Why is each epoch taking longer than the last

Each epoch takes 5 minute longer than the last. How can I improve my code.

def train(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader)
    history = []
    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device = DEVICE)
        targets = targets.to(device = DEVICE)
        
        #forward
        with torch.cuda.amp.autocast():
            predictions = model(data)
            loss = loss_fn(predictions, targets)
            history.append(loss)
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        loop.set_postfix(loss = loss)
    return history

You are appending all computation graphs in the history list which will increase the memory usage and eventually run out of memory. Detach the loss tensor before appending it.