I have the following code, but CUDA keeps running out of memory after a few steps (can be 3 steps, can be 50 steps). Is there something I’m doing wrong?
import torch
from torch import Tensor
from torch.nn import BCELoss
from torch.optim import Optimizer
from torch.utils.data import DataLoader
def _evaluate_single_batch(self, batch: tuple[Tensor]) -> Tensor:
input_ids = batch[0]
attention_mask = batch[1]
number_of_chunks = [len(x) for x in input_ids]
input_ids_combined = []
for x in input_ids:
input_ids_combined.extend(x.tolist())
input_ids_combined_tensors = torch.stack([torch.tensor(x) for x in input_ids_combined]).to(self.device)
attention_mask_combined = []
for x in attention_mask:
attention_mask_combined.extend(x.tolist())
attention_mask_combined_tensors = torch.stack([torch.tensor(x) for x in attention_mask_combined]).to(self.device)
preds_out = self.neural_network(input_ids_combined_tensors, attention_mask_combined_tensors)
preds = preds_out.flatten().cpu()
del input_ids_combined_tensors, attention_mask_combined_tensors, preds_out
torch.cuda.empty_cache()
preds_split = preds.split(number_of_chunks)
pooled_preds = torch.cat([torch.mean(x).reshape(1) for x in preds_split])
return pooled_preds
def _train_single_epoch(self, dataloader: DataLoader, optimizer: Optimizer) -> None:
self.neural_network.train()
cross_entropy = BCELoss()
for step, batch in enumerate(dataloader):
labels = batch[-1].float().cpu()
predictions = self._evaluate_single_batch(batch)
loss = cross_entropy(predictions, labels) / self.accumulation_steps
loss.backward()
if ((step + 1) % self.accumulation_steps == 0) or (step + 1 == len(dataloader)):
optimizer.step()
optimizer.zero_grad()
torch.cuda.empty_cache()
del batch, labels, predictions, loss