Cuda out of memory after some batches


I am getting CUDA out of memory after iterating over some batches. I am guessing the computation graph is being saved somewhere, but I can’t figure it out

  File "/home/ubuntu/repositories/GraphExp/", line 105, in train
    out, loss_value = self._pass(input, phase="train")
  File "/home/ubuntu/repositories/GraphExp/", line 90, in _pass
  File "/home/ubuntu/miniconda3/envs/gatv2/lib/python3.7/site-packages/torch/", line 363, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
  File "/home/ubuntu/miniconda3/envs/gatv2/lib/python3.7/site-packages/torch/autograd/", line 175, in backward
    allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass
RuntimeError: CUDA out of memory. Tried to allocate 1.84 GiB (GPU 0; 11.17 GiB total capacity; 10.03 GiB already allocated; 316.25 MiB free; 10.50 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

here’s my training loop

    def train(self):
        # breakpoint()
        avg_loss = AverageMeter()
        avg_top1 = AverageMeter()
        avg_top5 = AverageMeter()
        early_stopping_counter = 0
        for epoch in range(self.config["training_config"]["epoch"]):
            for batch_idx, input in enumerate(self.dataloaders["train"]):
                input = 
                out, loss_value = self._pass(input, phase="train")
                avg_loss.update(loss_value, input.num_graphs)
                top1, top5 = self.cls_accuracy(output=out.detach().cpu().data, target=input.y.detach().cpu().data, topk=(1, 5),
                avg_top1.update(top1, input.num_graphs)
                avg_top5.update(top5, input.num_graphs)
                self._log_training(epoch, batch_idx, loss_value)

class AverageMeter:
    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def value(self):
        return round(self.avg, 8)

    def _pass(self, data, phase="val"):
        out = self.model(data.x, data.edge_index, data.batch)
        out = F.softmax(out, dim=1)
        loss_value = self.criterion(out, data.y)
        if phase =="train":
        return out, loss_value.detach().cpu().item()

    def cls_accuracy(output, target, n_classes, topk=(1,)):
        _, pred = output.topk(max(topk), 1, True, True)
        idx = (pred[:, 0] < n_classes)
        pred = pred[idx, :]
        target = target[idx]
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        res = []
        for k in topk:
            correct_k = correct[:k].float().max(0)[0].sum()
            res.append((correct_k / target.size(0)).detach().item())
        return res

>>> input
DataBatch(x=[874, 1032], edge_index=[2, 24908], edge_attr=[24908, 2], y=[48], batch=[874], ptr=[49])
>>> input.x.is_leaf

Im using pytorch geometric.
the CUDA is out of memory after 78 batches out of 119.
DO you see any memory leak here ?
Thank you

In my experience, this always happened to me when i was adding += something like you do in this method. I see val is detached but what about n ?

input.num_graphs is a float

1 Like