Gpu ram usage increases after every batch

Hello,
I am trying to customize the finetuning tutorial to my Datas. I have created a dataset class. I try to train the network but the program runs out of memory. The memory usage increases after every batch.

import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image

class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "input"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "gt"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "input", self.imgs[idx])
        mask_path = os.path.join(self.root, "gt", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)
        obj_ids=[]
        class_ids=[]
        kontrol= np.zeros(4)
        mymask = np.zeros((img.size[1], img.size[0]))
        mask = np.array(mask)
        denemece=np.where(mask[:][:]!=kontrol)
       # baskadenemece=np.where(mask[:][:]==np.array(mask[denemece[0][0]][denemece[1][0]]))
        boxes = []
        while len(denemece[0])!=0:
            kontrol1 =np.array(mask[denemece[0][0]][denemece[1][0]],dtype=float)
            ilkeslesme=np.where(np.all(mask[:][:]==kontrol1,axis=-1))
            n=(mask[ilkeslesme[0][0]][ilkeslesme[1][0]][1]*256)+mask[ilkeslesme[0][0]][ilkeslesme[1][0]][2]
            obj_ids.append(n)
            class_ids.append(mask[ilkeslesme[0][0]][ilkeslesme[1][0]][0])
            for j in range(len(ilkeslesme[0])):
                mymask[ilkeslesme[0][j]][ilkeslesme[1][j]] = n
                mask[ilkeslesme[0][j]][ilkeslesme[1][j]]=kontrol
            denemece=np.where((mask[:][:]!=kontrol1)&(mask[:][:]!=kontrol))
            # split the color-encoded mask into a set
            #print(mask[baskadeneme[-1][0]+1][baskadeneme[-1][0]+1] == kontrol1)
            xmin = np.min(ilkeslesme[1])
            xmax = np.max(ilkeslesme[1])
            ymin = np.min(ilkeslesme[0])
            ymax = np.max(ilkeslesme[0])
            boxes.append([xmin, ymin, xmax, ymax])
        obj_ids=np.array(obj_ids)
        # of binary masks
        masks = mymask == obj_ids[:, None, None]
        num_objs=len(obj_ids)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        #labels = torch.ones((num_objs,), dtype=torch.int64)
        labels = torch.as_tensor(np.array(class_ids), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        del img_path,mask_path,mask,obj_ids,class_ids,kontrol,mymask,denemece,boxes,kontrol1,ilkeslesme,n,xmin,xmax,ymin,ymax,masks,num_objs,labels,image_id,area,iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

the rest is the same as https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html .
Thanks in advice
Muhammet

Could you check, if you are storing the output, loss or any other tensors, which might be attached to the computation graph in a list, dict or any other container?
If so, you should .detach() these tensors before storing them, as the whole computation graph would be stored as well otherwise.

I think I’ve found the problem. It’s about my dataset. Some images have too much annotaitons so the program fails. Thanks for reply.