Memory leak in dataloader

When training a model on ms coco dataset, I encountered memory leak problem. Even if i set num_works=0, it is still not solved. the codes of dataset and dataloader are as below. Any help or suggestions are appreciated!

class CocoDetection(Dataset):

    def __init__(self, root, annFile, transform=None):
        
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(self.coco.imgs.keys())
        self.transform = transform
        

    def __getitem__(self, index):
        img_id = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        target = self.coco.loadAnns(ann_ids)
        name = self.coco.loadImgs(img_id)[0]['file_name']
        idx = int(name.split('.')[0])
        if idx < 295669:
            path = os.path.join(self.root, 'train2017a', name)
        else:
            path = os.path.join(self.root, 'train2017b', name)
        image = cv2.imread(path)[:, :, ::-1]
        if self.transform is not None:
            image, target = self.transform(image, target, self.coco)   
        return image, target


    def __len__(self):
        return len(self.ids)



class CocoTransform():
    def __init__(self, size=(640, 800)):
        assert isinstance(size, (int, tuple))
        self.mean = np.resize(np.array([0.485, 0.456, 0.406]), (3, 1, 1))
        self.std = np.resize(np.array([0.229, 0.224, 0.225]), (3, 1, 1))
        if isinstance(size, int):
            self.h, self.w = size, size
        else:
            assert len(size) == 2
            self.h, self.w = size[0], size[1]


    def __call__(self, image, target, coco):
        h0, w0, c0 = np.shape(image)
        image = cv2.resize(image, (self.w, self.h),
                           interpolation=cv2.INTER_AREA)
        h_ratio, w_ratio = self.h/h0, self.w/w0
        image = np.transpose(image, (2, 0, 1))
        image = (image/255.0 - self.mean)/self.std

        for elem in target:
            mask = coco.annToMask(elem)
            mask = cv2.resize(mask, (self.w, self.h),
                              interpolation=cv2.INTER_AREA)
            elem['mask'] = mask
            elem['category_id'] = COCO_DICT[str(elem['category_id'])]
            elem['bbox'][0] *= w_ratio 
            elem['bbox'][2] *= w_ratio 
            elem['bbox'][1] *= h_ratio 
            elem['bbox'][3] *= h_ratio

        return image, target



def coco_collate(batch):
    batch0 = [item[0] for item in batch]
    batch1 = [item[1] for item in batch]
    return np.array(batch0), batch1

I did some experiments and found that the memory leak was in the code below:

for elem in target:
    mask = coco.annToMask(elem)
    mask = cv2.resize(mask, (self.w, self.h),
                              interpolation=cv2.INTER_AREA)
    elem['mask'] = mask
    elem['category_id'] = COCO_DICT[str(elem['category_id'])]
    elem['bbox'][0] *= w_ratio 
    elem['bbox'][2] *= w_ratio 
    elem['bbox'][1] *= h_ratio 
    elem['bbox'][3] *= h_ratio

How did you solve the issue?
I don’t see anything that could cause a memory leak.
Since you are storing the mask in each element of the target I would expect an increase in memory usage, but not a leak.

I comment these lines, and then everything works fine:

mask = coco.annToMask(elem)
mask = cv2.resize(mask, (self.w, self.h),
                              interpolation=cv2.INTER_AREA)
elem['mask'] = mask

I guess it’s because the originally dict ‘elem’ has no key named ‘mask’, and the added new key and value is occupying the memory and has not been freed even if the iteration is over. I am not sure if this is a problem of pytorch or python, or my coding logic. But thank god this problem could be solved by moving this part of code elsewhere