Pytorch DataLoader cause memory leak on training

I’ve made custom dataset in pytorch for image segmentation my image sizes are 640x640 and my masks are 320x320 when i’m trying to loop in dataloader over time it increase using ram memory and my code will crash

import torch
from torch.utils.data import Dataset, DataLoader
from prefetch_generator import BackgroundGenerator
import os
import cv2
import numpy as np
import json
import matplotlib.pyplot as plt
from PIL import Image

class CustomDataset(Dataset):
def __init__(self, data_path, label_path, image_size=(720, 1280), normalize=True, class_mapping=None):
self.data_path = data_path
self.image_size = image_size
self.normalize = normalize
self.class_mapping = class_mapping
self.label_path = label_path
self.annotations = self.load_data()
self.num_samples = len(self.annotations)

    def load_data(self):
        json_file_path = os.path.join(self.label_path, 'labels.json')
        print("two")
        with open(json_file_path, 'r') as f:
            annotations = json.load(f)
    
        return self.filter_invalid_images(annotations)
    
    def filter_invalid_images(self, annotations):
        valid_annotations = []
        for annotation in annotations:
            image_name = annotation['name']
            image_path = os.path.join(self.data_path, image_name)
            if os.path.exists(image_path):
                valid_annotations.append(annotation)
            else:
                continue
        return valid_annotations
    
    
    def process(self, annotation):
    
        image = None
        combined_mask = None
        lane_mark_mask = None
        drivable_mask = None
    
        image_name = annotation['name']
        image_path = f"{self.data_path}/{image_name}"
    
        dri_mask, mark_mask = self.create_masks(annotation)
        image = Image.open(image_path)
        # Resize seg_mask and lane_mark_mask to the desired image size
        image = self.resize_image(image)
        image = self.normalize_image(image)
        mark_mask = self.resize_mask(mark_mask)
        dri_mask = self.resize_mask(dri_mask)
    
        # Append processed data to lists
        combined_mask = np.stack([dri_mask, mark_mask])
        return np.array(image), np.array(combined_mask)
    
    
    def create_masks(self, annotation):
    
        mark_masks = np.zeros((720,1280))
        dri_masks = np.zeros((720,1280))
    
    
        for label_info in annotation['labels']:
            category = label_info['category']
            if category=='mark' and label_info['attributes']['mark_dir']=='parallel' :
              poly2d = label_info['poly2d']
              image_size = ImageSize(width=1280, height=720)
              mark_masks= poly2ds_to_mask(image_size, poly2d)
              mark_masks+= lane_mask
    
            if category=='dri':
              poly2d = label_info['poly2d']
              image_size = ImageSize(width=1280, height=720)
              dri_masks = poly2ds_to_mask(image_size, poly2d)
              dri_masks += dri_masks 
    
    
        return dri_masks , mark_masks
    
    def resize_image(self, image):
        # Convert JpegImageFile to NumPy array
        image_np = np.array(image.convert('RGB'))
        
        # Resize image
        image_pil = Image.fromarray(image_np)
        resized_image = image_pil.resize(self.image_size, Image.BILINEAR)
        
        # Normalize image
        resized_image = np.array(resized_image)
        
        return resized_image
    def resize_mask(self, mask):
        mask_pil = Image.fromarray(mask)
        resized_mask = mask_pil.resize((self.image_size[1] // 2, self.image_size[0] // 2), Image.NEAREST)
        return np.array(resized_mask)
    
    def resize_obj_mask(self, obj_mask):
        obj_mask_pil = Image.fromarray(obj_mask)
        resized_obj_mask = obj_mask_pil.resize((self.image_size[1] // 32, self.image_size[0] // 32), Image.NEAREST)
        return np.array(resized_obj_mask)
    
    def normalize_image(self, image):
        return image.astype(np.float32) / 255.0
    
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, index):
    
        annotation = self.annotations[index]
    
        image , mask = self.process(annotation)
        # Convert lists to numpy arrays and stack the masks
    
        image = image.transpose((2, 0, 1))
    
        return image, mask

class DataLoaderX(DataLoader):
def __iter__(self):
return BackgroundGenerator(super().__iter__())

data_path = "/content/dataset/images/val/"
label_path = '/content/dataset/labels/'
class_mapping = {
'obj1': 0,
'obj2': 1,
'obj3': 2
}
dataset = CustomDataset(data_path, label_path, image_size=(640, 640), normalize=True, class_mapping=class_mapping)
batch_size = 8  # Choose your desired batch size
train_data_loader = DataLoaderX(dataset, batch_size=batch_size, shuffle=False,pin_memory=False, num_workers=0)

for i,(batch_images, masks) in enumerate(train_data_loader):

    # Perform your training process here
    try:
        print(batch_images.shape)
        print(masks.shape)
        # print(lane_masks.shape)
        print(i)
    except:
        continue

I really need help and dont know why it cause the memory leak when i commented theimage,mask = self.process(annotation)\ in the getitem and made a zeros array instead of image and mask the memory was okay and seems like the problem is in process function
I’ve also tried reading image file with cv2 but nothing changed.Preformatted text

Didn’t you solve the issue already here by isolating it down to OpenCV?

No It actually was never solved I made a mistake assuming it was solved