Hello guys, I need help
I created a custom Dataset using PyTorch which in the getitem function I load images and make batch by batch and when Im using the training for loop the ram usage gradually increases
images are 640x640 and masks are 320x320 and it will take like 300 images to fill up the ram
and its has nothing to do with pre-fetch dataset loading because I tested without it too.
import torch from torch.utils.data import Dataset, DataLoader from prefetch_generator import BackgroundGenerator class CustomDataset(Dataset): def __init__(self, data_path, label_path, image_size=(720, 1280), normalize=True, class_mapping=None, batch_size=8): self.data_loader = CustomDataLoader(data_path, label_path, image_size, normalize, class_mapping, batch_size) print("one") self.batch_size = batch_size self.image_size = image_size def __len__(self): return self.data_loader.num_samples // self.batch_size def __getitem__(self, index): start_idx = index * self.batch_size end_idx = min((index + 1) * self.batch_size, self.data_loader.num_samples) batch_annotations = self.data_loader.annotations[start_idx:end_idx] batch_images, batch_masks = self.data_loader.process_batch(batch_annotations) # Convert lists to numpy arrays and stack the masks # batch_images = np.array(batch_images) # batch_masks = np.stack(batch_masks, axis=0) # Transpose batch_images to shape [batch_size, C, H, W] batch_images = torch.tensor(batch_images.transpose(0, 3, 1, 2), dtype=torch.float32) # Remove the extra dimension at index 1 from batch_masks batch_masks = torch.tensor(batch_masks[:, 0, :, :], dtype=torch.float32) # Stack the second dimension to batch_masks batch_masks = torch.stack((batch_masks, 1 - batch_masks), dim=1) return batch_images, batch_masks class DataLoaderX(DataLoader): """prefetch dataloader""" def __iter__(self): return BackgroundGenerator(super().__iter__()) dataset = CustomDataset(data_path, label_path, image_size=(640, 640), normalize=True, class_mapping=class_mapping) batch_size = 1 # Choose your desired batch size train_data_loader = DataLoaderX(dataset, batch_size=batch_size, shuffle=False,pin_memory=False, num_workers=0) # Example of how to iterate through the data loader during training print(len(train_data_loader)) for i, (batch_images, masks) in enumerate(train_data_loader): # Here you can use the batch_images, drivable_masks, and lane_masks for training # Remember that the batch size is determined by the 'batch_size' parameter you set above # Perform your training process here try: print(batch_images[:, 0, :, :].shape) print(masks[:, 0, :, :].shape) # print(lane_masks.shape) print(i * batch_size) # Print the starting index of each batch except: continue