Hello guys, I need help
I created a custom Dataset using PyTorch which in the getitem function I load images and make batch by batch and when Im using the training for loop the ram usage gradually increases
images are 640x640 and masks are 320x320 and it will take like 300 images to fill up the ram
and its has nothing to do with pre-fetch dataset loading because I tested without it too.
import torch
from torch.utils.data import Dataset, DataLoader
from prefetch_generator import BackgroundGenerator
class CustomDataset(Dataset):
def __init__(self, data_path, label_path, image_size=(720, 1280), normalize=True, class_mapping=None, batch_size=8):
self.data_loader = CustomDataLoader(data_path, label_path, image_size, normalize, class_mapping, batch_size)
print("one")
self.batch_size = batch_size
self.image_size = image_size
def __len__(self):
return self.data_loader.num_samples // self.batch_size
def __getitem__(self, index):
start_idx = index * self.batch_size
end_idx = min((index + 1) * self.batch_size, self.data_loader.num_samples)
batch_annotations = self.data_loader.annotations[start_idx:end_idx]
batch_images, batch_masks = self.data_loader.process_batch(batch_annotations)
# Convert lists to numpy arrays and stack the masks
# batch_images = np.array(batch_images)
# batch_masks = np.stack(batch_masks, axis=0)
# Transpose batch_images to shape [batch_size, C, H, W]
batch_images = torch.tensor(batch_images.transpose(0, 3, 1, 2), dtype=torch.float32)
# Remove the extra dimension at index 1 from batch_masks
batch_masks = torch.tensor(batch_masks[:, 0, :, :], dtype=torch.float32)
# Stack the second dimension to batch_masks
batch_masks = torch.stack((batch_masks, 1 - batch_masks), dim=1)
return batch_images, batch_masks
class DataLoaderX(DataLoader):
"""prefetch dataloader"""
def __iter__(self):
return BackgroundGenerator(super().__iter__())
dataset = CustomDataset(data_path, label_path, image_size=(640, 640), normalize=True, class_mapping=class_mapping)
batch_size = 1 # Choose your desired batch size
train_data_loader = DataLoaderX(dataset, batch_size=batch_size, shuffle=False,pin_memory=False, num_workers=0)
# Example of how to iterate through the data loader during training
print(len(train_data_loader))
for i, (batch_images, masks) in enumerate(train_data_loader):
# Here you can use the batch_images, drivable_masks, and lane_masks for training
# Remember that the batch size is determined by the 'batch_size' parameter you set above
# Perform your training process here
try:
print(batch_images[:, 0, :, :].shape)
print(masks[:, 0, :, :].shape)
# print(lane_masks.shape)
print(i * batch_size) # Print the starting index of each batch
except:
continue