I am new to PyTorch and have a small issue with creating Data Loaders for huge datasets. I have a folder “/train” with two folders “/images” and “/labels”. Now, these folders further have 1000 folders that contain 1000 images and 1000 labels in each. I am new to creating custom data loaders. After reading the PyTorch documentation I was able to create the following class but since the dataset is too big 350 GB my code will not work. Can someone please point me in the right direction?
import numpy as np import os from torch.utils.data import Dataset from PIL import Image class CustomDataset(Dataset): def __init__(self, data_root): self.data_root = data_root self.images =  self.labels =  for filenames in os.listdir(self.data_root) data = np.load(data_root+'/'+filename) img = data["images"] lbl = data["labels"] for image, label in zip(img,lbl): img = Image.fromarray(image) self.images.append(img) self.labels.append(label) def __len__(self): return len(self.filenames)*1000 def __getitem__(self, index): return self.images[index], self.labels[index]