I am new to PyTorch and have a small issue with creating Data Loaders for huge datasets. I have a folder “/train” with two folders “/images” and “/labels”. Now, these folders further have 1000 folders that contain 1000 images and 1000 labels in each. I am new to creating custom data loaders. After reading the PyTorch documentation I was able to create the following class but since the dataset is too big 350 GB my code will not work. Can someone please point me in the right direction?
import numpy as np
import os
from torch.utils.data import Dataset
from PIL import Image
class CustomDataset(Dataset):
def __init__(self, data_root):
self.data_root = data_root
self.images = []
self.labels = []
for filenames in os.listdir(self.data_root)
data = np.load(data_root+'/'+filename)
img = data["images"]
lbl = data["labels"]
for image, label in zip(img,lbl):
img = Image.fromarray(image)
self.images.append(img)
self.labels.append(label)
def __len__(self):
return len(self.filenames)*1000
def __getitem__(self, index):
return self.images[index], self.labels[index]