Hi,
I have written a custom dataloader to load a huge amount of data. But the dataloader seems to be very slow. The model is taking up around ~9GB of GPU memory but the volatile GPU memory usage is 0% and sometimes it is 100% (just for a second). I am reading the dataset from SSD.
class CustomDataset(Dataset):
def __init__(self, root_dir, csv_file, transform=None):
self.targets_csv = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
self.image_names = [i for i in sorted(os.listdir(self.root_dir)) if i.endswith('.jpg')]
# print(len(self.image_names))
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir, self.image_names[idx])
image = Image.open(img_name).convert('RGB')
index = self.targets_csv.index[self.targets_csv['id'] == img_name.split('.')[0].split('/')[-1]][0]
targets = self.targets_csv.iloc[index]['landmark_id']
targets = targets.astype('float')
if self.transform:
img = self.transform(image)
return img, targets