So i need 5 sizes of every picture.
I preprocess and save everything with numpy, but loading files from disc (probably) take most time (much more then model/tensor processing).
My dataloader looks like this:
class GanDataset(Dataset):
def __init__(self, samples):
self.samples = samples
def __getitem__(self, index):
sample = self.samples[index]
return sample
def __len__(self):
return len(self.samples)
class GanCollate():
def __init__(self, sizes):
self.sizes = sizes
def __call__(self, batch):
output = []
for size in self.sizes:
output.append(torch.stack([torch.from_numpy(np.load(os.path.join(str(size), i) + '.npy')) for i in batch]))
return output
dataloader = DataLoader(GanDataset(pics), batch_size=batch_size,shuffle=True,
num_workers=workers, pin_memory=False, drop_last=True, collate_fn=GanCollate(sizes))
Full code
https://colab.research.google.com/drive/1yWkyqyzE0chTs2ciBcLn_l2T3MXDEjaP