Torchvision dataset ImageFolder reads images sequentially from the directories. Is it possible to shuffle them before loading them into the DataLoader. That is using a different way to shuffle dataset, not the DataLoader shuffle option.
You can use a custom Sampler to specify the sampling order to the data loader. Specifically, write the shuffling order in __iter__()
method of the Sampler.
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch
import torch.utils.data as data
class IndexSampler(data.Sampler):
r"""Custom ampler
Arguments:
dataset (Dataset): dataset to sample from
"""
def __init__(self, dataset):
self.dataset = dataset
def __iter__(self):
# write sampling logic here
return iter(range(len(self.dataset)))
def __len__(self):
return len(self.dataset)
if __name__ == '__main__':
inps = torch.arange(20)
dataset = data.TensorDataset(inps)
customsampler = IndexSampler(dataset)
dataloader = DataLoader(dataset, batch_size=2, sampler=customsampler)
print([d for d in dataloader])