Randomize ImageFolder

Torchvision dataset ImageFolder reads images sequentially from the directories. Is it possible to shuffle them before loading them into the DataLoader. That is using a different way to shuffle dataset, not the DataLoader shuffle option.

You can use a custom Sampler to specify the sampling order to the data loader. Specifically, write the shuffling order in __iter__() method of the Sampler.

import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch
import torch.utils.data as data

class IndexSampler(data.Sampler):
    r"""Custom ampler

    Arguments:
        dataset (Dataset): dataset to sample from
    """

    def __init__(self, dataset):
        self.dataset = dataset

    def __iter__(self):
        # write sampling logic here
        return iter(range(len(self.dataset)))

    def __len__(self):
        return len(self.dataset)

if __name__ == '__main__':
    inps = torch.arange(20)
    dataset = data.TensorDataset(inps)
    customsampler = IndexSampler(dataset)
    dataloader = DataLoader(dataset, batch_size=2, sampler=customsampler)
    print([d for d in dataloader])