Is it possible to use a DataLoader to repeat the same batch with a different augmentation?

Is it possible to use a DataLoader to repeat the same batch with a different augmentation? For example, I would like to generate a batch with images from 1 to 10 four time with different augmentation, and then for images from 11 to 20, etc. My problem is that I do not know how to avoid the DataLoader to advance the index.
Thanks.

are you writing/having your own dataset class?

Yes. I have created my own DataLoader that reads the images and masks, and then it applies the data augmentation, everything inside the
__getitem__
function.

This is the loader code that I am using:

class my_loader(data.Dataset):

    def __init__(self, root, split='train', joint_transform=None,
                 transform=None, target_transform=LabelToLongTensor(),
                 download=False,
                 loader=default_loader, train=True, augm=True):
        self.root = root
        assert split in ('train', 'val', 'test', 'test_all')
        self.split = split
        self.transform = transform
        self.target_transform = target_transform
        self.joint_transform = joint_transform
        self.loader = loader
        self.train = train
        self.augm = augm
        
        if download:
            self.download()
        
        self.imgs = _make_dataset(os.path.join(self.root, self.split + '/images'))
        
        if self.augm:
            self.affine_seq = iaa.Sequential([
                iaa.Fliplr(0.5),
                iaa.Sometimes(0.33, 
                    iaa.Affine(translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, mode='symmetric')
                    ),
                ], random_order=True)
            self.intensity_seq = iaa.Noop()
        else:
            self.affine_seq = iaa.Noop()
            self.intensity_seq = iaa.Noop()
        
    def __getitem__(self, index):
        path = self.imgs[index]
        
        img = self.loader(path)
        if self.train:
            target = Image.open(path.replace(self.split + '/images', self.split + '/masks'))
            target = target.convert('L')
            target = from_pil(target).astype(np.uint8)
            target = target.reshape((target.shape[0],target.shape[1],1))
            target = affine_seq_deter.augment_images([target])
            target = target[0]
            target = target.reshape((target.shape[0],target.shape[1]))
            target = (target>128).astype(np.uint8)
            target = to_pil(target)
        
        img = from_pil(img).astype(np.uint8)
        affine_seq_deter = self.affine_seq.to_deterministic()
        img = affine_seq_deter.augment_images([img])
        img = img[0]
        img = img.astype(np.uint8)
        img = self.intensity_seq.augment_images([img])
        img = img[0]
        img = img.astype(np.uint8)
        img = to_pil(img)
        
        if self.joint_transform is not None:
            if self.train:
                img, target = self.joint_transform([img, target])
            else:
                img = self.joint_transform([img])
                img = img[0]
        
        if self.transform is not None:
            img = self.transform(img)
        
        if self.train:
            target = self.target_transform(target)
        else:
            target = []   # Not accepted None.
        
        import ntpath
        path = ntpath.basename(path)
        return img, target, path, index

    def __len__(self):
        return len(self.imgs)

    def download(self):
        raise NotImplementedError

If anyone have interest, I have done it by using:
sampler = torch.utils.data.SubsetRandomSampler(indices=index_you_want_to_use)
and then defining a new DataLoader with this sampler. I am sure there is a much better way to do that but it works. Personally I don’t like this method because it shuffle the dataset so you have to take care with the new indexes.

2 Likes