Is it possible to use a DataLoader to repeat the same batch with a different augmentation? For example, I would like to generate a batch with images from 1 to 10 four time with different augmentation, and then for images from 11 to 20, etc. My problem is that I do not know how to avoid the DataLoader to advance the index.
Thanks.
are you writing/having your own dataset class?
Yes. I have created my own DataLoader that reads the images and masks, and then it applies the data augmentation, everything inside the
__getitem__
function.
This is the loader code that I am using:
class my_loader(data.Dataset):
def __init__(self, root, split='train', joint_transform=None,
transform=None, target_transform=LabelToLongTensor(),
download=False,
loader=default_loader, train=True, augm=True):
self.root = root
assert split in ('train', 'val', 'test', 'test_all')
self.split = split
self.transform = transform
self.target_transform = target_transform
self.joint_transform = joint_transform
self.loader = loader
self.train = train
self.augm = augm
if download:
self.download()
self.imgs = _make_dataset(os.path.join(self.root, self.split + '/images'))
if self.augm:
self.affine_seq = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Sometimes(0.33,
iaa.Affine(translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, mode='symmetric')
),
], random_order=True)
self.intensity_seq = iaa.Noop()
else:
self.affine_seq = iaa.Noop()
self.intensity_seq = iaa.Noop()
def __getitem__(self, index):
path = self.imgs[index]
img = self.loader(path)
if self.train:
target = Image.open(path.replace(self.split + '/images', self.split + '/masks'))
target = target.convert('L')
target = from_pil(target).astype(np.uint8)
target = target.reshape((target.shape[0],target.shape[1],1))
target = affine_seq_deter.augment_images([target])
target = target[0]
target = target.reshape((target.shape[0],target.shape[1]))
target = (target>128).astype(np.uint8)
target = to_pil(target)
img = from_pil(img).astype(np.uint8)
affine_seq_deter = self.affine_seq.to_deterministic()
img = affine_seq_deter.augment_images([img])
img = img[0]
img = img.astype(np.uint8)
img = self.intensity_seq.augment_images([img])
img = img[0]
img = img.astype(np.uint8)
img = to_pil(img)
if self.joint_transform is not None:
if self.train:
img, target = self.joint_transform([img, target])
else:
img = self.joint_transform([img])
img = img[0]
if self.transform is not None:
img = self.transform(img)
if self.train:
target = self.target_transform(target)
else:
target = [] # Not accepted None.
import ntpath
path = ntpath.basename(path)
return img, target, path, index
def __len__(self):
return len(self.imgs)
def download(self):
raise NotImplementedError
If anyone have interest, I have done it by using:
sampler = torch.utils.data.SubsetRandomSampler(indices=index_you_want_to_use)
and then defining a new DataLoader with this sampler. I am sure there is a much better way to do that but it works. Personally I don’t like this method because it shuffle the dataset so you have to take care with the new indexes.
2 Likes