I am trying to use data augmentation for each of the epoch on train set, but I also need the filenames of testloader for later.
So, I used a custom ImageFolderWithPaths
to generate tuple for image, label, path.
But when combined with a wrapper dataset to build using augmentation have some issues.
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import transforms
import torchvision
from sklearn.model_selection import KFold
from torchvision import datasets, transforms, models
Custom datasets.ImageFolder
to return a tuple of (image, label, path)
class ImageFolderWithPaths(datasets.ImageFolder):
def __getitem__(self, index):
return super(ImageFolderWithPaths, self).__getitem__(index) + (self.imgs[index][0],)
Sample output after creating dataset
data_dir = '/content/drive/MyDrive/Colab Notebooks/CBIR study/Dataset/temp'
dataset = ImageFolderWithPaths(data_dir)
for i, data in enumerate(dataset):
imgs, label, path = data
print(path)
Wrapper dataset to use transforms for augmentation of train within k-fold from trainloader and testloader. Code from here: https://stackoverflow.com/a/57539790.
class WrapperDataset:
def __init__(self, dataset, transform=None, target_transform=None):
self.dataset = dataset
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
# this is what ImageFolder normally returns
image, label = super(datasets.ImageFolder, self).__getitem__(index)
# the image file path
path = self.imgs[index][0]
if self.transform is not None:
image = self.transform(image)
if self.target_transform is not None:
label = self.target_transform(label)
return (image, label, path)
def __len__(self):
return len(self.dataset)
data_transforms = {
'train_transforms': transforms.Compose([
transforms.Resize([224,224]),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'test_transforms': transforms.Compose([
transforms.Resize([224,224]),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
The below code works for a normal dataset created using datasets.ImageFolder
, but gives an error with the custom ImageFolderWithPaths
.
Any thoughts on how to tweak this?
k_folds = 5
torch.manual_seed(42)
# Define the K-fold Cross Validator
kfold = KFold(n_splits=k_folds, shuffle=True)
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
# Print
print('\nKfold: {%d}' %(fold+1))
print('--------------------------------')
print(train_ids, test_ids)
# Sample elements randomly from a given list of ids, no replacement.
train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
trainloader = torch.utils.data.DataLoader(
WrapperDataset(dataset, transform=data_transforms['train_transforms']),
batch_size=4, sampler=train_subsampler)
testloader = torch.utils.data.DataLoader(
WrapperDataset(dataset, transform=data_transforms['test_transforms']),
batch_size=4, sampler=train_subsampler)
for i, data in enumerate(trainloader):
img, label, path = data
print(label, path)