Hello, I have created a data pipeline to make train, validation and test sets with various augmentations using albumentations with the code below
class GetDataset(Dataset):
def __init__(self,imgDir: str,dataset: pd.DataFrame,transform=None,labeled=True):
self.imgDir = imgDir
self.dataset = dataset
self.labeled = labeled
self.transform = transform
# self.training = training
def __len__(self) -> int:
return len(self.dataset)
def __getitem__(self,idx: int) -> tuple:
img_path = os.path.join(self.imgDir,self.dataset['image'].iloc[idx])
image = cv2.imread(img_path)
# image = Image.open(img_path).convert('RGB')
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) # may change to GrayScale.
if self.transform is not None:
image = self.transform(image=image)['image'].astype(np.float32)
image = image.transpose(2,1,0)
image = torch.tensor(image)
if self.labeled:
target = torch.tensor(self.dataset['individual_label'].iloc[idx])
return image,target
else:
return image
def transform_ds(trainFlag=True):
if trainFlag:
return A.Compose([
# starting with spatial transforms
A.Resize(224,224),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
A.RandomRotate90(p=0.5),
# pixel-wise transforms
# T.Grayscale(3),
A.GaussianBlur(blur_limit=(3,3),p=0.2),
A.ColorJitter(0.45,0.45,0.45,0.45),
# A.CLAHE(p=0.5),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])
else:
return A.Compose([
A.Resize(224,224),
# T.Grayscale(3),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])
And I use the code below to create my train,validation and test datasets
tex = GetDataset(train_imgs,df_copy,transform=transform_ds(trainFlag=True))
vex = GetDataset(train_imgs,df_copy,transform=transform_ds(trainFlag=False))
tester = GetDataset(test_imgs,test_copy,transform=transform_ds(trainFlag=False),labeled=False)
indices = torch.randperm(len(df_copy)).tolist()
train_eg = torch.utils.data.Subset(tex,indices[:-1000])
val_eg = torch.utils.data.Subset(vex,indices[-1000:])
ex_t = DataLoader(train_eg,shuffle=True,batch_size=16)
ex_v = DataLoader(val_eg,shuffle=True,batch_size=16)
test = DataLoader(tester,batch_size=16)
but when I try to visualize them I realize that my validation set uses the same augmentations as my train set even though I am explicitely using the trainFlag to control what transforms are applied to what. I noticed that this behavior only works when I use transforms from albumentation however the code works as expected when I use torchvision transforms.