okay, I followed the post and implemented the following dataloader class keeping in mind the same random locations for both input and target.
First, I want to show the required transformation by me which I have applied in a normal way if I don’t need to apply them simultaneously-
data_transforms = transforms.Compose([transforms.RandomCrop((512,512)),
transforms.Lambda(gaussian_blur),
transforms.Lambda(elastic_transform),
transforms.RandomRotation([+90,+180]),
transforms.RandomRotation([+180,+270]),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=train_mean, std=train_std)
])
Now, the thing is I can apply RandomCrop, Random Rotation, ToTensor
in the class but I have no idea how to apply gaussian_blur,elastic_transform and Normalization with custom mean
Also, below is my dataloader class
class DataLoaderSegmentation(data.Dataset):
def __init__(self,folder_path):
super(DataLoaderSegmentation, self).__init__()
self.img_files = glob.glob(os.path.join(folder_path,'images','*.tif'))
self.mask_files = glob.glob(os.path.join(folder_path,'mask','*.bmp'))
def mask_to_class(self,mask):
target = torch.from_numpy(mask)
h,w = target.shape[0],target.shape[1]
masks = torch.empty(h, w, dtype=torch.long)
colors = torch.unique(target.view(-1,target.size(2)),dim=0).numpy()
target = target.permute(2, 0, 1).contiguous()
mapping = {tuple(c): t for c, t in zip(colors.tolist(), range(len(colors)))}
for k in mapping:
idx = (target==torch.tensor(k, dtype=torch.uint8).unsqueeze(1).unsqueeze(2))
validx = (idx.sum(0) == 3)
masks[validx] = torch.tensor(mapping[k], dtype=torch.long)
return masks
def transform(self,image,mask):
i, j, h, w = transforms.RandomCrop.get_params(
image, output_size=(512, 512))
image = TF.crop(image, i, j, h, w)
mask = TF.crop(mask, i, j, h, w)
# Random horizontal flipping
if random.random() > 0.5:
image = TF.hflip(image)
mask = TF.hflip(mask)
image = TF.rotate(image,90)
mask = TF.rotate(mask,90)
image = TF.rotate(image,180)
mask = TF.rotate(mask,180)
image = TF.rotate(image,270)
mask = TF.rotate(mask,270)
# Transform to tensor
image = TF.to_tensor(image)
mask = TF.to_tensor(mask)
return image, mask
def __getitem__(self, index):
img_path = self.img_files[index]
mask_path = self.mask_files[index]
data = Image.open(img_path)
label = Image.open(mask_path)
data,label = self.transform(data,label)
label = np.array(label)
mask = self.mask_to_class(label)
return data,mask
def __len__(self):
return len(self.img_files)
I am obtaining the following error -
RuntimeError: Assertion `input0 == target0 && input2 == target1 && input3 == target2' failed. size mismatch (got input: 5x4x504x504, target: 5x584x565) at ../aten/src/THNN/generic/SpatialClassNLLCriterion.c:59
Because size of my label is now batch_size*height*width
, so does this implies I have to do certain change in my network too or is someplace else I am screwing up?