Multiclass Segmentation

okay, I followed the post and implemented the following dataloader class keeping in mind the same random locations for both input and target.
First, I want to show the required transformation by me which I have applied in a normal way if I don’t need to apply them simultaneously-

data_transforms = transforms.Compose([transforms.RandomCrop((512,512)),
                                 transforms.Lambda(gaussian_blur),
                                 transforms.Lambda(elastic_transform),
                                 transforms.RandomRotation([+90,+180]),
                                 transforms.RandomRotation([+180,+270]),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 transforms.Normalize(mean=train_mean, std=train_std)
                               ])


Now, the thing is I can apply RandomCrop, Random Rotation, ToTensor in the class but I have no idea how to apply gaussian_blur,elastic_transform and Normalization with custom mean
Also, below is my dataloader class

class DataLoaderSegmentation(data.Dataset):
    def __init__(self,folder_path):
        super(DataLoaderSegmentation, self).__init__()
        self.img_files = glob.glob(os.path.join(folder_path,'images','*.tif'))
        self.mask_files = glob.glob(os.path.join(folder_path,'mask','*.bmp'))
  
    def mask_to_class(self,mask):
        target = torch.from_numpy(mask)
        h,w = target.shape[0],target.shape[1]
        masks = torch.empty(h, w, dtype=torch.long)
        colors = torch.unique(target.view(-1,target.size(2)),dim=0).numpy()
        target = target.permute(2, 0, 1).contiguous()
        mapping = {tuple(c): t for c, t in zip(colors.tolist(), range(len(colors)))}
        for k in mapping:
            idx = (target==torch.tensor(k, dtype=torch.uint8).unsqueeze(1).unsqueeze(2))
            validx = (idx.sum(0) == 3) 
            masks[validx] = torch.tensor(mapping[k], dtype=torch.long)
        return masks
    
    def transform(self,image,mask):
        i, j, h, w = transforms.RandomCrop.get_params(
        image, output_size=(512, 512))
        image = TF.crop(image, i, j, h, w)
        mask = TF.crop(mask, i, j, h, w)

        # Random horizontal flipping
        if random.random() > 0.5:
            image = TF.hflip(image)
            mask = TF.hflip(mask)
        
        image = TF.rotate(image,90)
        mask = TF.rotate(mask,90)
        image = TF.rotate(image,180)
        mask = TF.rotate(mask,180)
        image = TF.rotate(image,270)
        mask = TF.rotate(mask,270)

        # Transform to tensor
        image = TF.to_tensor(image)
        mask = TF.to_tensor(mask)
        return image, mask
    
    def __getitem__(self, index):
        img_path = self.img_files[index]
        mask_path = self.mask_files[index]
        data = Image.open(img_path)
        label = Image.open(mask_path)
        data,label = self.transform(data,label)
        label = np.array(label)
        mask = self.mask_to_class(label)
        return data,mask
           
    def __len__(self):
        return len(self.img_files)

I am obtaining the following error -

RuntimeError: Assertion `input0 == target0 && input2 == target1 && input3 == target2' failed. size mismatch (got input: 5x4x504x504, target: 5x584x565) at ../aten/src/THNN/generic/SpatialClassNLLCriterion.c:59

Because size of my label is now batch_size*height*width, so does this implies I have to do certain change in my network too or is someplace else I am screwing up?