Adding augmentation to both image and segmentation mask

I am lost, i have issues augmenting both image and masks correctly in the sense that at times my image is flipped and the mask is not or the mask is flipped and the image is not. Check the code out below:

im_size = (256,256)
device = "cuda" if torch.cuda.is_available() else "cpu"

train_transform = v2.Compose(
    [
        v2.Resize(im_size),
        v2.RandomHorizontalFlip(p = 0.5),
        v2.RandomVerticalFlip(p = 0.5),
        # v2.RandomPerspective(distortion_scale = .16, p = 1.0),
        # v2.RandomRotation(degrees = [90, 180]),
        v2.ToImage(),
        v2.ToDtype(dtype = torch.float32, scale = True),
    ]
)

val_transform = v2.Compose(
    [
        # v2.Resize(im_size),
        v2.ToImage(),
        v2.ToDtype(dtype = torch.float32, scale = True),
    ]
)


class barbadosLandSurveyDataset(Dataset):
    def __init__(self, df_:pd.DataFrame, tf):
        self.df_ = df_
        # self.transform = transform
        self.tf = tf

    def __len__(self):
        return len(self.df_)

    def __getitem__(self, idx):
        im_ = Image.open(self.df_.iloc[idx, :]["image_path"]).resize((256,256))
        w,h = im_.size
        mask_ = Image.fromarray((polygon_to_mask(polygon = self.df_.iloc[idx, :]["geometry"])*255.0)).convert("L")
        im_, mask_ = self.tf((im_, mask_))
        # im_ = v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(im_)
        return im_, mask_
        


def return_dataloader(ds, bsz:int, shuffle:bool, num_workers:int, pin_memory:bool, persistent_workers:bool):
    dl = DataLoader(dataset = ds,
                    batch_size = bsz,
                    shuffle = shuffle,
                    num_workers = num_workers,
                    pin_memory = pin_memory,
                    persistent_workers = persistent_workers)
    return dl

what could possibly be wrong?

Could you try to use tv_tensors as described here?

Alternatively, you could also use the functional API as given in this example.