Hey!
I’m trying to use RandomResizedCrop from transforms.v2 for a segmentation model, but for some reason I can’t get it working on both the images and masks at the same time.
my code:
transforms_train['shared'] = T.Compose([
T.RandomResizedCrop(size = [512,512],
scale = (0.25, 2.0),
ratio = (0.5, 2.0),
interpolation = T.InterpolationMode.BILINEAR,
antialias = True),
])
def __getitem__(self, i):
'''
Parameter:
i [int] : Index of the image to get
Returns:
image_tensor [torch.Tensor] : 3 x H x W Tensor
label_tensor [torch.Tensor] : H x W Tensor as semantic map
'''
image = tv_tensors.Image(read_image(self.dataset_dict[i]['img_path'], mode = ImageReadMode.RGB))
seg_map = tv_tensors.Mask(read_image(self.dataset_dict[i]['semantic_path'], mode = ImageReadMode.GRAY))
if self.reduce_zero_label:
seg_map = self._reduce_zero_label(seg_map)
image, seg_map = self._preprocess(image, seg_map)
seg_map = seg_map
def _preprocess(self, image, seg_map):
# Apply 'shared' transform first, if it exists
if 'shared' in self.transforms:
print(f"Before shared transform")
print(f"Dataset mode is: {self.mode}")
print(f"Image type: {type(image)}, shape: {image.shape}")
print(f"Image dtype: {image.dtype}")
print(f"Segmentation map type: {type(seg_map)}, shape: {seg_map.shape}")
print(f"Segmentation map dtype: {seg_map.dtype}")
image, seg_map = self.transforms['shared']((image, seg_map))
print(f"After shared transform")
print(f"Image type: {type(image)}, shape: {image.shape}")
print(f"Segmentation map type: {type(seg_map)}, shape: {seg_map.shape}")
# Apply 'img' transform, if it exists
if 'img' in self.transforms:
image = self.transforms['img'](image)
# Apply 'mask' transform, if it exists
if 'mask' in self.transforms:
seg_map = self.transforms['mask'](seg_map)
return image, seg_map
The output i’m getting:
Before shared transform
Dataset mode is: train
Image type: <class ‘torchvision.tv_tensors._image.Image’>, shape: torch.Size([3, 375, 500])
Image dtype: torch.uint8
Segmentation map type: <class ‘torch.Tensor’>, shape: torch.Size([1, 375, 500])
Segmentation map dtype: torch.uint8
After shared transform
Image type: <class ‘torchvision.tv_tensors._image.Image’>, shape: torch.Size([3, 512, 512])
Segmentation map type: <class ‘torch.Tensor’>, shape: torch.Size([1, 375, 500])
Before shared transform
Dataset mode is: train
Image type: <class ‘torchvision.tv_tensors._image.Image’>, shape: torch.Size([3, 335, 500])
Image dtype: torch.uint8
Segmentation map type: <class ‘torch.Tensor’>, shape: torch.Size([1, 335, 500])
Segmentation map dtype: torch.uint8
After shared transform
Image type: <class ‘torchvision.tv_tensors._image.Image’>, shape: torch.Size([3, 512, 512])
Segmentation map type: <class ‘torch.Tensor’>, shape: torch.Size([1, 335, 500])
I’d expect both the segmentation maps and images to be the same size. This happens with or without casting it to tv_tensors, with or without changing the dtypes to floa32 and int64. Any input on what I’m doing wrong would be greatly appreciated!
Joosep