Height and Width of image, mask or masks should be equal. You can disable shapes check by setting a parameter is_check_shapes=False of Compose class (do it only if you are sure about your data consistency)

I am Unable to match the height and width of image, mask.
Here is my code can anyone help me with this

!pip install segmentation-models-pytorch
!pip install -U git+https://github.com/albumentations-team/albumentations
!pip install --upgrade opencv-contrib-python

!git clone https://github.com/parth1620/Human-Segmentation-Dataset-master.git

import sys
sys.path.append('/content/Human-Segmentation-Dataset-master')

import torch 
import cv2

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 

from sklearn.model_selection import train_test_split
from tqdm import tqdm

import helper

CSV_FILE = '/content/Human-Segmentation-Dataset-master/train.csv'
DATA_DIR = '/content/'

DEVICE = 'cuda'

EPOCHS = 25
LR = 0.003
IMG_SIZE = 320
BATCH_SIZE = 16

ENCODER = 'timm-efficientnet-b0'
WEIGHT = 'imagenet'

train_df, valid_df = train_test_split(df, test_size = 0.2, random_state = 42)

import albumentations as A

def get_train_augs():
  return A.Compose([
      A.Resize(IMG_SIZE, IMG_SIZE),
      A.HorizontalFlip(p = 0.5),
      A.VerticalFlip(p = 0.5),
  ])

def get_valid_augs():
  return A.Compose([
      A.Resize(IMG_SIZE, IMG_SIZE),
  ])

from torch.utils.data import Dataset

class SegmentationDataset(Dataset):

  def __init__(self, df, augmentations):

    self.df = df
    self.augmentations = augmentations
    
  def __len__(self):
    return len(self.df)
  
  def __getitem__(self, idx):
    
    row = self.df.iloc[idx]

    image_path = row.images
    mask_path = row.masks

    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    mask = np.expand_dims(mask, axis = -1)
    
    if self.augmentations:
      data = self.augmentations(image = image, mask = mask)
      image = data['image']
      mask = data['mask']


    image = np.transpose(image, (2,0,1)).astype(np.float32)
    mask = np.transpose(mask, (2,0,1)).astype(np.float32)


    image = torch.Tensor(image) / 255.0
    mask = torch.round(torch.Tensor(mask) / 255.0)

    return image, mask

trainset = SegmentationDataset(train_df, get_train_augs())
validset = SegmentationDataset(valid_df, get_valid_augs())

print(f"Size of Trainset : {len(trainset)}")
print(f"Size of Validset : {len(validset)}")

idx = 3

image, mask = trainset[idx]
helper.show_image(image, mask)

from torch.utils.data import DataLoader

trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle = True)
validloader = DataLoader(validset, batch_size=BATCH_SIZE)

print(f"Total no. of batches in trainloader : {len(trainloader)}")
print(f"Total no. of batches in validloader : {len(validloader)}")

for image, mask in trainloader:
  break

print(f"One batch images shape : {image.shape}")
print(f"One batch masks shape : {mask.shape}")

I am facing the error in the last for loop, the error is

Can anyone please help me with this

Could you print the shape of image and mask inside the Dataset.__getitem__ method before and after applying the transformation, please?

I ran the code and found these results

Before Augmentation: [[[ 62 90 50]
[ 70 98 60]
[ 83 108 76]

[ 63 88 110]
[ 51 76 98]
[ 49 74 96]]

[[ 66 94 56]
[ 74 101 66]
[ 88 113 83]

[ 63 88 110]
[ 52 77 99]
[ 49 74 96]]

[[ 73 100 65]
[ 83 108 76]
[ 97 122 93]

[ 64 89 111]
[ 52 77 99]
[ 50 75 97]]

[[108 132 74]
[108 132 74]
[107 131 73]

[ 85 63 50]
[ 84 62 49]
[ 82 60 47]]

[[108 133 75]
[108 133 75]
[106 131 73]

[ 88 64 54]
[ 87 63 53]
[ 85 61 51]]

[[109 134 76]
[108 133 75]
[107 132 74]

[ 91 64 55]
[ 90 63 54]
[ 89 62 53]]] [[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]]
After Augmentation [[[ 89 62 53]
[ 91 64 55]
[ 93 70 54]

[103 128 70]
[106 131 73]
[108 133 75]]

[[ 84 61 50]
[ 87 64 53]
[ 89 69 51]

[104 128 70]
[106 130 72]
[108 132 75]]

[[ 80 60 44]
[ 82 63 47]
[ 84 68 46]

[104 128 70]
[106 130 72]
[107 131 73]]

[[ 51 76 98]
[ 69 94 116]
[102 127 149]

[146 163 147]
[112 134 109]
[ 86 110 80]]

[[ 51 76 97]
[ 68 93 115]
[ 99 123 145]

[132 152 133]
[ 98 122 94]
[ 73 100 64]]

[[ 50 75 97]
[ 67 92 114]
[ 97 121 143]

[121 142 121]
[ 89 114 84]
[ 66 94 55]]] [[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]

[[0]
[0]
[0]

[0]
[0]
[0]]]
After Transpose: [[[ 89. 91. 93. … 103. 106. 108.]
[ 84. 87. 89. … 104. 106. 108.]
[ 80. 82. 84. … 104. 106. 107.]

[ 51. 69. 102. … 146. 112. 86.]
[ 51. 68. 99. … 132. 98. 73.]
[ 50. 67. 97. … 121. 89. 66.]]

[[ 62. 64. 70. … 128. 131. 133.]
[ 61. 64. 69. … 128. 130. 132.]
[ 60. 63. 68. … 128. 130. 131.]

[ 76. 94. 127. … 163. 134. 110.]
[ 76. 93. 123. … 152. 122. 100.]
[ 75. 92. 121. … 142. 114. 94.]]

[[ 53. 55. 54. … 70. 73. 75.]
[ 50. 53. 51. … 70. 72. 75.]
[ 44. 47. 46. … 70. 72. 73.]

[ 98. 116. 149. … 147. 109. 80.]
[ 97. 115. 145. … 133. 94. 64.]
[ 97. 114. 143. … 121. 84. 55.]]] [[[0. 0. 0. … 0. 0. 0.]
[0. 0. 0. … 0. 0. 0.]
[0. 0. 0. … 0. 0. 0.]

[0. 0. 0. … 0. 0. 0.]
[0. 0. 0. … 0. 0. 0.]
[0. 0. 0. … 0. 0. 0.]]]
After Tensor Trasform: tensor([[[0.3490, 0.3569, 0.3647, …, 0.4039, 0.4157, 0.4235],
[0.3294, 0.3412, 0.3490, …, 0.4078, 0.4157, 0.4235],
[0.3137, 0.3216, 0.3294, …, 0.4078, 0.4157, 0.4196],
…,
[0.2000, 0.2706, 0.4000, …, 0.5725, 0.4392, 0.3373],
[0.2000, 0.2667, 0.3882, …, 0.5176, 0.3843, 0.2863],
[0.1961, 0.2627, 0.3804, …, 0.4745, 0.3490, 0.2588]],

    [[0.2431, 0.2510, 0.2745,  ..., 0.5020, 0.5137, 0.5216],
     [0.2392, 0.2510, 0.2706,  ..., 0.5020, 0.5098, 0.5176],
     [0.2353, 0.2471, 0.2667,  ..., 0.5020, 0.5098, 0.5137],
     ...,
     [0.2980, 0.3686, 0.4980,  ..., 0.6392, 0.5255, 0.4314],
     [0.2980, 0.3647, 0.4824,  ..., 0.5961, 0.4784, 0.3922],
     [0.2941, 0.3608, 0.4745,  ..., 0.5569, 0.4471, 0.3686]],

    [[0.2078, 0.2157, 0.2118,  ..., 0.2745, 0.2863, 0.2941],
     [0.1961, 0.2078, 0.2000,  ..., 0.2745, 0.2824, 0.2941],
     [0.1725, 0.1843, 0.1804,  ..., 0.2745, 0.2824, 0.2863],
     ...,
     [0.3843, 0.4549, 0.5843,  ..., 0.5765, 0.4275, 0.3137],
     [0.3804, 0.4510, 0.5686,  ..., 0.5216, 0.3686, 0.2510],
     [0.3804, 0.4471, 0.5608,  ..., 0.4745, 0.3294, 0.2157]]]) tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
     [0., 0., 0.,  ..., 0., 0., 0.],
     [0., 0., 0.,  ..., 0., 0., 0.],
     ...,
     [0., 0., 0.,  ..., 0., 0., 0.],
     [0., 0., 0.,  ..., 0., 0., 0.],
     [0., 0., 0.,  ..., 0., 0., 0.]]])

I have found the solution. You have to change on the compose’s get_train/valid_augs() parameters, adding a “is_check_shapes=False”:

def get_train_augs():
  return A.Compose([
      A.Resize(IMG_SIZE, IMG_SIZE), 
      A.HorizontalFlip(p = 0.5), 
      A.VerticalFlip(p = 0.5), 
      ], is_check_shapes=False)

def get_valid_augs():
  return A.Compose([
      A.Resize(IMG_SIZE, IMG_SIZE),
      ], is_check_shapes=False)
1 Like

But here, it’s supposed that our data is consistent, right ?

how it works???

When you do Compose, for instance :

        tfms = [
            VerticalFlip(p=0.2),
            HorizontalFlip(p=0.2),
            GaussNoise(p=0.2),
            RandomBrightnessContrast(p=0.3),
            RandomShadow(p=0.1),
            ElasticTransform(alpha=2, p=0.3),
            Resize(height, width, interpolation=cv2.INTER_NEAREST, p=1),
        ]

    return A.Compose(tfms)

you can add  is_check_shapes=False in A.Compose ==> A.Compose(tfms, is_check_shapes=False)