Invalid argument 0: Sizes of tensors must match except in dimension 1. Got 173 and 172 in dimension 2

I try to use U-Net for segmentation but the error appears in cat layer

This is down-conv and up-conv part

from torch.nn import Module
import torch.nn.functional as F

class DownConv(Module):
    def __init__(self, in_feat, out_feat, drop_rate=0.4, bn_momentum=0.1):
        super(DownConv, self).__init__()
        self.conv1 = nn.Conv2d(in_feat, out_feat, kernel_size=3, padding=1)
        self.conv1_bn = nn.BatchNorm2d(out_feat, momentum=bn_momentum)
        self.conv1_drop = nn.Dropout2d(drop_rate)

        self.conv2 = nn.Conv2d(out_feat, out_feat, kernel_size=3, padding=1)
        self.conv2_bn = nn.BatchNorm2d(out_feat, momentum=bn_momentum)
        self.conv2_drop = nn.Dropout2d(drop_rate)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.conv1_bn(x)
        x = self.conv1_drop(x)

        x = F.relu(self.conv2(x))
        x = self.conv2_bn(x)
        x = self.conv2_drop(x)
        return x


class UpConv(Module):
    def __init__(self, in_feat, out_feat, drop_rate=0.4, bn_momentum=0.1):
        super(UpConv, self).__init__()
        self.up1 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.downconv = DownConv(in_feat, out_feat, drop_rate, bn_momentum)

    def forward(self, x, y):
        x = self.up1(x)
        x = torch.cat([x, y], dim=1)
        x = self.downconv(x)
        return x

This is model part

class Unet(Module):
    def __init__(self, drop_rate=0.4, bn_momentum=0.1):
        super(Unet, self).__init__()

        #Downsampling path
        self.conv1 = DownConv(1, 64, drop_rate, bn_momentum)
        self.mp1 = nn.MaxPool2d(2)

        self.conv2 = DownConv(64, 128, drop_rate, bn_momentum)
        self.mp2 = nn.MaxPool2d(2)

        self.conv3 = DownConv(128, 256, drop_rate, bn_momentum)
        self.mp3 = nn.MaxPool2d(2)

        # Bottleneck
        self.conv4 = DownConv(256, 256, drop_rate, bn_momentum)

        # Upsampling path
        self.up1 = UpConv(512, 256, drop_rate, bn_momentum)
        self.up2 = UpConv(384, 127, drop_rate, bn_momentum)
        self.up3 = UpConv(191, 64, drop_rate, bn_momentum)

        self.conv9 = nn.Conv2d(64, 1, kernel_size=3, padding=1)

    def forward(self, x):
        x1 = self.conv1(x)
        x2 = self.mp1(x1)

        x3 = self.conv2(x2)
        x4 = self.mp2(x3)

        x5 = self.conv3(x4)
        x6 = self.mp3(x5)

        # Bottom
        x7 = self.conv4(x6)

        # print(x7.size(), x5.size())
        # Up-sampling
        x8 = self.up1(x7, x5)
        print(x8.size(), x3.size())
        x9 = self.up2(x8, x3)
        x10 = self.up3(x9, x1)

        x11 = self.conv9(x10)
        preds = F.sigmoid(x11)

        return preds

In the last line I have no idea why the size have changed to this and my input size have resample from [2,1,512,512] to [2,1,256,256]

torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 64, 64]) torch.Size([2, 128, 128, 128])
torch.Size([2, 256, 86, 86]) torch.Size([2, 128, 173, 173])

error exception

 32     def forward(self, x, y):
     33         x = self.up1(x)
---> 34         x = torch.cat([x, y], dim=1)
     35         x = self.downconv(x)
     36         return x

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 173 and 172 in dimension 2 

can you do a print of all outputs x1,x2,…?

This is the last line from x1 to x11
second is x before up-convo.
last is after concat.

torch.Size([2, 64, 256, 256]) torch.Size([2, 64, 128, 128]) torch.Size([2, 128, 128, 128]) torch.Size([2, 128, 64, 64]) torch.Size([2, 256, 64, 64]) torch.Size([2, 256, 32, 32]) torch.Size([2, 256, 32, 32]) torch.Size([2, 256, 64, 64]) torch.Size([2, 127, 128, 128]) torch.Size([2, 64, 256, 256]) torch.Size([2, 1, 256, 256])
torch.Size([2, 256, 86, 86])
torch.Size([2, 256, 172, 172])

I see, i think the input tensor shape is the problem.

i tried to run your code:

This works fine

rand_tensor= torch.rand(8,1, 256, 256) 
 

output= model(rand_tensor)

gives same error as yours

rand_tensor= torch.rand(8,1, 255, 256) 
 

output= model(rand_tensor)

so try adjusting the input shape to an even number or power of 2.

PS: Your code has memory leaks, it wont train for very long.

Of course, my input size is torch.Size([2, 1, 256, 256])

If I resample with my manual code there is no problem but when it tries to use resample from medicaltorch I got an error like that.

original size is torch.Size([2, 1, 512, 512])

train_transform = transforms.Compose([
        mt_transforms.Resample(1.385, 1.385),
        mt_transforms.ElasticTransform(alpha_range=(40.0, 60.0), sigma_range=(2.5, 4.0), p=0.3),
        mt_transforms.ToTensor(),
       

after resample got torch.Size([2, 1, 256, 256])

and how to avoid the memory leak?

Thank you.

In the forward function of Unet you are using several variables (x1,x2,…) to the store intermediate results of layers which remain in the memory rather i would use only one variable like x as in your other class.

Thank you I get it anyway did you have any idea why get concatenate and the size of y get +1
Here is resample class from libary.

class Resample(MTTransform):
    def __init__(self, wspace, hspace,
                 interpolation=Image.BILINEAR,
                 labeled=True):
        self.hspace = hspace
        self.wspace = wspace
        self.interpolation = interpolation
        self.labeled = labeled

    def __call__(self, sample):
        rdict = {}
        input_data = sample['input']
        input_metadata = sample['input_metadata']

        # Voxel dimension in mm
        hzoom, wzoom = input_metadata["zooms"]
        hshape, wshape = input_metadata["data_shape"]

        hfactor = hzoom / self.hspace
        wfactor = wzoom / self.wspace

        hshape_new = int(hshape * hfactor)
        wshape_new = int(wshape * wfactor)

        input_data = input_data.resize((wshape_new, hshape_new),
                                       resample=self.interpolation)
        rdict['input'] = input_data

        if self.labeled:
            gt_data = sample['gt']
            gt_metadata = sample['gt_metadata']
            gt_data = gt_data.resize((wshape_new, hshape_new),
                                     resample=self.interpolation)
            np_gt_data = np.array(gt_data)
            np_gt_data[np_gt_data >= 0.5] = 1.0
            np_gt_data[np_gt_data < 0.5] = 0.0
            gt_data = Image.fromarray(np_gt_data, mode='F')
            rdict['gt'] = gt_data

        sample.update(rdict)
        return sample