RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 42 but got size 39 for tensor number 1 in the list

jeremy_eudaric · July 4, 2023, 7:50am

Hello l got this awkward error message l still do not understand why they are an issue with torch. cat ?
this is my code, many thanks for your support

class DoubleConv(nn.Module):

def __init__(self, in_channels, out_channels):
    
    super(DoubleConv, self).__init__()
    
    self.double_conv = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=2),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=2),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
    )

def forward(self, out):
    
    x=self.double_conv(out)
    
    
    return x

In[5]:

class DownBlock(nn.Module):
def init(self, in_channels, out_channels):

    super(DownBlock, self).__init__()
    
    self.double_conv = DoubleConv(in_channels, out_channels)
    self.down_sample = nn.MaxPool2d(2)

def forward(self, x):
    skip_out = self.double_conv(x)
    down_out = self.down_sample(skip_out)
    return (down_out, skip_out)

In[6]:

class UpBlock (nn.Module):
“”“Upscaling then double conv”“”

def __init__(self, in_channels, out_channels, up_sample_mode):
    super(UpBlock, self).__init__()
    if up_sample_mode == 'conv_transpose':
        self.up_sample = nn.ConvTranspose2d(in_channels-out_channels, in_channels-out_channels, kernel_size=2, stride=2, padding=2)        
    elif up_sample_mode == 'bilinear':
        self.up_sample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
    else:
        raise ValueError("Unsupported `up_sample_mode` (can take one of `conv_transpose` or `bilinear`)")
    self.double_conv = DoubleConv(in_channels, out_channels)

def forward(self, down_input, skip_input):
    x = self.up_sample(down_input)
    x = torch.cat([x, skip_input], dim=1)
    return self.double_conv(x)

In[9]:

class Siamese_UNet_Assement(nn.Module):
def init(self, out_classes, up_sample_mode=‘conv_transpose’):

    super(Siamese_UNet_Assement, self).__init__()
    
    self.up_sample_mode = up_sample_mode
    # Downsampling Path
    self.down_conv1 = DownBlock(3, 64)
    self.down_conv2 = DownBlock(64, 128)
    self.down_conv3 = DownBlock(128, 256)
    self.down_conv4 = DownBlock(256, 512)
    # Bottleneck
    self.double_conv = DoubleConv(512, 1024)
    # Upsampling Path
    self.up_conv4 = UpBlock(512 + 1024, 512, self.up_sample_mode)
    self.up_conv3 = UpBlock(256 + 512, 256,self.up_sample_mode)
    self.up_conv2 = UpBlock(128 + 256,128,self.up_sample_mode)
    self.up_conv1 = UpBlock(64+128 , 64, self.up_sample_mode)
    # Final Convolution
    self.conv_last = nn.Conv2d(64, out_classes, kernel_size=1)

def forward(self, input1,input2):
    
    # Unet1
    
    # Encoder
    enc1_1, skip1_out1 = self.down_conv1(input1)
    enc1_2, skip2_out1 = self.down_conv2(enc1_1)
    enc1_3, skip3_out1 = self.down_conv3(enc1_2)
    enc1_4, skip4_out1 = self.down_conv4(enc1_3)
    bottleneck_1  = self.double_conv(enc1_4)
    
    #Decoder
    dec4_1= self.up_conv4(bottleneck_1, skip4_out1)
    dec3_1_= self.up_conv3(dec4_1, skip3_out1)
    dec2_1= self.up_conv2(dec3_1_, skip2_out1)
    dec1_1 = self.up_conv1(dec2_1, skip1_out1)
    dec1_1 = self.conv_last(dec1_1)
    
    # Unet2
    
    # Encoder
    enc2_1, skip1_out2 = self.down_conv1(input2)
    enc2_2, skip2_out2 = self.down_conv2(enc2_1)
    enc2_3, skip3_out2 = self.down_conv3(enc2_2)
    enc2_4, skip4_out2 = self.down_conv4(enc2_3)
    bottleneck_2  = self.double_conv(enc2_4)
    
    #Decoder
    dec4_2= self.up_conv4(bottleneck_2 , skip4_out2)
    dec3_2_= self.up_conv3(dec4_2, skip3_out2)
    dec2_2= self.up_conv2(dec3_2_, skip2_out2)
    dec1_2 = self.up_conv1(dec2_2, skip1_out2)
    dec1_2 = self.conv_last(dec1_2)
    
    
    
    # Siamese
    output = torch.cat((dec1_1,dec1_2), 1)
    
    
    
    return output

and the error message :

e ~/codes/Buldings_damage_assessment/models/testUNet_Assement.py:135, in Siamese_UNet_Assement.forward(self, input1, input2)
132 bottleneck_1 = self.double_conv(enc1_4)
134 #Decoder
→ 135 dec4_1= self.up_conv4(bottleneck_1, skip4_out1)
136 dec3_1_= self.up_conv3(dec4_1, skip3_out1)
137 dec2_1= self.up_conv2(dec3_1_, skip2_out1)

File ~/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/codes/Buldings_damage_assessment/models/testUNet_Assement.py:94, in UpBlock.forward(self, down_input, skip_input)
92 def forward(self, down_input, skip_input):
93 x = self.up_sample(down_input)
—> 94 x = torch.cat([x, skip_input], dim=1)
95 return self.double_conv(x)

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 42 but got size 39 for tensor number 1 in the list.

ptrblck · July 4, 2023, 6:31pm

Your code is neither executable nor properly formatted, which makes it hard to debug.
However, the error is raised in torch.cat([x, skip_input], dim=1) which expects x and skip_input to have the same shape in all dimensions but dim1, which is used to concatenate these tensors.
Here is a small example:

x = torch.randn(42, 10)
skip_input = torch.randn(42, 8)

# works
out = torch.cat((x, skip_input), dim=1)
print(out.shape)
# torch.Size([42, 18])

# fails, since two sizes differ
skip_input = torch.randn(39, 8)
out = torch.cat((x, skip_input), dim=1)
# RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 42 but got size 39 for tensor number 1 in the list.

jeremy_eudaric · July 4, 2023, 7:23pm

Thank you for your time the issue its that l would like to do it for any dataset without changing every time the sizes. And currently the bug is in my Unet model in my decoder so after my encoder. So my encoder is good but not my decoder that is surprising. What do you think ?
l will provide all my code sorry to disturb you and thank you for your time the model is bellow
import glob
import os
import pickle
import sys
from pathlib import Path
import random
from tqdm import tqdm

import torchvision.models as models
from torch.nn import Module, Dropout
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import Dataset, DataLoader,TensorDataset,random_split,SubsetRandomSampler, ConcatDataset
import torchvision
from torchvision import transforms, datasets
import segmentation_models_pytorch as smp
from segmentation_models_pytorch import utils
from torch.utils.data import Dataset, DataLoader

#from tversky_loss import tversky_loss
from models.testUNet_Assement import Siamese_UNet_Assement

from PIL import Image
import pathlib
from typing import Tuple

from torch.autograd import Variable
from loss.DiceLoss import diceLoss
from loss.FocalTverskyLoss import focalTverskyLoss
import torchvision.transforms.functional as F

Transforms=transforms.Compose(
[

        transforms.Resize(224),
        transforms.RandomVerticalFlip(),
        transforms.RandomHorizontalFlip(),
        #image translalte of 10 pixels 
        transforms.RandomAffine(0, translate=(0, 1)),
        transforms.ToTensor()
    ]
)

from torch.utils.data import Dataset
from torchvision import transforms

1. Subclass torch.utils.data.Dataset

class CustomDataset(Dataset):

# 2. Initialize with a targ_dir and transform (optional) parameter
def __init__(self, targ_dir: str,transform=None) -> None:
    
    
    
    
    self.paths_pre = list(pathlib.Path(targ_dir).glob("*/*.png"))
    
    self.paths_post = list(pathlib.Path(targ_dir).glob("*/*.png"))
    
    
# . Overwrite the __len__() method (optional but recommended for subclasses of torch.utils.data.Dataset)
def __len__(self) -> int:
    "Returns the total number of samples."
    return len(self.paths_pre) & len(self.paths_post) 
   
# 4. Make function to load images
def  __getitem__(self, index: int,) -> Image.Image:
    "Opens an image via a path and returns it."
    
    #image_path1 = self.paths1[index]
    #image_path2 = self.paths2[index]
    
    for index in range(len(self.paths_pre)):
          image_path_pre  = self.paths_pre [index]
            
          
    for index in range(len(self.paths_post)):
          image_path_post  = self.paths_post [index]
    
  
    img_pre  = Image.open(image_path_pre).convert('RGB')
    
    
    transform = transforms.ToTensor()
    img_pre  = transform(img_pre)

    img_post = Image.open(image_path_post).convert('RGB')
   
    transform = transforms.ToTensor()
    img_post = transform(img_post)
   
    
    return  {'pre_image': img_pre, 'post_image': img_post}

dataloader_train=DataLoader(Train, batch_size=2, shuffle=True)
dataloader_val = DataLoader(Val, batch_size=2, shuffle=True)

define color to label encoding dictionary

color_to_label = {
(0, 0, 0): 0, # background
(255, 0, 0): 1, # label 1 destroyed
(127,255,0): 2, # label 2 minor destroyed
(218,165,32): 3, # label 3 semi destroyed
(0, 0, 255): 4 # label 4 destroyed
}

val_loss=[]
train_loss=[]

precision_val = []
recall_val = []
f1_score_val = []

precision_train = []
recall_train = []
f1_score_train = []

indices_train=[]
indices_val=[]

def training(epochs):

    #label= {(0, 0, 0): 0, (255, 255, 255): 1, (255, 0, 0): 2, (0, 255, 0): 3, (0, 0, 255): 4}                   
    Model.train()              
    epoch_loss = 0.0
    running_loss_train=0.0

    #for label,data in dataloader_train:
    for label,data in enumerate(dataloader_train): 
    
   
              
        
              x_pre = data['pre_image'].to(device=device)  
                
             
              x_post = data['post_image'].to(device=device)
             
              
              #label = label.to(device)
            
            
              #label= label.to(torch.float32)

              #label = label.unsqueeze(1)
    
              #Clear the gradients
              optimizer.zero_grad()
        
              output = Model(x_pre,x_post)
              #loss = sum(loss1,loss2,loss3)
               
              loss1 = diceLoss (output,label)

                
              loss2 = focalTverskyLoss(output,label)
              
              loss3 =torch.nn.CrossEntropyLoss()(output,label)
              
              total_loss = loss1 + loss2 +  loss3 
    
              total_loss.backward()
            
            # Update Weight
              optimizer.step()
            
           #Calculate prediction
              prediction_train=output.argmax(dim=1)

    
              running_loss_train =+ loss.item() 
      
        
              epoch_loss += running_loss_train / len(train_loader)
            
              #acc_train += (prediction_train == label).type(torch.float).sum().item()
              
              Tp = ((label == 1) & (prediction_train == 1)).sum().item()
              
              fp = ((label == 0) & (prediction_train == 1)).sum().item()
              
              Fn = ((label == 1) & (prediction_train == 0)).sum().item()
                
            
            # calculate precision, recall, and F1-score
            
              Precision_train = Tp / (Tp + Fp)

             
              Recall_train = Tp / (Tp + Fn)
              
              F1_score_train = 2 * (Precision * Recall) / (Precision + Recall)
            
            
            

                
                
                
            

                     
    # generate color-coded image from predicted label values
    color_image = torch.zeros((3, height, width)) # initialize empty image
    
    for color, label in color_to_label.items():
        
                indices_train= (prediction_train == label)
                color_image[:, indices] = torch.tensor(color).unsqueeze(1).float()


    Model.eval()
    epoch_val_loss = 0.0
    running_loss_val= 0.0
   
    
    with torch.no_grad():
    
        for label ,data in (dataloader_val):
            
                    
                    #inputs= inputs.to(device)
                    #label = label.to(device)
                    
                    
                    #label= label.to(torch.float32)
                    #label = label.unsqueeze(1)

                    #output = Model(input1,input2)
                    
                    
                    x_pre = data['pre_image'].to(device=device)  # move to device, e.g. GPU
                
             
                    x_post = data['post_image'].to(device=device)
             
              
            
             
                    optimizer.zero_grad()
                    
                    Model((x_pre,x_post).permute(0, 3, 1, 2))
                    #output = Model(x_pre,x_post)
                    
                    
                    
                    
                    
            
                    loss1 = diceLoss (output,label)

                
                    loss2 = focalTverskyLoss(output,label)
                  
                    loss3 =torch.nn.CrossEntropyLoss()(output,label)
              
                    total_loss = loss1 + loss2 +  loss3 
    
                    total_loss.backward()
            
       
                    prediction_val= output.argmax(dim=1)
            
                    running_loss_val =+ loss.item()* inputs.size(0)
           
            #Epoch  
                
                    epoch_val_loss += running_loss_val / len(valid_loader)
                    
                    #acc_val += (prediction_val == label).type(torch.float).sum().item()
                    
                    
                    Tp = ((label == 1) & (prediction_val == 1)).sum().item()
              
                    Fp = ((label == 0) & (prediction_val == 1)).sum().item()
              
                    Fn = ((label == 1) & (prediction_val == 0)).sum().item()
                
            
                    # calculate precision, recall, and F1-score
            
                    Precision_val = Tp / (Tp + Fp)

             
                    Recall_val = Tp / (Tp + Fn)
              
                    F1_score_val = 2 * (Precision * Recall) / (Precision + Recall)
            
            
            
                      
                    
    
    # generate color-coded image from predicted label values
        color_image = torch.zeros((3, height, width)) # initialize empty image
    
    
        for color, label in color_to_label.items():
        
                indices_val = (prediction_val == label)
                color_image[:, indices] = torch.tensor(color).unsqueeze(1).float()


            
                    
                   

     
    val_loss.append(epoch_val_loss)
    train_loss.append(epoch_loss)
    
    precision_train.append(Precision_train)
    recall_train.append(Recall_train)
    f1_score_train.append(F1_score_train)
    
    precision_val.append(Precision_val)
    recall_val.append(Recall_val)
    f1_score_val.append(F1_score_val)
    
    indices_train.append(indices_train )
    indices_val.append( indices_val)
    
                                  
    torch.save(Model, 'model_UNet_Assement.pt')     

    print(
        f"Epoch : {epoch+1} - train_loss : {epoch_loss:.4f} - val_loss : {epoch_val_loss:.4f} \n")
    
            


    
    return