Batch size mismatch

Hi. I am really new to deep learning and I am trying to create a school project using ESNet architecture. But this time I just want to test with a sample dataset (Cats VS Dogs) but I keep on getting the error:

Expected input batch_size (4) to match target batch_size (1).

THIS IS THE ARCHITECTURE THAT I USED

#ESNet: An Efficient Symmetric Network for Real-time Semantic Segmentation

#Paper-Link: https://arxiv.org/pdf/1906.09826.pdf

###################################################################################################

import torch

import torch.nn as nn

import torch.nn.init as init

import torch.nn.functional as F

from torchsummary import summary

class DownsamplerBlock(nn.Module):

def __init__(self, ninput, noutput):

    super().__init__()

    self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True)

    self.pool = nn.MaxPool2d(2, stride=2)

    self.bn = nn.BatchNorm2d(noutput, eps=1e-3)

    self.relu = nn.ReLU(inplace=True)

def forward(self, input):

    x1 = self.pool(input)

    x2 = self.conv(input)

  

    diffY = x2.size()[2] - x1.size()[2]

    diffX = x2.size()[3] - x1.size()[3]

    x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,

                    diffY // 2, diffY - diffY // 2])

    print(x1.shape)

    output = torch.cat([x2, x1], 1)

    output = self.bn(output)

    output = self.relu(output)

    return output

class UpsamplerBlock (nn.Module):

def __init__(self, ninput, noutput):

    super().__init__()

    self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True)

    self.bn = nn.BatchNorm2d(noutput, eps=1e-3)

def forward(self, input):

    output = self.conv(input)

    output = self.bn(output)

    return F.relu(output)

class FCU(nn.Module):

def __init__(self, chann, kernel_size,dropprob, dilated): 

    """

    Factorized Convolution Unit

    """     

    super(FCU,self).__init__()

    padding = int((kernel_size-1)//2) * dilated

    self.conv3x1_1 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(int((kernel_size-1)//2)*1,0), bias=True)

    self.conv1x3_1 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,int((kernel_size-1)//2)*1), bias=True)

    self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)

    self.conv3x1_2 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(padding,0), bias=True, dilation = (dilated,1))

    self.conv1x3_2 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,padding), bias=True, dilation = (1, dilated))

    self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)

    

    self.relu = nn.ReLU(inplace = True)

    self.dropout = nn.Dropout2d(dropprob)

    

def forward(self, input):

    residual = input

    output = self.conv3x1_1(input)

    output = self.relu(output)

    output = self.conv1x3_1(output)

    output = self.bn1(output)

    output = self.relu(output)

    output = self.conv3x1_2(output)

    output = self.relu(output)

    output = self.conv1x3_2(output)

    output = self.bn2(output)   

    if (self.dropout.p != 0):

        output = self.dropout(output)

    

    return F.relu(residual+output,inplace=True) 

class PFCU(nn.Module):

def __init__(self,chann):

    """

    Parallel Factorized Convolution Unit

    """         



    super(PFCU,self).__init__()

    

    self.conv3x1_1 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(1,0), bias=True)

    self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True)

    self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)

    self.conv3x1_22 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(2,0), bias=True, dilation = (2,1))

    self.conv1x3_22 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,2), bias=True, dilation = (1,2))

    self.conv3x1_25 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(5,0), bias=True, dilation = (5,1))

    self.conv1x3_25 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,5), bias=True, dilation = (1,5))

    self.conv3x1_29 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(9,0), bias=True, dilation = (9,1))

    self.conv1x3_29 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,9), bias=True, dilation = (1,9))

    self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)

    self.dropout = nn.Dropout2d(0.3)

def forward(self, input):

    residual = input

    output = self.conv3x1_1(input)

    output = F.relu(output)

    output = self.conv1x3_1(output)

    output = self.bn1(output)

    output = F.relu(output)

    output2 = self.conv3x1_22(output)

    output2 = F.relu(output2)

    output2 = self.conv1x3_22(output2)

    output2 = self.bn2(output2)

    if (self.dropout.p != 0):

        output2 = self.dropout(output2)

    output5 = self.conv3x1_25(output)

    output5 = F.relu(output5)

    output5 = self.conv1x3_25(output5)

    output5 = self.bn2(output5)

    if (self.dropout.p != 0):

        output5 = self.dropout(output5)

    output9 = self.conv3x1_29(output)

    output9 = F.relu(output9)

    output9 = self.conv1x3_29(output9)

    output9 = self.bn2(output9)

    if (self.dropout.p != 0):

        output9 = self.dropout(output9)

    return F.relu(residual+output2+output5+output9,inplace=True)

class ESNet(nn.Module):

def __init__(self, classes):

    super().__init__()

    #-----ESNET---------#

    self.initial_block = DownsamplerBlock(1, 16)

    self.layers = nn.ModuleList()

    

    for x in range(0, 3):

       self.layers.append(FCU(16, 3, 0.03, 1))  

    

    self.layers.append(DownsamplerBlock(16,64))

    for x in range(0, 2):

       self.layers.append(FCU(64, 5, 0.03, 1))  

    self.layers.append(DownsamplerBlock(64,128))

    for x in range(0, 3):   

        self.layers.append(PFCU(chann=128)) 

    self.layers.append(UpsamplerBlock(128,64))

    self.layers.append(FCU(64, 5, 0, 1))

    self.layers.append(FCU(64, 5, 0, 1))

    self.layers.append(UpsamplerBlock(64,16))

    self.layers.append(FCU(16, 3, 0, 1))

    self.layers.append(FCU(16, 3, 0, 1))

    self.output_conv = nn.ConvTranspose2d( 16, classes, 2, stride=2, padding=0, output_padding=0, bias=True)

def forward(self, input):

    output = self.initial_block(input)

    for layer in self.layers:

        output = layer(output)

    output = self.output_conv(output)

  

    return output

“”“print layers and params of network”""

if name == ‘main’:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ESNet(classes=2).to(device)

summary(model,(1,100,100))

and this is the rest of my code:

import numpy as np

from tqdm import tqdm

import torch.optim as optim

training_data = np.load("/content/thirdy/training_data_grayscale.npy", allow_pickle = True)

x = torch.Tensor([i[0] for i in training_data]).view(-1, 100, 100)

y = torch.Tensor([i[1] for i in training_data])

VAL_PCT = 0.1

val_size = int(len(x)*VAL_PCT)

print(val_size)

train_x = x[:-val_size]

train_y = y[:-val_size]

test_x = x[-val_size:]

test_y = y[-val_size:]

import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr = 0.001)

loss_function = nn.CrossEntropyLoss()

BATCH_SIZE = 1

EPOCHS = 1

for epoch in range(EPOCHS):

for i in tqdm(range(0, len(train_x), BATCH_SIZE)): 

    batch_x = train_x[i:i+BATCH_SIZE].view(-1, 1, 50, 50)

    print(batch_x.shape)

    batch_y = train_y[i:i+BATCH_SIZE]

    print(batch_y.shape)

    model.zero_grad()

    outputs = model(batch_x)

    loss = loss_function(outputs, batch_y)

    loss.backward()

    optimizer.step()    # Does the update

print(f"Epoch: {epoch}. Loss: {loss}")

and this is the error I always get no matter how i try to apply the suggestions i found online:

0%| | 0/22452 [00:00<?, ?it/s]torch.Size([4, 1, 50, 50])
torch.Size([1, 2])
torch.Size([4, 2, 56, 56])


ValueError Traceback (most recent call last)
in ()
13 outputs = model(batch_x)
14 outputs = outputs.squeeze(0)
—> 15 loss = loss_function(outputs, batch_y)
16 loss.backward()
17 optimizer.step() # Does the update

3 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2214 if input.size(0) != target.size(0):
2215 raise ValueError(‘Expected input batch_size ({}) to match target batch_size ({}).’
-> 2216 .format(input.size(0), target.size(0)))
2217 if dim == 2:
2218 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

ValueError: Expected input batch_size (4) to match target batch_size (1).

I guess you are reshaping the intermediate tensors somewhere in your model and are reducing the batch size to 1.

Your current code is not easily readable, so please feel free to post an executable code snippet to reproduce this error by wrapping it into three backticks ``` :wink:

I am really new to this sir and is still trying to understand how networks work and I really find it tricky to do so, that’s why any helpful comments will do! By the way this is my complete code and the error was different this time and I have no idea why!


###################################################################################################
#ESNet: An Efficient Symmetric Network for Real-time Semantic Segmentation
#Paper-Link: https://arxiv.org/pdf/1906.09826.pdf
###################################################################################################

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torchsummary import summary

class DownsamplerBlock(nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()

        self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True)
        self.pool = nn.MaxPool2d(2, stride=2)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, input):
        x1 = self.pool(input)
        x2 = self.conv(input)

        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

       

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])


        output = torch.cat([x2, x1], 1)
        output = self.bn(output)
        output = self.relu(output)

        return output



class UpsamplerBlock (nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()

        self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)

    def forward(self, input):

        output = self.conv(input)
        output = self.bn(output)

        return F.relu(output)
		
class FCU(nn.Module):
    def __init__(self, chann, kernel_size,dropprob, dilated): 
        """
        Factorized Convolution Unit
        """     
        super(FCU,self).__init__()

        padding = int((kernel_size-1)//2) * dilated

        self.conv3x1_1 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(int((kernel_size-1)//2)*1,0), bias=True)

        self.conv1x3_1 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,int((kernel_size-1)//2)*1), bias=True)

        self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)

        self.conv3x1_2 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(padding,0), bias=True, dilation = (dilated,1))

        self.conv1x3_2 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,padding), bias=True, dilation = (1, dilated))

        self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)
        
        self.relu = nn.ReLU(inplace = True)
        self.dropout = nn.Dropout2d(dropprob)
        
    def forward(self, input):
        residual = input
        output = self.conv3x1_1(input)
        output = self.relu(output)
        output = self.conv1x3_1(output)
        output = self.bn1(output)
        output = self.relu(output)

        output = self.conv3x1_2(output)
        output = self.relu(output)
        output = self.conv1x3_2(output)
        output = self.bn2(output)   

        if (self.dropout.p != 0):
            output = self.dropout(output)
        
        return F.relu(residual+output,inplace=True) 


class PFCU(nn.Module):
    def __init__(self,chann):
        """
        Parallel Factorized Convolution Unit
        """         
    
        super(PFCU,self).__init__()
        
        self.conv3x1_1 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(1,0), bias=True)

        self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True)

        self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)

        self.conv3x1_22 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(2,0), bias=True, dilation = (2,1))
        self.conv1x3_22 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,2), bias=True, dilation = (1,2))

        self.conv3x1_25 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(5,0), bias=True, dilation = (5,1))
        self.conv1x3_25 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,5), bias=True, dilation = (1,5))

        self.conv3x1_29 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(9,0), bias=True, dilation = (9,1))
        self.conv1x3_29 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,9), bias=True, dilation = (1,9))

        self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)

        self.dropout = nn.Dropout2d(0.3)

    def forward(self, input):
        residual = input
        output = self.conv3x1_1(input)
        output = F.relu(output)
        output = self.conv1x3_1(output)
        output = self.bn1(output)
        output = F.relu(output)

        output2 = self.conv3x1_22(output)
        output2 = F.relu(output2)
        output2 = self.conv1x3_22(output2)
        output2 = self.bn2(output2)
        if (self.dropout.p != 0):
            output2 = self.dropout(output2)

        output5 = self.conv3x1_25(output)
        output5 = F.relu(output5)
        output5 = self.conv1x3_25(output5)
        output5 = self.bn2(output5)
        if (self.dropout.p != 0):
            output5 = self.dropout(output5)

        output9 = self.conv3x1_29(output)
        output9 = F.relu(output9)
        output9 = self.conv1x3_29(output9)
        output9 = self.bn2(output9)
        if (self.dropout.p != 0):
            output9 = self.dropout(output9)

        return F.relu(residual+output2+output5+output9,inplace=True)

		
class ESNet(nn.Module):
    def __init__(self, classes):
        super().__init__()
        #-----ESNET---------#
        self.initial_block = DownsamplerBlock(3, 16)

        self.layers = nn.ModuleList()
        
        for x in range(0, 3):
           self.layers.append(FCU(16, 3, 0.03, 1))  
        
        self.layers.append(DownsamplerBlock(16,64))

        for x in range(0, 2):
           self.layers.append(FCU(64, 5, 0.03, 1))  

        self.layers.append(DownsamplerBlock(64,128))

        for x in range(0, 3):   
            self.layers.append(PFCU(chann=128)) 

        self.layers.append(UpsamplerBlock(128,64))
        self.layers.append(FCU(64, 5, 0, 1))
        self.layers.append(FCU(64, 5, 0, 1))

        self.layers.append(UpsamplerBlock(64,16))
        self.layers.append(FCU(16, 3, 0, 1))
        self.layers.append(FCU(16, 3, 0, 1))

        self.output_conv = nn.ConvTranspose2d( 16, classes, 2, stride=2, padding=0, output_padding=0, bias=True)

    def forward(self, input):
        output = self.initial_block(input)

        for layer in self.layers:
            output = layer(output)

        output = self.output_conv(output)
        return output


"""print layers and params of network"""
if __name__ == '__main__':
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ESNet(classes=2).to(device)
    summary(model,(3,100,100))

import numpy as np
from tqdm import tqdm
import torch.optim as optim

training_data = np.load("/content/thirdy/training_data.npy", allow_pickle = True)

x = torch.Tensor([i[0] for i in training_data]).view(-1, 3, 100, 100)


print(x.shape)

y = torch.Tensor([i[1] for i in training_data])

print(y.shape)

VAL_PCT = 0.1
val_size = int(len(x)*VAL_PCT)
print(val_size)

train_x = x[:-val_size]
train_y = y[:-val_size]

test_x = x[-val_size:]
test_y = y[-val_size:]

print(len(train_x))
print(len(test_x))

import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr = 0.001)
loss_function = nn.CrossEntropyLoss()

BATCH_SIZE = 10
EPOCHS = 1

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_x), BATCH_SIZE)): 
        batch_x = train_x[i:i+BATCH_SIZE].view(-1, 3, 100, 100)
        print(batch_x.shape)
        batch_y = train_y[i:i+BATCH_SIZE].squeeze(1)
        print(batch_y.shape)

        model.zero_grad()

        outputs = model(batch_x)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()    # Does the update

    print(f"Epoch: {epoch}. Loss: {loss}")```

Thanks for the code. Unfortunately it’s not executable, but based on the view operation I assume your input has the shape [batch_size, 3, 100, 100].
Based on this shape the output would have the shape [batch_size, 2, 104, 104] and thus the target should have the shape [batch_size, 104, 104] and contain values in [0, 1].
Using these shapes, the script works fine so you could check the shapes of your inputs, outputs, and targets and make sure they have the mentioned shapes.

Thank you so much sir for your response. But as a newbie I don’t understand why my model should have the shapes you have stated above, specifically the output’s shape and the target shape. Lastly, I still don’t fully understand what target really is. Can you please give an explanation? Thanks!

by the way sir, this is my complete code (I forgot to include my repo where i store my custom dataset in the previous code I provided):

!git clone https://gitlab.com/mariacassie/thirdy.git

###################################################################################################
#ESNet: An Efficient Symmetric Network for Real-time Semantic Segmentation
#Paper-Link: https://arxiv.org/pdf/1906.09826.pdf
###################################################################################################

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torchsummary import summary

class DownsamplerBlock(nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()

        self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True)
        self.pool = nn.MaxPool2d(2, stride=2)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, input):
        x1 = self.pool(input)
        x2 = self.conv(input)

        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

       

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])


        output = torch.cat([x2, x1], 1)
        output = self.bn(output)
        output = self.relu(output)

        return output



class UpsamplerBlock (nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()

        self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)

    def forward(self, input):

        output = self.conv(input)
        output = self.bn(output)

        return F.relu(output)
		
class FCU(nn.Module):
    def __init__(self, chann, kernel_size,dropprob, dilated): 
        """
        Factorized Convolution Unit
        """     
        super(FCU,self).__init__()

        padding = int((kernel_size-1)//2) * dilated

        self.conv3x1_1 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(int((kernel_size-1)//2)*1,0), bias=True)

        self.conv1x3_1 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,int((kernel_size-1)//2)*1), bias=True)

        self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)

        self.conv3x1_2 = nn.Conv2d(chann, chann, (kernel_size,1), stride=1, padding=(padding,0), bias=True, dilation = (dilated,1))

        self.conv1x3_2 = nn.Conv2d(chann, chann, (1,kernel_size), stride=1, padding=(0,padding), bias=True, dilation = (1, dilated))

        self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)
        
        self.relu = nn.ReLU(inplace = True)
        self.dropout = nn.Dropout2d(dropprob)
        
    def forward(self, input):
        residual = input
        output = self.conv3x1_1(input)
        output = self.relu(output)
        output = self.conv1x3_1(output)
        output = self.bn1(output)
        output = self.relu(output)

        output = self.conv3x1_2(output)
        output = self.relu(output)
        output = self.conv1x3_2(output)
        output = self.bn2(output)   

        if (self.dropout.p != 0):
            output = self.dropout(output)
        
        return F.relu(residual+output,inplace=True) 


class PFCU(nn.Module):
    def __init__(self,chann):
        """
        Parallel Factorized Convolution Unit
        """         
    
        super(PFCU,self).__init__()
        
        self.conv3x1_1 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(1,0), bias=True)

        self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True)

        self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)

        self.conv3x1_22 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(2,0), bias=True, dilation = (2,1))
        self.conv1x3_22 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,2), bias=True, dilation = (1,2))

        self.conv3x1_25 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(5,0), bias=True, dilation = (5,1))
        self.conv1x3_25 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,5), bias=True, dilation = (1,5))

        self.conv3x1_29 = nn.Conv2d(chann, chann, (3,1), stride=1, padding=(9,0), bias=True, dilation = (9,1))
        self.conv1x3_29 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,9), bias=True, dilation = (1,9))

        self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)

        self.dropout = nn.Dropout2d(0.3)

    def forward(self, input):
        residual = input
        output = self.conv3x1_1(input)
        output = F.relu(output)
        output = self.conv1x3_1(output)
        output = self.bn1(output)
        output = F.relu(output)

        output2 = self.conv3x1_22(output)
        output2 = F.relu(output2)
        output2 = self.conv1x3_22(output2)
        output2 = self.bn2(output2)
        if (self.dropout.p != 0):
            output2 = self.dropout(output2)

        output5 = self.conv3x1_25(output)
        output5 = F.relu(output5)
        output5 = self.conv1x3_25(output5)
        output5 = self.bn2(output5)
        if (self.dropout.p != 0):
            output5 = self.dropout(output5)

        output9 = self.conv3x1_29(output)
        output9 = F.relu(output9)
        output9 = self.conv1x3_29(output9)
        output9 = self.bn2(output9)
        if (self.dropout.p != 0):
            output9 = self.dropout(output9)

        return F.relu(residual+output2+output5+output9,inplace=True)

		
class ESNet(nn.Module):
    def __init__(self, classes):
        super().__init__()
        #-----ESNET---------#
        self.initial_block = DownsamplerBlock(3, 16)

        self.layers = nn.ModuleList()
        
        for x in range(0, 3):
           self.layers.append(FCU(16, 3, 0.03, 1))  
        
        self.layers.append(DownsamplerBlock(16,64))

        for x in range(0, 2):
           self.layers.append(FCU(64, 5, 0.03, 1))  

        self.layers.append(DownsamplerBlock(64,128))

        for x in range(0, 3):   
            self.layers.append(PFCU(chann=128)) 

        self.layers.append(UpsamplerBlock(128,64))
        self.layers.append(FCU(64, 5, 0, 1))
        self.layers.append(FCU(64, 5, 0, 1))

        self.layers.append(UpsamplerBlock(64,16))
        self.layers.append(FCU(16, 3, 0, 1))
        self.layers.append(FCU(16, 3, 0, 1))

        self.output_conv = nn.ConvTranspose2d( 16, classes, 2, stride=2, padding=0, output_padding=0, bias=True)

    def forward(self, input):
        output = self.initial_block(input)

        for layer in self.layers:
            output = layer(output)

        output = self.output_conv(output)
        return output


"""print layers and params of network"""
if __name__ == '__main__':
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ESNet(classes=2).to(device)
    summary(model,(3,100,100))

import numpy as np
from tqdm import tqdm
import torch.optim as optim

training_data = np.load("/content/thirdy/training_data.npy", allow_pickle = True)

x = torch.Tensor([i[0] for i in training_data]).view(-1, 3, 100, 100)


print(x.shape)

y = torch.Tensor([i[1] for i in training_data])

print(y.shape)

VAL_PCT = 0.1
val_size = int(len(x)*VAL_PCT)
print(val_size)

train_x = x[:-val_size]
train_y = y[:-val_size]

test_x = x[-val_size:]
test_y = y[-val_size:]

print(len(train_x))
print(len(test_x))

import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr = 0.001)
loss_function = nn.CrossEntropyLoss()

BATCH_SIZE = 100
EPOCHS = 1

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_x), BATCH_SIZE)): 
        batch_x = train_x[i:i+BATCH_SIZE].view(-1, 3, 100, 100)
        batch_y = train_y[i:i+BATCH_SIZE]
      

        model.zero_grad()

        outputs = model(batch_x)
        print(outputs.shape)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()    # Does the update

    print(f"Epoch: {epoch}. Loss: {loss}")

For the input shape I used the shaped defined in your script:

batch_x = train_x[i:i+BATCH_SIZE].view(-1, 3, 100, 100)

Using a random input in this shape, the model creates an output in the shape [batch_size, 2, 104, 104].

The target is the ground truth label of the sample. During the training of your model you are trying to get the model predictions as close to the target (label) as possible.

Sir, I checked the shape of my output and target and I found that my output has the shape [100, 3, 104, 104] and my target shape is [100, 2]. Now, I find it difficult to adjust my target shape. Can you please give any suggestion where should I adjust it? in your previous answers you stated that my target should have the shape [batch_size, 104, 104] but it doesn’t coincide to what I got thus getting the error: RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 2

Could you explain what the target contains and what dim1 is used for in [100, 2]?
Based on the shape it should be a multi-class classification target for a sequence length of 2, but I guess you might be using a one-hot encoded target for a binary classifiaction?
If that’s the case, you would have to create a target in the shape [batch_size=100], which contains the class indices in [0, 1] (or use alternatively nn.BCEWithLogitsLoss).
Since your current model outputs a 4-dimensional tensor, you could flatten the activation and add linear layers to the model to get the desired output shape.

Have a look at this tutorial for a simple CNN.

My target contains my labels sir and thank you for the linked tutorial I will check it out.

Sir, I decided to just flatten my output shape because I see it as the best answer to my problem. But I actually don’t know how

You can flatten the activations via x = x.view(x.size(0), -1) or by using nn.Flatten.
However, I don’t think flattening the activations alone will solve the issue, since the model output should have the shape [batch_size, nb_classes] for a multi-class classification as shown in the tutorial.
Thus I would still recommend to use e.g. an nn.Linear module to create the desired outputs.

Hi! I tried using nn.Linear module and tried to flatten the last layer of my network too. My code finally has no error but still I am not 100% sure if I did it correctly, any comments on this sir? :

class ESNet(nn.Module):
    def __init__(self, classes):
        super().__init__()
        #-----ESNET---------#
        self.initial_block = DownsamplerBlock(3, 16)

        self.layers = nn.ModuleList()
        
        for x in range(0, 3):
           self.layers.append(FCU(16, 3, 0.03, 1))  
        
        self.layers.append(DownsamplerBlock(16,64))

        for x in range(0, 2):
           self.layers.append(FCU(64, 5, 0.03, 1))  

        self.layers.append(DownsamplerBlock(64,128))

        for x in range(0, 3):   
            self.layers.append(PFCU(chann=128)) 

        self.layers.append(UpsamplerBlock(128,64))
        self.layers.append(FCU(64, 5, 0, 1))
        self.layers.append(FCU(64, 5, 0, 1))

        self.layers.append(UpsamplerBlock(64,16))
        self.layers.append(FCU(16, 3, 0, 1))
        self.layers.append(FCU(16, 3, 0, 1))

        self.output_conv = nn.ConvTranspose2d( 16, classes, 2, stride=2, padding=0, output_padding=0, bias=True)
        self.hidden = nn.Linear(2*104*104, 208)
        self.out = nn.Linear(208, 2)
        self.act = nn.ReLU()

    def forward(self, input):
        output = self.initial_block(input)
        print(input.shape)
        

        for layer in self.layers:
            output = layer(output)
           

        output = self.output_conv(output)
        output = output.view(output.size(0), -1)
        output = self.act(self.hidden(output))
        output = self.out(output)
        print(output.shape)
       
        return output

The implementation looks alright for a 2-class classification.
As said before, you could also use a single output unit and then nn.BCEWithLogitsLoss for a binary classification, but your approach should also work.