Adding layers to ResNet18 gives RuntimeError error

Hello,

I have added a few layers to the resnet18 model but when I try to find the summary of the model using a dummy input, I am getting a RuntimeError.

Code:


import torch.nn as nn
import torch
from torchsummary import summary
import torchvision


class BasicBlock2(nn.Module):

    def __init__(self, in_channels, out_channels, stride=(1, 1), dilation=(1, 1), downsample=None):
        super(BasicBlock2, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=(5, 5),
                               padding=(0, 0),
                               dilation=dilation,
                               stride=stride)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.relu1 = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=(3, 3),
                               padding=(1, 1),
                               dilation=dilation,
                               stride=stride)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)
        self.relu2 = nn.ReLU(inplace=True)

    def forward(self, x):
        # The input shape of the first layer should be: [-1, 128, 64, 192]
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu1(out)

        # out = self.conv2(out)
        # out = self.batch_norm2(out)
        # out = self.relu2(out)
        # The output shape of the first layer should be: [-1, 128, 44, 172]
        return out


class BasicBlock3(nn.Module):

    def __init__(self, in_channels, out_channels, stride=(1, 1), dilation=(1, 1), downsample=None):
        super(BasicBlock3, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=(7, 7),
                               padding=(3, 0),
                               dilation=dilation,
                               stride=stride)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.relu1 = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=(3, 3),
                               padding=(1, 1),
                               dilation=dilation,
                               stride=stride)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)
        self.relu2 = nn.ReLU(inplace=True)

    def forward(self, x):
        # The input shape of the first layer should be: [-1, 128, 44, 172]
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu1(out)

        # out = self.conv2(out)
        # out = self.batch_norm2(out)
        # out = self.relu2(out)
        # The output shape of the first layer should be: [-1, 128, 44, 124]
        return out


class NewLayers(nn.Module):

    def __init__(self, layers, last_in_channels, last_out_channels, stride=(1, 1), dilation=(1, 1)):
        super(NewLayers, self).__init__()
        # layers: [5, 8]
        # last_in_channels: 128
        # last_out_channels: 256

        # Need two _make_layers() with blocks 5 and 8 respectively
        self.intermediate1_layers = self._make_layer(BasicBlock2, layers[0], in_channels=128, out_channels=128)
        self.intermediate2_layers = self._make_layer(BasicBlock3, layers[1], in_channels=128, out_channels=128)

        self.lastconv = nn.Conv2d(in_channels=last_in_channels,
                                  out_channels=last_out_channels,
                                  kernel_size=(5, 5),
                                  padding=(2, 0),
                                  dilation=dilation,
                                  stride=stride)
        self.lastbatchnorm = nn.BatchNorm2d(last_out_channels)
        self.lastrelu = nn.ReLU(inplace=True)

    def _make_layer(self, block, blocks, in_channels, out_channels):  # blocks = 5 and blocks = 8
        layers = []
        for _ in range(1, blocks+1):  # because in python last index is not considered, hence + 1
            layers.append(block(in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.intermediate1_layers(x)
        out = self.intermediate2_layers(out)
        out = self.lastconv(out)
        out = self.lastbatchnorm(out)
        out = self.lastrelu(out)
        return out


net = NewLayers(layers=[5, 8], last_in_channels=128, last_out_channels=128)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
x = torch.randn(1, 128, 64, 192)
output= net(x)

resnet = torchvision.models.resnet18()
model = nn.Sequential(*list(resnet.children())[:-4])
model.add_module('custom_layer', net)
model.add_module('final', nn.Sequential(*(list(resnet.children())[-4:])))
print(model)
summary(model.to(device), (3, 512, 1536))

But the error I am getting is:
RuntimeError: mat1 dim 1 must match mat2 dim 0

I cannot find where the dimensions are wrong. Any help is appreciated.

Thank You.

Ah, this is a tricky one! I don’t think you have done anything obviously wrong, but the issue is due to the fact that there is a “functional” use of torch.flatten in ResNet18 which means that if you just append the modules you are missing an operation before the linear layer.

Can you check if changing the lines

model.add_module('final', nn.Sequential(*(list(resnet.children())[-4:])))
print(model)
summary(model.to(device), (3, 512, 1536))

to

rest = list(resnet.children())[-4:]
flatten = torch.nn.Flatten(1)
rest.insert(-1, flatten)
model.add_module('final', nn.Sequential(*rest))
print(model)
summary(model.to(device), (3, 512, 1536))

fixes the issue?

@eqy Thanks for replying.
Yes, I changed it and it works now. But I don’t understand what you did there. I would like to know how you found out if there is a “functional” use of torch.flatten in ResNet18? Also, why is it used?

@eqy Hi again!
I have slightly modified my code wherein, I wanted to concat another image to my NN and hence I have made some changes to the forward method of the NewLayers class as shown below:

import torch
import torch.nn as nn
import torchvision


class BasicBlock2(nn.Module):

    def __init__(self, in_channels, out_channels, stride=(1, 1), dilation=(1, 1), downsample=None):
        super(BasicBlock2, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=(5, 5),
                               padding=(0, 0),
                               dilation=dilation,
                               stride=stride)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.relu1 = nn.ReLU(inplace=True)

    def forward(self, x):
        # The input shape of the first layer should be: [-1, 128, 64, 192]
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu1(out)

        # The output shape of the first layer should be: [-1, 128, 44, 172]
        return out


class BasicBlock3(nn.Module):

    def __init__(self, in_channels, out_channels, stride=(1, 1), dilation=(1, 1), downsample=None):
        super(BasicBlock3, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=(7, 7),
                               padding=(3, 0),
                               dilation=dilation,
                               stride=stride)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.relu1 = nn.ReLU(inplace=True)

    def forward(self, x):
        # The input shape of the first layer should be: [-1, 128, 44, 172]
        out = self.conv1(x)
        out = self.batch_norm1(out)
        out = self.relu1(out)

        # The output shape of the first layer should be: [-1, 128, 44, 124]
        return out


class NewLayers(nn.Module):

    def __init__(self, layers, last_in_channels, last_out_channels, stride=(1, 1), dilation=(1, 1)):
        super(NewLayers, self).__init__()
        # layers: [5, 8]
        # last_in_channels: 128
        # last_out_channels: 256

        # Need two _make_layers() with blocks 5 and 8 respectively
        self.intermediate1_layers = self._make_layer(BasicBlock2, layers[0], in_channels=128, out_channels=128)
        self.intermediate2_layers = self._make_layer(BasicBlock3, layers[1], in_channels=128, out_channels=128)

        self.conv = nn.Conv2d(in_channels=last_in_channels,
                              out_channels=last_out_channels,
                              kernel_size=(5, 5),
                              padding=(2, 0),
                              dilation=dilation,
                              stride=stride)
        self.batchnorm = nn.BatchNorm2d(last_out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        # A new conv layer is added
        self.last_conv =nn.Conv2d(in_channels=127,
                                  out_channels=128,
                                  kernel_size=(3, 3),
                                  padding=(1, 1),
                                  stride=stride,
                                  dilation=dilation)

    def _make_layer(self, block, blocks, in_channels, out_channels):  # blocks = 5 and blocks = 8
        layers = []
        for _ in range(1, blocks+1):  # because in python last index is not considered, hence + 1
            layers.append(block(in_channels, out_channels))
        return nn.Sequential(*layers)
    
   # changes are made in this method
    def forward(self, img, dep):
        out = self.intermediate1_layers(img)
        out = self.intermediate2_layers(out)
        out = self.conv(out)
        out = self.batchnorm(out)
        out = self.relu(out)
        concat = torch.cat((out, dep), dim=1)
        out = self.last_conv(concat)
        return out


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = NewLayers(layers=[5, 8], last_in_channels=128, last_out_channels=127)

resnet = torchvision.models.resnet18()
model = nn.Sequential(*list(resnet.children())[:-4])
model.add_module('custom_layer', net)

rest = list(resnet.children())[-4:]
flatten = torch.nn.Flatten(1)
rest.insert(-1, flatten)
model.add_module('final', nn.Sequential(*rest))

img = torch.randn(1, 3, 512, 1536)
dep = torch.randn(1, 1, 44, 120)

model(img.to(device), dep.to(device))

I have also changed the out_channels argument for the NewLayers class to 127 and then I concat another channel which then becomes 128 (as given in the forward method)
But when I try to print the output of the model using a dummy input like above:
I am getting aTypeError: TypeError: forward() takes 2 positional arguments but 3 were given

Any idea as to why this is the case? I have 2 inputs in the forward method of the NewLayers class and I am passing 2 inputs but am getting the error. This is confusing.