RuntimeError: The size of tensor a (7) must match the size of tensor b (14) at non-singleton dimension 3

I am studying ResNet-50 currently. I have a problem of modifying it.

import torchvision.transforms as transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
batch_size=4

# Load the Cars-196 train and test dataset
"""https://pytorch.org/vision/main/generated/torchvision.datasets.StanfordCars.html"""

trainset = torchvision.datasets.StanfordCars(root='./data', split='train',
                                                 download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=4)
testset = torchvision.datasets.StanfordCars(root='./data', split='test',
                                       download=True, transform=transform)
testloader = DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=4)

I used Cars-196 dataset from Pytorch.

#Remove Res4_3_conv3 - Penultimate
class Penultimate_ModelA(nn.Module):
    def __init__(self, remove_conv3=False): #Set remove_conv3 to False for remove
        super(Penultimate_ModelA, self).__init__()
        resnet = models.resnet50(pretrained=True)
        
        self.features = nn.Sequential(*list(resnet.children())) #Put resnet_50 into list
        if remove_conv3:
            self.features[7][2].conv3 = nn.Identity() #Replace Res4_3_conv3 to Identity()
            self.features[7][0].downsample = nn.Identity() #Remove skip connection
            self.features[7][2].bn3 = nn.Identity()
            self.features[9] = nn.Linear(512, 1000, bias=True)

    def forward(self, x):
        x = self.features(x)
        return x

The code above is the Class for modifying ResNet-50. I put it in the nn.Sequential as a list. and modified some layers.

penultimate_ModelA = Penultimate_ModelA(remove_conv3=True)
penultimate_ModelA = penultimate_ModelA.to(device)
penultimate_ModelA

So, CUDA is applied. However, when I train Standford-Cars(Cars-196) data from Pytorch, I have some problem about tensor mismatch.

for epoch in range(10):   # Iterate data 
    penultimate_ModelA.train()
    train_loss  = 0.0
    for inputs, labels in tqdm(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        # Make Gradient parameter 0...
        optimizer.zero_grad()
        
        # propagation + backpropagation + optimisation
        outputs = penultimate_ModelA(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # outputs statistics
        train_loss += loss.item()
    train_loss = train_loss / len(trainset)

print('Finished Training')
RuntimeError                              Traceback (most recent call last)
Cell In[39], line 11
      8 optimizer.zero_grad()
     10 # propagation + backpropagation + optimisation
---> 11 outputs = penultimate_ModelA(inputs)
     12 loss = criterion(outputs, labels)
     13 loss.backward()

File ~/anaconda3/envs/test_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
   1191 # this function, and just call forward.
   1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
   1195 # Do not call functions when jit is used
   1196 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[6], line 19, in Penultimate_ModelA.forward(self, x)
     18 def forward(self, x):
---> 19     x = self.features(x)
     20     return x

File ~/anaconda3/envs/test_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, **kwargs)
   1190 # If we don't have any hooks, we want to skip the rest of the logic in
...
--> 160 out += identity
    161 out = self.relu(out)
    163 return out

RuntimeError: The size of tensor a (7) must match the size of tensor b (14) at non-singleton dimension 3

I tried many things such as editing Stride, In-channel. Some information said that this error could be solved by using transpose, sqeeze, and unsqueeze but, I couldn’t find where I apply them. Is this way to modify ResNet-50 is appropriate? or are there ways to modify ResNet-50?

You are removing the downsample layer, which decreases the spatial size:

Sequential(
  (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
  (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

and which then raises the issue.
You could keep this layer or add an additional one which would decrease the spatial size such as a pooling layer.

Hi, so you mean that I have to modify other layer.

(7): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Identity()
      )
      (1): Bottleneck(
        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (2): Bottleneck(
        (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Identity()
        (bn3): Identity()
        (relu): ReLU(inplace=True)
      )
    )
    (8): AdaptiveAvgPool2d(output_size=(1, 1))
    (9): Linear(in_features=512, out_features=1000, bias=True)
  )

The code above is the last block of modified ResNet-50. So, when downsample in (7)(0) Bottleneck is removed, should I change conv3 in (7)(0) Bottleneck?