mat1 and mat2 shapes cannot be multiplied (32x50176 and 512x3)

I’m trying to use ResNet18, but I have an error and I can’t fix it.
Can anybody help me?

class BasicBlock(nn.Module):
  def __init__(self, in_planes, planes, stride = 1):
    super(BasicBlock, self).__init__()

    self.conv1 = nn.Conv2d(in_planes, planes,
                           kernel_size = 3,
                           stride = stride,
                           padding = 1,
                           bias = False)
    self.bn1 = nn.BatchNorm2d(planes)

    self.conv2 = nn.Conv2d(planes, planes,
                           kernel_size = 3,
                           stride = 1,
                           padding = 1,
                           bias = False)
    self.bn2 = nn.BatchNorm2d(planes)

    self.shortcut = nn.Sequential()
    if stride != 1: #입력값과 출력값의 dimension이 다른 경우를 의미함.
      self.shortcut = nn.Sequential(
          nn.Conv2d(in_planes, planes,
                    kernel_size = 1,
                    stride = stride,
                    bias = False),
                    nn.BatchNorm2d(planes))
    
  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x) #Skip-Connection 부분임.
    out = F.relu(out)
    return out
    
class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes = 3):
    super(ResNet, self).__init__()
    self.in_planes = 64

    self.conv1 = nn.Conv2d(3, 64,
                           kernel_size = 3,
                           stride = 1,
                           padding = 1,
                           bias = False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], stride = 1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], stride = 2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], stride = 2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], stride = 2)
    self.linear = nn.Linear(512, num_classes)

  def _make_layer(self, block, planes, num_blocks, stride):
    strides = [stride] + [1] *(num_blocks - 1) #각 레이어마다 첫번째 블록에서는 너비와 높이 값을 줄임. 이때마다 stride를 2로 해서 dimension 값을 바꿔줌. 
    layers = []
    for stride in strides:
      layers.append(block(self.in_planes, planes, stride))
      self.in_planes = planes
      return nn.Sequential(*layers)
    
  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)
    return out

def ResNet18():
  return ResNet(BasicBlock, [2, 2, 2, 2])

When I run this and then try to use it,
I get issues like this.

[ Train epoch: 0 ]
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
  cpuset_checked))
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-11-6cc47c45b4a3> in <module>()
      1 for epoch in range(0, 20):
      2   adjust_learning_rate(optimizer, epoch)
----> 3   train(epoch)
      4   test(epoch)

7 frames
<ipython-input-10-bb70dbb67e0f> in train(epoch)
     21     optimizer.zero_grad()
     22 
---> 23     benign_outputs = net(inputs)
     24     loss = criterion(benign_outputs, targets)
     25     loss.backward()

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
    148         with torch.autograd.profiler.record_function("DataParallel.forward"):
    149             if not self.device_ids:
--> 150                 return self.module(*inputs, **kwargs)
    151 
    152             for t in chain(self.module.parameters(), self.module.buffers()):

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-9-7dc95e571234> in forward(self, x)
     65     out = F.avg_pool2d(out, 4)
     66     out = out.view(out.size(0), -1)
---> 67     out = self.linear(out)
     68     return out
     69 

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1101                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102             return forward_call(*input, **kwargs)
   1103         # Do not call functions when jit is used
   1104         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    101 
    102     def forward(self, input: Tensor) -> Tensor:
--> 103         return F.linear(input, self.weight, self.bias)
    104 
    105     def extra_repr(self) -> str:

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1846     if has_torch_function_variadic(input, weight, bias):
   1847         return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias)
-> 1848     return torch._C._nn.linear(input, weight, bias)
   1849 
   1850 

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x50176 and 512x3)

It would be very thankful if anybody can help me.

Hi Johny!

Note that you define self.layer4, but you never use it. Perhaps
you want in forward()

    ...
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)   # should layer4 appear here?
    out = F.avg_pool2d(out, 4)
    out = out.view(out.size(0), -1)
    out = self.linear(out)

Regardless of whether or not using layer4 is relevant to your issue,
the 512x3 in the error message refers to the weight tensor of
self.linear. This Linear layer expects an input tensor of shape
[nBatch, 512]. But the error message is telling you that its input
has shape [32, 50176]. This means that nBatch = 32, which is
fine, but the number of “features” (in each “sample” in the batch)
is 50176, rather than the expected 512.

This line:

    out = out.view(out.size(0), -1)

takes the output of avg_pool2d and “flattens” all but the nBatch
dimension together. That is the “planes” (“channels”) dimension
together with the “height” and “width” dimensions are flattened
together into one single dimension that the error message is telling
us has size 50176. (Note that height and width of the tensor being
flattened here depend on the height and width of the image input
into your ResNet. As written, your ResNet will only work for images
with a certain given height and width.)

To debug your issue, you should print out the shapes of the result of
self.layer3(out), the result of self.layer4(out) (should you
decide to add it to your network), the result of F.avg_pool2d(out, 4),
and the result of out.view(out.size(0), -1). Do these shapes
make sense and are they what you expect them to be? Does it make
sense that self.linear is expecting a tensor of shape [nBatch, 512]?

Best.

K. Frank