RuntimeError: mat1 and mat2 shapes cannot be multiplied (5376x28 and 784x512)

thesekyi · July 15, 2021, 4:42pm

class Baseline(nn.Module):
    def __init__(self):
        super().__init__()
        # 5 Hidden Layer Network
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 3)

        # Dropout module with 0.2 probbability
        self.dropout = nn.Dropout(p=0.2)
        # Add softmax on output layer
        self.log_softmax = F.log_softmax

    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))

        x = self.log_softmax(self.fc5(x), dim=1)

        return x

Error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-48-0030d9c3852c> in <module>
     18         optimizer.zero_grad()
     19         # Make predictions
---> 20         log_ps = model(images)
     21         loss = criterion(log_ps, labels)
     22         #backprop

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-46-09dd06cd0a72> in forward(self, x)
     15 
     16     def forward(self, x):
---> 17         x = self.dropout(F.relu(self.fc1(x)))
     18         x = self.dropout(F.relu(self.fc2(x)))
     19         x = self.dropout(F.relu(self.fc3(x)))

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/linear.py in forward(self, input)
     91 
     92     def forward(self, input: Tensor) -> Tensor:
---> 93         return F.linear(input, self.weight, self.bias)
     94 
     95     def extra_repr(self) -> str:

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/functional.py in linear(input, weight, bias)
   1690         ret = torch.addmm(bias, input, weight.t())
   1691     else:
-> 1692         output = input.matmul(weight.t())
   1693         if bias is not None:
   1694             output += bias

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5376x28 and 784x512)

I tried changing fc1 to self.fc1 = nn.Linear(5376, 512) but I still get RuntimeError: mat1 and mat2 shapes cannot be multiplied (5376x28 and 5376x512).

I then adjust the whole architecture as follows:

class Baseline(nn.Module):
    def __init__(self):
        super().__init__()
        # 5 Hidden Layer Network
        self.fc1 = nn.Linear(5376, 28)
        self.fc2 = nn.Linear(28, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 3)

        # Dropout module with 0.2 probbability
        self.dropout = nn.Dropout(p=0.2)
        # Add softmax on output layer
        self.log_softmax = F.log_softmax

    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))

        x = self.log_softmax(self.fc5(x), dim=1)

        return x

and I still get the following error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-54-0030d9c3852c> in <module>
     18         optimizer.zero_grad()
     19         # Make predictions
---> 20         log_ps = model(images)
     21         loss = criterion(log_ps, labels)
     22         #backprop

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-52-f98f89e15885> in forward(self, x)
     15 
     16     def forward(self, x):
---> 17         x = self.dropout(F.relu(self.fc1(x)))
     18         x = self.dropout(F.relu(self.fc2(x)))
     19         x = self.dropout(F.relu(self.fc3(x)))

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/modules/linear.py in forward(self, input)
     91 
     92     def forward(self, input: Tensor) -> Tensor:
---> 93         return F.linear(input, self.weight, self.bias)
     94 
     95     def extra_repr(self) -> str:

~/anaconda3/envs/torch/lib/python3.8/site-packages/torch/nn/functional.py in linear(input, weight, bias)
   1690         ret = torch.addmm(bias, input, weight.t())
   1691     else:
-> 1692         output = input.matmul(weight.t())
   1693         if bias is not None:
   1694             output += bias

RuntimeError: mat1 and mat2 shapes cannot be multiplied (5376x28 and 5376x28)

NB: The input data is of shape torch.Size([64, 3, 28, 28]).

For the purposes of replication:

model = Baseline()
X = torch.rand(64, 3, 28, 28)
model(X)

ptrblck · July 16, 2021, 6:00am

Based on the input shapes the in_features of the first linear layer should be set to 3*28*28=2352 and you would most likely want to flatten the input via:

    def forward(self, x):
        x  = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))