It seems your input contains 31
features while the first linear layer (self.fc1
) expects an input with 30
features.
You could fix it by changing the in_features
argument of self.fc1
or by making sure the input contains 30
features.
Hi , i have same error RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 768x32)
th output
Hope you can help @ptrblck
the model notebook
[Google Colab](https://POP model)
Your notebook doesn’t show any code besides executing a Python script from another folder.
However, the error:
word_embeddings = self.dropout(self.fc(word_embeddings))
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1 and 768x32)
points to a shape mismatch in self.fc
which expects 768
input features while the activation only contains 1
feature.
here is the code
https://github.com/HumaticsLAB/POP-Mining-POtential-Performance
how can i solve it ?
Check the aforementioned layer specs and the corresponding input activation as described before.
In case you get stuck, please create a minimal and executable code snippet to reproduce the issue.
sorry i don’t get it, can you explain more?
Hi, I get the same run time error with my code (mat1 and mat2 shapes cannot be multiplied (1x25088 and 15x10)):
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
class NetMLP(torch.nn.Module):
def __init__(self, input_features, size_hidden, n_output):
super(NetMLP, self).__init__()
self.hidden1 = torch.nn.Linear(input_features, size_hidden, bias=True, device=None, dtype=None)
self.hidden2 = torch.nn.Linear(size_hidden, size_hidden, bias=True, device=None, dtype=None)
self.out = torch.nn.Linear(size_hidden, n_output, bias=True, device=None, dtype=None)
def forward(self, x):
x = x.view(-1,)
x = self.out(x) # linear output
return x
input_features = 28*28
hidden_neurons = 15
num_outputs = 10
batch_size = 50
learning_rate = 0.0001
device = torch.device('cuda:0')
net = NetMLP(input_features, hidden_neurons, num_outputs)
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
loader = DataLoader(dataset=mnist_train, batch_size=32, shuffle=True)
num_epochs = 5
loss_list = []
for i in range(num_epochs):
for x, y in loader:
optimizer.zero_grad()
x = x.to(device)
y = y.to(device)
loss = criterion(net(x), y)
loss.backward()
optimizer.step()
loss_list.append(loss.data.item())
#if i > 0 and i % 100 == 0:
print('Epoch %d, loss = %g' % (i, loss))
```´
Many thanks!
In your model you are using only the last linear layer via self.out
while I would guess self.hidden1
and self.hidden2
should also be used.
Additionally, you are flattening the input tensor and are mixing the batch with the feature dimension in:
x = x.view(-1)
Take a look at this tutorial to see how a simple neural network is implemented and used.
Hi I am a newbie, Even I have a similar error. `import torch
import torch.nn as nn
import torch.nn.functional as F
class Auxiliary(nn.Module):
def __init__(self, input_channels, n_classes):
super(Auxiliary, self).__init__()
self.Conv2 = nn.Conv2d(input_channels, 128, kernel_size=1)
self.FC1 = nn.Linear(2048, 1024)
self.FC2 = nn.Linear(1024, n_classes)
def forward(self, x):
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = F.adaptive_avg_pool2d(x, (4, 4))
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.Conv2(x)
# N x 128 x 4 x 4
x = x.view(x.size(0), -1)
# N x 2048
x = F.relu(self.FC1(x), inplace=True)
# N x 2048
x = F.dropout(x, 0.7, training=self.training)
# N x 2048
x = self.FC2(x)
# N x 1024
return x
class Inception(nn.Module):
def __init__(self, input_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):
super(Inception, self).__init__()
# 1x1conv branch
self.inception_1 = nn.Sequential(
nn.Conv2d(input_channels, n1x1, kernel_size=1)
)
self.inception_2 = nn.Sequential(
nn.Conv2d(input_channels, n3x3_reduce, kernel_size=1),
nn.ReLU(inplace=True),
nn.Conv2d(n3x3_reduce, n3x3, kernel_size=1),
nn.ReLU(inplace=True)
)
self.inception_3 = nn.Sequential(
nn.Conv2d(input_channels, n5x5_reduce, kernel_size=1),
nn.ReLU(inplace=True),
nn.Conv2d(n5x5_reduce, n5x5, kernel_size=1),
nn.ReLU(inplace=True)
)
self.inception_4 = nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
nn.Conv2d(input_channels, pool_proj, kernel_size=1),
nn.ReLU(inplace=True)
)
def forward(self, x):
return torch.cat((self.inception_1(x),self.inception_2(x),self.inception_3(x),self.inception_4(x)), dim=1)
class Net(nn.Module):
def __init__(self, n_classes):
super(Net, self).__init__()
self.pre_layer = nn.Sequential(
# N x 3 x 224 x 224
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
# N x 64 x 112 x 112
nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
# N x 64 x 56 x 56
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
# N x 64 x 56 x 56
nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(192),
nn.ReLU(inplace=True),
# N x 192 x 56 x 56
nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True),
)
# N x 192 x 28 x 28
self.inception_3a = Inception(192, 64, 96, 128, 16, 32, 32)
# N x 256 x 28 x 28
self.inception_3b = Inception(256, 128, 128, 192, 32, 96, 64)
# N x 480 x 28 x 28
self.maxPool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# N x 480 x 14 x 14
self.inception_4a = Inception(480, 192, 96, 208, 16, 48, 64)
self.aux1 = Auxiliary(512, n_classes)
# N x 512 x 14 x 14
self.inception_4b = Inception(512, 160, 112, 224, 24, 64, 64)
# N x 512 x 14 x 14
self.inception_4c = Inception(512, 128, 128, 256, 24, 64, 64)
# N x 512 x 14 x 14
self.inception_4d = Inception(512, 112, 144, 288, 32, 64, 64)
self.aux2 = Auxiliary(528, n_classes)
# N x 528 x 14 x 14
self.inception_4e = Inception(528, 256, 160, 320, 32, 128, 128)
# N x 832 x 14 x 14
self.maxPool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# N x 832 x 7 x 7
self.inception_5a = Inception(832, 256, 160, 320, 32, 128, 128)
# N x 832 x 7 x 7
self.inception_5b = Inception(832, 384, 192, 384, 48, 128, 128)
# N x 1024 x 7 x 7
self.avgPool5 = nn.AvgPool2d(kernel_size=7, stride=1)
# N x 1024 x 1 x 1
self.dropout = nn.Dropout(p=0.4)
# N x 1024
self.linear = nn.Linear(in_features=1024, out_features=n_classes)
def forward(self, x, mode=True):
x = self.pre_layer(x)
x = self.inception_3a(x)
x = self.inception_3b(x)
x = self.maxPool3(x)
x = self.inception_4a(x)
if mode:
aux1 = self.aux1(x)
else:
aux1 = None
x = self.inception_4b(x)
x = self.inception_4c(x)
x = self.inception_4d(x)
if mode:
aux2 = self.aux2(x)
else:
aux2 = None
x = self.inception_4e(x)
x = self.maxPool4(x)
x = self.inception_5a(x)
x = self.inception_5b(x)
x = self.avgPool5(x)
x = torch.flatten(x, 1)
x = self.dropout(x)
x = self.linear(x)
x = F.softmax(x, dim=1)
return x, aux1, aux2
`
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-123-6378d8f2f25e> in <module>
11 labels=labels.to(device)
12 total+=labels.size(0)
---> 13 output,aux1,aux2=model(images)
14 aux1_loss=criterion(aux1, labels)
15 aux2_loss=criterion(aux2, labels)
3 frames
/usr/local/lib/python3.9/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) -> str:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x16384 and 1024x2)`
type or paste code here
I guess self.linear
is raising the error in Net
so you should also check the shape of its input activation in Net.forward
and change the in_features
to 16384
as mentioned in previous posts.
Also, assuming you are working on a multi-class classification use case using nn.CrossEntropyLoss
as the criterion remove the last F.softmax
activation as raw logits are expected.