RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x13056 and 153600x2048)

Your model works fine for me:

model = MyFirstNet()
x = torch.randn(1, 3, 224, 224)
out = model(x)
print(out.shape)
# torch.Size([1, 2])

whaaaaaaat ? no way

why do I have a shape error then ?
where is the error ?

and if you try with the call function trainer ;
trainer(model = model.classify, dataloader=dataset_loader,
num_epochs = 10, optimizer=optimizer_mynet,train=True)

Your trainer function needs a dataloader argument, which is undefined as it needs real data.
Also, your code is not properly formatted. Post a minimal and executable code snippet reproducing the error in case you get stuck.

off course you’re right I’m beginner I’m sorry
here si my code :


normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
composed_transforms = transforms.Compose([transforms.Resize(224),transforms.CenterCrop(224),transforms.ToTensor(), normalize])

#data_dir = 'data_exam'
data_dir = '../datasets/dogscats/'
dataset = {x: datasets.ImageFolder(os.path.join(data_dir, x), transform=composed_transforms)
         for x in ['train', 'valid']}

my data_loader that I have forgotten

def to_shuffle(x):
    if x == 'train':
        return True
    else:
        return False
dataset_loader = {x: torch.utils.data.DataLoader(dataset[x], batch_size=4,
                                               shuffle=to_shuffle(x), num_workers=6)
                for x in ['train', 'valid']}

mu network VGG16

mport torch.nn as nn

class Network(nn.Module):
     def __init__(self):
        super(Network, self).__init__()
        self.categories = 1000 #Nombre de catégories ImageNet
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels = 64, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block3 = nn.Sequential(
            nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block4 = nn.Sequential(
            nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.block5 = nn.Sequential(
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.classify = nn.Sequential(
            nn.Linear(in_features = 512*7*7, out_features = 4096),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            nn.Linear(in_features = 4096, out_features = 4096),
            nn.ReLU(inplace = True),
            nn.Dropout(),
            nn.Linear(in_features = 4096, out_features = self.categories),
        )       

        def forward(self, x):
            x = self.block1(x)
            x = self.block2(x)
            x = self.block3(x)
            x = self.block4(x)
            x = self.block5(x)
            x = x.view(x.size(0), -1)
            x = self.classify(x)
        return 

mynet=Network()
params_pre = model_zoo.load_url('https://download.pytorch.org/models/vgg16-397923af.pth')
give_params_to_model(mynet, params_pre)

for param in mynet.parameters():
    param.requires_grad = False

mynet.classify._modules['6'] = nn.Linear(in_features = 4096, out_features = 2)
learning_rate = 1e-4
optimizer_mynet = torch.optim.SGD(mynet.classify[6].parameters(),lr = learning_rate)

print(mynet)

Network(
  (block1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block5): Sequential(
    (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classify): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=224, out_features=2, bias=True)
  )
)

my trainer fucntion :

def trainer_exam(model,dataloader, num_epochs,optimizer=None,train=True):
“”"
Implémente une fonction d’entraînement générique qui peut être utilisée pour l’entraînement d’un réseau ainsi que pour l’évaluation
performances sur un ensemble de validation

Arguments :
    model : réseau à entraîner (ou à évaluer). Peut-être le réseau complet ou une seule couche de celui-ci
    dataloader : itérateur sur le jeu de données
    num_epochs (int, optionnel) : nombre d'époques pour l'entraînement (par défaut : 1)
    optimizer : optimiseur utilisé pour l'entraînement. Généralement un objet torch.optim
    train (bool, optionnel) : Exécute la fonction en mode train ou eval (par défaut : True)
"""
sizes = {'train': 23000, 'valid': 2000}
if train:
    model.train()
    phase='train'
else:
    model.eval()
    phase='valid'
print("La phase est {}".format(phase))
for epoch in range(num_epochs):
    total = 0
    running_loss = 0.0
    running_corrects = 0
    batch_counter = 1
    for inputs,classes in dataloader[phase]:
        #inputs , classes = Variable(torch.from_numpy(inputs)),Variable(torch.from_numpy(classes))
        
        outputs = model(inputs)
        loss_function = nn.CrossEntropyLoss()
        loss = loss_function(outputs,classes)
        optimizer = optimizer
        optimizer.zero_grad()
        if train:
            if optimizer is None:
                raise ValueError('Pass optimizer for train mode')
            loss.backward()
            optimizer.step()
        _,preds = torch.max(outputs.data,1)

        running_loss += loss.data.item()
        running_corrects += torch.sum(preds == classes.data)

        print("Cumulated loss of the " + str(batch_counter) + " first batches: {}".format(running_loss))
        batch_counter += 1
        #depend de la version py
    epoch_loss = float(running_loss)
    epoch_acc = float(running_corrects)/sizes[phase]
    print('Loss: {:}, Acc: {:}'.format(epoch_loss, epoch_acc))

trainer_exam(model = mynet.classify, dataloader=dataset_loader,
num_epochs = 10, optimizer=optimizer_mynet,train=True)

result :

La phase est train


RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_15252\302059662.py in
2
3
----> 4 trainer_exam(model = mynet.classify, dataloader=dataset_loader,
5 num_epochs = 10, optimizer=optimizer_mynet,train=True)
6

~\AppData\Local\Temp\ipykernel_15252\3161195043.py in trainer_exam(model, dataloader, num_epochs, optimizer, train)
32 #inputs , classes = Variable(torch.from_numpy(inputs)),Variable(torch.from_numpy(classes))
33
—> 34 outputs = model(inputs)
35 loss_function = nn.CrossEntropyLoss()
36 loss = loss_function(outputs,classes)

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

~\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
215 def forward(self, input):
216 for module in self:
→ 217 input = module(input)
218 return input
219

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

~\Anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) → str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2688x224 and 25088x4096)

Your code is unfortunately still not executable and still works using the previously posted shape:

mynet=Network()
mynet.classify._modules['6'] = nn.Linear(in_features = 4096, out_features = 2)

x = torch.randn(2, 3, 224, 224)
out = mynet(x)

Hi,

I’ve gotten the same error. Can you help me debug it?

`# Define the CNN architecture
class TimeSeriesCNN(nn.Module):
    def __init__(self, input_channels, output_size):
        super(TimeSeriesCNN, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv1d(in_channels=input_channels, out_channels=16, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2),
            nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(28 * final_sequence_length, 64),
            nn.ReLU(),
            nn.Linear(64, output_size)
        )

    def forward(self, x):
        x = self.conv_layer(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc_layer(x)
        return x`

Parameters

input_channels = 1 # Number of input channels (features)
output_size = 4 # Number of output classes or regression values
batch_size = 32
learning_rate = 0.001
num_epochs = 10
final_sequence_length = 10 # Adjust this according to your time series sequence length
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize the model, loss function, and optimizer
model = TimeSeriesCNN(input_channels, output_size)
criterion = nn.MSELoss()  # You can change the loss function as needed
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for batch in dataloader:
        optimizer.zero_grad()
        outputs = model(batch.unsqueeze(1))  # Add the channel dimension
        loss = criterion(outputs, target)  # Replace 'target' with your target values
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')```


RuntimeError Traceback (most recent call last)
Cell In[36], line 10
8 for batch in dataloader:
9 optimizer.zero_grad()
—> 10 outputs = model(batch.unsqueeze(1)) # Add the channel dimension
11 loss = criterion(outputs, target) # Replace ‘target’ with your target values
12 loss.backward()

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

Cell In[29], line 22, in TimeSeriesCNN.forward(self, x)
20 x = self.conv_layer(x)
21 x = x.view(x.size(0), -1) # Flatten the tensor
—> 22 x = self.fc_layer(x)
23 return x

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/container.py:217, in Sequential.forward(self, input)
215 def forward(self, input):
216 for module in self:
→ 217 input = module(input)
218 return input

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File ~/anaconda3/envs/python310_pytorch/lib/python3.10/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x288 and 320x64)

The error is raised in the first linear layer of self.fc_layer. Change its in_features to 288 and it should work.

Thanks! I solved it by directly changed

nn.Linear(28 * final_sequence_length, 64)

to

nn.Linear(288, 64)