Losing my mind over mat1 dim1 and mat2 dim0

Really losing my cool here with this CNN. I’m not sure where I am going wrong. Another set of eyes would be super helpful

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1,int(61952/8))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


CNNModel = Net()
print(CNNModel)
def train_model(model, criterion, optimizer, scheduler, num_epochs=20):
  device = torch.device("cuda")
  model.to(device)
  since = time.time()

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    print('----------------')

    # Each epoch has a training and validation phase
    for phase in ['mytrain', 'mytest']:
      if phase == 'mytrain':
        model.train()  # Set model to training mode
      else:
        model.eval()   # Set model to evaluate mode

      running_loss = 0.0
      running_corrects = 0

      # Iterate over data
      for inputs, labels in dataloaders[phase]:
        # move data to GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(phase == 'mytrain'):
          outputs = model(inputs)
          _, preds = torch.max(outputs, 1)
          loss = criterion(outputs, labels)

          # backward + optimize only if in training phase
          if phase == 'mytrain':
            loss.backward()
            optimizer.step()

        # statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

      # update learning rate with scheduler
      if phase == 'mytrain':CNNModel
      scheduler.step()

      epoch_loss = running_loss / dataset_sizes[phase]
      epoch_acc = running_corrects.double() / dataset_sizes[phase]

      print(f"{phase} loss: {epoch_loss:.4f} acc: {epoch_acc:.4f}")

      # deep copy the model with best accuracy on validation set
      if phase == 'mytest' and epoch_acc > best_acc:
        best_acc = epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())

    print()

  time_elapsed = time.time() - since
  print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
  print(f"Best val acc: {best_acc:4f}")

  # load best model weights
  model.load_state_dict(best_model_wts)
  return model
loss = nn.CrossEntropyLoss()

optimizer = optim.SGD(CNNModel.parameters(), lr=0.01, momentum=0.5)

# Decay LR by a factor of 0.1 every 5 epochs

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

train_model(CNNModel, loss, optimizer, exp_lr_scheduler, num_epochs=20)
Epoch 1/20
----------------
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-567-757004c76098> in <module>()
      6 exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
      7 
----> 8 train_model(CNNModel, loss, optimizer, exp_lr_scheduler, num_epochs=20)

5 frames
<ipython-input-566-0ce3493ed3b6> in train_model(model, criterion, optimizer, scheduler, num_epochs)
     33         # track history if only in train
     34         with torch.set_grad_enabled(phase == 'mytrain'):
---> 35           outputs = model(inputs)
     36           _, preds = torch.max(outputs, 1)
     37           loss = criterion(outputs, labels)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

<ipython-input-563-8d673c2ca45f> in forward(self, x)
     21         x = self.pool(F.relu(self.conv2(x)))
     22         x = x.view(-1,int(61952/8))
---> 23         x = F.relu(self.fc1(x))
     24         x = F.relu(self.fc2(x))
     25         x = self.fc3(x)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     92 
     93     def forward(self, input: Tensor) -> Tensor:
---> 94         return F.linear(input, self.weight, self.bias)
     95 
     96     def extra_repr(self) -> str:

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1751     if has_torch_function_variadic(input, weight):
   1752         return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753     return torch._C._nn.linear(input, weight, bias)
   1754 
   1755 

RuntimeError: mat1 dim 1 must match mat2 dim 0

It looks like the input to your first fully connected layer doesn’t match the requires shape. The view will yield a shape of [?, 7744] while the layer expects an input shape of [?, 400] as 400 = 16 * 5 * 5.

Thank you for taking a look. I should be inputting a batch of 8, 3-channel images with shape 100x100. I have 7 class labels. I’m not too sure where I am going wrong. I updated my x.view to (-1,8) and still get dim errors

update your x.view as x = x.view(x.size(0), -1) .

Thanks for taking a look, I still get a mat1 dim error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-139-757004c76098> in <module>()
      6 exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
      7 
----> 8 train_model(CNNModel, loss, optimizer, exp_lr_scheduler, num_epochs=20)

5 frames
<ipython-input-132-0ce3493ed3b6> in train_model(model, criterion, optimizer, scheduler, num_epochs)
     33         # track history if only in train
     34         with torch.set_grad_enabled(phase == 'mytrain'):
---> 35           outputs = model(inputs)
     36           _, preds = torch.max(outputs, 1)
     37           loss = criterion(outputs, labels)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

<ipython-input-138-f35b0d1715f8> in forward(self, x)
     21         x = self.pool(F.relu(self.conv2(x)))
     22         x = x.view(x.size(0), -1)
---> 23         x = F.relu(self.fc1(x))
     24         x = F.relu(self.fc2(x))
     25         x = self.fc3(x)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     92 
     93     def forward(self, input: Tensor) -> Tensor:
---> 94         return F.linear(input, self.weight, self.bias)
     95 
     96     def extra_repr(self) -> str:

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1751     if has_torch_function_variadic(input, weight):
   1752         return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753     return torch._C._nn.linear(input, weight, bias)
   1754 
   1755 

RuntimeError: mat1 dim 1 must match mat2 dim 0

With:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 8)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


CNNModel = Net()

Do this:

class Net(nn.Module):

def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(16 * 5 * 5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 8)

def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    #x = x.view(x.size(0), -1)
    #x = F.relu(self.fc1(x))
    #x = F.relu(self.fc2(x))
    #x = self.fc3(x)
    return x

CNNModel = Net()

random_input = torch.randn(32, 3, 100, 100) (where 100x100 is your image dimension)

out = CNNModel(random_input)
out.shape

your out shape would be something like [32, C, W, H] and this C * W * H should be given as input to your self.fc1.

1 Like

Wow amazing! Thank you so much!

my output was:
torch.Size([32, 16, 22, 22])

why do I get the 22, 22?

Because of the two conv layers and the pooling you are using. Check the math.
Now, give 16x22x22 as input to self.fc1.

Apologies for my ignorance, but what numbers give me the product 22? I thought it was conv1 kernel* conv2 kernel?

No, it’s not like that. Understand the math of convolution
Check this link for conv documentation:

https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d