Losing my mind over mat1 dim1 and mat2 dim0

Jbrec · May 6, 2021, 9:12pm

Really losing my cool here with this CNN. I’m not sure where I am going wrong. Another set of eyes would be super helpful

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1,int(61952/8))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


CNNModel = Net()
print(CNNModel)

def train_model(model, criterion, optimizer, scheduler, num_epochs=20):
  device = torch.device("cuda")
  model.to(device)
  since = time.time()

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    print('----------------')

    # Each epoch has a training and validation phase
    for phase in ['mytrain', 'mytest']:
      if phase == 'mytrain':
        model.train()  # Set model to training mode
      else:
        model.eval()   # Set model to evaluate mode

      running_loss = 0.0
      running_corrects = 0

      # Iterate over data
      for inputs, labels in dataloaders[phase]:
        # move data to GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(phase == 'mytrain'):
          outputs = model(inputs)
          _, preds = torch.max(outputs, 1)
          loss = criterion(outputs, labels)

          # backward + optimize only if in training phase
          if phase == 'mytrain':
            loss.backward()
            optimizer.step()

        # statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

      # update learning rate with scheduler
      if phase == 'mytrain':CNNModel
      scheduler.step()

      epoch_loss = running_loss / dataset_sizes[phase]
      epoch_acc = running_corrects.double() / dataset_sizes[phase]

      print(f"{phase} loss: {epoch_loss:.4f} acc: {epoch_acc:.4f}")

      # deep copy the model with best accuracy on validation set
      if phase == 'mytest' and epoch_acc > best_acc:
        best_acc = epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())

    print()

  time_elapsed = time.time() - since
  print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
  print(f"Best val acc: {best_acc:4f}")

  # load best model weights
  model.load_state_dict(best_model_wts)
  return model

loss = nn.CrossEntropyLoss()

optimizer = optim.SGD(CNNModel.parameters(), lr=0.01, momentum=0.5)

# Decay LR by a factor of 0.1 every 5 epochs

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

train_model(CNNModel, loss, optimizer, exp_lr_scheduler, num_epochs=20)

Epoch 1/20
----------------
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-567-757004c76098> in <module>()
      6 exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
      7 
----> 8 train_model(CNNModel, loss, optimizer, exp_lr_scheduler, num_epochs=20)

5 frames
<ipython-input-566-0ce3493ed3b6> in train_model(model, criterion, optimizer, scheduler, num_epochs)
     33         # track history if only in train
     34         with torch.set_grad_enabled(phase == 'mytrain'):
---> 35           outputs = model(inputs)
     36           _, preds = torch.max(outputs, 1)
     37           loss = criterion(outputs, labels)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

<ipython-input-563-8d673c2ca45f> in forward(self, x)
     21         x = self.pool(F.relu(self.conv2(x)))
     22         x = x.view(-1,int(61952/8))
---> 23         x = F.relu(self.fc1(x))
     24         x = F.relu(self.fc2(x))
     25         x = self.fc3(x)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     92 
     93     def forward(self, input: Tensor) -> Tensor:
---> 94         return F.linear(input, self.weight, self.bias)
     95 
     96     def extra_repr(self) -> str:

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1751     if has_torch_function_variadic(input, weight):
   1752         return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753     return torch._C._nn.linear(input, weight, bias)
   1754 
   1755 

RuntimeError: mat1 dim 1 must match mat2 dim 0

eqy · May 7, 2021, 6:23am

It looks like the input to your first fully connected layer doesn’t match the requires shape. The view will yield a shape of [?, 7744] while the layer expects an input shape of [?, 400] as 400 = 16 * 5 * 5.

Jbrec · May 7, 2021, 11:46am

Thank you for taking a look. I should be inputting a batch of 8, 3-channel images with shape 100x100. I have 7 class labels. I’m not too sure where I am going wrong. I updated my x.view to (-1,8) and still get dim errors

Tejan_Mehndiratta · May 7, 2021, 12:26pm

update your x.view as x = x.view(x.size(0), -1) .

Jbrec · May 7, 2021, 12:50pm

Thanks for taking a look, I still get a mat1 dim error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-139-757004c76098> in <module>()
      6 exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
      7 
----> 8 train_model(CNNModel, loss, optimizer, exp_lr_scheduler, num_epochs=20)

5 frames
<ipython-input-132-0ce3493ed3b6> in train_model(model, criterion, optimizer, scheduler, num_epochs)
     33         # track history if only in train
     34         with torch.set_grad_enabled(phase == 'mytrain'):
---> 35           outputs = model(inputs)
     36           _, preds = torch.max(outputs, 1)
     37           loss = criterion(outputs, labels)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

<ipython-input-138-f35b0d1715f8> in forward(self, x)
     21         x = self.pool(F.relu(self.conv2(x)))
     22         x = x.view(x.size(0), -1)
---> 23         x = F.relu(self.fc1(x))
     24         x = F.relu(self.fc2(x))
     25         x = self.fc3(x)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     92 
     93     def forward(self, input: Tensor) -> Tensor:
---> 94         return F.linear(input, self.weight, self.bias)
     95 
     96     def extra_repr(self) -> str:

/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1751     if has_torch_function_variadic(input, weight):
   1752         return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753     return torch._C._nn.linear(input, weight, bias)
   1754 
   1755 

RuntimeError: mat1 dim 1 must match mat2 dim 0

With:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 8)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


CNNModel = Net()

Tejan_Mehndiratta · May 7, 2021, 1:11pm

Do this:

class Net(nn.Module):

def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(16 * 5 * 5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 8)

def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    #x = x.view(x.size(0), -1)
    #x = F.relu(self.fc1(x))
    #x = F.relu(self.fc2(x))
    #x = self.fc3(x)
    return x

CNNModel = Net()

random_input = torch.randn(32, 3, 100, 100) (where 100x100 is your image dimension)

out = CNNModel(random_input)
out.shape

your out shape would be something like [32, C, W, H] and this C * W * H should be given as input to your self.fc1.

Jbrec · May 7, 2021, 1:17pm

Wow amazing! Thank you so much!

my output was:
torch.Size([32, 16, 22, 22])

why do I get the 22, 22?

Tejan_Mehndiratta · May 7, 2021, 1:22pm

Because of the two conv layers and the pooling you are using. Check the math.
Now, give 16x22x22 as input to self.fc1.

Jbrec · May 7, 2021, 1:45pm

Apologies for my ignorance, but what numbers give me the product 22? I thought it was conv1 kernel* conv2 kernel?

Tejan_Mehndiratta · May 7, 2021, 3:08pm

No, it’s not like that. Understand the math of convolution
Check this link for conv documentation:

https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d