Mat1 and mat2 shapes cannot be multiplied (1x57344 and 8192x256)

I am trying to make a simple neural network with CNN and RNN model but somehow I am getting this error. What I am missing here ?

import torch
import torch.nn as nn
from torchsummary import summary

def fit(a, b):
def ctc_loss_func(y_pred, names, input_length, name_length):
y_pred = y_pred[:, 2:, :]
return nn.functional.ctc_loss(y_pred.transpose(0, 1), names, input_length.squeeze(), name_length.squeeze(), blank=0)

input_data = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2),
    nn.Conv2d(32, 64, kernel_size=3, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2),
    nn.Dropout2d(p=0.3),
    nn.Conv2d(64, 128, kernel_size=3, padding=(1, 0)),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=(1, 2)),
    nn.Dropout2d(p=0.3),
    nn.Flatten()
)

inner = input_data(torch.randn(1, 1, 256, 64))

# CNN to RNN

inner = nn.Sequential(
    nn.Linear(64*128, 64),
    nn.ReLU()
)(inner)

# RNN

num_features = inner.size()[1]
inner = inner.unsqueeze(0)
inner, _ = nn.LSTM(num_features, 256, bidirectional=True, batch_first=True)(inner)
inner = inner.squeeze(0)

alphabets = a
num_characters = len(a) + 1
max_str_len = b

# Output
num_features = 2 * 256
batch_size = 1
inner = nn.Linear(num_features, max_str_len * num_characters)(inner)
inner = inner.reshape(batch_size, max_str_len * num_characters, num_features)
y_pred = nn.functional.softmax(inner, dim=2)

model = nn.Sequential(input_data, inner, y_pred)

summary(model, input_size=(1, 256, 64))

return model

alphabets = u’“0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ#$%'”‘"’ ()*+,-:.=?@^`~µÂ×öΩμ√_’
max_str_len = 87 # max length of input name
fit(alphabets,max_str_len)

This is the error I am getting,Please Help

in fit(a, b)
34 # CNN to RNN
35
—> 36 inner = nn.Sequential(
37 nn.Linear(64*128, 256),
38 nn.ReLU()

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/container.py in forward(self, input)
202 def forward(self, input):
203 for module in self:
→ 204 input = module(input)
205 return input
206

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.9/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) → Tensor:
→ 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) → str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x57344 and 8192x256)

The error you are encountering is due to a mismatch in the input and output dimensions of the Linear layer in your nn.Sequential. The input shape is not matching the expected shape of the Linear layer.

The input tensor inner has the shape (1, 1, 256, 64), but you’re using a Linear layer with input dimension 64*128 which is 8192. This is not the correct input size for this layer.

To resolve this issue, you need to first flatten the input tensor and then use the correct input dimension for the Linear layer. Based on the shape of inner, the input dimension should be 1*256*64.

Here’s the corrected code part of the code to be replaced:

import torch
import torch.nn as nn

input_data = torch.randn(1, 1, 256, 64)

# Flatten the input tensor
inner = input_data.view(1, -1) # torch.Size([1, 16384])

# CNN to RNN
inner = nn.Sequential(
    nn.Linear(1*256*64, 256),
    nn.ReLU()
)(inner)

import torch
import torch.nn as nn
from torchsummary import summary

def fit(a, b):
def ctc_loss_func(y_pred, names, input_length, name_length):
y_pred = y_pred[:, 2:, :]
return nn.functional.ctc_loss(y_pred.transpose(0, 1), names, input_length.squeeze(), name_length.squeeze(), blank=0)

input_data = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2),
    nn.Conv2d(32, 64, kernel_size=3, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2),
    nn.Dropout2d(p=0.3),
    nn.Conv2d(64, 128, kernel_size=3, padding=(1, 0)),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=(1, 2)),
    nn.Dropout2d(p=0.3),
    nn.Flatten()
)

input_data = torch.randn(1, 1, 256, 64)

# Flatten the input tensor
inner = input_data.view(1, -1) # torch.Size([1, 16384])

# CNN to RNN
inner = nn.Sequential(
    nn.Linear(1*256*64, 256),
    nn.ReLU()
)(inner)


# RNN

num_features = inner.size()[1]
inner = inner.unsqueeze(0)
inner, _ = nn.LSTM(num_features, 256, bidirectional=True, batch_first=True)(inner)
inner = inner.squeeze(0)

alphabets = a
num_characters = len(a) + 1
max_str_len = b

# Output
num_features = 2 * 256
batch_size = 1
inner = nn.Linear(num_features, max_str_len * num_characters)(inner)
inner = inner.reshape(batch_size, max_str_len * num_characters, num_features)
y_pred = nn.functional.softmax(inner, dim=2)

model = nn.Sequential(input_data, inner, y_pred)

summary(model, input_size=(1, 256, 64))

return model

alphabets = u’“0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ#$%'”‘"’ ()*+,-:.=?@^`~µÂ×öΩμ√_’
max_str_len = 87 # max length of input name
fit(alphabets,max_str_len)

so here is the updated code with your feedback but now I am getting this error:

RuntimeError Traceback (most recent call last)
in <cell line: 66>()
64 alphabets = u’“0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ#$%'”‘"’ ()*+,-:.=?@^`~µÂ×öΩμ√_’
65 max_str_len = 87 # max length of input name
—> 66 fit(alphabets,max_str_len)

in fit(a, b)
53 batch_size = 1
54 inner = nn.Linear(num_features, max_str_len * num_characters)(inner)
—> 55 inner = inner.reshape(batch_size, max_str_len * num_characters, num_features)
56 y_pred = nn.functional.softmax(inner, dim=2)
57

RuntimeError: shape ‘[1, 7917, 512]’ is invalid for input of size 7917