RuntimeError: mat1 dim 1 must match mat2 dim 0 for LSTM binary classification

I have a binary classification problem out of numeric temporal series

my LSTM is as follows :

class LSTMClassifier(nn.Module): 
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim= 1 ):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.embed = nn.Embedding(input_dim, hidden_dim) #shape of input 3 input_dim, batch, hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.LSTM( hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        self.batch_size = None
        #self.hidden = None
    
    def forward(self, x):
        batch_size = x.size(0) 

        # transpose to get the final input shape (seq_len, batch, input_size)
        x = x.t()
        x = self.embed(x)

        #h_n of shape (num_layers * num_directions, batch, hidden_size)
        h0, c0 = self.init_hidden(batch_size)
        out, (hn, cn) = self.rnn(x)
        print('out', out.size())
        #this so i can extract the last layer
        out = out[: , :, -1]
        #reshape the tensor
        out.unsqueeze_(-1)
        out = self.fc(out)
        return out

    def init_hidden(self, batch_size):
        h0 = torch.zeros(self.layer_dim, batch_size, self.hidden_dim)
        c0 = torch.zeros(self.layer_dim, batch_size, self.hidden_dim)
        return [t.cuda() for t in (h0, c0)]

Since my data is numeric I have 17 different feature for each intrance


--------------------------------------------------------------------------------
 | f1  |  f2  |   ....                                                    | f17 |  label  |
 + ---------------------------------------------------------------------------- +
 |                                                                                               |
 +                                                                                             +
 |

the parameter are :

input_dim = 30    
hidden_dim = 365
layer_dim = 3
output_dim = 1
seq_dim = 17

code for trainning :

for epoch in range(1, n_epochs + 1): 

    for i, (x_batch, y_batch) in enumerate(trn_dl):

        model.train()

        correctt, totalt = 0, 0

        x_batch = x_batch.cuda().long()

        y_batch = y_batch.cuda()

        opt.zero_grad()

        #sched.step()

        out = model(x_batch)

        predst = F.log_softmax(out, dim=1).argmax(dim=1)

        loss = criterion(out, y_batch)

I triyed everything, still got the error RuntimeError: mat1 dim 1 must match mat2 dim 0

here is my model shape

LSTMClassifier(
  (embed): Embedding(30, 365)
  (rnn): LSTM(365, 3, batch_first=True)
  (fc): Linear(in_features=365, out_features=1, bias=True)
)

I printed out each phase shapes

input torch.Size([90, 17])

embed torch.Size([17, 90, 365])

hidden torch.Size([3, 90, 365])

out torch.Size([17, 90, 3])

out unsqueeze torch.Size([17, 90, 1])

can someone please help?

which line was the error pointing to?

the error was pointed to the fc layer

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-19-9d6ee25d2d64> in forward(self, x)
     35         print('out unsqueeze', out.size())
     36         #output of shape (seq_len, batch, num_directions * hidden_size)
---> 37         out = self.fc(out)
     38 
     39         print('out fc', out.size())

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     91 
     92     def forward(self, input: Tensor) -> Tensor:
---> 93         return F.linear(input, self.weight, self.bias)
     94 
     95     def extra_repr(self) -> str:

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1690         ret = torch.addmm(bias, input, weight.t())
   1691     else:
-> 1692         output = input.matmul(weight.t())
   1693         if bias is not None:
   1694             output += bias

RuntimeError: mat1 dim 1 must match mat2 dim 0

it was pointed to fc

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

<ipython-input-19-9d6ee25d2d64> in forward(self, x)
    
---> 37         out = self.fc(out)
     38 
     39         print('out fc', out.size())

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     91 
     92     def forward(self, input: Tensor) -> Tensor:
---> 93         return F.linear(input, self.weight, self.bias)
     94 
     95     def extra_repr(self) -> str:

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1690         ret = torch.addmm(bias, input, weight.t())
   1691     else:
-> 1692         output = input.matmul(weight.t())
   1693         if bias is not None:
   1694             output += bias

RuntimeError: mat1 dim 1 must match mat2 dim 0

are you sure the inputs of the linear function are the right shape. Could you print them out?

the input of the linear function is of a shape : torch.Size([17, 90, 1])

that would be why. A linear layer only takes an input of that has two dimensions, the batch_size and the input features. Also the your input to the linear layer says their should be 365 input features. So you need to change the input shape and the input values when you define fc

can you please broadly explain?

The docs are here for a linear layer. Basically a linear layer only takes a shape of (batch_size, input_size) the shape of the variable you are passing it is (batch_size, input_size, 1) which has too many dimensions for your linear layers so it will output an error. Also when you defined the linear layer in your model the input to the layer was 365 which does not match the input shape you are passing it too. Even if you don’t unsqueeze the input to your linear layer and the shape is correct there will still be an error because the two sizes do not match. So you need to change the input of your linear layer to 90 and delete the unsqueeze call so your input only has two dimensions.