Matmat1 and mat2 shapes cannot be multiplied (128x20 and 7304x16)

Hello, currently I’m having the problem of matrix size mismatching for the multiplication. In here I use the x_train with the size of (7304,20) and y_train with the size of (7304,3). I intend to use a AE-Transformer model to pass the input with batch_size through dataloader method equals 128, here is the model:

class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_dim,embed_dim, noise_level):
        super(Autoencoder, self).__init__()
        self.input_size, self.hidden_dim, self.noise_level = input_size, embed_dim,noise_level
        self.embed_dim = embed_dim
        self.fc1 = nn.Linear(self.input_size, self.hidden_dim)
        self.fc2 = nn.Linear(self.hidden_dim, self.input_size)
        
    def encoder(self,x):
        x = self.fc1(x)
        h1 = F.relu(x)
        return h1
    
    def mask(self,x):
        corrupted_x = x + self.noise_level + torch.randn_like(x)   # randn_like  Initializes a tensor where all the elements are sampled from a normal distribution.
        return corrupted_x
    
    def decoder(self, x):
        h2 = self.fc2(x)
        return h2
    
    def forward (self, x):
        out = self.mask(x) # Adding noise to feed the network
        encoder = self.encoder(out)
        decoder = self.decoder(encoder)
        return encoder, decoder 
    
    ## Transformer 
    ### Positional encoding
class PositionalEncoding(nn.Module):
        def __init__(self,d_model, dropout=0.0,max_len=16):
            super(PositionalEncoding, self).__init__()
            pe = torch.zeros(max_len,d_model)
            position = torch.arange(0,max_len, dtype = torch.float).unsqueeze(1)
            
            div_term = torch.exp(torch.arange(0,d_model,2).float()*(-math.log(10000.0) / d_model))
            
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)

            pe = pe.unsqueeze(0).transpose(0, 1)

            self.register_buffer('pe', pe)
            
        def forward(self, x):
            x = x + self.pe[:x.size(1), :].squeeze(1)
            return x
        
class Net(nn.Module):
        def __init__(self,feature_size,num_layers,n_head,dropout,noise_level,embed_dim):
            super(Net,self).__init__()
            self.embed_dim = embed_dim
            self.hidden_dim = 4*embed_dim
            self.auto_hidden = int(feature_size / 2)
            input_size = self.auto_hidden
            self.pos = PositionalEncoding(d_model=input_size, max_len=input_size)
            encoder_layers = nn.TransformerEncoderLayer(d_model=input_size, nhead=n_head, dim_feedforward=self.hidden_dim, dropout=dropout)
            self.cell = nn.TransformerEncoder(encoder_layers,num_layers=num_layers)
            self.linear = nn.Linear(input_size,1)
            self.autoencoder = Autoencoder(input_size = feature_size, hidden_dim = self.auto_hidden,embed_dim = embed_dim, noise_level=noise_level)
              
        def forward(self,x):
            batch_size,feature_num, feature_size = x.shape
            encode, decode = self.autoencoder(x.view(batch_size,-1).float()) # Equals batch_size * seq_len
            out = encode.reshape(batch_size,-1,self.auto_hidden)
            out = self.pos(out)
            out = out.reshape(1,batch_size,-1)  #(1,batch_size,feature_size)
            out = self.cell(out)
            out = out.reshape(batch_size,-1)
            out = self.linear(out)
            
            return out, decode
        class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_dim,embed_dim, noise_level):
        super(Autoencoder, self).__init__()
        self.input_size, self.hidden_dim, self.noise_level = input_size, embed_dim,noise_level
        self.embed_dim = embed_dim
        self.fc1 = nn.Linear(self.input_size, self.hidden_dim)
        self.fc2 = nn.Linear(self.hidden_dim, self.input_size)
        
    def encoder(self,x):
        x = self.fc1(x)
        h1 = F.relu(x)
        return h1
    
    def mask(self,x):
        corrupted_x = x + self.noise_level + torch.randn_like(x)   # randn_like  Initializes a tensor where all the elements are sampled from a normal distribution.
        return corrupted_x
    
    def decoder(self, x):
        h2 = self.fc2(x)
        return h2
    
    def forward (self, x):
        out = self.mask(x) # Adding noise to feed the network
        encoder = self.encoder(out)
        decoder = self.decoder(encoder)
        return encoder, decoder 
    
    ## Transformer 
    ### Positional encoding
class PositionalEncoding(nn.Module):
        def __init__(self,d_model, dropout=0.0,max_len=16):
            super(PositionalEncoding, self).__init__()
            pe = torch.zeros(max_len,d_model)
            position = torch.arange(0,max_len, dtype = torch.float).unsqueeze(1)
            
            div_term = torch.exp(torch.arange(0,d_model,2).float()*(-math.log(10000.0) / d_model))
            
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)

            pe = pe.unsqueeze(0).transpose(0, 1)

            self.register_buffer('pe', pe)
            
        def forward(self, x):
            x = x + self.pe[:x.size(1), :].squeeze(1)
            return x
        
class Net(nn.Module):
        def __init__(self,feature_size,num_layers,n_head,dropout,noise_level,embed_dim):
            super(Net,self).__init__()
            self.embed_dim = embed_dim
            self.hidden_dim = 4*embed_dim
            self.auto_hidden = int(feature_size / 2)
            input_size = self.auto_hidden
            self.pos = PositionalEncoding(d_model=input_size, max_len=input_size)
            encoder_layers = nn.TransformerEncoderLayer(d_model=input_size, nhead=n_head, dim_feedforward=self.hidden_dim, dropout=dropout)
            self.cell = nn.TransformerEncoder(encoder_layers,num_layers=num_layers)
            self.linear = nn.Linear(input_size,1)
            self.autoencoder = Autoencoder(input_size = feature_size, hidden_dim = self.auto_hidden,embed_dim = embed_dim, noise_level=noise_level)
              
        def forward(self,x):
            batch_size,feature_num, feature_size = x.shape
            encode, decode = self.autoencoder(x.view(batch_size,-1).float()) # Equals batch_size * seq_len
            out = encode.reshape(batch_size,-1,self.auto_hidden)
            out = self.pos(out)
            out = out.reshape(1,batch_size,-1)  #(1,batch_size,feature_size)
            out = self.cell(out)
            out = out.reshape(batch_size,-1)
            out = self.linear(out)
            
            return out, decode

Here is some information of the model after passing the data:

Net(
  (pos): PositionalEncoding()
  (cell): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=3652, out_features=3652, bias=True)
        )
        (linear1): Linear(in_features=3652, out_features=64, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=64, out_features=3652, bias=True)
        (norm1): LayerNorm((3652,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((3652,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (linear): Linear(in_features=3652, out_features=1, bias=True)
  (autoencoder): Autoencoder(
    (fc1): Linear(in_features=7304, out_features=16, bias=True)
    (fc2): Linear(in_features=16, out_features=7304, bias=True)
  )

And I got the error:


How can I modify the model so that I can address with the matter of data size when passing through the model ?

Hi @NMInh234,

Can you share the entire stacktrace, rather than just the last two error? Those are internal pytorch function, and not the part of your script that’s causing the error. All it says is that it fails to do a matrix multiplication, but doesn’t tell you which nn.Linear object fails. Although, I think it may be this one,

Secondly, what exactly is the size of your input Tensor (including the batch size?). Is it [128,7304,20] ?

Hi @AlphaBetaGamma96 , here is the full error stack trace:

RuntimeError                              Traceback (most recent call last)
Cell In[59], line 12
     10 #Forward
     11 x = x.unsqueeze(1)
---> 12 output = model(x)
     13 loss = criterion(output,rul)
     15 #Backward and Optimize

File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[54], line 64, in Net.forward(self, x)
     62 def forward(self,x):
     63     batch_size, feature_num, feature_size = x.shape
---> 64     encode, decode = self.autoencoder(x.view(batch_size,-1).float()) # Equals batch_size * seq_len
     65     out = encode.reshape(batch_size,-1,self.auto_hidden)
     66     out = self.pos(out)

File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[54], line 24, in Autoencoder.forward(self, x)
     22 def forward (self, x):
     23     out = self.mask(x) # Adding noise to feed the network
---> 24     encoder = self.encoder(out)
     25     decoder = self.decoder(encoder)
     26     return encoder, decoder

Cell In[54], line 10, in Autoencoder.encoder(self, x)
      9 def encoder(self,x):
---> 10     x = self.fc1(x)
     11     h1 = F.relu(x)
     12     return h1

File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File ~/.local/lib/python3.10/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input)
    113 def forward(self, input: Tensor) -> Tensor:
--> 114     return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x20 and 7304x16)

And as for the input of the Tensor, it’s exactly

[128,7304,20]

As far as I know, I suspect the line out = encode.reshape(batch_size,-1,self.auto_hidden) is the problem as it takes the batch_size of 128 and the auto_hidden by 20

Isn’t the input actually [128, 7304*20] as you called x.view(batch_size, -1)?

you could check the shape of x and dimensions of self.fc1 within the encoder method. The right most dimension of x should equal the input_size of fc1

Hi @AlphaBetaGamma96, I have changed the feature_size = 20 so that to address with this problem, but then the step to reshape the autoencoder encode to be the input I got some error


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[117], line 12
     10 #Forward
     11 x = x.unsqueeze(1)
---> 12 output = model(x)
     13 loss = criterion(output,rul)
     15 #Backward and Optimize

File ~/.local/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[103], line 65, in Net.forward(self, x)
     63 batch_size, feature_num, feature_size = x.shape
     64 encode, decode = self.autoencoder(x.view(batch_size,-1).float()) # Equals batch_size * seq_len
---> 65 out = encode.reshape(batch_size, -1, self.auto_hidden)
     66 out = self.pos(out)
     67 out = out.reshape(1,batch_size,-1)  #(1,batch_size,feature_size)

RuntimeError: shape '[128, -1, 10]' is invalid for input of size 2048

So as I understand the as I have fixed the above problem, so how can I reshape the encoder to fit the requirement input of 2048 for the 1st out output ?

Well, the first dim will be 128 as that’s the batch-size, but you’ll need to figure out what the shape should be. Given you changed the input_size surely the reshape should change as well?

Hi @AlphaBetaGamma96, I have a question as for the Transformer Encoder block, how should I set the max_seq_len and d_model after passing the input_size of 20 from the Autoencoder and the whole trainning set having the size of (7304,20)

I’m not an expert on this so it’s best you check this yourself, but the input to a Transformer block should be a 3-dimensional tensor of [batch_size, seq_length, num_inputs].