I am getting size mismatch error. Please let me know how to fix it

samsumankumar · September 17, 2020, 3:25pm

Hello All, I am getting the below error for this code. Could anyone take a look and figure out the problem. It would be a great help.

Thank you!!!

class LSTMTagger(nn.Module):

def __init__(self, embedding_dim, char_embedding_dim, hidden_dim, char_hidden_dim, vocab_size, output_size, char_size):
    super(LSTMTagger, self).__init__()
    #self.hidden_dim = hidden_dim
    self.embedding_dim = embedding_dim
    self.char_embedding_dim = char_embedding_dim
    
    # word embedding
    self.hidden_dim = hidden_dim
    self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
    self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first = True)

    # char embedding
    self.char_hidden_dim = char_hidden_dim
    self.char_embeddings = nn.Embedding(char_size, char_embedding_dim)
    self.char_lstm = nn.LSTM(char_embedding_dim, char_hidden_dim, batch_first = True)

    self.overall_hidden_dim = hidden_dim + word_len_max * char_hidden_dim

    # The linear layer that maps from hidden state space to tag space
    self.hidden2tag = nn.Linear(self.overall_hidden_dim, output_size)
    #self.hidden = self.init_hidden()
    #self.char_hidden = self.init_hidden(isChar=True)


def forward(self, sentence, chars):
   
    embeds = self.word_embeddings(sentence)
    print(embeds.shape)
    lstm_out, _ = self.lstm(embeds)
    print(lstm_out.shape)
    embedc = self.char_embeddings(chars)
    print(embedc.shape)
    char_lstm_out, _ = self.char_lstm(embedc)
    print(char_lstm_out.shape)
    merge_out = torch.cat((lstm_out, char_lstm_out), 1)
    print(merge_out.shape)

    tag_space = self.hidden2tag(merge_out)
    tag_scores = F.log_softmax(tag_space, dim=1)

    return tag_scores

def train_model(model, patience, n_epochs):

train_losses = []
valid_losses = []
avg_train_losses = []
avg_valid_losses = []

early_stopping = EarlyStopping(patience=patience, verbose=True)

for epoch in range(n_epochs):
    print(f"Starting epoch {epoch+1}...")
    
    #Sets the model in training mode where training is set to True to utilise any 
    #regularisation/BatchNormalization if available.  
    model.train()
    for sentence, tags, chars in train_loader:
        model.zero_grad()
    
        tag_scores = model(sentence, chars)
        loss = loss_function(tag_scores, tags.flatten())
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    #Sets the model in evaluation mode where training is set to False to avoid any 
    #regularisation/BatchNormalization if available.    
    model.eval()
    for sentence, tags, chars in dev_loader:
        tag_scores = model(sentence, chars)
        loss = loss_function(tag_scores, tags.flatten())
        valid_losses.append(loss.item())

    train_loss = np.average(train_losses)
    valid_loss = np.average(valid_losses)
    avg_train_losses.append(train_loss)
    avg_valid_losses.append(valid_loss)
    
    epoch_len = len(str(n_epochs))
    
    print(f'[{epoch+1:>{epoch_len}}/{n_epochs:>{epoch_len}}]' +'-'*20+'->'+
                 f'train_loss: {train_loss:.5f}   ' +
                 f'valid_loss: {valid_loss:.5f}')
    
    # Empty lists again for the next epoch
    train_losses = []
    valid_losses = []
    
    early_stopping(valid_loss, model)
    
    if early_stopping.early_stop:
        print("Early stopping"+'-'*10 +"STOPPED")
        break
    
# loading the last best the best model
model.load_state_dict(torch.load('checkpoint.pt'))

return  model, avg_train_losses, avg_valid_losses

embedding_dim = 32
epochs = 20
hidden_dim = 32
patience = 5
char_hidden_dim = 32
char_embedding_dim = 32

model = LSTMTagger(embedding_dim, char_embedding_dim, hidden_dim, char_hidden_dim, len(word_to_ix), len(tag_to_ix), len(char_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

model, train_loss, valid_loss = train_model(model, patience, epochs)

Error:
RuntimeError Traceback (most recent call last)
in
10 optimizer = optim.Adam(model.parameters(), lr=0.01)
11
—> 12 model, train_loss, valid_loss = train_model(model, patience, epochs)

in train_model(model, patience, n_epochs)
17 model.zero_grad()
18
—> 19 tag_scores = model(sentence, chars)
20 loss = loss_function(tag_scores, tags.flatten())
21 loss.backward()

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

in forward(self, sentence, chars)
55 print(merge_out.shape)
56 #merge_out = merge_out.view(-1, 32)
—> 57 tag_space = self.hidden2tag(merge_out)
58 tag_scores = F.log_softmax(tag_space, dim=1)
59

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
85
86 def forward(self, input):
—> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):

~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1610 ret = torch.addmm(bias, input, weight.t())
1611 else:
-> 1612 output = input.matmul(weight.t())
1613 if bias is not None:
1614 output += bias

RuntimeError: size mismatch, m1: [39712 x 32], m2: [672 x 17] at C:\cb\pytorch_1000000000000\work\aten\src\TH/generic/THTensorMath.cpp:41

ptrblck · September 19, 2020, 4:36am

The number of in_features in hidden2tag doesn’t match the features in merge_out.
Unfortunately your code isn’t executable, so I would recommend to check the shape of merge_out, make sure the number of dimensions fits your use case and adapt the in_features of the linear layer.