Hello All, I am getting the below error for this code. Could anyone take a look and figure out the problem. It would be a great help.
Thank you!!!
class LSTMTagger(nn.Module):
def __init__(self, embedding_dim, char_embedding_dim, hidden_dim, char_hidden_dim, vocab_size, output_size, char_size):
super(LSTMTagger, self).__init__()
#self.hidden_dim = hidden_dim
self.embedding_dim = embedding_dim
self.char_embedding_dim = char_embedding_dim
# word embedding
self.hidden_dim = hidden_dim
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first = True)
# char embedding
self.char_hidden_dim = char_hidden_dim
self.char_embeddings = nn.Embedding(char_size, char_embedding_dim)
self.char_lstm = nn.LSTM(char_embedding_dim, char_hidden_dim, batch_first = True)
self.overall_hidden_dim = hidden_dim + word_len_max * char_hidden_dim
# The linear layer that maps from hidden state space to tag space
self.hidden2tag = nn.Linear(self.overall_hidden_dim, output_size)
#self.hidden = self.init_hidden()
#self.char_hidden = self.init_hidden(isChar=True)
def forward(self, sentence, chars):
embeds = self.word_embeddings(sentence)
print(embeds.shape)
lstm_out, _ = self.lstm(embeds)
print(lstm_out.shape)
embedc = self.char_embeddings(chars)
print(embedc.shape)
char_lstm_out, _ = self.char_lstm(embedc)
print(char_lstm_out.shape)
merge_out = torch.cat((lstm_out, char_lstm_out), 1)
print(merge_out.shape)
tag_space = self.hidden2tag(merge_out)
tag_scores = F.log_softmax(tag_space, dim=1)
return tag_scores
def train_model(model, patience, n_epochs):
train_losses = []
valid_losses = []
avg_train_losses = []
avg_valid_losses = []
early_stopping = EarlyStopping(patience=patience, verbose=True)
for epoch in range(n_epochs):
print(f"Starting epoch {epoch+1}...")
#Sets the model in training mode where training is set to True to utilise any
#regularisation/BatchNormalization if available.
model.train()
for sentence, tags, chars in train_loader:
model.zero_grad()
tag_scores = model(sentence, chars)
loss = loss_function(tag_scores, tags.flatten())
loss.backward()
optimizer.step()
train_losses.append(loss.item())
#Sets the model in evaluation mode where training is set to False to avoid any
#regularisation/BatchNormalization if available.
model.eval()
for sentence, tags, chars in dev_loader:
tag_scores = model(sentence, chars)
loss = loss_function(tag_scores, tags.flatten())
valid_losses.append(loss.item())
train_loss = np.average(train_losses)
valid_loss = np.average(valid_losses)
avg_train_losses.append(train_loss)
avg_valid_losses.append(valid_loss)
epoch_len = len(str(n_epochs))
print(f'[{epoch+1:>{epoch_len}}/{n_epochs:>{epoch_len}}]' +'-'*20+'->'+
f'train_loss: {train_loss:.5f} ' +
f'valid_loss: {valid_loss:.5f}')
# Empty lists again for the next epoch
train_losses = []
valid_losses = []
early_stopping(valid_loss, model)
if early_stopping.early_stop:
print("Early stopping"+'-'*10 +"STOPPED")
break
# loading the last best the best model
model.load_state_dict(torch.load('checkpoint.pt'))
return model, avg_train_losses, avg_valid_losses
embedding_dim = 32
epochs = 20
hidden_dim = 32
patience = 5
char_hidden_dim = 32
char_embedding_dim = 32
model = LSTMTagger(embedding_dim, char_embedding_dim, hidden_dim, char_hidden_dim, len(word_to_ix), len(tag_to_ix), len(char_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
model, train_loss, valid_loss = train_model(model, patience, epochs)
Error:
RuntimeError Traceback (most recent call last)
in
10 optimizer = optim.Adam(model.parameters(), lr=0.01)
11
—> 12 model, train_loss, valid_loss = train_model(model, patience, epochs)
in train_model(model, patience, n_epochs)
17 model.zero_grad()
18
—> 19 tag_scores = model(sentence, chars)
20 loss = loss_function(tag_scores, tags.flatten())
21 loss.backward()
~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
in forward(self, sentence, chars)
55 print(merge_out.shape)
56 #merge_out = merge_out.view(-1, 32)
—> 57 tag_space = self.hidden2tag(merge_out)
58 tag_scores = F.log_softmax(tag_space, dim=1)
59
~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
–> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
85
86 def forward(self, input):
—> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
~\anaconda3\envs\tensorflow_gpu\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1610 ret = torch.addmm(bias, input, weight.t())
1611 else:
-> 1612 output = input.matmul(weight.t())
1613 if bias is not None:
1614 output += bias
RuntimeError: size mismatch, m1: [39712 x 32], m2: [672 x 17] at C:\cb\pytorch_1000000000000\work\aten\src\TH/generic/THTensorMath.cpp:41