Dear all.
I got a problem training an RNN LSTM network with batching. I can’t figure why the network doesn’t backpropagate the error: the loss doesn’t go down. I hope some can give me some advice to fix this. I’m using python3 with pytorch 0.31.
here is the relevant code in my model class:
def init_hidden(self, batch):
return (autograd.Variable(torch.randn(NUM_LAYERS*NUM_DIRS, batch, self.hidden_dim // NUM_DIRS)),
autograd.Variable(torch.randn(NUM_LAYERS*NUM_DIRS, batch, self.hidden_dim // NUM_DIRS)))
def forward(self, sentence, lengths):
self.hidden = self.init_hidden(sentence.size(-1))
embeds = self.word_embeddings(sentence)
embeds = self.dropout(embeds)
packed_input = pack_padded_sequence(embeds, lengths)
packed_output, (ht, ct) = self.lstm(packed_input, self.hidden)
lstm_out, _ = pad_packed_sequence(packed_output)
output = self.hidden2tag(lstm_out)
output = self.softmax(output)
return output
And in the training loop:
print('Train with', len(data), 'examples.')
for epoch in range(EPOCHS):
print(f'Starting epoch {epoch}.')
loss_sum = 0
y_true = list()
y_pred = list()
for batch, lengths, targets, lengths2 in tqdm(dataset):
model.zero_grad()
batch, targets, lengths = sort_batch(batch, targets, lengths)
pred = model(autograd.Variable(batch), lengths.cpu().numpy())
loss = loss_function(pred.view(-1, pred.size()[2]), autograd.Variable(targets).view(-1, 1).squeeze(1))
loss.backward()
optimizer.step()
loss_sum += loss.data[0]
print(loss.data[0])
pred_idx = torch.max(pred, 1)[1]
y_true += list(targets.int())
y_pred += list(pred_idx.data.int())
loss_total=loss_sum / len(dataset)
print('>>> Loss:', loss_total)
Thank you for your time!