Hi,
I’m trying to learn and play with pytorch. But encountered a Stack overflow exception. Below are my code snippets. The last line of the codes below will throw a “Windows fatal exception: stack overflow” at some point during training. While If I change to use torch.nn.RNN, things are working just file. Any help will be appreciated.
class VanillaRNNModule(torch.nn.Module):
def __init__(self, input_size, hidden_size):
super(VanillaRNNModule, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.rnn = torch.nn.RNN(input_size, hidden_size, num_layers=1, nonlinearity='tanh', batch_first=True, dropout=0, bidirectional=False)
self.output_layer = torch.nn.Linear(hidden_size, 1)
def forward(self, input, input_lengths):
packed_input = torch.nn.utils.rnn.pack_padded_sequence(input, input_lengths, batch_first=True)
hidden = self.get_init_hidden()
out, hiddens = self.rnn(packed_input)
# unpacked_output, unpacked_lens = torch.nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
output = self.output_layer(hiddens)
return output
def get_init_hidden(self):
return torch.zeros(1, self.hidden_size)
class LSTMRNNModule(torch.nn.Module):
def __init__(self, input_size, hidden_size):
super(LSTMRNNModule, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers=1, batch_first=True, dropout=0, bidirectional=False)
self.output_layer = torch.nn.Linear(hidden_size, 1)
def forward(self, input, input_lengths):
packed_input = torch.nn.utils.rnn.pack_padded_sequence(input, input_lengths, batch_first=True)
out, (hidden, _) = self.lstm(packed_input)
# unpacked_output, unpacked_lens = torch.nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
output = self.output_layer(hidden)
return output
def get_init_hidden(self):
return torch.zeros(1, self.hidden_size)
class RNNBinaryClassifierTrainer(object):
def __init__(self, data_set, hidden_size, batch_size = 1):
self.data_set = data_set
self.batch_size = batch_size
# self.rnn = RNNModule(data_set.n_words, hidden_size)
# self.rnn = VanillaRNNModule(data_set.n_words, hidden_size)
self.rnn = LSTMRNNModule(data_set.n_words, hidden_size)
self.all_losses = []
self.criterion = torch.nn.BCEWithLogitsLoss(size_average=True, reduce=True)
self.optimizer = torch.optim.SGD(self.rnn.parameters(), lr = 0.001, momentum = 0.9)
def train(self, max_iter, evl_every):
current_loss = 0
for i in range(1, max_iter + 1):
st = time.time()
for X, L, Y in self.data_set.next_train_batch(self.batch_size):
current_loss += self.train_iter(X, L, Y)
ed0 = time.time()
if i % evl_every == 0:
avg_loss = current_loss / evl_every
self.all_losses.append(avg_loss)
# correct_count, total_count = self.evaluate()
auc = self.evaluate_auc(self.batch_size)
ed1 = time.time()
# print("Elapsed: {4}, Iter: {0}, Loss: {1:8.3f}, AUC: {}, Total: {2}, Correct: {3}".format(i, avg_loss, total_count, correct_count, timeSince(time_start), ))
print("Epoch Used: {0:0.3f}, Eval Used: {1:0.3f}".format(ed0-st, ed1-st))
print("Elapsed: {3}, Iter: {0}, Loss: {1:8.3f}, AUC: {2:0.3f}".format(i, avg_loss, auc, timeSince(time_start)))
current_loss = 0
def train_iter(self, input, input_lengths, target):
self.optimizer.zero_grad()
output = self.predict(input, input_lengths)
output = output[0]
loss = self.criterion(output, target)
loss.backward()
self.optimizer.step()
# return loss.data.item()
# return loss.item()
return float(loss)