Hi, I’m trying out the code from the awesome practical-python codes. And I’m replacing the text with a slightly bigger one (originally 164KB, and mine is 966KB).
However, the loss becomes nan after several iterations.
the model by @spro is below.
import torch.nn as nn
from torch.autograd import Variable
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.softmax = nn.LogSoftmax()
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(combined)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return Variable(torch.zeros(1, self.hidden_size))
Then I replace the LogSoftmax
with softmax+log(output+eps)
like that
import torch.nn as nn
from torch.autograd import Variable
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.softmax = nn.Softmax()
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(combined)
output = self.softmax(output)
output = output.add(1e-8)
output = output.log()
return output, hidden
def initHidden(self):
return Variable(torch.zeros(1, self.hidden_size).type(dtypeFloat))
The result is slightly better, but still end up nan
Is anybody could provide me with some suggestion on how such thing could happen?
Thanks.