Hi, I’m transferring from tensorflow and having a hard time getting an LSTM to compute a polynomial on a sequence.
I’m just trying out a simple many-many model.
My x is a sequence of random floats. My y is just x*3.24.
The network just ends up outputting 0.2478 for every input.
However, the error doesn’t seem to be decreasing and prediction don’t look like the value they’re supposed to. Any bugs here anyone can spot?
class TestRNN(nn.Module):
def __init__(self, params):
# init superclass
super(TestRNN, self).__init__()
# pass params
self.hidden_size_l1 = params.hidden_size_l1
# build model
self.rnn = nn.LSTM(input_size=1, hidden_size=20, num_layers=3)
self.dense_2 = nn.Linear(20, 1)
def forward(self, input_seq):
steps = len(input_seq)
# track all outputs at each step
output_per_step = Variable(torch.zeros(steps, 1, 1))
# run the sequence through the rnn
# each timestep produces the activations from the hidden units
# track the output at each timestep
last_hidden = None
for i, element in enumerate(input_seq):
# add a 1 for batch size in front
element = element.unsqueeze(1)
# -----------------------------
# RUN THROUGH THE NETWORK
# output is an array of activations for that timestep from the rnn
# each output is the outpur for that layer
# for the rnn, the output is the output of the cell (ie probabilities or next word or whatever)
# the last_hidden is the hidden state of the cell in that output
output, last_hidden = self.rnn(element)
#output = self.dense_1(output)
output = self.dense_2(output)
output_per_step[i] = output
return output_per_step, last_hidden
@classmethod
def test(cls, model, data, nb_tests=10):
results = []
for i in range(nb_tests):
x, y = data.val_x[i], data.val_y[i]
x = Variable(torch.from_numpy(x).float())
y_hat, last_hidden = model(x)
# flatten
y_hat = y_hat.squeeze(1).squeeze(1)
print(y_hat[0:2], y[0:2])
@classmethod
def fit(cls, hparams):
# dataset
data = datasets.SimplePolynomialMappingDataset(hparams.data_path, data_limit=hparams.nb_train_pts)
nb_epochs = 150
nb_batches = 100
# make model
model = cls(hparams)
loss_fx = nn.MSELoss()
# optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)
# epoch v batch num loop
for epoch_nb in range(nb_epochs):
for batch_nb in range(nb_batches):
# get data
x = data.train_x[batch_nb]
y = data.train_y[batch_nb]
try:
# format for torch
x = Variable(torch.from_numpy(x).float())
y = Variable(torch.from_numpy(y).float())
# forward pass
# y_hat is an output sequence
y_hat, last_hidden = model(x)
# zero out gradients
optimizer.zero_grad()
# compute loss
# in this case it's the difference between each element in the target sequence and the input sequence
loss = loss_fx(y_hat, y)
# backward pass
loss.backward()
# update gradients
optimizer.step()
# track loss
print('tng loss: {}'.format(loss.data[0]))
except Exception as e:
print(e)
# run test data
TestRNN.test(model, data)