Error getting basic RNN to compute polynomial sequence

williamFalcon · December 7, 2017, 5:39pm

Hi, I’m transferring from tensorflow and having a hard time getting an LSTM to compute a polynomial on a sequence.
I’m just trying out a simple many-many model.
My x is a sequence of random floats. My y is just x*3.24.

The network just ends up outputting 0.2478 for every input.

However, the error doesn’t seem to be decreasing and prediction don’t look like the value they’re supposed to. Any bugs here anyone can spot?

class TestRNN(nn.Module):

    def __init__(self, params):
        # init superclass
        super(TestRNN, self).__init__()

        # pass params
        self.hidden_size_l1 = params.hidden_size_l1

        # build model
        self.rnn = nn.LSTM(input_size=1, hidden_size=20, num_layers=3)
        self.dense_2 = nn.Linear(20, 1)

    def forward(self, input_seq):
        steps = len(input_seq)

        # track all outputs at each step
        output_per_step = Variable(torch.zeros(steps, 1, 1))

        # run the sequence through the rnn
        # each timestep produces the activations from the hidden units
        # track the output at each timestep
        last_hidden = None
        for i, element in enumerate(input_seq):

            # add a 1 for batch size in front
            element = element.unsqueeze(1)

            # -----------------------------
            # RUN THROUGH THE NETWORK
            # output is an array of activations for that timestep from the rnn
            # each output is the outpur for that layer
            # for the rnn, the output is the output of the cell (ie probabilities or next word or whatever)
            # the last_hidden is the hidden state of the cell in that output
            output, last_hidden = self.rnn(element)
            #output = self.dense_1(output)
            output = self.dense_2(output)

            output_per_step[i] = output

        return output_per_step, last_hidden

    @classmethod
    def test(cls, model, data, nb_tests=10):
        results = []
        for i in range(nb_tests):
            x, y = data.val_x[i], data.val_y[i]
            x = Variable(torch.from_numpy(x).float())

            y_hat, last_hidden = model(x)

            # flatten
            y_hat = y_hat.squeeze(1).squeeze(1)

            print(y_hat[0:2], y[0:2])

    @classmethod
    def fit(cls, hparams):
        # dataset
        data = datasets.SimplePolynomialMappingDataset(hparams.data_path, data_limit=hparams.nb_train_pts)

        nb_epochs = 150
        nb_batches = 100

        # make model
        model = cls(hparams)
        loss_fx = nn.MSELoss()

        # optimizer
        optimizer = optim.SGD(model.parameters(), lr=0.01)

        # epoch v batch num loop
        for epoch_nb in range(nb_epochs):
            for batch_nb in range(nb_batches):
                # get data
                x = data.train_x[batch_nb]
                y = data.train_y[batch_nb]

                try:
                    # format for torch
                    x = Variable(torch.from_numpy(x).float())
                    y = Variable(torch.from_numpy(y).float())

                    # forward pass
                    # y_hat is an output sequence
                    y_hat, last_hidden = model(x)

                    # zero out gradients
                    optimizer.zero_grad()

                    # compute loss
                    # in this case it's the difference between each element in the target sequence and the input sequence
                    loss = loss_fx(y_hat, y)

                    # backward pass
                    loss.backward()

                    # update gradients
                    optimizer.step()

                    # track loss
                    print('tng loss: {}'.format(loss.data[0]))
                except Exception as e:
                    print(e)

            # run test data
            TestRNN.test(model, data)