Time sequence prediction example with new data fails

I am running the time sequence prediction example with some actual time-series data instead of sine waves and the loss function gets stuck.

The data in the example is (100,1000), my new data is (40,1000), and I don’t see anywhere in the code where this could be the issue.

class Sequence(nn.Module):
def init(self):
super(Sequence, self).init()
self.lstm1 = nn.LSTMCell(1,51)
self.lstm2 = nn.LSTMCell(51,1)

def forward(self, input, future=0):
    outputs = []
    h_t = Variable(torch.zeros(input.size(0), 51).double(), requires_grad=False)
    c_t = Variable(torch.zeros(input.size(0), 51).double(), requires_grad=False)
    h_t2 = Variable(torch.zeros(input.size(0), 1).double(), requires_grad=False)
    c_t2 = Variable(torch.zeros(input.size(0), 1).double(), requires_grad=False)
    for i, input_t in enumerate(input.chunk(input.size(1), dim=1)): # Split tensor into tuples of size input.size(0)
        h_t, c_t = self.lstm1(input_t, (h_t, c_t)) # Two layer LSTM (2 periods lookbehind)
        h_t2, c_t2 = self.lstm2(c_t, (h_t2, c_t2)) # Second period (t-2): input is lstm cell of t-1
        outputs += [c_t2]
    for i in range(future): # if we should predict the future
        h_t, c_t = self.lstm1(c_t2, (h_t, c_t))
        h_t2, c_t2 = self.lstm2(c_t, (h_t2, c_t2))
        outputs += [c_t2]
    outputs = torch.stack(outputs, 1).squeeze(2)
    return outputs

if name == ‘main’:

# Set seed to 0
# Load data and generate training set
data = torch.load('traindata_EX1.pt')
input = Variable(torch.from_numpy(data[3:,:-1]), requires_grad=False)
target = Variable(torch.from_numpy(data[3:,1:]), requires_grad=False)
# Build the model
seq = Sequence()
seq.double() # Casts all parameters and buffers to double datatype.
criterion = nn.MSELoss() # Set loss function to Mean Squared Error
# use LBFGS as optimizer since we can load the whole data to train
optimizer = optim.LBFGS(seq.parameters())
# Begin to train
for i in range(15):
    print('STEP:', i)
    def closure():
        optimizer.zero_grad() # Reset gradients each pass.
        out = seq(input) # Calculate the predicted value with the given parameters W*
        loss = criterion(out, target) # Calculate error.
        print('Loss:', loss.data.numpy()[0])
        loss.backward() # Backpropagate
        return loss
    optimizer.step(closure) # Move parameters to fastest changing gradient (?)
   # Predict
    future = 1000
    pred = seq(input[:3], future = future)
    y = pred.data.numpy()
    # Draw the result
    plt.figure() #figsize=(30,10)
    plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
    plt.xlabel('x', fontsize=20)
    plt.ylabel('y', fontsize=20)
    def draw(yi, color):
        plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
        plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
    draw(y[0], 'r')
    draw(y[1], 'g')
    draw(y[2], 'b')

The file traindata_EX1.pt is here.

Any clues as to why my loss is getting stuck?

I wonder why this code use c_t2 as the prediction. I’m a beginner, but hidden state(in this case h_t2) is considered as the prediction habitually, isn’t it? After modifying c_t2 to h_t2, the training on the original dataset looks bad.

I spent lot of time reinstalling pyTorch since this example gives wrong result, both with and without GPU (I just took this one by chance to check my installation was OK). it would be nice to fix it or at least to remove it from what is expected to work.