Simple LSTM example


(jtremblay) #1

Hello I am trying to do a simple test, I want to show it a number at t=0 and then I want it to output that number k step in the future. Meanwhile the network is going to be shown zeros. But I am getting an error when I am doing backward. I am not sure how to read the error message.

Here is the code I wrote:

import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple
import os 

import torch
import random
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
import torchvision.transforms as T
import cv2
import pickle
import glob
import time
import subprocess
from collections import namedtuple
import resource
import math

class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(5,5)
        self.lstm = nn.LSTMCell(5, 2)
        self.fc2 = nn.Linear(2,1)

    def forward(self, x, hidden):
        y = self.fc1(x)
        hx,cx = self.lstm(y,hidden)
        y = self.fc2(hx)
        return y, hx,cx

model = Policy()
optimizer = optim.Adam(model.parameters(),lr=1)

step = 10 

for i in range(100):
    yhat = Variable(torch.zeros(step,1))
    target = Variable(torch.zeros(step,1))
    target[-1,0] = 1
    cx = Variable(torch.zeros(1,2))
    hx = Variable(torch.zeros(1,2))
    hidden= [hx,cx]

    for j in range(step):
        x = Variable(torch.zeros(1,5))
        if j is 0:
            x += 1
        y, hx,cx = model(x,hidden)
        print (hx.data.numpy())
        hidden = (hx,cx)
        yhat[j] = y.clone()

    print ('done - Hoping the last value should be zero')
    
    #learning   
    optimizer.zero_grad()
    error = ((yhat-target)*(yhat-target)).mean()
    error.backward()
    optimizer.step()

Here is the error I get,

RuntimeError: matrices expected, got 1D, 2D tensors at /data/users/soumith/miniconda2/conda-bld/pytorch-cuda80-0.1.10_1488756735684/work/torch/lib/TH/generic/THTensorMath.c:1224

I am sure I am just using something like a silly person.


(Adam Paszke) #2

Can you show us a full stack trace? Something has an invalid size, but I don’t know where.


(jtremblay) #3
done - the last output should be one
Traceback (most recent call last):
  File "/home/jtremblay/code/Personal-git/dqn/simpleLstm.py", line 84, in <module>
    error.backward()
  File "/home/jtremblay/anaconda2/lib/python2.7/site-packages/torch/autograd/variable.py", line 146, in backward
    self._execution_engine.run_backward((self,), (gradient,), retain_variables)
  File "/home/jtremblay/anaconda2/lib/python2.7/site-packages/torch/nn/_functions/linear.py", line 22, in backward
    grad_input = torch.mm(grad_output, weight)
RuntimeError: matrices expected, got 1D, 2D tensors at /data/users/soumith/miniconda2/conda-bld/pytorch-cuda80-0.1.10_1488756735684/work/torch/lib/TH/generic/THTensorMath.c:1224
[Finished in 0.7s with exit code 1]
[cmd: ['/home/jtremblay/anaconda2/bin/python', '-u', '/home/jtremblay/code/Personal-git/dqn/simpleLstm.py']]
[dir: /home/jtremblay/code/Personal-git/dqn]
[path: /home/jtremblay/anaconda/bin]

Here is the full stack, sorry I should have added the whole thing.


(Adam Paszke) #4

What version are you using?


(jtremblay) #5
>>> import torch
>>> torch.__version__
'0.1.10+ac9245a'

(Pranav Shyam) #6

I ran the code on my machine and it worked. I only removed the cv2 import, rest of it was exactly same. Output:

Version:

>>> torch.__version__
'0.1.10+16a133e'

OUTPUT:

[[ 0.22190067  0.113309  ]]
[[ 0.22374019  0.17195135]]
[[ 0.24251971  0.20971343]]
[[ 0.25022674  0.23256381]]
[[ 0.25297049  0.24633361]]
[[ 0.25373745  0.2547116 ]]
[[ 0.25378832  0.25985831]]
[[ 0.25362667  0.26304504]]
[[ 0.25343812  0.26503012]]
[[ 0.2532804   0.26627228]]
done - Hoping the last value should be zero
[[  7.61594176e-01  -6.59305393e-19]]
[[  9.63655114e-01  -1.15941839e-06]]
[[  9.94877398e-01  -7.77833122e-07]]
[[  9.99219239e-01  -7.20439346e-07]]
[[  9.99815702e-01  -7.12471092e-07]]
[[  9.99897778e-01  -7.11375492e-07]]
[[  9.99909043e-01  -7.11224914e-07]]
[[  9.99910653e-01  -7.11203825e-07]]
[[  9.99910831e-01  -7.11200983e-07]]
[[  9.99910891e-01  -7.11200926e-07]]
done - Hoping the last value should be zero
[[ 0.76159418 -0.        ]]
[[  9.64020252e-01  -4.48575378e-12]]
[[  9.95050907e-01  -2.20972351e-12]]
[[  9.99326706e-01  -1.97817370e-12]]
[[  9.99906898e-01  -1.94826468e-12]]
[[  9.99985516e-01  -1.94424338e-12]]
[[  9.99996126e-01  -1.94369932e-12]]
[[  9.99997556e-01  -1.94362603e-12]]
[[  9.99997735e-01  -1.94361671e-12]]
[[  9.99997795e-01  -1.94361302e-12]]
done - Hoping the last value should be zero
[[ 0.76159418 -0.        ]]
[[  9.64026868e-01  -3.94646712e-17]]
[[  9.95054364e-01  -1.57264918e-17]]
[[  9.99329090e-01  -1.36518715e-17]]
[[  9.99909043e-01  -1.33884757e-17]]
[[  9.99987543e-01  -1.33531386e-17]]
[[  9.99998152e-01  -1.33483426e-17]]
[[  9.99999583e-01  -1.33477065e-17]]
[[  9.99999762e-01  -1.33476039e-17]]
[[  9.99999821e-01  -1.33476039e-17]]
done - Hoping the last value should be zero
[[ 0.76159418  0.        ]]
[[  9.64027464e-01  -7.70289075e-22]]
[[  9.95054662e-01  -2.58641162e-22]]
[[  9.99329209e-01  -2.18775464e-22]]
[[  9.99909163e-01  -2.13788550e-22]]
[[  9.99987662e-01  -2.13120985e-22]]
[[  9.99998271e-01  -2.13030714e-22]]
[[  9.99999702e-01  -2.13018117e-22]]
[[  9.99999881e-01  -2.13016502e-22]]
[[  9.99999940e-01  -2.13016072e-22]]
done - Hoping the last value should be zero
[[ 0.76159418  0.        ]]
[[  9.64027584e-01  -3.21531385e-26]]
[[  9.95054722e-01  -9.34462056e-27]]
[[  9.99329269e-01  -7.73195391e-27]]
[[  9.99909222e-01  -7.53276268e-27]]
[[  9.99987721e-01  -7.50614017e-27]]
[[  9.99998331e-01  -7.50253251e-27]]
[[  9.99999762e-01  -7.50207491e-27]]
[[  9.99999940e-01  -7.50200250e-27]]
[[  1.00000000e+00  -7.50200250e-27]]
done - Hoping the last value should be zero
[[ 0.76159418  0.        ]]
[[  9.64027584e-01  -2.75070681e-30]]
[[  9.95054722e-01  -7.05951502e-31]]
[[  9.99329329e-01  -5.73105970e-31]]
[[  9.99909222e-01  -5.56879258e-31]]
[[  9.99987721e-01  -5.54712352e-31]]
[[  9.99998331e-01  -5.54420359e-31]]
[[  9.99999762e-01  -5.54382320e-31]]
[[  9.99999940e-01  -5.54375925e-31]]
[[  1.00000000e+00  -5.54375925e-31]]
done - Hoping the last value should be zero

(jtremblay) #7

This is interesting, it is still not working on my end even without cv2. Are you running pytorch with cuda 8.0?


(Pranav Shyam) #8

Yes, I have CUDA 8.0 and Python 2.7


(Xiaoyu Liu) #9

Why can’t I run this script for checking the version? Here’s what I got:

>>> torch.__version__
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'module' object has no attribute '__version__'

#10

@Xiaoyu_Liu because maybe you are not on the latest version of pytorch. we introduced this after 0.1.9


(Xiaoyu Liu) #11

I see, perhaps I should re-install Pytorch to see whether it can solve my torch.save and torch.load problem as well!


(jtremblay) #12

I tried on my laptop and with a clean pytorch install (using conda) and I still get the error with the grads. It is weird, I am trying to make sense out of the problem and it seemed that the last layer (the fully connected) wants to do a backward with size two. But the output is of size one.

The backward function in the linear class if I print the following variables:

    def backward(self, grad_output):
        print (grad_output)
        input, weight, bias = self.saved_tensors

        grad_input = grad_weight = grad_bias = None
        print(self.needs_input_grad)
        
        if self.needs_input_grad[0]:
            print ('back')
            # print (self)
            print (grad_output,weight)

I get

back
(
1.00000e-02 *
 -6.6986
[torch.FloatTensor of size 1]
, 
 0.6127  0.6033
[torch.FloatTensor of size 1x2]
)

Where the first variable is equal to the loss calculated by error = (yhat-target).pow(2).mean(), I am confused as to why the backward pass is expecting something of size 1x2.

Here is the code

import numpy as np
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
import torchvision.transforms as T

class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(5,5)
        self.lstm = nn.LSTMCell(5, 2)
        self.fc2 = nn.Linear(2,1)

    def forward(self, x, hidden):
        y = self.fc1(x)
        hx,cx = self.lstm(y,hidden)
        y = self.fc2(hx)

        return y, hx,cx


model = Policy()
optimizer = optim.Adam(model.parameters())

step = 1

for i in range(100):
    yhat = Variable(torch.zeros(step,1))
    target = Variable(torch.zeros(step,1))
    target[-1,0] = 1
    cx = Variable(torch.zeros(1,2))
    hx = Variable(torch.zeros(1,2))
    hidden= [hx,cx]

    for j in range(step):
        x = Variable(torch.zeros(1,5))
        if j is 0:
            x += 1
            x = Variable(x.data)
        y, hx,cx = model(x,hidden)
        # print (hx.data.numpy())
        hidden = (hx,cx)
        print ('y',y)
        print ('hidden',hidden)
        yhat[j] = y

    print ('done - the last output should be one')
    #learning   
    optimizer.zero_grad()
    error = (yhat-target).pow(2).mean()
    print (error)
    error.backward()
    optimizer.step()

(Pranav Shyam) #13

The problem is when you do [quote=“jtremblay, post:12, topic:1097”]
yhat[j] = y
[/quote]

>>> print 'yhat[j] size: ', yhat[j].size(), 'y size: ', y.size()
yhat[j] size:  torch.Size([1]) y size:  torch.Size([1, 1])

so this is causing issues when backpropping.

If you just change that line to:

yhat[j] = y[0]

everything works properly.

Full code (same as yours, except for that one line):

import numpy as np
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
import torchvision.transforms as T

class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(5,5)
        self.lstm = nn.LSTMCell(5, 2)
        self.fc2 = nn.Linear(2,1)

    def forward(self, x, hidden):
        y = self.fc1(x)
        hx,cx = self.lstm(y,hidden)
        y = self.fc2(hx)

        return y, hx,cx


model = Policy()
optimizer = optim.Adam(model.parameters())

step = 1

for i in range(100):
    yhat = Variable(torch.zeros(step,1))
    target = Variable(torch.zeros(step,1))
    target[-1,0] = 1
    cx = Variable(torch.zeros(1,2))
    hx = Variable(torch.zeros(1,2))
    hidden= [hx,cx]

    for j in range(step):
        x = Variable(torch.zeros(1,5))
        if j is 0:
            x += 1
            x = Variable(x.data)
        y, hx,cx = model(x,hidden)
        # print (hx.data.numpy())
        hidden = (hx,cx)
        print ('y',y)
        print ('hidden',hidden)
        yhat[j] = y[0]

    print ('done - the last output should be one')
    #learning   
    optimizer.zero_grad()
    error = (yhat-target).pow(2).mean()
    print (error)
    error.backward()
    optimizer.step()

Gives the following output:

('y', Variable containing:
-0.7611
[torch.FloatTensor of size 1x1]
)
('hidden', (Variable containing:
-0.1136  0.1655
[torch.FloatTensor of size 1x2]
, Variable containing:
-0.2690  0.3413
[torch.FloatTensor of size 1x2]
))
done - the last output should be one
Variable containing:
 3.1013
[torch.FloatTensor of size 1]

('y', Variable containing:
-0.7580
[torch.FloatTensor of size 1x1]
)
('hidden', (Variable containing:
-0.1126  0.1623
[torch.FloatTensor of size 1x2]
, Variable containing:
-0.2672  0.3351
[torch.FloatTensor of size 1x2]
))
done - the last output should be one
Variable containing:
 3.0906
[torch.FloatTensor of size 1]

('y', Variable containing:
-0.7549
[torch.FloatTensor of size 1x1]
)
('hidden', (Variable containing:
-0.1115  0.1591
[torch.FloatTensor of size 1x2]
, Variable containing:
-0.2654  0.3289
[torch.FloatTensor of size 1x2]
))
done - the last output should be one
Variable containing:
 3.0798
[torch.FloatTensor of size 1]

....

I trained it for large number of iterations and the loss converged.


(jtremblay) #14

Thank you, I thought I was doing something silly :stuck_out_tongue: