I’m trying to use the basic LSTM example, which is as follows:
lstm = nn.LSTM(3, 3) # Input dim is 3, output dim is 3
inputs = [autograd.Variable(torch.randn((1, 3)))
for _ in range(5)] # make a sequence of length 5
# initialize the hidden state.
hidden = (autograd.Variable(torch.randn(1, 1, 3)),
autograd.Variable(torch.randn((1, 1, 3))))
for i in inputs:
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden state.
out, hidden = lstm(i.view(1, 1, -1), hidden)
and it works fine. However, when I try to use it in a class (like as follows)
class Seq(nn.Module):
def __init__(self):
super(Seq, self).__init__()
self.lstm = nn.LSTM(3, 3) # Input dim is 3, output dim is 3
def forward(self, inp):
inputs = [autograd.Variable(torch.randn((1, 3)))
for _ in range(5)] # make a sequence of length 5
# initialize the hidden state.
hidden = (autograd.Variable(torch.randn(1, 1, 3)),
autograd.Variable(torch.randn((1, 1, 3))))
for i in inputs:
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden state.
out, hidden = self.lstm(i.view(1, 1, -1), hidden)
if __name__ == '__main__':
...
....
seq = Seq()
seq.double()
criterion = nn.MSELoss()
optimizer = optim.LBFGS(seq.parameters(), lr=0.3)
#begin to train
for i in range(15):
print('STEP: ', i)
def closure():
optimizer.zero_grad()
output = []
for i in range(inp.size(0)):
out = seq(inp[0,:])
output.append(out)
loss = criterion(out, target)
print('loss:', loss.data.numpy()[0])
loss.backward()
return loss
optimizer.step(closure)
I get an exception:
TypeError: torch.addmm received an invalid combination of arguments - got (int, torch.DoubleTensor, int, torch.FloatTensor, torch.DoubleTensor, out=torch.DoubleTensor), but expected one of:
* (torch.DoubleTensor source, torch.DoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
* (torch.DoubleTensor source, torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
* (float beta, torch.DoubleTensor source, torch.DoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
* (torch.DoubleTensor source, float alpha, torch.DoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
* (float beta, torch.DoubleTensor source, torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
* (torch.DoubleTensor source, float alpha, torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
* (float beta, torch.DoubleTensor source, float alpha, torch.DoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
didn't match because some of the arguments have invalid types: (int, torch.DoubleTensor, int, torch.FloatTensor, torch.DoubleTensor, out=torch.DoubleTensor)
* (float beta, torch.DoubleTensor source, float alpha, torch.SparseDoubleTensor mat1, torch.DoubleTensor mat2, *, torch.DoubleTensor out)
didn't match because some of the arguments have invalid types: (int, torch.DoubleTensor, int, torch.FloatTensor, torch.DoubleTensor, out=torch.DoubleTensor)
on the out, hidden = self.lstm(i.view(1, 1, -1), hidden) line, which is weird because I’m just executing the exact same thing. It seems to have something to do with initializing self.lstm in the init method; I’ve been having problems with this outside of this simple example.
Anyone have any idea what’s up?
Best,
Mark