I have problems training a simple nn.LSTM with
rnn = nn.LSTM(1, args.n_hidden, 1)
net = Mynet()
crit = nn.MSELoss()
optim_rnn = optim.Adam(rnn.parameters(), lr=1e-3)
optim_net = optim.Adam(net.parameters(), lr=1e-3)
One epoch of my training procedure is
c = torch.randn(1, args.n_batch, args.n_hidden)
h = torch.randn(1, args.n_batch, args.n_hidden)
for k in range(N):
rnn.zero_grad()
net.zero_grad()
i = torch.tensor(y_train[k]).detach().view(1, 1, 1)
o, (h, c) = rnn(i, (h.detach(), c.detach()))
p = net(o.view(1, -1))
err = crit(p.view(-1), torch.tensor(f_train[k]))
err.backward()
optim_rnn.step()
optim_net.step()
What am I doing wrong?
Is there a problem with how I use the hidden and cell states h and c?