When training an LSTM when should i save the hidden state?
During the run or after running the optimizer?
Here is an example:
import torch
class neuralnet(torch.nn.Module):
def __init__(self):
super(neuralnet, self).__init__()
self.lstm = torch.nn.LSTM(
input_size=64,
hidden_size=64,
num_layers=1,batch_first=False
)
self.front = torch.nn.Linear(128,64)
self.behind=torch.nn.Linear(64,2)
self.reset_hidden()
def reset_hidden(self):
self.hidden=(torch.rand(1, 1,64),torch.rand(1, 1,64))
def forward(self, inp,storehidden=False):
# frontlayers
x=self.front(inp)
x, prx= self.lstm(x.view(-1,1,64), self.hidden)
if storehidden:
self.hidden=prx
return torch.sigmoid(self.behind(x))
net=neuralnet()
optimizer=torch.optim.Adam(net.parameters(),lr=0.3)
loss_function = torch.nn.BCELoss()
input=torch.rand(128)
output=net(input)
loss = loss_function(output,torch.tensor([0,1], dtype=torch.float).view(1,1,2))
loss.backward(retain_graph=True)
optimizer.step()
with torch.no_grad():
print("direct:",net(input))
net(input,True)
print("after the optimizer:",net(input))
currently i store it after the optimizer and there is little difference to before it.
direct: tensor([[[0.0197, 0.9791]]])
after the optimizer: tensor([[[0.0092, 0.9885]]])