When to save the hidden state during training in LSTMs

When training an LSTM when should i save the hidden state?
During the run or after running the optimizer?
Here is an example:

import torch
class neuralnet(torch.nn.Module):
    def __init__(self):
        super(neuralnet, self).__init__()
        self.lstm = torch.nn.LSTM(
            input_size=64,
            hidden_size=64,
            num_layers=1,batch_first=False
        )
        self.front = torch.nn.Linear(128,64)
        self.behind=torch.nn.Linear(64,2)
        self.reset_hidden()

    def reset_hidden(self):
        self.hidden=(torch.rand(1, 1,64),torch.rand(1, 1,64))

    def forward(self, inp,storehidden=False):
        # frontlayers
        x=self.front(inp)
        x, prx= self.lstm(x.view(-1,1,64), self.hidden)
        if storehidden:
            self.hidden=prx
        return torch.sigmoid(self.behind(x))

net=neuralnet()
optimizer=torch.optim.Adam(net.parameters(),lr=0.3)
loss_function = torch.nn.BCELoss()
input=torch.rand(128)
output=net(input)
loss = loss_function(output,torch.tensor([0,1], dtype=torch.float).view(1,1,2))
loss.backward(retain_graph=True)
optimizer.step()
with torch.no_grad():
    print("direct:",net(input))
    net(input,True)
    print("after the optimizer:",net(input))

currently i store it after the optimizer and there is little difference to before it.

direct: tensor([[[0.0197, 0.9791]]])
after the optimizer: tensor([[[0.0092, 0.9885]]])