Hello, I am relatively new to Pytorch and I am trying to train a stateful LSTM Autoencoder. But while training I run into the following error
“RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time”
Even after setting retain_graph=True I run into another error
“RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation.”
A solution that I found on the forum is to use detach() at the end of every forward pass. However, I am unsure if detaching the hidden state from the computation graph would only retain the hidden state from the previous time step alone. If this is the case how do I backpropagate across batches?
class Encoder(nn.Module):
def __init__(self, inputDimension, outputDimension):
super(Encoder, self).__init__()
self.inputDimension = inputDimension
self.outputDimension = outputDimension
#batch size (numLayers, batchSize, hidden_layer size)
self.hidden1 = (torch.zeros(1,1,64), torch.zeros(1,1,64))
self.hidden2 = (torch.zeros(1,1,self.outputDimension), torch.zeros(1,1, self.outputDimension))
self.lstm1 = nn.LSTM(self.inputDimension, 64, num_layers=1)
self.lstm2 = nn.LSTM(64, self.outputDimension, num_layers=1)
def forward(self, inputs):
x,self.hidden1= self.lstm1(inputs, self.hidden1)
x,self.hidden2= self.lstm2(x, self.hidden2)
return x
class Decoder(nn.Module):
def __init__(self, inputDimension, outputDimension):
super(Decoder, self).__init__()
self.inputDimension = inputDimension
self.outputDimension = outputDimension
self.hidden3 = (torch.zeros(1, 1, 64), torch.zeros(1, 1, 64))
self.lstm1 = nn.LSTM(self.inputDimension, 64, num_layers=1)
self.lin1 = nn.Linear(64, self.outputDimension)
def forward(self, inputs):
#no need to reshape input as the output of Encoder is a 3D tensor
x,self.hidden3= self.lstm1(inputs, self.hidden3)
x = x.view(-1, torch.prod(torch.tensor(x.shape)[1:]))
x = self.lin1(x)
return x
class LSTM_Autoencoder(nn.Module):
def __init__(self,inputDimension, outputDimension):
super(LSTM_Autoencoder, self).__init__()
self.inputDimension = inputDimension
self.outputDimension = outputDimension
self.encoder = Encoder(inputDimension, outputDimension)
self.decoder = Decoder(outputDimension, inputDimension)
def forward(self, inputs):
inputs = inputs.view(len(inputs), 1, -1)
x = self.encoder(inputs)
x = self.decoder(x)
return x
Any clarification provided is much appreciated.