Using detach() while training a stateful LSTM

prash_857 · October 19, 2020, 10:38pm

Hello, I am relatively new to Pytorch and I am trying to train a stateful LSTM Autoencoder. But while training I run into the following error

“RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time”

Even after setting retain_graph=True I run into another error

“RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation.”

A solution that I found on the forum is to use detach() at the end of every forward pass. However, I am unsure if detaching the hidden state from the computation graph would only retain the hidden state from the previous time step alone. If this is the case how do I backpropagate across batches?

class Encoder(nn.Module):
    def __init__(self, inputDimension, outputDimension):
        super(Encoder, self).__init__()
        self.inputDimension = inputDimension
        self.outputDimension = outputDimension
        #batch size (numLayers, batchSize, hidden_layer size)
        self.hidden1 = (torch.zeros(1,1,64), torch.zeros(1,1,64))
        self.hidden2 = (torch.zeros(1,1,self.outputDimension), torch.zeros(1,1, self.outputDimension))
        self.lstm1 = nn.LSTM(self.inputDimension, 64, num_layers=1)
        self.lstm2 = nn.LSTM(64, self.outputDimension, num_layers=1)

    def forward(self, inputs):
        x,self.hidden1= self.lstm1(inputs, self.hidden1)
        x,self.hidden2= self.lstm2(x, self.hidden2)
        return x

class Decoder(nn.Module):
    def __init__(self, inputDimension, outputDimension):
        super(Decoder, self).__init__()
        self.inputDimension = inputDimension
        self.outputDimension = outputDimension
        self.hidden3 = (torch.zeros(1, 1, 64), torch.zeros(1, 1, 64))
        self.lstm1 = nn.LSTM(self.inputDimension, 64, num_layers=1)
        self.lin1 = nn.Linear(64, self.outputDimension)

    def forward(self, inputs):
        #no need to reshape input as the output of Encoder is a 3D tensor
        x,self.hidden3= self.lstm1(inputs, self.hidden3)
        x = x.view(-1, torch.prod(torch.tensor(x.shape)[1:]))
        x = self.lin1(x)
        return x

class LSTM_Autoencoder(nn.Module):
    def __init__(self,inputDimension, outputDimension):
        super(LSTM_Autoencoder, self).__init__()
        self.inputDimension = inputDimension
        self.outputDimension = outputDimension
        self.encoder = Encoder(inputDimension, outputDimension)
        self.decoder = Decoder(outputDimension, inputDimension)

    def forward(self, inputs):
        inputs = inputs.view(len(inputs), 1, -1)
        x = self.encoder(inputs)
        x = self.decoder(x)

        return x

Any clarification provided is much appreciated.