Hi,
In my LSTM model, I found that the tensor variables must be re-allocate each iteration(each backward() calling), or the error
RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.```
will be raised.
Sample code:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 1
class LSTM(nn.Module):
"""
LSTM extractor for Peptide
"""
def __init__(self, input_channels, lstm_hidden_size=100, lstm_num_layers=2):
super(LSTM, self).__init__()
self.lstm = nn.LSTM(input_channels,
lstm_hidden_size,
lstm_num_layers,
bias=False,
bidirectional=True)
self.lstm_num_layers = lstm_num_layers
self.lstm_hidden_size = lstm_hidden_size
self.lstm_states = self.int_hidden_states()
def int_hidden_states(self):
return (
torch.zeros((self.lstm_num_layers*2, BATCH_SIZE, self.lstm_hidden_size)).to(device),
torch.zeros((self.lstm_num_layers*2, BATCH_SIZE, self.lstm_hidden_size)).to(device),
)
def forward(self, peptide):
lstm_out, self.lstm_states = self.lstm(peptide, self.lstm_states)
return lstm_out[-1]
def main():
model = LSTM(10)
for _ in range(5):
a = model.forward(torch.rand(15, BATCH_SIZE, 10, dtype=torch.float).to(device))
print(a.shape)
## This is required
model.lstm_states = model.int_hidden_states()
a.backward(torch.rand(a.shape))
print('done')
if __name__ == '__main__':
main()
What’s the logical behind this? Why parameter tensors of a module/layer do not require these actions?
Ii is good to use retain_graph=True
here?