While trying to extend word_language_model example, I’m hitting an error:

`Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.`

I guess I’m missing something obvious here, but why running the model again doesn’t refill the buffers? I was going to implement training in a similar loop.

Code to reproduce:

```
import torch as th
import torch.nn as nn
from torch.autograd import Variable
# borrowed from `word_language_model`
class RNNModel(nn.Module):
"""Container module with an encoder, a recurrent module, and a decoder."""
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers):
super(RNNModel, self).__init__()
self.encoder = nn.Embedding(ntoken, ninp)
self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, bias=False)
self.decoder = nn.Linear(nhid, ntoken)
self.init_weights()
self.rnn_type = rnn_type
self.nhid = nhid
self.nlayers = nlayers
def init_weights(self):
initrange = 0.1
self.encoder.weight.data.uniform_(-initrange, initrange)
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input, hidden):
emb = self.encoder(input)
output, hidden = self.rnn(emb, hidden)
decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
def init_hidden(self, bsz):
weight = next(self.parameters()).data
if self.rnn_type == 'LSTM':
return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
else:
return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())
vocab_size = 256
batch_size = 64
model = RNNModel('GRU', vocab_size, 100, 100, 3)
model.train()
optimizer = th.optim.Adam(model.parameters(), lr=1e-2)
criterion = nn.CrossEntropyLoss()
x = Variable(th.LongTensor(50, 64))
x[:] = 1
state = model.init_hidden(batch_size)
print('first pass')
optimizer.zero_grad()
logits, state = model(x, state)
loss = criterion(logits.view(-1, vocab_size), x.view(-1))
loss.backward()
optimizer.step()
print('second pass')
optimizer.zero_grad()
logits, state = model(x, state)
loss = criterion(logits.view(-1, vocab_size), x.view(-1))
loss.backward() # <-- error occurs here
optimizer.step()
```