Hi I am trying to implement Sequence 2 Sequence model while implementing it I am facing the following error :
RuntimeError Traceback (most recent call last)
<ipython-input-12-cd1866fff827> in <module>()
----> 1 train(data1[0:10],data2[0:10],128,1,128,128,10000)
<ipython-input-6-2bf5208cf775> in train(data1, data2, embedding_size, n_layers, input_size, hidden_size, num_epochs)
33 enc.zero_grad()
34 dec.zero_grad()
---> 35 l.backward()
36 optimizer.step()
37
/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.pyc in backward(self, gradient, retain_variables)
144 'or with gradient w.r.t. the variable')
145 gradient = self.data.new().resize_as_(self.data).fill_(1)
--> 146 self._execution_engine.run_backward((self,), (gradient,), retain_variables)
147
148 def register_hook(self, hook):
/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/thnn/auto.pyc in backward(self, grad_output)
43
44 def backward(self, grad_output):
---> 45 input, target = self.saved_tensors
46 grad_input = grad_output.new().resize_as_(input).zero_()
47 getattr(self._backend, update_grad_input.name)(self._backend.library_state, input, target,
RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.
Code:
import torch
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
class Encoder(nn.Module):
def __init__(self,vocab_size,embedding_size,n_layers,hidden_size):
super(Encoder,self).__init__()
self.embedding = nn.Embedding(vocab_size,embedding_size)
self.lstm = nn.LSTM(embedding_size,hidden_size,n_layers)
self.n_layers = n_layers
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.embedding_size = embedding_size
def init_hidden_cell(self):
hidden = (Variable(torch.randn(1,1,self.hidden_size)),Variable(torch.randn(1,1,self.hidden_size)))
return hidden
def forward(self,x):
vect = []
for i in xrange(len(x)):
vect.append(self.embedding(x[i].max(1)[1]))
hidden = self.init_hidden_cell()
output,hidden = self.lstm(torch.cat(vect),hidden)
return hidden
class Decoder(nn.Module):
def __init__(self,vocab_size,hidden_size,input_size,n_layers):
super(Decoder,self).__init__()
self.lstm = nn.LSTM(input_size,hidden_size,n_layers)
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.fc1 = nn.Linear(hidden_size,vocab_size)
def forward(self,hidden):
output,hidden = self.lstm(Variable(torch.zeros(1,1,self.input_size)),hidden)
return F.softmax(self.fc1(hidden[0].view(-1,self.hidden_size))),hidden
def make_corpus(data):
corpa = {"#":0}
for i in data:
for j in i.split(" "):
if j not in corpa.keys():
corpa[j] = len(corpa)
return corpa
def make_vect(word,corpa):
temp = torch.FloatTensor(1,len(corpa)).zero_()
temp[0][corpa[word]] = 1.0
return temp
def train(data1,data2,embedding_size,n_layers,input_size,hidden_size,num_epochs):
corpa_lang1 = make_corpus(data1)
corpa_lang2 = make_corpus(data2)
#print corpa_lang1
enc = Encoder(len(corpa_lang1),embedding_size,n_layers,hidden_size)
dec = Decoder(len(corpa_lang2),hidden_size,input_size,n_layers)
l = 0
loss = nn.CrossEntropyLoss()
params = list(enc.parameters()) + list(dec.parameters())
optimizer = optim.SGD(params,lr= 0.01)
for i in xrange(num_epochs):
for j in xrange(len(data1)):
print data1[j].split(" ")
ip_vec = [Variable(make_vect(k,corpa_lang1),requires_grad= True) for k in data1[j].split(" ")]
ip_vec = ip_vec + [Variable(make_vect("#",corpa_lang1),requires_grad = True)]
op1,op2 = dec(enc(ip_vec))
for m in xrange(len(data2[j].split(" "))+1):
if m == len(data2[j].split(" ")):
op_vec = Variable(torch.FloatTensor([corpa_lang2["#"]]))
op_vec.data = torch.Tensor.long(op_vec.data)
op1,op2 = dec(op2)
l = l + loss(op1,op_vec)
else:
op_vec = Variable(torch.FloatTensor([corpa_lang2[data2[j].split(" ")[m]]]))
op_vec.data = torch.Tensor.long(op_vec.data)
if m == 0:
l=l+loss(op1,op_vec)
else:
op1,op2 = dec(op2)
l = l + loss(op1,op_vec)
enc.zero_grad()
dec.zero_grad()
l.backward()
optimizer.step()
return enc,dec
lines = open('data/eng-fra.txt').read().strip()
data1 = []
data2 = []
for i in lines.split("\n"):
#print i.split("\t")
if len(i.split("\t")) == 2:
data1.append(i.split("\t")[0])
data2.append(i.split("\t")[1])
for i in xrange(len(data2)):
data2[i] = unicode(data2[i],encoding = 'utf-8')
train(data1[0:10],data2[0:10],128,1,128,128,10000)
Please can someone help me debug this problem?Thank you