I have a tree lstm code, and am trying to train it but after each epoch my loss is staying the exact same.
I have been stuck for over 10 days, can anyone help me with this please.
Trying different learning rates has also not helped
class treeEncoder(nn.Module):
def __init__(self, cuda,in_dim, mem_dim,emb,labels,labelMap,criterion,device):
super(treeEncoder, self).__init__()
self.cudaFlag = cuda
self.in_dim = in_dim
self.mem_dim = mem_dim
self.device = device
self.labels = labels
self.labelMap = labelMap
self.criterion = criterion
self.ix = nn.Linear(self.in_dim,self.mem_dim)
self.ih = nn.Linear(self.mem_dim,self.mem_dim)
self.fx = nn.Linear(self.in_dim,self.mem_dim)
self.fh = nn.Linear(self.mem_dim, self.mem_dim)
self.ux = nn.Linear(self.in_dim,self.mem_dim)
self.uh = nn.Linear(self.mem_dim,self.mem_dim)
self.ox = nn.Linear(self.in_dim,self.mem_dim)
self.oh = nn.Linear(self.mem_dim,self.mem_dim)
self.emb = emb
self.outputModule = OutputModule(self.cudaFlag,mem_dim,4,self.device,dropout=False)
def predict(self,node):
loss = Variable(torch.zeros(1))
for i in range(node.num_children):
_, _ = self.forward(node.childrenList[i])
child_c, child_h = self.getChildStates(node)
node.state = self.nodeForward(self.emb[node.idx].to(self.device),child_c,child_h)
output = self.outputModule.forward(node.state[1], False)
return output
def forward(self,node):
loss = Variable(torch.zeros(1))
if self.cudaFlag:
loss = loss.to(self.device)
for i in range(node.num_children):
_, child_loss = self.forward(node.childrenList[i])
loss = loss + child_loss
child_c, child_h = self.getChildStates(node)
node.state = self.nodeForward(self.emb[node.uid].to(self.device),child_c,child_h)
output = self.outputModule.forward(node.state[1], True)
node.output = output
label = Variable(torch.tensor(self.labelMap[node.label]))
loss = loss + self.criterion(output.reshape(-1,4), label.reshape(-1))
return node.state, loss
def nodeForward(self,x,child_c,child_h):
child_h_sum = torch.sum(child_h,0)
i = torch.sigmoid(self.ix(x) + self.ih(child_h_sum))
o = torch.sigmoid(self.ox(x)+self.oh(child_h_sum))
u = torch.tanh(self.ux(x)+self.uh(child_h_sum))
fx = self.fx(x)
f = torch.cat([self.fh(child_hi)+fx for child_hi in child_h], 0)
fc = torch.sigmoid(f)
c = i*u + torch.sum(fc,0)
h = o*torch.tanh(c)
return c,h
def getChildStates(self,node):
if node.num_children==0:
child_c = Variable(torch.zeros(1,self.mem_dim))
child_h = Variable(torch.zeros(1,self.mem_dim))
if self.cudaFlag:
child_c, child_h = child_c.to(self.device), child_h.to(self.device)
else:
child_c = Variable(torch.Tensor(node.num_children,self.mem_dim))
child_h = Variable(torch.Tensor(node.num_children,self.mem_dim))
if self.cudaFlag:
child_c, child_h = child_c.to(self.device), child_h.to(self.device)
for idx in range(node.num_children):
child_c[idx] = node.childrenList[idx].state[0]
child_h[idx] = node.childrenList[idx].state[1]
return child_c, child_h
for i in range(epochs):
train_losses = []
val_losses = []
for tree in tqdm_notebook(x_train):
count += 1
optimizer.zero_grad()
(h,c),loss = model(tree.root)
label = Variable(torch.tensor(labelMap[tree.root.label]))
if torch.cuda.is_available():
label.to(device)
loss.backward()
train_losses.append(loss.item())
optimizer.step()