Hi all,
I am trying to make a model which will produce an integer value from two sequences. But my model weights are not updating. The loss is constant. I tried all the debugging mentioned in the forum for related issues. list(model.parameters())[0].grad is None . Can anyone please help me to find out the mistake. I am giving the code below.
class my_model(nn.Module):
def __init__(self):
super(my_model,self).__init__()
self.lstm1=nn.LSTM(1024,1024,num_layers=1)
self.lstm2=nn.LSTM(2048,1024,num_layers=1)
self.v=nn.Parameter(torch.randn(1024))
self.w1=nn.Parameter(torch.randn(1024,1024))
self.w2=nn.Parameter(torch.randn(1024,2048))
self.tanh=nn.Tanh()
self.relu=nn.ReLU()
self.softmax=nn.Softmax()
self.drop=nn.Dropout(0.5)
self.fc1=nn.Linear(1024,512)
self.fc2=nn.Linear(512,1)
def forward(self,data):
#data[0]= sequence1 features, data[1]=sequence2 features
score1,score2=self.score(data[0],data[1])
sf1,sf2=get_scored(data[0],data[1],score1,score2) # multiply the score with the sequence feature vectors
n_s=sf1.size()[1] #sequence1 length
rep1,_=self.lstm2(sf1.detach().view(n_s,n_batches,2048))
s_rep=rep1[-1]
nf2=sf2.size()[1] # sequence2 length
rep2=self.lstm1(sf2.detach().view(n_f2,n_batches,1024))
w_rep=rep2[-1]
c=s_rep*w_rep # multiplying lstm outputs
outputs=[] # finding target float value for all the sequence in the batch, size= n_batches
for i in range(n_batches):
out1=self.fc1(c[i])
out1=self.drop(out1)
out1=self.relu(out1)
out2=self.fc2(out1)
out2=self.relu(out2)
outputs.append(out2)
outputs=torch.Tensor(outputs).to(device)
outputs.requires_grad=True
return outputs
def score(self,sf,wf):
nbatches=sf.size()[0]
nf=sf.size()[1]
nr=sf.size()[2]
nw=wf.size()[1]
s=np.zeros((nbatches,nf,nr,nw),dtype='float32')
for b in range(nbatches):
for f in range(nf):
for r in range(nr):
for w in range(nw):
w_p=torch.mv(self.w1,wf[b][w])
r_p=torch.mv(self.w2,sf[b][f][r])
s[b][f][r][w]=torch.dot(self.v,self.tanh(w_p+r_p))
s=torch.from_numpy(s).to(device)
rs=self.softmax(s.sum(dim=3))
ws=self.softmax(s.sum(dim=2).sum(dim=1))
return rs,ws
model=my_model.to(device)
# training
criterian=nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# data is a tuple of 2 tensors which contain features of the 2 sequences
for data,target in train_dataset_loader: # target is a tensor of size n_batches and target[i] is an integer.
b_cnt=b_cnt+1
data=list(data)
data[0]=data[0].to(device)
data[1]=data[1].to(device)
target=target.to(device).float()
optimizer.zero_grad()
pred=model(data).to(device)
loss=criterian(pred,target)
loss.backward()
optimizer.step()
Thanks