Weights are not updating. list(model.parameters())[0].grad is None

Gkv · June 29, 2018, 7:13pm

Hi all,
I am trying to make a model which will produce an integer value from two sequences. But my model weights are not updating. The loss is constant. I tried all the debugging mentioned in the forum for related issues. list(model.parameters())[0].grad is None . Can anyone please help me to find out the mistake. I am giving the code below.

class my_model(nn.Module):
    def __init__(self):
        super(my_model,self).__init__()
        self.lstm1=nn.LSTM(1024,1024,num_layers=1)
        self.lstm2=nn.LSTM(2048,1024,num_layers=1)
        self.v=nn.Parameter(torch.randn(1024))
        self.w1=nn.Parameter(torch.randn(1024,1024))
        self.w2=nn.Parameter(torch.randn(1024,2048))
        self.tanh=nn.Tanh()
        self.relu=nn.ReLU()
        self.softmax=nn.Softmax()
        self.drop=nn.Dropout(0.5)
        self.fc1=nn.Linear(1024,512)
        self.fc2=nn.Linear(512,1)
    def forward(self,data):
        #data[0]= sequence1 features, data[1]=sequence2 features
        score1,score2=self.score(data[0],data[1])
        sf1,sf2=get_scored(data[0],data[1],score1,score2) # multiply the score with the sequence feature vectors 
        n_s=sf1.size()[1] #sequence1 length
        rep1,_=self.lstm2(sf1.detach().view(n_s,n_batches,2048))
        s_rep=rep1[-1]
        nf2=sf2.size()[1] # sequence2 length
        rep2=self.lstm1(sf2.detach().view(n_f2,n_batches,1024))
        w_rep=rep2[-1]
        c=s_rep*w_rep # multiplying lstm outputs
        
        outputs=[] # finding target float value for all the sequence in the batch, size= n_batches
        for i in range(n_batches):
            out1=self.fc1(c[i])
            out1=self.drop(out1)
            out1=self.relu(out1)
            out2=self.fc2(out1)
            out2=self.relu(out2)
            outputs.append(out2)
        outputs=torch.Tensor(outputs).to(device)
        outputs.requires_grad=True
        return outputs 
    
    def score(self,sf,wf): 
        nbatches=sf.size()[0]
        nf=sf.size()[1]
        nr=sf.size()[2]
        nw=wf.size()[1]
        s=np.zeros((nbatches,nf,nr,nw),dtype='float32')
        for b in range(nbatches):
            for f in range(nf):
                for r in range(nr):
                    for w in range(nw):
                        w_p=torch.mv(self.w1,wf[b][w])
                        r_p=torch.mv(self.w2,sf[b][f][r])
                        s[b][f][r][w]=torch.dot(self.v,self.tanh(w_p+r_p))
        s=torch.from_numpy(s).to(device)
        rs=self.softmax(s.sum(dim=3))
        ws=self.softmax(s.sum(dim=2).sum(dim=1))  
        return rs,ws 


model=my_model.to(device)
# training
criterian=nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# data is a tuple of 2 tensors which contain features of the 2 sequences
for data,target in train_dataset_loader: # target is a tensor of size n_batches and target[i] is an integer. 
    b_cnt=b_cnt+1
    data=list(data)
    data[0]=data[0].to(device)
    data[1]=data[1].to(device)
    target=target.to(device).float()      
    optimizer.zero_grad()
    pred=model(data).to(device)
    loss=criterian(pred,target)
    loss.backward()
    optimizer.step()

Thanks

Prashant_Kalikotay · September 17, 2019, 10:22am

Hey Can you tell me if you could fix this bug and how?. Thanks in advance.