Have gradients but net weight do dot change

wentao · May 3, 2019, 6:28am

Hello,

I made a model but loss do not change, no matter how large learningrate I set, so I checked the gradients and the model weights, I found I have the gradients but the model weights are not updated.
Here is my model:

class Net(torch.nn.Module):
    def __init__(self, n_features, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden1 = torch.nn.Linear(n_features, n_hidden)
        self.dropout1 = torch.nn.Dropout(p=0.2)
        self.hidden2 = torch.nn.Linear(n_hidden, n_hidden)
        self.dropout2 = torch.nn.Dropout(p=0.2)
        self.hidden3 = torch.nn.Linear(n_hidden, n_hidden)
        self.dropout3 = torch.nn.Dropout(p=0.2)
        self.predict = torch.nn.Linear(n_hidden, n_output)


    def forward(self, x):
        x = torch.sigmoid(self.dropout1(self.hidden1(x)))
        x = torch.sigmoid(self.dropout1(self.hidden2(x)))
        x = torch.sigmoid(self.dropout1(self.hidden3(x)))


        x = self.predict(x)
        return (x)

if __name__ == '__main__':


    lr = 0.95
    netaudio = Net(13, 100, 1)
    netaudio.cuda()
    netvideo = Net(34, 100, 1)
    netvideo.cuda()
    params = list(netaudio.parameters()) + list(netvideo.parameters())

    optimizer = torch.optim.Adam(params, lr=lr, betas=(0.9, 0.99))
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15000, gamma=0.5)
    loss_func = torch.nn.MSELoss()

    optimizer.zero_grad()

feataudio and featbvideo are input for models, and audio, video are some data both have size(frame_number, 3480)

               prediction=torch.exp((netaudio(feataudio).mul(torch.log(audio))+netvideo(featvideo).mul(torch.log(video)))).cuda()
               targetmatrix = targetmatrix.type(torch.cuda.FloatTensor)
                loss = (prediction - targetmatrix).cuda()

                unit = torch.ones(loss.size()).cuda()
                a = list(netaudio.parameters())[0].clone()
                loss.backward(unit)
                scheduler.step()
                b = list(netaudio.parameters())[0].clone()
                print(torch.equal(a.data, b.data))

         
                for p in netaudio.parameters():
                    print(p.grad.data.sum())

And I got these results:

loss: 51118341757649438113792.0000
True
tensor(1.8778e+16, device='cuda:0')
tensor(3.6852e+14, device='cuda:0')
tensor(7.5647e+17, device='cuda:0')
tensor(1.7120e+16, device='cuda:0')
tensor(9.7500e+18, device='cuda:0')
tensor(1.9442e+17, device='cuda:0')
tensor(-3.8174e+19, device='cuda:0')
tensor(-7.6594e+17, device='cuda:0')
loss: 7608380647529762521088.0000
True
tensor(2.0884e+16, device='cuda:0')
tensor(4.2580e+14, device='cuda:0')
tensor(1.1888e+18, device='cuda:0')
tensor(2.6893e+16, device='cuda:0')
tensor(1.6915e+19, device='cuda:0')
tensor(3.3719e+17, device='cuda:0')
tensor(-6.8886e+19, device='cuda:0')
tensor(-1.3813e+18, device='cuda:0')

Is there something wrong with my code? Please help me, thank you

wentao · May 3, 2019, 7:27am

Have found the problem, now it works