Can't updating Weight Values in PyTorch .backward()

kubicndmr · July 13, 2019, 1:01pm

Hi,

I want to design a network estimating new step of the signal so I’ve started with a simple sin wave. While investigating network I noticed that weights are not changing. What could be the reason? I cannot find any explanation.

class Model(nn.Module):
    def __init__(self,in_dim,hidden_dim,num_classes):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(in_dim,hidden_dim)
        self.layer2 = nn.Linear(hidden_dim,hidden_dim)
        self.layer3 = nn.Linear(hidden_dim,num_classes)
        self.relu = nn.ReLU()
        self.out = nn.LogSoftmax(dim=0)

    def forward(self,x):
        a = self.relu(self.layer1(x))
        a = self.relu(self.layer2(a))
        return self.relu(self.layer3(a))

def train(epoch,L,depth):
    criteria = nn.MSELoss()
    learning_rate = 1e-3
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    t = np.linspace(0,2,L+1)       
    fs = L+1
    
    trn_loss = list()
        
    for f in range(0,epoch):
        phase = f/np.pi      
        x = np.sin(2*np.pi*t*fs+phase)   
        x = torch.from_numpy(x).float()
        
        optimizer.zero_grad()
        
        x_hat = model(x[:-1])
            
        currentCost = criteria(x_hat,x[-1])
        trn_loss.append(currentCost.item())
        print(model.layer1.weight.data.clone())
        currentCost.backward()
        optimizer.step()
        print(model.layer1.weight.data.clone())

and outputs are:

tensor([[-0.1715, -0.1696,  0.0424,  ...,  0.0154,  0.1450, -0.0544],
        [ 0.0368,  0.1427, -0.1419,  ...,  0.0966,  0.0298, -0.0659],
        [-0.1641, -0.1551,  0.0570,  ..., -0.0227, -0.1426, -0.0648],
        ...,
        [-0.0684, -0.1707, -0.0711,  ...,  0.0788,  0.1386,  0.1546],
        [ 0.1401, -0.0922, -0.0104,  ..., -0.0490,  0.0404,  0.1038],
        [-0.0604, -0.0517,  0.0715,  ..., -0.1200,  0.0014,  0.0215]])
tensor([[-0.1715, -0.1696,  0.0424,  ...,  0.0154,  0.1450, -0.0544],
        [ 0.0368,  0.1427, -0.1419,  ...,  0.0966,  0.0298, -0.0659],
        [-0.1641, -0.1551,  0.0570,  ..., -0.0227, -0.1426, -0.0648],
        ...,
        [-0.0684, -0.1707, -0.0711,  ...,  0.0788,  0.1386,  0.1546],
        [ 0.1401, -0.0922, -0.0104,  ..., -0.0490,  0.0404,  0.1038],
        [-0.0604, -0.0517,  0.0715,  ..., -0.1200,  0.0014,  0.0215]])

ptrblck · July 13, 2019, 2:03pm

I’m not sure how you’ve initialized your model, but you should check the shapes of your input and target.
nn.Linear layers expect an input of [batch_size, *, nb_features].
Based on your code snippet I assume you are using 99 features and a single sample.
If that’s the case, you should unsqueeze the batch dimension for the input and target:

x = x.unsqueeze(0)
x_hat = model(x[:, :-1])
...
loss = criteria(x_hat, x[:, -1].unsqueeze(1))
...

Also, check your gradients after calling .backward():

print(model.layer3.weight.grad)

kubicndmr · July 13, 2019, 2:20pm

That is my total code script:

class Model(nn.Module):
    def __init__(self,in_dim,hidden_dim,num_classes):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(in_dim,hidden_dim)
        self.layer2 = nn.Linear(hidden_dim,hidden_dim)
        self.layer3 = nn.Linear(hidden_dim,num_classes)
        self.relu = nn.ReLU()

    def forward(self,x):
        a = self.relu(self.layer1(x))
        a = self.relu(self.layer2(a))
        return self.relu(self.layer3(a))  

model = Model(32,32,1)

def train(epoch,L):
    criteria = nn.MSELoss()
    learning_rate = 0.1
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    t = np.linspace(0,2,L+2)       
    fs = L+2
    
    trn_loss = list()
        
    for f in range(0,epoch):
        phase = f/np.pi      
        x = np.sin(2*np.pi*t*fs+phase)
        
        x_train = torch.from_numpy(x[:-2]).float()
        x_train = x_train.unsqueeze(0)
        
        x_test = torch.tensor(x[-2]).float()
        x_test = torch.reshape(x_test,(1,1))
        
        optimizer.zero_grad()
        
        x_hat = model(x_train)
            
        currentCost = criteria(x_hat,x_test)
        
        trn_loss.append(currentCost.item())
        currentCost.backward()
        print(model.layer3.weight.grad)
        optimizer.step()
                
    plt.plot(range(1,epoch+1),trn_loss,c='orangered')

train(100,32)

and output is now for every epoch:

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.]])