Derivative = 0 with higher learning rates

Hello guys,

I’m quite new to pytorch. Thus I’ve created a dummy dataset with the function y = x_1 + 2x_2 + 3x_3 + 4*x_4 to get used to the autograd function. The whole code isn’t working for higher learning rates e.g. 0.1. Does anybody know why?
Thanks for your help!

import torch.nn as nn
import torch.nn.functional as f

class deriv_net(nn.Module):

 def init(self):
   super(deriv_net, self).init()
   self.fc1 = nn.Linear(4, 20)
   self.fc2 = nn.Linear(20, 40)
   self.last = nn.Linear(40, 1)

  def forward(self, x):
   out = f.relu(self.fc1(x))
   out = f.relu(self.fc2(out))
   out = self.last(out)
   return out

from autograd_dataset import autograd_dataset
from deriv_nn import deriv_net
import torch
import torch.nn.functional as f
import torch.optim as optim
from torch.utils.data import DataLoader

def train(deriv_model, epoch):
  optimizer = optim.SGD(deriv_model.parameters(), lr=0.1, momentum=0.8)
  for batch_id, (data, target) in enumerate(train_data):
   data.requires_grad = True
   out = deriv_model(data.float())
   deriv = torch.autograd.grad(outputs=[out], inputs=[data],
              grad_outputs=torch.ones_like(out),
             allow_unused=True, retain_graph=True, create_graph=True)[0]
   print(deriv)
   optimizer.zero_grad()
   loss = f.mse_loss(out, target)
   loss.backward()
   optimizer.step()
   print(‘Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.06f}’.format(
     epoch, batch_id * len(data), len(train_data.dataset), 100. *
    batch_id / len(train_data), loss.data.item()))

if name == ‘main’:
#define train_data
  dataset = autograd_dataset(‘test_autograd’)
  train_data = DataLoader(dataset=dataset, batch_size=1, shuffle=True)

  deriv_model = deriv_net()
  for Epoch in range(1, 100):
   train(deriv_model, Epoch)