# Derivative = 0 with higher learning rates

Hello guys,

I’m quite new to pytorch. Thus I’ve created a dummy dataset with the function y = x_1 + 2x_2 + 3x_3 + 4*x_4 to get used to the autograd function. The whole code isn’t working for higher learning rates e.g. 0.1. Does anybody know why?
Thanks for your help!

import torch.nn as nn
import torch.nn.functional as f

class deriv_net(nn.Module):

def init(self):
super(deriv_net, self).init()
self.fc1 = nn.Linear(4, 20)
self.fc2 = nn.Linear(20, 40)
self.last = nn.Linear(40, 1)

def forward(self, x):
out = f.relu(self.fc1(x))
out = f.relu(self.fc2(out))
out = self.last(out)
return out

from deriv_nn import deriv_net
import torch
import torch.nn.functional as f
import torch.optim as optim
from torch.utils.data import DataLoader

def train(deriv_model, epoch):
optimizer = optim.SGD(deriv_model.parameters(), lr=0.1, momentum=0.8)
for batch_id, (data, target) in enumerate(train_data):
out = deriv_model(data.float())
allow_unused=True, retain_graph=True, create_graph=True)[0]
print(deriv)
loss = f.mse_loss(out, target)
loss.backward()
optimizer.step()
print(‘Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.06f}’.format(
epoch, batch_id * len(data), len(train_data.dataset), 100. *
batch_id / len(train_data), loss.data.item()))

if name == ‘main’:
#define train_data