Hey,
I’m trying to implement the optimization algorithems by myself and to compare them . However, not sure why, but the backwards flow doesnt update the gradients and they left as None values.
My optimizer :
class MyOptimizer:
def __init__(self, parameters,lr=0.001, momentum=0.9):
self.momentum = momentum
self.layers_data_list=[]
for layer_params in list(parameters):
layer_dict = dict()
layer_dict['params']=layer_params
layer_dict['momentum']=momentum
layer_dict['velocity']=None
layer_dict['lr']=lr
self.layers_data_list.append(layer_dict)
def step_sgd(self):
for layer_data in self.layers_data_list:
for p in layer_data['params']:
if p.data.grad is None: #Update : tried if p.grad is None as suggested in the comments
print("grad is None")
continue
lr=layer_data['lr']
d_p=p.grad.data
p.data.add_(-lr,d_p)
def zero_grad(self):
for layer_data in self.layers_data_list:
for p in layer_data['params']:
if p.grad is not None:
p.grad.zero_()
the training part isnt so unusual, and it worked on different notebooks that I used :
def train_and_eval(optimizer,net,optimizer_step,GPU=False):
loss_function = nn.CrossEntropyLoss()
epochs=100
train_loss_per_epoch=[]
test_loss_per_epoch=[]
for epoch in range(epochs) :
print("[train]-----epoch "+str(epoch+1)+" -----")
train_loss = 0.0
test_loss = 0.0
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get inputs from data
inputs, labels = data
if GPU:
inputs = inputs.cuda() # -- For GPU
labels = labels.cuda() # -- For GPU
optimizer.zero_grad()
outputs = net(inputs)
loss = loss_function(outputs, labels)
loss.backward()
if optimizer_step == "sgd":
optimizer.step_sgd()
train_loss += loss.item()
# print statistics
running_loss += loss.item()
if (i + 1) % 200 == 0:
print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
running_loss = 0.0
train_loss_per_epoch.append(train_loss / len(trainloader))
print('[%d] train loss: %.3f' %
(epoch + 1, train_loss / len(trainloader)))
# test
print("[test]-----epoch "+str(epoch+1)+" -----")
for i, data in enumerate(testloader, 0):
# get inputs from data
inputs, labels = data
if GPU:
inputs = inputs.cuda() # -- For GPU
labels = labels.cuda() # -- For GPU
outputs = net(inputs)
loss = loss_function(outputs, labels)
test_loss += loss.item()
test_loss_per_epoch.append(test_loss / len(testloader))
print('[%d] test loss: %.3f' %
(epoch + 1, test_loss / len(testloader)))
return train_loss_per_epoch,test_loss_per_epoch
def runOptimizerTest(optimizer_step,GPU):
if GPU :
net=CNN().cuda()
else :
net=CNN()
optimizer=MyOptimizer(net.parameters())
return train_and_eval(optimizer,net,optimizer_step,GPU)
optimizer_step="sgd"
train_loss_per_epoch_sgd,test_loss_per_epoch_sgd=runOptimizerTest(optimizer_step,GPU=False)
thanks