Hey,
I’m trying to use the foolowing optimizer that I implemented (rmsprop) but after the first step of the optimizer the loss that is calculated in my main is nan.
def __init__(self, parameters,lr=0.001,beta=0.999,epsilon=sys.float_info.epsilon):
self.layers_data_list=[]
self.epsilon=epsilon
self.beta=beta
for layer_params in list(parameters):
layer_dict = dict()
layer_dict['params']=layer_params
layer_dict['lr']=lr
layer_dict['average_derrevative']=None
self.layers_data_list.append(layer_dict)
def step(self):
for layer_data in self.layers_data_list:
p=layer_data['params']
lr=layer_data['lr']
d_p=p.grad.data
if layer_data['average_derrevative'] is None:
layer_data['average_derrevative'] = torch.clone(d_p).detach()
else :
layer_data['average_derrevative'].mul_(self.beta).addcmul_(1-self.beta,d_p,d_p)
avg_root = torch.clone(layer_data['average_derrevative']).detach().sqrt().add_(self.epsilon) # tried to add .clone().detach() didnt help..
layer_data['params'].data.addcdiv_(-lr,d_p, avg_root)