Hi,
I am new to pytorch and implementing A3C.
For this I have got a code for SharedAdam optimizer which is of pytorch version 0.4 which is producing above error in pytorch 1.6.0
Can someone please help to let me know what should be correct implementations of the functionality of above 3 functions in pytorch 1.6.0
Below is the code for pytorch 0.4
class SharedAdam(optim.Adam):
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): super(SharedAdam, self).__init__(params, lr, betas, eps, weight_decay) for group in self.param_groups: for p in group['params']: state = self.state[p] state['step'] = torch.zeros(1) state['exp_avg'] = p.data.new().resize_as_(p.data).zero_() state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_() def share_memory(self): for group in self.param_groups: for p in group['params']: state = self.state[p] state['step'].share_memory_() state['exp_avg'].share_memory_() state['exp_avg_sq'].share_memory_() def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data state = self.state[p] exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] beta1, beta2 = group['betas'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) # Decay the first and second moment running average coefficient exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) denom = exp_avg_sq.sqrt().add_(group['eps']) bias_correction1 = 1 - beta1 ** state['step'][0] bias_correction2 = 1 - beta2 ** state['step'][0] step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 p.data.addcdiv_(-step_size, exp_avg, denom) return loss
Is this the correct implementation-
exp_avg.mul_(beta1).add_(grad, alpha = 1 - beta1)
exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value = 1 - beta2)
p.data.addcdiv_(exp_avg, denom, value = -step_size)