So I’m trying to write my own optimizer and I found out that the parameters are’t being updated. I tried following the example of optim.SGD https://github.com/pytorch/pytorch/blob/master/torch/optim/sgd.py .
class AGCD(Optimizer):
def init(self, params, theta=1):
defaults = dict(theta=theta)
super(AGCD, self).init(params, defaults)
self.x_update_params = deepcopy(self.param_groups)
self.z_update_params = deepcopy(self.param_groups)
self.theta_update_params = deepcopy(self.param_groups)
for group in self.theta_update_params:
for i in range(len(group[‘params’])):
group[‘params’][i] = torch.ones(1)
def __setstate__(self, state):
super(AGCD, self).__setstate__(state)
@torch.no_grad()
def step(self, closure=None):
loss = None
if closure is not None:
with torch.enable_grad():
loss=closure()
for group_y, group_x, group_z, group_theta in zip(self.param_groups,
self.x_update_params,
self.z_update_params,
self.theta_update_params):
for y, x, z, theta in zip(group_y['params'],
group_x['params'],
group_z['params'],
group_theta['params']):
if y.grad is None:
continue
d_y = y.grad
j1 = j2 = my_argmax(d_y)
y = (x*(1-theta).expand_as(x)).add(z*theta.expand_as(z)) #use pytorch functions for math
temp = torch.zeros(x.size())
#print(temp.size())
#print(x.size())
#print(d_y.size())
temp[j1] = 1*0.2*x[j1]
x = y.sub(temp)
temp = torch.zeros(z.size())
temp[j2] = 5*0.2*z[j2]
#print(theta)
theta = update_theta(theta)
#print(theta)
return loss
BTW this is my first time posting on a forum so please excuse any sins.
Edit: To be more specific, the parameters used to update aren’t themselves updated (theta stays at the value of 1 at all times).
Edit2: I think it’s because I have to use pytorch operations instead of using “=” so that the object gets modified, not just the copy of that object