As you can see below, I print the value of scale.grad as well as the value of scale before and after the optimizer.step(),and my learning rate = 0.01 with adam optimizer, but 1 - .01*13 = 0.87, not 0.99. What could be going on here? I know adam optimizer has some other tweaks to this equation but I didnt expect it be different by a magnitude of .11, which relative to 1 is 10% difference! Thanks.
Below is the photo and the code.
Screen Shot 2020-02-27 at 11.03.06 AM|690x308
cat= TF.to_tensor(np.array(Image.open("just_cat_above_dog.png").convert('RGB')))
cat_dog = TF.to_tensor(np.array(Image.open("cat_above_dog.png").convert('RGB')))
translated_params = torch.unsqueeze(torch.tensor([0.0,0.0]),1)
translated_params.requires_grad_(True)
scale = torch.unsqueeze(torch.tensor([1.0]),1)
scale.requires_grad_(True)
def forward2(x,dxdy,the_scale):
dxdy = torch.tanh(dxdy)
the_scale = torch.tanh(the_scale)
M= torch.cat((torch.eye(2)*the_scale, dxdy),dim=1)[None,:,:]
#print(M)
grid = F.affine_grid(M,[1] + list(x.shape))
resampled = F.grid_sample(x[None,:,:,:], grid, mode='bilinear')[0]
return resampled
optimizer = torch.optim.Adam([scale,translated_params], lr=0.01)
for i in range(3000):
optimizer.zero_grad()
predicted= forward2(cat_dog,translated_params,scale)
#criterion = LapLoss()
#loss = criterion.forward(torch.unsqueeze(predicted,0),torch.unsqueeze(cat,0))
fft_predicted = torch.rfft(predicted,3)
#pdb.set_trace()
fft_gt = torch.rfft(cat,3)
loss = torch.nn.functional.l1_loss(fft_gt[:,:,:,0],fft_predicted[:,:,:,0]) - 0.0001*torch.sum(torch.abs(fft_predicted))
#absdiff = (predicted-cat).abs()
#loss = absdiff[:,100:300,:].mean()
loss.backward()
print(loss, scale.grad.data, scale)
#pdb.set_trace()
optimizer.step()
print(loss, scale.grad.data, scale)
if i % 300 == 0:
img = predicted.data.cpu().numpy().transpose((1,2,0))
actualimg= cat.cpu().numpy().transpose((1,2,0))
plt.figure()
plt.imshow(img)
plt.figure()
plt.imshow(actualimg)