Optimizer.step() not updating correctly

As you can see below, I print the value of scale.grad as well as the value of scale before and after the optimizer.step(),and my learning rate = 0.01 with adam optimizer, but 1 - .01*13 = 0.87, not 0.99. What could be going on here? I know adam optimizer has some other tweaks to this equation but I didnt expect it be different by a magnitude of .11, which relative to 1 is 10% difference! Thanks.

Below is the photo and the code.

Screen Shot 2020-02-27 at 11.03.06 AM|690x308

cat= TF.to_tensor(np.array(Image.open("just_cat_above_dog.png").convert('RGB')))
cat_dog = TF.to_tensor(np.array(Image.open("cat_above_dog.png").convert('RGB')))

translated_params = torch.unsqueeze(torch.tensor([0.0,0.0]),1)
translated_params.requires_grad_(True)

scale = torch.unsqueeze(torch.tensor([1.0]),1)
scale.requires_grad_(True)

def forward2(x,dxdy,the_scale):
  dxdy = torch.tanh(dxdy)
  the_scale = torch.tanh(the_scale)
  M= torch.cat((torch.eye(2)*the_scale, dxdy),dim=1)[None,:,:]
  #print(M)
  grid = F.affine_grid(M,[1] + list(x.shape))
  resampled = F.grid_sample(x[None,:,:,:], grid, mode='bilinear')[0]
  return resampled

optimizer = torch.optim.Adam([scale,translated_params], lr=0.01)

for i in range(3000): 
    optimizer.zero_grad()
    predicted= forward2(cat_dog,translated_params,scale)
    #criterion = LapLoss()
    #loss = criterion.forward(torch.unsqueeze(predicted,0),torch.unsqueeze(cat,0))
    fft_predicted = torch.rfft(predicted,3)
    #pdb.set_trace()
    fft_gt = torch.rfft(cat,3)
    loss = torch.nn.functional.l1_loss(fft_gt[:,:,:,0],fft_predicted[:,:,:,0]) - 0.0001*torch.sum(torch.abs(fft_predicted))
    #absdiff = (predicted-cat).abs()
    #loss = absdiff[:,100:300,:].mean()

    loss.backward()
    print(loss, scale.grad.data, scale)
    #pdb.set_trace()
    
    optimizer.step()
    print(loss, scale.grad.data, scale)

    if i % 300 == 0:
      
      img = predicted.data.cpu().numpy().transpose((1,2,0))
      actualimg= cat.cpu().numpy().transpose((1,2,0))
      plt.figure()
      plt.imshow(img)
      plt.figure()
      plt.imshow(actualimg)

I’m not sure to understand the posted numbers correctly.
Is SGD yielding the expected result?
If so, your guess of the internal states in Adam might be right.

sorry i rephrased my question in a new post, hopefully its clearer now… thank you