Hi everyone, I am running a unit test using the code below. The thing is I found the output of code of ‘test2’ and ‘test3’ gives different results in grads of x and x itself after some iterations. ‘test2’ gives exactly the same results like in ‘test1’, but ‘test3’'s results differ from them. I wonder if it is the slicing causing the problem in the computational graph? Any suggestion is appreciated
result of ‘test2’:
iter: 0, x: 0.001123535679653287
x_after: 0.14787426590919495
iter: 1, x: 0.001123535679653287
x_after: 0.14719876646995544
iter: 2, x: 0.0011176818516105413
x_after: 0.1464969515800476
iter: 3, x: 0.0011090511688962579
x_after: 0.14573749899864197
iter: 4, x: 0.0010979033540934324
x_after: 0.14489953219890594
iter: 5, x: 0.0010852295672520995
x_after: 0.14401070773601532
iter: 6, x: 0.00107166962698102
x_after: 0.14312008023262024
iter: 7, x: 0.0010575008345767856
x_after: 0.14225336909294128
iter: 8, x: 0.001042835763655603
x_after: 0.14140984416007996
iter: 9, x: 0.001027711434289813
x_after: 0.1405780017375946
iter: 10, x: 0.0010121342493221164
x_after: 0.1397448629140854
iter: 11, x: 0.0009961080504581332
x_after: 0.13889974355697632
iter: 12, x: 0.000979656120762229
x_after: 0.13803447782993317
iter: 13, x: 0.0009628300904296339
x_after: 0.1371440291404724
iter: 14, x: 0.0009457063279114664
x_after: 0.13622605800628662
iter: 15, x: 0.0009283693507313728
x_after: 0.13528119027614594
iter: 16, x: 0.0009108937229029834
x_after: 0.13431242108345032
iter: 17, x: 0.000893332646228373
x_after: 0.13332459330558777
iter: 18, x: 0.0008757139439694583
x_after: 0.1323237121105194
iter: 19, x: 0.0008580424473620951
x_after: 0.1313161700963974
iter: 20, x: 0.0008403086685575545
x_after: 0.13030840456485748
iter: 21, x: 0.0008224951452575624
x_after: 0.12930648028850555
iter: 22, x: 0.000804582261480391
x_after: 0.12831594049930573
iter: 23, x: 0.0007865555817261338
x_after: 0.1273418664932251
iter: 24, x: 0.000768406200222671
x_after: 0.12638860940933228
iter: 25, x: 0.0007501322543248534
x_after: 0.12546001374721527
iter: 26, x: 0.0007317414856515825
x_after: 0.12455948442220688
iter: 27, x: 0.0007132487371563911
x_after: 0.12368995696306229
iter: 28, x: 0.0006946753128431737
x_after: 0.12285391241312027
iter: 29, x: 0.000676048279274255
x_after: 0.12205348908901215
result of ‘test3’
iter: 0, x: 0.001123535679653287
x_after: 0.14787426590919495
iter: 1, x: 0.001123535679653287
x_after: 0.14719876646995544
iter: 2, x: 0.0011176818516105413
x_after: 0.1464969515800476
iter: 3, x: 0.0011090511688962579
x_after: 0.14573749899864197
iter: 4, x: 0.0010979033540934324
x_after: 0.14489953219890594
iter: 5, x: 0.0010852295672520995
x_after: 0.14401070773601532
iter: 6, x: 0.00107166962698102
x_after: 0.14312008023262024
iter: 7, x: 0.0010575008345767856
x_after: 0.14225336909294128
iter: 8, x: 0.001042835763655603
x_after: 0.14140984416007996
iter: 9, x: 0.001027711434289813
x_after: 0.1405780017375946
iter: 10, x: 0.0010121342493221164
x_after: 0.1397448629140854
iter: 11, x: 0.0009961080504581332
x_after: 0.13889974355697632
iter: 12, x: 0.000979656120762229
x_after: 0.13803447782993317
iter: 13, x: 0.0009628300322219729
x_after: 0.1371440291404724
iter: 14, x: 0.0009457063279114664
x_after: 0.13622605800628662
iter: 15, x: 0.0009283691761083901
x_after: 0.13528119027614594
iter: 16, x: 0.0009108937229029834
x_after: 0.13431242108345032
iter: 17, x: 0.000893332646228373
x_after: 0.13332459330558777
iter: 18, x: 0.0008757137693464756
x_after: 0.1323237121105194
iter: 19, x: 0.0008580424473620951
x_after: 0.1313161700963974
iter: 20, x: 0.0008403086685575545
x_after: 0.13030840456485748
iter: 21, x: 0.0008224949124269187
x_after: 0.12930648028850555
iter: 22, x: 0.000804582261480391
x_after: 0.12831594049930573
iter: 23, x: 0.0007865555817261338
x_after: 0.1273418664932251
iter: 24, x: 0.000768406200222671
x_after: 0.12638860940933228
iter: 25, x: 0.0007501322543248534
x_after: 0.12546001374721527
iter: 26, x: 0.0007317413692362607
x_after: 0.12455948442220688
iter: 27, x: 0.0007132486207410693
x_after: 0.12368995696306229
iter: 28, x: 0.0006946754292584956
x_after: 0.12285391241312027
iter: 29, x: 0.0006760482210665941
x_after: 0.12205347418785095
import torch
import torch.nn as nn
import random
import os
import numpy as np
random.seed(0)
os.environ['PYTHONHASHSEED'] = str(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
def forward(self, x,y):
out = x*y
return out
class test():
def __init__(self):
self.x = torch.randn(30, 3, requires_grad=True)
self.y = torch.randn(30, 1, requires_grad=True)
self.model = Model()
l = [
{'params': [self.x], 'lr': 0.01, "name": "x"},
{'params': [self.y], 'lr': 0.01, "name": "y"},
{'params': self.model.parameters(), 'lr': 0.01, "name": "model"},
]
self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15)
gs = test()
gt = torch.randn(30, 3, requires_grad=False)
for i in range(50):
means3D = gs.x
means3D_next= gs.x
'''test 1'''
# features = torch.cat([means3D],dim=1)#1,2
# out = gs.model(features,gs.y) #1,2
# out_slice1 = out[:,:]
'''test 2'''
features = torch.cat([means3D,means3D[:,:2]],dim=1)#1,2
out = gs.model(features,gs.y) #1,2
out_slice1 = out[:,:3]
'''test 3'''
# features = torch.cat([means3D[:,:2],means3D],dim=1)#1,2
# out = gs.model(features,gs.y) #1,2
# out_slice1 = out[:,2:]
loss = (out_slice1 - gt).mean()
loss.backward()
print(f"iter: {i}, x: {gs.x.grad.mean()}")
gs.optimizer.step()
gs.optimizer.zero_grad()
print(f"x_after: {gs.x.mean()}")