@albanD @JuanFMontesinos Yeah,I should’ve looked at the source code!
One more question to ask:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.bn2 = nn.BatchNorm2d(64)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
layer_collector=[]
for idx,param in enumerate(model.parameters()):
block = idx//3
lr = args.lr/(10**block)
layer_collector+=[{"params":param,"lr":lr}]
optimizer = optim.SGD(layer_collector,momentum=0.85,weight_decay=1e-5)
The code is above is tje model and how I set the optimizer.
And I try to do this :
for group_param in optimizer.param_groups:
for param in group_param["params"]:
for each_set in param:
print(each_set)
The result gives something like :
tensor([-0.0048, 0.0155, -0.0403, -0.0363, 0.0766, 0.0209, -0.0341, 0.0499,
0.0590, 0.0267, -0.0719, -0.0199, -0.0585, -0.0632, 0.0872, 0.0512,
-0.0528, -0.0671, -0.0278, -0.0009, 0.0570, 0.0187, 0.0411, -0.0460,
-0.0667, -0.0144, -0.0228, -0.0181, -0.0388, 0.0235, 0.0766, -0.0179,
0.0664, 0.0092, -0.0698, -0.0850, 0.0578, 0.0419, -0.0034, 0.0127,
0.0263, -0.0060, -0.0120, -0.0356, -0.0429, 0.0169, 0.0010, -0.0227,
-0.0736, -0.0817, 0.0567, 0.0305, 0.0839, -0.0474, -0.0297, 0.0020,
-0.0025, 0.0800, -0.0758, -0.0859, 0.0695, 0.0839, -0.0218, 0.0392,
-0.0795, -0.0634, 0.0108, -0.0785, -0.0103, 0.0450, -0.0252, -0.0361,
-0.0161, 0.0106, 0.0832, -0.0706, -0.0487, -0.0190, 0.0364, -0.0272,
-0.0750, 0.0834, 0.0687, -0.0866, -0.0550, -0.0435, -0.0807, -0.0501,
0.0329, 0.0694, 0.0330, 0.0613, -0.0204, -0.0302, 0.0559, 0.0098,
-0.0497, -0.0153, -0.0212, 0.0377, 0.0154, 0.0512, -0.0469, -0.0496,
0.0218, -0.0187, 0.0294, -0.0480, 0.0234, 0.0040, -0.0665, -0.0831,
-0.0799, -0.0845, 0.0759, 0.0807, 0.0492, 0.0455, -0.0201, -0.0427,
0.0037, -0.0168, 0.0463, 0.0782, 0.0066, 0.0759, -0.0852, 0.0401],
grad_fn=<SelectBackward>)
What does grad_fn mean here? As another try is:
for group_param in optimizer.param_groups:
for param in group_param["params"]:
print(param.grad_fn)
It just return None
,why do this happen?Thanks!