I get valid gradients for the weight
parameters (bias
is set to None by default in your code) using your code snippet:
gc1.weight tensor([[ 0.0000e+00, 0.0000e+00, 5.7626e+12, 0.0000e+00, 2.0992e+12,
-4.3610e+12, 0.0000e+00, 1.7043e+12, 0.0000e+00, 0.0000e+00,
1.0510e+12, 5.1097e+11, 1.1546e+12, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, -1.9490e+12, -5.2885e+11, -2.5631e+11,
0.0000e+00, 1.9785e+12, 2.1556e+11, 3.4624e+11, 3.0468e+12,
-1.4528e+12, 2.2929e+12, 2.6372e+12, 1.0251e+12, 0.0000e+00,
0.0000e+00, -1.3199e+12, -1.1152e+11, -3.9257e+11, 1.8377e+12,
0.0000e+00, 0.0000e+00, -5.2774e+11, 7.5787e+12, 6.4663e+11,
9.5647e+12, 0.0000e+00, -2.4952e+12, 0.0000e+00, 1.3465e+11,
0.0000e+00, 0.0000e+00, 0.0000e+00, 8.5702e+11, 4.1229e+11,
8.5888e+11, 0.0000e+00, 1.6762e+12, 0.0000e+00, -8.6959e+11,
2.0165e+12, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.6999e+12,
0.0000e+00, 9.5645e+11, 1.3904e+12, -2.0582e+12, -1.1083e+12,
0.0000e+00, 2.0443e+12, 0.0000e+00, 0.0000e+00, 9.8510e+11,
-1.0981e+12, 0.0000e+00, 0.0000e+00, -2.1311e+12, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 2.0415e+12, -3.1155e+11,
0.0000e+00, -1.2381e+12, -1.4073e+11, 0.0000e+00, 0.0000e+00,
-1.0034e+11, 3.8488e+12, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 4.3684e+11, -1.7028e+12,
-2.5951e+12, -2.4032e+12, 0.0000e+00, 3.5148e+12, -3.6833e+12]])
gc1.bias None
gc2.weight tensor([[ 0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00,
0.0000e+00, 0.0000e+00],
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00,
0.0000e+00, 0.0000e+00],
[ 4.6452e+10, 0.0000e+00, 0.0000e+00, ..., 8.7185e+11,
-1.2338e+11, 2.8303e+11],
...,
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00,
0.0000e+00, 0.0000e+00],
[ 2.2199e+10, 0.0000e+00, 0.0000e+00, ..., 5.8348e+11,
-7.5911e+10, 1.6448e+11],
[ 9.0184e+10, 0.0000e+00, 0.0000e+00, ..., 1.3702e+12,
-1.8119e+11, 5.0495e+11]])
gc2.bias None
gc3.weight tensor([[1.3418e+09, 1.0558e+09, 3.0883e+09, 2.7453e+09, 8.3871e+08, 2.9497e+09,
0.0000e+00, 0.0000e+00, 1.8982e+09, 2.8017e+09],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[5.4088e+11, 4.6983e+11, 7.4464e+11, 4.8750e+11, 8.1482e+10, 6.1323e+11,
0.0000e+00, 0.0000e+00, 4.3579e+11, 4.8442e+11],
[6.1316e+09, 4.8247e+09, 1.4112e+10, 1.2545e+10, 3.8326e+09, 1.3479e+10,
0.0000e+00, 0.0000e+00, 8.6741e+09, 1.2803e+10],
[1.5149e+10, 1.0378e+10, 1.8953e+10, 1.3017e+10, 2.6907e+09, 1.9303e+10,
0.0000e+00, 0.0000e+00, 1.4705e+10, 1.6191e+10],
[8.3224e+09, 7.1178e+09, 1.7066e+10, 1.2104e+10, 1.7453e+09, 1.2392e+10,
0.0000e+00, 0.0000e+00, 7.6211e+09, 1.2210e+10],
[5.1466e+11, 4.4102e+11, 6.7929e+11, 4.3283e+11, 5.6717e+10, 5.3794e+11,
0.0000e+00, 0.0000e+00, 3.8280e+11, 4.3141e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[2.7988e+10, 2.3244e+10, 3.1372e+10, 1.6246e+10, 1.4151e+09, 2.5873e+10,
0.0000e+00, 0.0000e+00, 2.0801e+10, 1.7842e+10],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[1.2529e+11, 1.0351e+11, 2.0384e+11, 1.5329e+11, 3.9726e+10, 1.7360e+11,
0.0000e+00, 0.0000e+00, 1.1277e+11, 1.6391e+11],
[1.9331e+11, 1.7053e+11, 2.3887e+11, 1.4136e+11, 2.3857e+10, 1.8999e+11,
0.0000e+00, 0.0000e+00, 1.3634e+11, 1.5548e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[2.6648e+11, 2.1048e+11, 3.6879e+11, 2.1916e+11, 3.8415e+10, 3.3483e+11,
0.0000e+00, 0.0000e+00, 2.6191e+11, 2.2762e+11],
[4.8012e+11, 4.0491e+11, 6.6682e+11, 4.4386e+11, 6.6911e+10, 5.6540e+11,
0.0000e+00, 0.0000e+00, 4.0385e+11, 4.2403e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[9.6287e+10, 8.5866e+10, 1.2923e+11, 8.1884e+10, 1.1501e+10, 1.0275e+11,
0.0000e+00, 0.0000e+00, 7.2406e+10, 7.9410e+10],
[3.8502e+11, 3.2341e+11, 5.5472e+11, 3.8425e+11, 6.7005e+10, 4.7505e+11,
0.0000e+00, 0.0000e+00, 3.3311e+11, 3.7214e+11],
[1.2536e+11, 9.0180e+10, 1.4233e+11, 8.8746e+10, 1.3844e+10, 1.3854e+11,
0.0000e+00, 0.0000e+00, 1.0551e+11, 1.0729e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[1.6826e+11, 1.3820e+11, 2.1675e+11, 1.4432e+11, 1.7525e+10, 1.8229e+11,
0.0000e+00, 0.0000e+00, 1.3090e+11, 1.4944e+11],
[1.7524e+11, 1.4594e+11, 2.3257e+11, 1.6048e+11, 3.3265e+10, 1.9856e+11,
0.0000e+00, 0.0000e+00, 1.3721e+11, 1.7323e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[1.9308e+11, 2.0435e+11, 2.8336e+11, 1.8084e+11, 4.6458e+10, 1.8524e+11,
0.0000e+00, 0.0000e+00, 1.1171e+11, 1.8113e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[4.9807e+11, 4.4118e+11, 5.8472e+11, 3.5972e+11, 1.0318e+10, 4.5462e+11,
0.0000e+00, 0.0000e+00, 3.2011e+11, 3.2889e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[6.5498e+11, 5.1708e+11, 8.7470e+11, 5.8653e+11, 8.7341e+10, 7.9434e+11,
0.0000e+00, 0.0000e+00, 5.8149e+11, 5.9509e+11],
[7.4386e+11, 6.4328e+11, 1.0640e+12, 7.3333e+11, 1.3503e+11, 8.8885e+11,
0.0000e+00, 0.0000e+00, 6.1150e+11, 7.2601e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[4.2528e+10, 3.4561e+10, 6.0354e+10, 3.6555e+10, 1.0606e+10, 5.1470e+10,
0.0000e+00, 0.0000e+00, 4.1024e+10, 4.1764e+10],
[2.0269e+11, 1.5970e+11, 2.6420e+11, 1.6884e+11, 3.2350e+10, 2.3451e+11,
0.0000e+00, 0.0000e+00, 1.7871e+11, 1.8206e+11],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[1.1283e+10, 1.0017e+10, 2.2469e+10, 1.8430e+10, 5.4394e+09, 1.9605e+10,
0.0000e+00, 0.0000e+00, 1.2453e+10, 1.8734e+10],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[2.6133e+11, 2.2275e+11, 3.8679e+11, 2.5941e+11, 6.0402e+10, 3.2680e+11,
0.0000e+00, 0.0000e+00, 2.3434e+11, 2.6632e+11],
[3.3256e+11, 2.7949e+11, 5.0830e+11, 3.3410e+11, 7.3282e+10, 4.2524e+11,
0.0000e+00, 0.0000e+00, 3.0468e+11, 3.5502e+11],
[1.1428e+11, 1.0545e+11, 1.6758e+11, 1.1861e+11, 1.9412e+10, 1.3258e+11,
0.0000e+00, 0.0000e+00, 8.3063e+10, 1.1005e+11],
[2.5735e+11, 2.1977e+11, 3.7276e+11, 2.6437e+11, 3.3609e+10, 3.1184e+11,
0.0000e+00, 0.0000e+00, 2.1148e+11, 2.4916e+11]])
gc3.bias None