Hello, By using torch.autograd.set_detect_anomaly(True), Pytorch returns this error. I am using cnn in my code.
RuntimeError Traceback (most recent call last)
<ipython-input-64-d65b1c842b33> in <module>
40
41 # Backward pass
---> 42 loss.backward()
43 # Optimize the weights
44 optimizer.step()
~\anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
243 create_graph=create_graph,
244 inputs=inputs)
--> 245 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
246
247 def register_hook(self, hook):
~\anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
143 retain_graph = create_graph
144
--> 145 Variable._execution_engine.run_backward(
146 tensors, grad_tensors_, retain_graph, create_graph, inputs,
147 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
RuntimeError: Function 'AddmmBackward' returned nan values in its 2th output.
By using, print(model.fc1.weight.grad) in the code, the output I get is:
tensor([[[[ 1.0000e+00, 2.0121e+16, 2.1112e+03, -1.2587e+02, 1.1975e+02, 9.1882e+01, 1.4300e+02, 1.4685e+02,
5.6806e+01, 6.2351e+01, 1.3990e+02, 1.0970e+02, 1.6538e+02, 1.3725e+02, 1.2693e+02, 2.7573e+01,
1.1658e+02, -1.7453e+01, 5.7874e+01, -7.4747e+01, 5.4523e-01, 9.1882e+01, -5.6166e+01, 2.1318e+01,
1.2021e+02, -1.5615e+02, -1.2587e+02, -1.2144e+02, 2.5129e+01, 1.1975e+02, -5.0621e+01, -1.0840e+02,
-1.6882e+02, -1.2074e+02, -4.0773e+01, 1.0225e+02, -1.6588e+02, -4.7247e+01, 8.4561e+01, 1.7495e+02,
-2.0543e+01, 1.4607e+02, -1.0414e+02, -4.2305e+01, 9.7643e+01, -1.5160e+02, 1.1463e+02, -4.9136e+01,
2.0322e+01, 2.2924e+01, 4.8046e-01, 8.7566e-01, 5.6928e-01, 1.8828e+00, 2.6070e+00, 6.8450e-01,
1.0887e+00, 8.5651e-01, 1.3067e+00, 9.8682e-01, 3.0889e+00, 2.0896e+01, 4.3667e+01, 1.0537e+00,
1.1503e+00, 1.8539e+00, 1.3042e+01, 5.0504e-01, 5.6928e-01, 1.1076e+00, 1.8138e+00, 2.6676e+00,
nan, 4.8046e-01, 3.5797e-01, 7.3913e-01, 8.7566e-01, 1.4186e+00, 1.4761e+00, nan,
5.5842e-01, nan, 8.8135e-01, nan, 1.0344e+00, 1.1153e+00, 1.2731e+00, 5.4525e+00,
2.7410e+00, 3.8250e+00, 1.9446e+01, 2.8108e+01, 5.8824e+01, 5.1819e+01, 4.9466e+01, 4.6651e+01]]]],
dtype=torch.float64)
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[-0., -0., -0., ..., -0., -0., -0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[-0., -0., -0., ..., -0., -0., -0.]])
tensor([[[[2.0000e+00, 2.0121e+16, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan]]]], dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
tensor([[[[ 2.0000e+00, 2.0121e+16, 2.1799e+03, -1.2642e+02, 1.1888e+02, 9.2102e+01, 1.3392e+02, 1.5826e+02,
5.6402e+01, 6.0114e+01, 1.3267e+02, 1.3073e+02, 1.6000e+02, 1.3671e+02, 1.3763e+02, 1.4622e+02,
9.7603e+01, -2.1623e+01, 4.7637e+01, -1.0052e+02, -2.0893e+00, 9.2102e+01, -8.4090e+01, 3.2523e+00,
1.1199e+02, -1.4152e+02, -1.2642e+02, -1.0328e+02, 2.2450e+01, 1.1888e+02, -5.3248e+01, -1.0527e+02,
-1.3257e+02, -1.2736e+02, -5.6363e+01, 1.2717e+02, -1.3012e+02, -4.9418e+01, 9.6423e+01, -1.3353e+02,
-2.4414e+01, 1.4379e+02, -1.0854e+02, -6.4658e+01, 9.7735e+01, -1.4628e+02, -7.0982e+01, 7.2506e+01,
-6.3233e+01, 2.2006e+01, 4.1336e+00, 3.9871e+00, 3.7840e+00, 1.1910e+01, 7.4683e+00, 4.5036e+00,
1.9909e+00, 1.1300e+01, 2.6995e+01, 8.0197e+00, 1.2381e+01, 2.6532e+01, 3.8079e+01, 5.4645e+01,
4.8356e+00, 1.5947e+01, 1.0069e+02, 1.8042e+00, 3.7840e+00, 1.3642e+01, 4.0943e+00, 8.5131e+00,
8.3134e+01, 4.1336e+00, 1.9227e+01, 4.8255e+00, 3.9871e+00, 9.3339e+00, 8.9132e+00, 1.0333e+02,
5.4045e+00, 1.4110e+01, 2.7794e+01, 6.7948e+01, 1.5307e+01, 2.0399e+01, 1.0004e+02, 1.6973e+01,
4.8058e+00, 1.2377e+01, 3.2879e+01, 1.1396e+02, 2.7245e+01, 5.0165e+01, 1.0444e+02, 1.4360e+02]]]],
dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
tensor([[[[ 2.0000e+00, 2.0121e+16, 2.1468e+03, -1.1650e+02, 1.0681e+02, 9.2336e+01, 1.6192e+02, 1.5636e+02,
6.8639e+01, 6.9372e+01, 1.2871e+02, 1.2097e+02, 1.6926e+02, 9.7851e+01, 9.9558e+01, 1.6124e+02,
1.0247e+02, -8.7555e+00, 7.4999e+01, -1.4355e+02, -4.5936e+00, 9.2336e+01, -6.6639e+01, 9.2131e+00,
1.1063e+02, -1.6793e+02, -1.1650e+02, -1.0717e+02, 1.3600e+01, 1.0681e+02, -4.9655e+01, -1.1447e+02,
-1.4637e+02, -1.1646e+02, -8.0738e+01, 1.3582e+02, -1.6939e+02, -8.9483e+01, 1.0139e+02, -1.6476e+02,
-5.4197e+01, 6.2026e+01, -9.1667e+01, -6.2413e+01, -5.6123e+01, -7.2598e+01, -5.5944e+01, -4.4062e+01,
-7.8543e+01, 1.9982e+01, 4.2698e+00, 6.2515e+00, 9.2737e-01, 8.4033e+00, 4.7989e+00, 5.4807e+00,
4.5356e+00, 6.7761e+00, 6.4291e+00, 2.3736e+01, 3.4663e+01, 3.6859e+01, 1.9547e+01, 6.6953e+01,
4.2942e+00, 8.7091e+00, 3.5222e+01, 3.6455e+00, 9.2737e-01, 1.5033e+01, 4.8080e+00, 4.2720e+00,
3.4953e+01, 4.2698e+00, 7.1668e+00, 1.5076e+01, 6.2515e+00, 3.2291e+01, 1.2008e+01, 7.9383e+01,
3.2539e+00, 1.8213e+01, 7.3228e+01, 1.9818e+01, 6.4666e+00, 1.1351e+02, 5.2506e+01, 4.9471e+01,
1.2409e+02, 8.9141e+01, 8.0393e+01, 1.1634e+02, 1.1175e+02, 8.0143e+01, 1.2202e+02, 1.1002e+02]]]],
dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
tensor([[[[ 3.0000e+00, 2.0121e+16, 2.1307e+03, -1.1836e+02, 1.1960e+02, 9.3335e+01, 1.7614e+02, 1.6423e+02,
6.0497e+01, 6.4295e+01, 1.4674e+02, 1.0307e+02, 1.3393e+02, 1.3606e+02, 8.8506e+01, 1.6990e+02,
-6.3892e+01, 3.0301e+00, 9.6623e+01, -4.6353e+01, 3.2749e+00, 9.3335e+01, -3.5689e+01, 1.6018e+01,
1.0254e+02, -1.6364e+02, -1.1836e+02, -1.1859e+02, 2.3843e+01, 1.1960e+02, -5.1990e+01, -1.3287e+02,
-1.5137e+02, -1.2043e+02, -4.7107e+01, 1.1174e+02, -1.6050e+02, -9.1735e+01, -1.8729e+01, -1.6668e+02,
-1.9044e+01, 1.4821e+02, -1.0359e+02, -8.7890e+01, -8.5127e+01, -1.3848e+02, -8.3747e+01, -8.5320e+01,
-1.2180e+02, 2.6135e+01, 1.9769e+00, 2.6045e+00, 2.0390e+00, 1.4901e+00, 5.6421e+00, 3.7508e+00,
3.1743e+00, 3.1110e+00, 1.0373e+01, 2.7232e+01, 7.2261e+00, 2.2325e+01, 2.9433e+01, 2.2821e+01,
3.0788e+00, 2.6993e+00, 2.5304e+01, 2.2994e+00, 2.0390e+00, 1.1612e+01, 3.0532e+00, 6.5961e+00,
5.8219e-01, 1.9769e+00, 1.8826e+00, 2.6724e+00, 2.6045e+00, 5.0856e+00, 2.8424e+00, 1.9730e+00,
5.0156e-01, 1.5276e+01, 8.1453e+00, 4.4028e+00, 1.6706e+01, 1.3667e+02, 2.1477e+01, 1.6028e+01,
8.0510e+00, 1.0984e+01, 7.1106e+01, 4.7512e+01, 8.9697e+01, 7.9041e+01, 4.4746e+01, 1.0752e+02]]]],
dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
tensor([[[[1.0000e+00, 2.0121e+16, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan, nan, nan, nan,
nan, nan, nan, nan, nan, nan]]]], dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
tensor([[[[ 3.0000e+00, 2.0121e+16, 2.1919e+03, -1.2222e+02, 1.2178e+02, 9.2645e+01, 1.4630e+02, 1.6849e+02,
5.7381e+01, 5.9252e+01, 1.2452e+02, 1.6066e+02, 1.6036e+02, 1.3954e+02, 8.3678e+01, 1.6861e+02,
9.9969e+01, -6.3340e+00, 5.9333e+01, -8.1033e+01, 3.7224e-01, 9.2645e+01, -8.6560e+01, 7.4431e-01,
1.0392e+02, 1.5384e+02, -1.2222e+02, -8.0856e+01, 1.9358e+01, 1.2178e+02, -6.0104e+01, -1.1192e+02,
-1.6483e+02, -1.2422e+02, -9.5145e+01, -1.2774e+02, -1.3558e+02, -7.7553e+01, 6.8804e+01, -1.4478e+02,
-2.5068e+01, 1.4691e+02, -1.0733e+02, 4.9468e+01, 5.9000e+01, -1.4433e+01, 4.2396e+01, 5.9469e+01,
-1.6240e+01, 2.1173e+01, 2.1348e+00, 4.0060e+00, 9.0399e-01, 1.3519e+01, 2.0945e+00, 2.4240e+00,
4.3001e+00, 3.8076e+00, 1.4390e+01, 1.5359e+01, 5.6460e+00, 7.0027e+01, 1.0411e+01, 5.0637e+00,
4.0639e+00, 1.3297e+01, 3.0445e+01, 1.4768e+00, 9.0399e-01, 8.4628e+00, 1.8631e+00, 2.0581e+00,
8.1748e+01, 2.1348e+00, 6.9908e+00, 4.6871e+00, 4.0060e+00, 9.0634e+00, 6.4780e+00, 3.3827e+00,
2.7371e+00, 1.3227e+01, 1.1238e+02, 3.6789e+01, 2.0156e+01, 1.4099e+02, 2.8025e+01, 1.3357e+01,
5.5910e+00, 9.2154e+00, 1.0142e+02, 8.0994e+01, 1.0139e+02, 1.0297e+02, 8.7059e+01, 1.0224e+02]]]],
dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
tensor([[[[ 1.0000e+00, 2.0121e+16, 2.0871e+03, -1.2150e+02, 1.1584e+02, 9.4614e+01, 1.7655e+02, 1.4968e+02,
5.9186e+01, 6.4225e+01, 1.4681e+02, 1.2555e+02, 1.4587e+02, 1.3124e+02, 1.0625e+02, 1.6970e+02,
-8.9481e+01, -1.2804e-01, 9.7902e+01, -8.7950e+01, -4.4465e-02, 9.4614e+01, -6.9190e+01, 1.3636e+01,
1.2268e+02, -1.7465e+02, -1.2150e+02, -9.8691e+01, 2.3403e+01, 1.1584e+02, -4.8239e+01, 4.2988e+00,
-1.5434e+02, -9.0322e+01, -2.7524e+01, 5.5696e+01, -1.2056e+02, -3.2478e+01, 5.6493e+01, 8.9425e+00,
9.4711e-01, 1.5983e+02, -8.9396e+01, -8.8004e+01, -1.0813e+02, -1.5779e+02, -8.9520e+01, -9.0743e+01,
-1.4404e+02, 3.7885e+01, 8.5067e-01, 8.7955e-01, 1.7108e+00, 1.2885e+00, 3.4127e+00, 1.1691e+00,
1.7282e+00, 2.7701e+00, 2.3123e+01, 2.0280e+01, 4.5535e+00, 1.8832e+01, 1.4191e+01, 1.2135e+01,
1.4857e+00, 2.4388e+00, 1.4344e+01, 1.0912e+00, 1.7108e+00, 2.5853e+00, 1.3156e+00, 3.0321e+00,
1.2381e+00, 8.5067e-01, 2.3047e+00, 1.2403e+00, 8.7955e-01, 1.6901e+00, 1.7304e+02, 2.8295e+00,
3.8194e+00, 8.0889e+01, 1.4319e+02, 2.3998e+01, 6.4587e+01, 1.0627e+02, 1.4835e+02, 1.4839e+01,
1.9887e+01, 4.6248e+00, 4.1587e+01, 9.4676e+01, 2.1845e+01, 4.2808e+01, 1.1131e+02, 6.9814e+01]]]],
dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
tensor([[[[ 3.0000e+00, 2.0121e+16, 2.2382e+03, -1.1457e+02, 1.0945e+02, 9.8255e+01, 1.7610e+02, 1.4711e+02,
7.2049e+01, 6.3489e+01, 1.4008e+02, 9.7981e+01, 1.6307e+02, 1.2685e+02, 1.1018e+02, 1.0374e+02,
-1.1611e+02, -5.8163e+00, 1.0192e+02, -1.3040e+02, -5.9148e+00, 9.8255e+01, -7.4477e+01, 1.1937e+01,
1.2674e+02, -1.7553e+02, -1.1457e+02, -9.9373e+01, 2.5755e+01, 1.0945e+02, -3.6399e+01, -1.4544e+02,
-1.5179e+02, -1.0904e+02, -4.7341e+01, 1.1076e+02, -1.6068e+02, -4.8402e+01, 8.7140e+01, 1.7231e+02,
-7.3906e+00, 1.5414e+02, -9.3966e+01, -4.5671e+01, 4.2261e+01, -1.1964e+02, 1.8086e+01, -1.1372e+01,
-3.0424e+01, 1.9088e+01, 1.6262e+00, 1.1555e+00, 5.5998e+00, 1.0427e+00, 1.9490e+00, 3.7436e+00,
3.2090e+00, 4.3002e+00, 8.4366e+00, 1.7938e+00, 8.7047e+00, 2.4796e+01, 6.9884e+01, 1.2647e+01,
2.6064e+00, 5.7478e+00, 1.4113e+01, 1.9710e+00, 5.5998e+00, 5.5197e+00, 4.9480e+00, 2.4867e+00,
2.1224e+00, 1.6262e+00, 4.3539e+00, 2.6674e+00, 1.1555e+00, 4.2607e+00, 1.3345e+01, 2.0322e+01,
5.7152e+00, 4.2881e+00, 2.0370e+00, 2.4950e+00, 1.8078e+00, 1.7292e+00, 4.2053e+01, 3.0696e+01,
1.9527e+01, 1.5438e+01, 5.5741e+01, 1.0291e+02, 9.3203e+01, 1.0503e+02, 9.0168e+01, 1.1326e+02]]]],
dtype=torch.float64)
tensor([[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
...,
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]])
and so on. Please provide any help.