Sure!
# graident of the sum of losses
tensor([[[[-8.3106e-06, 1.8759e-05, -7.7942e-06, ..., 7.4618e-05,
-4.7180e-05, 3.6800e-05],
[ 2.1606e-05, -5.4137e-05, 2.1183e-04, ..., 7.9589e-05,
4.3138e-05, -3.3451e-05],
[-3.4337e-05, 2.2183e-05, -2.5751e-04, ..., -4.9273e-05,
5.3253e-05, -8.3178e-05],
...,
[ 6.1050e-05, -2.1775e-05, -4.6475e-05, ..., 8.3546e-05,
5.8822e-05, -5.5404e-05],
[-5.5288e-05, 2.3810e-05, 5.7758e-05, ..., -1.2546e-05,
2.4010e-05, 1.6447e-05],
[ 9.5190e-05, -1.1252e-04, 2.3661e-05, ..., 1.8137e-05,
-2.1645e-05, -1.6138e-05]],
[[-1.4543e-05, 1.3344e-06, 3.2785e-05, ..., -3.4169e-05,
-7.0121e-05, 1.0172e-04],
[-4.3257e-05, 9.6162e-05, -1.1047e-04, ..., -2.2850e-05,
5.5712e-05, -3.1813e-05],
[-1.4116e-05, 5.1198e-05, 3.5880e-06, ..., 4.6499e-05,
-5.2294e-05, -2.6736e-05],
...,
[-8.7832e-05, -4.6133e-05, -5.2025e-05, ..., -5.7989e-05,
2.9164e-05, 3.3786e-06],
[-4.2051e-06, 6.9220e-05, 1.5171e-05, ..., 8.5499e-05,
-6.0512e-05, 1.9598e-05],
[ 4.8490e-05, -6.6214e-05, 6.1356e-06, ..., -1.9254e-05,
4.6471e-05, -3.8032e-05]],
[[-3.3499e-05, -6.2552e-06, 2.0268e-05, ..., -3.1183e-05,
6.9439e-05, -8.9930e-05],
[ 5.2075e-05, -4.3663e-06, -6.5345e-05, ..., 4.7974e-05,
-5.2119e-05, 4.9723e-05],
[-7.0753e-05, -2.4573e-06, 1.4305e-04, ..., -5.9918e-05,
5.7893e-05, -1.8903e-05],
...,
[ 9.8170e-06, -1.0143e-04, 3.8614e-05, ..., 3.4707e-07,
-3.9175e-05, 3.2880e-05],
[ 2.2249e-05, 1.7025e-05, -5.5440e-05, ..., -9.5285e-05,
6.6389e-05, 2.5453e-05],
[-3.7884e-05, 9.5156e-05, 1.4506e-05, ..., 2.4268e-05,
-1.5537e-05, -2.1148e-05]]]], device='cuda:0')
# sum of gradient of each loss
tensor([[[[-8.3106e-06, 1.8759e-05, -7.7942e-06, ..., 7.4618e-05,
-4.7180e-05, 3.6800e-05],
[ 2.1606e-05, -5.4137e-05, 2.1183e-04, ..., 7.9589e-05,
4.3138e-05, -3.3451e-05],
[-3.4337e-05, 2.2183e-05, -2.5751e-04, ..., -4.9273e-05,
5.3253e-05, -8.3178e-05],
...,
[ 6.1050e-05, -2.1775e-05, -4.6475e-05, ..., 8.3546e-05,
5.8822e-05, -5.5404e-05],
[-5.5288e-05, 2.3810e-05, 5.7758e-05, ..., -1.2546e-05,
2.4010e-05, 1.6447e-05],
[ 9.5190e-05, -1.1252e-04, 2.3661e-05, ..., 1.8137e-05,
-2.1645e-05, -1.6138e-05]],
[[-1.4543e-05, 1.3344e-06, 3.2785e-05, ..., -3.4169e-05,
-7.0121e-05, 1.0172e-04],
[-4.3257e-05, 9.6162e-05, -1.1047e-04, ..., -2.2850e-05,
5.5712e-05, -3.1813e-05],
[-1.4116e-05, 5.1198e-05, 3.5880e-06, ..., 4.6499e-05,
-5.2294e-05, -2.6736e-05],
...,
[-8.7832e-05, -4.6133e-05, -5.2025e-05, ..., -5.7989e-05,
2.9164e-05, 3.3786e-06],
[-4.2051e-06, 6.9220e-05, 1.5171e-05, ..., 8.5499e-05,
-6.0512e-05, 1.9598e-05],
[ 4.8490e-05, -6.6214e-05, 6.1356e-06, ..., -1.9254e-05,
4.6471e-05, -3.8032e-05]],
[[-3.3499e-05, -6.2552e-06, 2.0268e-05, ..., -3.1183e-05,
6.9439e-05, -8.9930e-05],
[ 5.2075e-05, -4.3663e-06, -6.5345e-05, ..., 4.7974e-05,
-5.2119e-05, 4.9723e-05],
[-7.0753e-05, -2.4573e-06, 1.4305e-04, ..., -5.9918e-05,
5.7893e-05, -1.8903e-05],
...,
[ 9.8170e-06, -1.0143e-04, 3.8614e-05, ..., 3.4707e-07,
-3.9175e-05, 3.2880e-05],
[ 2.2249e-05, 1.7025e-05, -5.5440e-05, ..., -9.5285e-05,
6.6389e-05, 2.5453e-05],
[-3.7884e-05, 9.5156e-05, 1.4506e-05, ..., 2.4268e-05,
-1.5537e-05, -2.1148e-05]]]], device='cuda:0')
# diff
tensor([[[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]]], device='cuda:0')
They are really the same. Looks like I got it wrong XD.