How is conv2d calculating the gradient respect to the input?

Given the following code, I want to understand how `conv1. x.register_hook:` is being calculated?

``````try_grad = nn.Conv2d(1,4,kernel_size=1)

class try_conv_model(nn.Module):
def __init__(self):
super(try_conv_model, self).__init__()
self.conv1 = nn.Conv2d(1, 4, kernel_size=1, bias=False)
#self.conv2 = nn.Conv2d(1, 4, kernel_size=1, bias=False)

def forward(self, x):
print("-"*150)
print("before conv1: ",x)
x = self.conv1(x)
x.register_hook(lambda x: print("conv1. x.register_hook: ",x))
#print("before conv2: ",x)
#x = self.conv2(x)
x = self.gap(x)
x = x.view(-1, self.num_flat_features(x))
return(x)

def num_flat_features(self, x):
size = x.size()[1:]  # all dimensions except the batch dimension
num_features = 1
for s in size:
num_features *= s
return num_features

conv_model = try_conv_model()

def my_func(z):
print("z[0]: ",z[0])
z = z[0] + torch.tensor([[0,0,1,0]])
z = z[0] - torch.tensor([[0,0,-1,0]])
return z

conv_model.conv1.register_forward_hook(lambda x,y,z: print(" \n conv_model.conv1.register_forward_hook: ",x,y,z))
conv_model.conv1.register_backward_hook(lambda x,y,z: print(" \n conv_model.conv1.register_backward_hook: ",x,y,z))
#conv_model.conv1.register_hook(lambda x: print("conv_model.conv1.register_hook: ",conv_model.conv1.register_hook))
#conv_model.conv2.register_forward_hook(lambda x,y,z: print(" \n conv_model.conv2.register_forward_hook: ",x,y,z))
#conv_model.conv2.register_backward_hook(lambda x,y,z: print(" \n conv_model.conv2.register_backward_hook: ",x,y,z))

conv_model.gap.register_forward_hook(lambda x,y,z: print(" \n conv_model.gap.register_forward_hook: ",x,y,z))
conv_model.gap.register_backward_hook(lambda x,y,z: print(" \n conv_model.gap.register_backward_hook: ",x,y,z))
#conv_model.gap.register_hook(lambda x: print("conv_model.gap.register_hook: ",x))

conv_model.conv1.weight.data = torch. tensor([[[[1]]], [[[1]]], [[[1]]], [[[1]]]], dtype = torch.float)
img = torch.ones(1,1,5,5, dtype = torch.float)
print(img)

labels = torch.tensor([2], dtype = torch.long)
print(labels)

criterion = F.cross_entropy
#criterion.register_hook(lambda x: print("babasjbdkajskjs"))
optimizer = optim.SGD(conv_model.parameters(), lr=0.01, momentum=0.9)
epoch = 1

def train(epoch):
conv_model.train()
for i in range(epoch):
print("&"*300)
print("*"*25)
for param in conv_model.parameters():
print(param)
print("^"*25)
conv_model.train()
outputs = conv_model(img)
#outputs.register_hook(lambda x: print("outputs.register_hook: ", x))
outputs.register_hook(my_func)
print("outputs: ",outputs)
loss = criterion(outputs, labels)
loss.register_hook(lambda x: print(" \n before backward loss hook: ",x))

loss.backward()
loss.register_hook(lambda x: print(" \n after backward loss hook: ",x))

optimizer.step()
print("*"*25)
for param in conv_model.parameters():
print(param)
print("^"*25)

train(epoch)

print("model prediction ",conv_model(img).max(1, keepdim=True)[1])
``````

result:

``````tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]])
tensor([2])
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
*************************
Parameter containing:
tensor([[[[1.]]],

[[[1.]]],

[[[1.]]],

^^^^^^^^^^^^^^^^^^^^^^^^^
------------------------------------------------------------------------------------------------------------------------------------------------------
before conv1:  tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]])

conv_model.conv1.register_forward_hook:  Conv2d(1, 4, kernel_size=(1, 1), stride=(1, 1), bias=False) (tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]]),) tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]], grad_fn=<MkldnnConvolutionBackward>)
before AdaptiveAvgPool2D:  tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]], grad_fn=<MkldnnConvolutionBackward>)

conv_model.gap.register_forward_hook:  AdaptiveAvgPool2d(output_size=(1, 1)) (tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]],

[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]], grad_fn=<MkldnnConvolutionBackward>),) tensor([[[[1.]],

[[1.]],

[[1.]],

[[1.]],

[[1.]],

outputs:  tensor([[1., 1., 1., 1.]], grad_fn=<ViewBackward>)

before backward loss hook:  tensor(1.)
z[0]:  tensor([ 0.2500,  0.2500, -0.7500,  0.2500])

conv_model.gap.register_backward_hook:  AdaptiveAvgPool2d(output_size=(1, 1)) (tensor([0.2500, 0.2500, 1.2500, 0.2500]),) (tensor([[[[0.2500]],

[[0.2500]],

[[1.2500]],

[[0.2500]]]]),)
conv1. x.register_hook:  tensor([[[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]],

[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]],

[[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500]],

[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]]]])

conv_model.conv1.register_backward_hook:  Conv2d(1, 4, kernel_size=(1, 1), stride=(1, 1), bias=False) (None, tensor([[[[0.2500]]],

[[[0.2500]]],

[[[1.2500]]],

[[[0.2500]]]]), None) (tensor([[[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]],

[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]],

[[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500]],

[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]]]]),)

[[[0.2500]]],

[[[1.2500]]],

[[[0.2500]]]])
*************************
Parameter containing:
tensor([[[[0.9975]]],

[[[0.9975]]],

[[[0.9875]]],

^^^^^^^^^^^^^^^^^^^^^^^^^
------------------------------------------------------------------------------------------------------------------------------------------------------
before conv1:  tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]])

conv_model.conv1.register_forward_hook:  Conv2d(1, 4, kernel_size=(1, 1), stride=(1, 1), bias=False) (tensor([[[[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]]]]),) tensor([[[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]],

[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]],

[[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875]],

[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]]]],
before AdaptiveAvgPool2D:  tensor([[[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]],

[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]],

[[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875]],

[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]]]],

conv_model.gap.register_forward_hook:  AdaptiveAvgPool2d(output_size=(1, 1)) (tensor([[[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]],

[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]],

[[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875],
[0.9875, 0.9875, 0.9875, 0.9875, 0.9875]],

[[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975],
[0.9975, 0.9975, 0.9975, 0.9975, 0.9975]]]],

[[0.9975]],

[[0.9875]],

[[0.9975]],

[[0.9875]],

model prediction  tensor([[3]])
``````

If the gradient being back-propagated by the avg2d layer is

``````(tensor([[[[0.2500]],

[[0.2500]],

[[1.2500]],

[[0.2500]]]]),)
``````

what is it being convoluted with so that it is giving:

``````tensor([[[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]],

[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]],

[[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500],
[0.0500, 0.0500, 0.0500, 0.0500, 0.0500]],

[[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100],
[0.0100, 0.0100, 0.0100, 0.0100, 0.0100]]]])
``````

If convoluted with img using `print(try_grad(img))`, it gives the following result:

``````tensor([[[[-0.1372, -0.1372, -0.1372, -0.1372, -0.1372],
[-0.1372, -0.1372, -0.1372, -0.1372, -0.1372],
[-0.1372, -0.1372, -0.1372, -0.1372, -0.1372],
[-0.1372, -0.1372, -0.1372, -0.1372, -0.1372],
[-0.1372, -0.1372, -0.1372, -0.1372, -0.1372]],

[[ 0.3811,  0.3811,  0.3811,  0.3811,  0.3811],
[ 0.3811,  0.3811,  0.3811,  0.3811,  0.3811],
[ 0.3811,  0.3811,  0.3811,  0.3811,  0.3811],
[ 0.3811,  0.3811,  0.3811,  0.3811,  0.3811],
[ 0.3811,  0.3811,  0.3811,  0.3811,  0.3811]],

[[-1.4801, -1.4801, -1.4801, -1.4801, -1.4801],
[-1.4801, -1.4801, -1.4801, -1.4801, -1.4801],
[-1.4801, -1.4801, -1.4801, -1.4801, -1.4801],
[-1.4801, -1.4801, -1.4801, -1.4801, -1.4801],
[-1.4801, -1.4801, -1.4801, -1.4801, -1.4801]],

[[-0.2196, -0.2196, -0.2196, -0.2196, -0.2196],
[-0.2196, -0.2196, -0.2196, -0.2196, -0.2196],
[-0.2196, -0.2196, -0.2196, -0.2196, -0.2196],
[-0.2196, -0.2196, -0.2196, -0.2196, -0.2196],
[-0.2196, -0.2196, -0.2196, -0.2196, -0.2196]]]],
First you should be careful and not rely on the result of `register_backward_hook`. You can see in the documentation a warning stating that it can return wrong results at the moment.