I am debugging my model and need to inspect gradients for inputs and outputs of the model via register_backward_hook. I found that in some cases register_backward_hook does not show grad of all its inputs. I was able to write a small code to show the scenario.
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as torchF
from torch.autograd import Variable
def BHook(self, input, output):
print('BHook')
print(self.__class__.__name__)
print('input')
print(input)
print('output')
print(output)
def BHook2(self, input, output):
print('BHook2')
print(self.__class__.__name__)
print('input')
print(len(input))
print(input)
print('output')
print(len(output))
print(output)
class SubModule(nn.Module):
def __init__(self):
super(SubModule, self).__init__()
def forward(self, x1, x2):
y = x1**2+x2**2+1. #this line results in only single element in grad_input tuple.
#y = x1**2+x2**2 #however, this line results in two elements in grad_input tuple as expected.
return y
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 3, kernel_size=3, padding=1, stride=1) #
self.conv2 = nn.Conv2d(3, 3, kernel_size=3, padding=1, stride=1) #
self.SubModule = SubModule()
def forward(self,x):
x1 = self.conv1(x)
x2 = self.conv2(x)
y = self.SubModule(x1,x2)
return y
net = Net()
net.cuda()
net.conv1.register_backward_hook(BHook)
net.conv2.register_backward_hook(BHook)
net.SubModule.register_backward_hook(BHook2)
x = Variable(torch.cuda.FloatTensor(np.random.rand(1,3,7,7)))
out_x=net(x)
out = torchF.l1_loss(out_x,Variable(torch.cuda.FloatTensor(-np.random.rand(1,3,7,7))))
net.zero_grad()
out.backward()
For example, when y = x1^2+x2^2 line is enabled BHook2() show two elements in input tuple and when y = x1^2+x2^2+1. line is enabled instead, it only shows single element. The grad w.r.t second input is missing which I need to inspect for debugging my actual module.