I am trying to implement the DeepLIFT algorithm with the gradient interpretation provided by Ancona et al. This requires saving forward activations before and after non-linearities and using their ratios in the backward pass instead of the ReLU-gradients, however when I return values in the backwards hooks, I get errors, I don’t understand (I don’t get any errors if I don’t return values in the backward hooks)
My small test network:
class HookNet(nn.Module):
def __init__(self):
super(HookNet, self).__init__()
self.fc1 = nn.Linear(2,2)
self.s1 = nn.Sigmoid()
self.fc2 = nn.Linear(2,1)
self.s2 = nn.Sigmoid()
self.fc1.weight = torch.nn.Parameter(torch.Tensor([[1, 2],[-1, 2]]))
self.fc1.bias = torch.nn.Parameter(torch.Tensor([0]))
self.fc2.weight = torch.nn.Parameter(torch.Tensor([[1, 2]]))
self.fc2.bias = torch.nn.Parameter(torch.Tensor([0]))
def forward(self, x):
x= self.fc1(x)
x = self.s1(x)
x= self.fc2(x)
x = self.s2(x)
return x
hooknet=HookNet()
My hook functions:
saved_activations = {}
def forward_save_act(name, module, input, output):
print(input)
print(output)
saved_activations[name] = (input[0], output)
def backward_use_act(name, module, grad_input, grad_output):
i, o = saved_activations[name]
print('i: '+str(i))
print('o: '+str(o))
new_grad_input = o/i
print('Name: '+str(name))
print('Original grad_input:')
print(grad_input)
print('Original grad_output:')
print(grad_output)
print('New grad_input:')
print(new_grad_input)
grad_tuple = (new_grad_input.data,)
return(grad_tuple)
My registering of the hooks:
from functools import partial
handle_list = []
for name, m in hooknet.named_modules():
if type(m) == nn.Sigmoid:
handle_list.append(m.register_forward_hook(partial(forward_save_act, name)))
handle_list.append(m.register_backward_hook(partial(backward_test, name)))
Running one forward and backward pass:
inp = torch.Tensor([1, 1])
inp.requires_grad=True
out = hooknet(inp)
out.backward()
Output (including errors):
(tensor([3., 1.], grad_fn=<AddBackward0>),)
tensor([0.9526, 0.7311], grad_fn=<SigmoidBackward>)
(tensor([2.4147], grad_fn=<AddBackward0>),)
tensor([0.9179], grad_fn=<SigmoidBackward>)
i: tensor([2.4147])
o: tensor([0.9179])
Name: s2
Original grad_input:
(tensor([0.0753]),)
Original grad_output:
(tensor([1.]),)
New grad_input:
(tensor([0.3801]),)
Traceback (most recent call last):
File "<ipython-input-1415-deeda1acb2f1>", line 4, in <module>
out.backward()
File "/Users/Eigil/opt/anaconda3/lib/python3.7/site-packages/torch/tensor.py", line 195, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/Users/Eigil/opt/anaconda3/lib/python3.7/site-packages/torch/autograd/__init__.py", line 99, in backward
allow_unreachable=True) # allow_unreachable flag
File "<ipython-input-1412-a390d2cba2e8>", line 4, in backward_test
print('i: '+str(i))
File "/Users/Eigil/opt/anaconda3/lib/python3.7/site-packages/torch/tensor.py", line 159, in __repr__
return torch._tensor_str._str(self)
File "/Users/Eigil/opt/anaconda3/lib/python3.7/site-packages/torch/_tensor_str.py", line 249, in _str
indent = len(prefix)
SystemError: <built-in function len> returned a result with an error set
So it works fine for the first layer (s2), but returns an error with s1. What I don’t understand is, that it seems like the error stems from internal working in the backward hook, however if I use the backward hook without a return-command (and just print everything), everything works like a charm!