Hi everyone,

I’m trying to implement a method to compute Hessian vector products (HVPs) using PyTorch, specifically using gradients obtained through hooks in a custom GradCAM class. However, I’m encountering an issue where the gradients obtained via hooks seem not to propagate correctly when computing the HVPs.

Here’s a simplified version of my code:

```
import torch
import torch.nn as nn
import torch.nn.functional as F
# Define a simple convolutional neural network model
class SimpleModel(nn.Module):
def __init__(self):
super(SimpleModel, self).__init__()
self.conv1 = nn.Conv2d(3, 16, 3) # Output size: (224 - 3 + 1) = 222
self.conv2 = nn.Conv2d(16, 32, 3) # Output size: (222 - 3 + 1) = 220
self.pool = nn.AdaptiveAvgPool2d((1, 1)) # Average pooling to (1, 1)
self.fc = nn.Linear(32, 10) # Adjusted to 32 to match the output after pooling
def forward(self, x):
x = F.relu(self.conv1(x)) # 224x224 -> 222x222
x = F.relu(self.conv2(x)) # 222x222 -> 220x220
x = self.pool(x) # 220x220 -> 1x1
x = x.view(x.size(0), -1) # Flatten tensor to (batch_size, 32)
x = self.fc(x) # Fully connected layer
return x
# Define GradCAM class
class GradCAM(nn.Module):
def __init__(self, model, target_layer):
super(GradCAM, self).__init__()
self.model = model
self.target_layer = target_layer
self.gradients = None
self.activation = None
# Register forward hook
self.target_layer.register_forward_hook(self.forward_hook)
def forward_hook(self, module, input, output):
self.activation = output
output.register_hook(self.backward_hook)
def backward_hook(self, grad):
self.gradients = grad
def forward(self, x):
return self.model(x)
# Instantiate model and GradCAM
model = SimpleModel()
target_layer = model.conv2
gradcam = GradCAM(model, target_layer)
# Input tensor
input_tensor = torch.randn(1, 3, 224, 224, requires_grad=True)
# Forward pass
output = gradcam(input_tensor)
# Compute loss and perform backward pass
loss = output.sum()
gradcam.model.zero_grad()
loss.backward(retain_graph=True)
# Get gradients and activation
gradients = gradcam.gradients
activation = gradcam.activation
# Compute Hessian-Vector Product
# Ensure activation has requires_grad=True
# Ensure gradients have requires_grad=True
activation.requires_grad_(True)
gradients.requires_grad_(True)
# Compute Hessian-Vector Product
hvp = torch.autograd.grad(
outputs=gradients,
inputs=activation,
grad_outputs=activation,
retain_graph=True
)
print("Hessian-Vector Product:", hvp)
```

When attempting to compute the Hessian vector product using `torch.autograd.grad`

, I encounter the following error:

```
Traceback (most recent call last):
File "F:\code\torch-cam\torchcam\methods\try2.py", line 71, in <module>
hvp = torch.autograd.grad(
File "D:\program\anaconda3\envs\cfr\lib\site-packages\torch\autograd\__init__.py", line 412, in grad
result = _engine_run_backward(
File "D:\program\anaconda3\envs\cfr\lib\site-packages\torch\autograd\graph.py", line 744, in _engine_run_backward
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.
```

I’ve ensured that both activation and gradients have requires_grad=True, but the issue persists. How can I correctly compute the Hessian vector product using gradients obtained via hooks in PyTorch?

Any insights or suggestions would be greatly appreciated! Thanks in advance!