Gradient extraction for Conv heatmap

I’m trying to extract the gradients out of the last conv layer of a trained NN in order to create a heatmap to visualize the parts of the image the NN is giving importance to in order to make its decisions.
The result should be something similar to “GradCAM”.
I am following this article: https://medium.com/@stepanulyanin/implementing-grad-cam-in-pytorch-ea0937c31e82. I replicated the code and adjusted it to my needs: I am not going to use a pretrained VGG but I use a VGG16 trained on my dataset divided into 3 classes. Then I pass a single image to the model and try to get the heatmap, but I’m stuck at the pred.backward() step of the article (I don’t understand also how in article in pooled_gradients there is “dim=[0,2,3]”.
This is my code:

class VGG(nn.Module):
  def __init__(self):
    super(VGG, self).__init__()
    
    #get the vgg16 model we just trained
    self.vgg = vgg16
    #disect the NN in order to access the last conv layer and extract the weights
    self.features_conv = self.vgg.features[:30]
    #reintroduce the MaxPool layer
    self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    self.avgpool2d = nn.AdaptiveAvgPool2d(7)
    #get the classifier of the vgg16
    self.classifier = self.vgg.classifier
    #place for the gradients (weights)
    self.gradients = None

  def activations_hook(self, grad):  #hook to extract the gradients
    self.gradient = grad

  def forward(self, x):
    x = self.features_conv(x)

    #register the hook
    h = x.register_hook(self.activations_hook)

    #complete the remaining pooling
    x = self.max_pool(x)
    x = self.avgpool2d(x)
    x = x.view((-1,25088))
    x = self.classifier(x)
    return x

  def get_activations_gradient(self):  #method for gradient extraction
    return self.gradients

  def get_activations(self, x):   #method for activation extraction
    return self.features_conv(x)
   
from PIL import Image
def image_loader(image_name):
    """load image, returns cuda tensor"""
    image = Image.open(image_name)
    image = transformation_test(image).float()
    image = Variable(image, requires_grad=True)
    image = image.unsqueeze(0)  #this is for VGG, may not be needed for ResNet
    return image 
import cv2

vgg_heatmap = VGG()
torch.set_grad_enabled(True)
vgg_heatmap.eval()

#img = cv2.imread("/content/drive/My Drive/Colab Notebooks/progetto/X-Ray Image DataSet/3classes/Covid-19/covid-19-pneumonia-rapidly-progressive-12-hours.jpg") #Add img path
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#img = Image.fromarray(img)
#img = transformation_test(img)
#img, _ = next(iter(dataloader_test))
img = image_loader("/content/drive/My Drive/Colab Notebooks/progetto/X-Ray Image DataSet/3classes/Covid-19/covid-19-pneumonia-rapidly-progressive-12-hours.jpg")
if torch.cuda.is_available():
  img = img.cuda()
img = Variable((img), requires_grad=True)
pred = vgg_heatmap(img).argmax(dim=1)

print(pred)
pred.backward()
#pullthe gradients out of the model
gradients = vgg_heatmap.get_activations_gradient()
print(gradients)
#pool gradients across the channels
pooled_gradients = torch.mean(gradients, dim=[0,2,3])

#get the activations of the last convolutional layer
activations = vgg_heatmap.get_activations(img).detach()

#weight the channels by corresponding gradients
for i in range(512):
  activations[:, i, :, :] *= pooled_gradients[i]

#average the channels of the activations
heatmap = torch.mean(activations, dim=1).squeeze()

#relu on the heatmap
heatmap = np.maximum(heatmap, 0)
#nrmalize heatmap
heatmap /= torch.max(heatmap)

#draw heatmap
plt.matshow(heatmap.squeeze())

As I said, I’m quite copying what is written in the article above.
I’m not able to do the backpropagation and get the gradients. I get always this error:

RuntimeError                              Traceback (most recent call last)
<ipython-input-218-bddb7047f0a9> in <module>()
      1 print(pred)
----> 2 pred.backward()
      3 #pullthe gradients out of the model
      4 gradients = vgg_heatmap.get_activations_gradient()
      5 print(gradients)

1 frames
/usr/local/lib/python3.6/dist-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
    183                 products. Defaults to ``False``.
    184         """
--> 185         torch.autograd.backward(self, gradient, retain_graph, create_graph)
    186 
    187     def register_hook(self, hook):

/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
    125     Variable._execution_engine.run_backward(
    126         tensors, grad_tensors, retain_graph, create_graph,
--> 127         allow_unreachable=True)  # allow_unreachable flag
    128 
    129 

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Thanks

The argmax operation should detach the tensor from the computation graph and would yield this error.
I’m not familiar with this particular blog post, but @halahup is also active here in the forum, so he might correct me. :slight_smile: