GradCam implementation error with vgg16

Gabriel_Duran · April 7, 2025, 4:28am

Hello guys, Im trying to create a GradCam implementation with a vgg16 architecture. Im using BinaryCrossEntropy to train my model for binary classification.

I adapted the code from this links to make my implementation: link1, link2.

Im extracting the last conv layer from the feature part in the vgg16 architecture, this layer have the “model.18” name in my implementation(thats only one part from the whole architecture).

def get_last_conv_block(self):
        layers_name = "model.28"
        for name, layer in self.named_modules():
            if name == layers_name:
                return layer

(model): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )

Code that I used:

def compute_gradcam(model, img_tensor):
    model.eval()
    conv_layer = model.get_last_conv_block()

    activations = None
    def forward_hook(module, input, output):
        nonlocal activations
        activations = output

    hook = conv_layer.register_forward_hook(forward_hook)

    # Compute gradients
    img_tensor.requires_grad_(True)
    preds = model(img_tensor)
    preds.backward()
    print(preds)
    model.zero_grad()

    # Get gradients
    grads = img_tensor.grad.cpu().numpy()
    pooled_grads = np.mean(grads, axis=(0, 2, 3))

    # Remove the hook
    hook.remove()

    activations = activations.detach().cpu().numpy()[0]
    for i in range(pooled_grads.shape[0]):
        activations[i, ...] *= pooled_grads[i]

    heatmap = np.mean(activations, axis=0)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)
    model.zero_grad()
    return heatmap

def overlay_heatmap(img_path, heatmap, alpha=0.4):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)

    superimposed_img = cv2.addWeighted(img, alpha, heatmap, 1 - alpha, 0)
    return superimposed_img

heatmap = compute_gradcam(lightning_model.model, img_tensor)
output_img = overlay_heatmap(IMAGE_ROOT_EXAMPLE, heatmap)

The heatmap doesnt seems have sense after running this.

Also I tried with other implementation:

gradients = None
activations = None

def backward_hook(module, grad_input, grad_output):
  global gradients # refers to the variable in the global scope
  print('Backward hook running...')
  gradients = grad_output
  # In this case, we expect it to be torch.Size([batch size, 1024, 8, 8])
  print(f'Gradients size: {gradients[0].size()}') 
  # We need the 0 index because the tensor containing the gradients comes
  # inside a one element tuple.

def forward_hook(module, args, output):
  global activations # refers to the variable in the global scope
  print('Forward hook running...')
  activations = output
  # In this case, we expect it to be torch.Size([batch size, 1024, 8, 8])
  print(f'Activations size: {activations.size()}')

model = lightning_model.model 

conv_layer = model.model[28]
backward_hook = conv_layer.register_full_backward_hook(backward_hook, prepend=False)
forward_hook = conv_layer.register_forward_hook(forward_hook, prepend=False)

model.eval()
preds = model(img_tensor)

But Im getting this error: Output 0 of BackwardHookFunctionBackward is a view and is being modified inplace.

My first implementation is correct? Is right to omit the ReLu function when I extract the last conv layer? Which layers or block I should extract in other implementations?

ptrblck · April 7, 2025, 7:18pm

Could you try to disable the inplace=True ops in your model?

Gabriel_Duran · April 7, 2025, 10:48pm

Yes It fixed the compilation error. But I made some changes and the resulting plot seems no have sense. Do you have any recommendations?