Hello guys, Im trying to create a GradCam implementation with a vgg16 architecture. Im using BinaryCrossEntropy to train my model for binary classification.
I adapted the code from this links to make my implementation: link1, link2.
Im extracting the last conv layer from the feature part in the vgg16 architecture, this layer have the “model.18” name in my implementation(thats only one part from the whole architecture).
def get_last_conv_block(self):
layers_name = "model.28"
for name, layer in self.named_modules():
if name == layers_name:
return layer
(model): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
Code that I used:
def compute_gradcam(model, img_tensor):
model.eval()
conv_layer = model.get_last_conv_block()
activations = None
def forward_hook(module, input, output):
nonlocal activations
activations = output
hook = conv_layer.register_forward_hook(forward_hook)
# Compute gradients
img_tensor.requires_grad_(True)
preds = model(img_tensor)
preds.backward()
print(preds)
model.zero_grad()
# Get gradients
grads = img_tensor.grad.cpu().numpy()
pooled_grads = np.mean(grads, axis=(0, 2, 3))
# Remove the hook
hook.remove()
activations = activations.detach().cpu().numpy()[0]
for i in range(pooled_grads.shape[0]):
activations[i, ...] *= pooled_grads[i]
heatmap = np.mean(activations, axis=0)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
model.zero_grad()
return heatmap
def overlay_heatmap(img_path, heatmap, alpha=0.4):
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = cv2.addWeighted(img, alpha, heatmap, 1 - alpha, 0)
return superimposed_img
heatmap = compute_gradcam(lightning_model.model, img_tensor)
output_img = overlay_heatmap(IMAGE_ROOT_EXAMPLE, heatmap)
The heatmap doesnt seems have sense after running this.
Also I tried with other implementation:
gradients = None
activations = None
def backward_hook(module, grad_input, grad_output):
global gradients # refers to the variable in the global scope
print('Backward hook running...')
gradients = grad_output
# In this case, we expect it to be torch.Size([batch size, 1024, 8, 8])
print(f'Gradients size: {gradients[0].size()}')
# We need the 0 index because the tensor containing the gradients comes
# inside a one element tuple.
def forward_hook(module, args, output):
global activations # refers to the variable in the global scope
print('Forward hook running...')
activations = output
# In this case, we expect it to be torch.Size([batch size, 1024, 8, 8])
print(f'Activations size: {activations.size()}')
model = lightning_model.model
conv_layer = model.model[28]
backward_hook = conv_layer.register_full_backward_hook(backward_hook, prepend=False)
forward_hook = conv_layer.register_forward_hook(forward_hook, prepend=False)
model.eval()
preds = model(img_tensor)
But Im getting this error: Output 0 of BackwardHookFunctionBackward is a view and is being modified inplace.
My first implementation is correct? Is right to omit the ReLu function when I extract the last conv layer? Which layers or block I should extract in other implementations?