GradCAM batch parallel on GPU

Hi guys, good morining! Would you please help me?

I’m trying to compute gradcam on parallel

I’m currently using this network:

class CNN(nn.Module):

def __init__(self):
    super(CNN, self).__init__()
    
    self.cnn = nn.Sequential(
                             nn.Conv2d(in_channels=1, out_channels=12, kernel_size=3, stride=1, padding=1),
                             nn.BatchNorm2d(num_features=12),
                             nn.ReLU(),
                             nn.MaxPool2d(kernel_size=2, stride=2),            
                             nn.Conv2d(in_channels=12, out_channels=36, kernel_size=3, stride=1, padding=1),
                             nn.BatchNorm2d(num_features=36),
                             nn.ReLU(),
                             nn.MaxPool2d(kernel_size=2, stride=2),
        
                             nn.Conv2d(in_channels=36, out_channels=50, kernel_size=3, stride=1, padding=1),
                             )
    self.classifier = nn.Sequential(
                                    nn.Flatten(),
                                    nn.Linear(50*49, 100),
                                    nn.Linear(100, 10)
                                    )
    self.gradient = None
    self.relu = nn.ReLU()
    self.resize = torchvision.transforms.Resize(28)
    
def get_activations(self, x):
    with torch.no_grad():
        return self.cnn(x)
    
def hook_activation(self, grad):
    self.gradient = grad
    
def forward(self, images):
    x = self.cnn(images)
    h = x.register_hook(self.hook_activation)
    x = self.classifier(x)
    return x

And this function for obtain the gradcam:

def get_grad_cam(model, image, label, size):
output = model(image) # forward step
_, predicted = torch.max(outputs.data, 1)
output[:,predicted].backward() # backward with respect to prediction
activations = model.get_activations(image)
weights = torch.sum(model.gradient, axis=[0, 2, 3])
scalar_prod = torch.tensordot(weights, activations, dims=([0], [1]))
scalar_prod = model.relu(scalar_prod)
scalar_prod /= torch.max(scalar_prod)
return model.resize(scalar_prod)

Apparently the main problem here is to backward the predictions for update the gradients.

I’m stuck in this error : RuntimeError: grad can be implicitly created only for scalar outputs

Thanks a lot for your time!

The error is most likely raised in:

output[:,predicted].backward() 

Assuming output[:, predicted] contains multiple values, you would either need to reduce it to a scalar or you would need to pass an explicit gradient in the same shape.
Here is an example:

output = torch.randn(10, 10, requires_grad=True)
predicted = torch.tensor([0, 3, 5, 7])

# fails
output[:,predicted].backward() 
# RuntimeError: grad can be implicitly created only for scalar outputs

# works
output[:,predicted].mean().backward() 
output[:,predicted].backward(gradient=torch.ones_like(output[:, predicted])) 

I just can’t figure it out how to backpropagate in the whole batch. I think maybe save the batch gradient of each image (but it should create an overflow problem). Thank you anyway…