Heatmap in ensemble method

I want to run Grad-CAM for ensemble method. Here is my code

import torch
import matplotlib.pyplot as plt
from torch.autograd import Function
from torchvision import models
from torchvision import utils
import cv2
import sys
from collections import OrderedDict
import numpy as np
import argparse
import os
import torch.nn as nn
i=0

image = []
num_classes = 3
batch_size =8
feature_extract = True
sm = nn.Softmax(dim = 1)
class FeatureExtractor():
    """ Class for extracting activations and 
    registering gradients from targetted intermediate layers """
    def __init__(self, model, target_layers):
        self.model = model
        self.target_layers = target_layers
        self.gradients = []

    def save_gradient(self, grad):
        self.gradients.append(grad)

    def __call__(self, x):
        outputs = []
        self.gradients = []
        for name, module in self.model._modules.items():
            x = module(x)
            if name in self.target_layers:
                x.register_hook(self.save_gradient)
                outputs += [x]
        return outputs, x

class ModelOutputs():
    """ Class for making a forward pass, and getting:
    1. The network output.
    2. Activations from intermeddiate targetted layers.
    3. Gradients from intermeddiate targetted layers. """
    def __init__(self, model, target_layers,use_cuda):
        self.model = model
        self.feature_extractor = FeatureExtractor(self.model, target_layers)
        self.cuda = use_cuda
    def get_gradients(self):
        return self.feature_extractor.gradients

    def __call__(self, x):
        target_activations, output  = self.feature_extractor(x)
        output = output.view(output.size(0), -1)
        #print('classfier=',output.size())
        if self.cuda:
            output = output.cpu()
            output = model.fc(output).cuda()
        else:
            output = model.fc(output)
        return target_activations, output

def preprocess_image(img):
    means=[0.485, 0.456, 0.406]
    stds=[0.229, 0.224, 0.225]

    preprocessed_img = img.copy()[: , :, ::-1]
    for i in range(3):
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
    preprocessed_img = \
        np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
    preprocessed_img = torch.from_numpy(preprocessed_img)
    preprocessed_img.unsqueeze_(0)
    input = preprocessed_img
    input.requires_grad = True
    return input

def show_cam_on_image(img, mask,name):
    heatmap = cv2.applyColorMap(np.uint8(255*mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap + np.float32(img)
    cam = cam / np.max(cam)
    cv2.imwrite("cam/cam_{}.jpg".format(name), np.uint8(255 * cam))
    
class GradCam:
    def __init__(self, model, target_layer_names, use_cuda):
        self.model = model
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        self.extractor = ModelOutputs(self.model, target_layer_names, use_cuda)

    def forward(self, input):
        return self.model(input) 

    def __call__(self, input, index = None):
        if self.cuda:
            features, output = self.extractor(input.cuda())
        else:
            features, output = self.extractor(input)

        if index == None:
            index = np.argmax(output.cpu().data.numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
        one_hot[0][index] = 1
        one_hot = torch.Tensor(torch.from_numpy(one_hot))
        one_hot.requires_grad = True
        
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)

        self.model.zero_grad()
        #self.model.zero_grad()
        one_hot.backward(retain_graph=True)

        grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
        #print('grads_val',grads_val.shape)
        target = features[-1]
        target = target.cpu().data.numpy()[0, :]

        weights = np.mean(grads_val, axis = (2, 3))[0, :]
        #print('weights',weights.shape)
        cam = np.zeros(target.shape[1 : ], dtype = np.float32)
        #print('cam',cam.shape)
        #print('features',features[-1].shape)
        #print('target',target.shape)
        for i, w in enumerate(weights):
            cam += w * target[i, :, :]

        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (224, 224))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam
class GuidedBackpropReLUModel:
    def __init__(self, model, use_cuda):
        self.model = model
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()
        for module in self.model.named_modules():
            module[1].register_backward_hook(self.bp_relu)

    def bp_relu(self, module, grad_in, grad_out):
        if isinstance(module, nn.ReLU):
            return (torch.clamp(grad_in[0], min=0.0),)
    def forward(self, input):
        return self.model(input)

    def __call__(self, input, index = None):
        if self.cuda:
            output = self.forward(input.cuda())
        else:
            output = self.forward(input)
        if index == None:
            index = np.argmax(output.cpu().data.numpy())
        #print(input.grad)
        one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
        one_hot[0][index] = 1
        one_hot = torch.from_numpy(one_hot)
        one_hot.requires_grad = True
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)
        #self.model.classifier.zero_grad()
        one_hot.backward(retain_graph=True)
        output = input.grad.cpu().data.numpy()
        output = output[0,:,:,:]

        return output

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--use-cuda', action='store_true', default=False,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument('--image-path', type=str, default='./examples/',
                        help='Input image path')
    args = parser.parse_args()
    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    if args.use_cuda:
        print("Using GPU for acceleration")
    else:
        print("Using CPU for computation")

    return args

class MyEnsemble(nn.Module):
    def __init__(self, modelA, modelB, modelC, nb_classes=3):
        super(MyEnsemble, self).__init__()
        self.modelA= modelA
        self.modelB = modelB
        self.modelC = modelC
        # Remove last linear layer
        self.modelA.fc = nn.Identity()
        self.modelB.fc =nn.Identity()
        self.modelC.fc =nn.Identity()
        # Create new classifier
        self.classifier = nn.Linear(1536, nb_classes)
        
    def forward(self, x):
        x1 = self.modelA(x.clone())  # clone to make sure x is not changed by inplace methods
        x1 = x1.view(x1.size(0), -1)
        x2 = self.modelB(x)
        x2 = x2.view(x2.size(0), -1)
        x3 = self.modelC(x)
        x3 = x3.view(x3.size(0), -1)
        x = torch.cat((x1, x2, x3), dim=1)
        x = self.classifier(F.relu(x))
        return x

# Train your separate models
# ...
# We use pretrained torchvision models here
modelA = models.resnet18(pretrained=True)
num_ftrs = modelA.fc.in_features
modelA.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(num_ftrs, 2))
modelB = models.resnet18(pretrained=True)
num_ftrs = modelB.fc.in_features
modelB.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(num_ftrs, 2))
modelC = models.resnet18(pretrained=True)
num_ftrs = modelC.fc.in_features
modelC.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(num_ftrs, 2))
modelA.load_state_dict(torch.load('checkpoint1.pt'))
modelB.load_state_dict(torch.load('checkpoint2.pt'))
modelC.load_state_dict(torch.load('checkpoint3.pt'))
model = MyEnsemble(modelA, modelB, modelC)
model.load_state_dict(torch.load('checkpoint_ensemble.pt'))
model.eval()

if __name__ == '__main__':


    """ python grad_cam.py <path_to_image>
   . """
    args = get_args()    
    model.load_state_dict(torch.load('checkpoint_ensembleP.pt'))
    
    model.eval()
    print(model)
    grad_cam = GradCam(model , target_layer_names = ["layer4"], use_cuda=args.use_cuda)
    x=os.walk(args.image_path)
    for root,dirs,filename in x:
    #print(type(grad_cam))
        print(filename)
    for s in filename:
            image.append(cv2.imread(args.image_path+s,1))
        #img = cv2.imread(filename, 1)
    for img in image:
        img = np.float32(cv2.resize(img, (224, 224))) / 255
        input = preprocess_image(img)
        input.required_grad = True
        print('input.size()=',input.size())
        target_index =None

        mask = grad_cam(input, target_index)
        i=i+1 
        show_cam_on_image(img, mask,i)

But getting RuntimeError: Expected 4-dimensional input for 4-dimensional weight 64 3 7 7, but got 2-dimensional input of size [1, 512] instead

Which line of code throws this error?

Also note, that you are removing the .fc layers inside the MyEnsemble.__init__ for all submodules.

I am getting error like this```
input.size()= torch.Size([1, 3, 224, 224])
Traceback (most recent call last):
File “c:\Users\Sampa\Downloads\test\test.py”, line 270, in
mask = grad_cam(input, target_index)
File “c:\Users\Sampa\Downloads\test\test.py”, line 104, in call
features, output = self.extractor(input)
File “c:\Users\Sampa\Downloads\test\test.py”, line 54, in call
target_activations, output = self.feature_extractor(x)
File “c:\Users\Sampa\Downloads\test\test.py”, line 35, in call
x = module(x)
File “C:\Users\Sampa\Anaconda3\lib\site-packages\torch\nn\modules\module.py”, line 532, in call
result = self.forward(*input, **kwargs)
File “C:\Users\Sampa\Anaconda3\lib\site-packages\torchvision\models\resnet.py”, line 216, in forward
return self._forward_impl(x)
File “C:\Users\Sampa\Anaconda3\lib\site-packages\torchvision\models\resnet.py”, line 199, in _forward_impl
x = self.conv1(x)
File “C:\Users\Sampa\Anaconda3\lib\site-packages\torch\nn\modules\module.py”, line 532, in call
result = self.forward(*input, **kwargs)
File “C:\Users\Sampa\Anaconda3\lib\site-packages\torch\nn\modules\conv.py”, line 345, in forward
return self.conv2d_forward(input, self.weight)
File “C:\Users\Sampa\Anaconda3\lib\site-packages\torch\nn\modules\conv.py”, line 342, in conv2d_forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight 64 3 7 7, but got 2-dimensional input of size [1, 512] instead

@ptrblck I modified thiis code for my ensemble approach as

import io
import requests
from PIL import Image
from torchvision import models, transforms
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np
import cv2
import pdb
import torch
from torch import nn

class MyEnsemble(nn.Module):
    def __init__(self, modelA, modelB, modelC, nb_classes=3):
        super(MyEnsemble, self).__init__()
        self.modelA= modelA
        self.modelB = modelB
        self.modelC = modelC
        # Remove last linear layer
        self.modelA.fc = nn.Identity()
        self.modelB.fc =nn.Identity()
        self.modelC.fc =nn.Identity()
        # Create new classifier
        self.classifier = nn.Linear(1536, nb_classes)
        
    def forward(self, x):
        x1 = self.modelA(x.clone())  # clone to make sure x is not changed by inplace methods
        x1 = x1.view(x1.size(0), -1)
        x2 = self.modelB(x)
        x2 = x2.view(x2.size(0), -1)
        x3 = self.modelC(x)
        x3 = x3.view(x3.size(0), -1)
        x = torch.cat((x1, x2, x3), dim=1)
        x = self.classifier(F.relu(x))
        return x

# Train your separate models
# ...
# We use pretrained torchvision models here
modelA = models.resnet18(pretrained=True)
num_ftrs = modelA.fc.in_features
modelA.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(num_ftrs, 2))
modelB = models.resnet18(pretrained=True)
num_ftrs = modelB.fc.in_features
modelB.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(num_ftrs, 2))
modelC = models.resnet18(pretrained=True)
num_ftrs = modelC.fc.in_features
modelC.fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(num_ftrs, 2))
modelA.load_state_dict(torch.load('checkpoint1.pt'))
modelB.load_state_dict(torch.load('checkpoint2.pt'))
modelC.load_state_dict(torch.load('checkpoint3.pt'))
model = MyEnsemble(modelA, modelB, modelC)
model.load_state_dict(torch.load('checkpoint_ensemble.pt'))

finalconv_name = 'layer4'
print(model)
model.eval()

# hook the feature extractor
features_blobs = []
def hook_feature(module, input, output):
    features_blobs.append(output.data.cpu().numpy())

model._modules.get(finalconv_name).register_forward_hook(hook_feature)

# get the softmax weight
params = list(model.parameters())
weight_softmax = np.squeeze(params[-2].data.numpy())

def returnCAM(feature_conv, weight_softmax, class_idx):
    # generate the class activation maps upsample to 256x256
    size_upsample = (256, 256)
    bz, nc, h, w = feature_conv.shape
    output_cam = []
    for idx in class_idx:
        cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))
        cam = cam.reshape(h, w)
        cam = cam - np.min(cam)
        cam_img = cam / np.max(cam)
        cam_img = np.uint8(255 * cam_img)
        output_cam.append(cv2.resize(cam_img, size_upsample))
    return output_cam


normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
   transforms.Resize((224,224)),
   transforms.ToTensor(),
   normalize
])

img_pil=Image.open('1.jpeg')
#img_pil.save('test.jpg')
img_tensor = preprocess(img_pil)
img_variable = Variable(img_tensor.unsqueeze(0))
logit = model(img_variable)

# download the imagenet category list
# classes = {int(key):value for (key, value)
#           in requests.get(LABELS_URL).json().items()}

h_x = F.softmax(logit, dim=1).data.squeeze()
probs, idx = h_x.sort(0, True)
probs = probs.numpy()
idx = idx.numpy()

# output the prediction
# for i in range(0, 5):
#     print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

# generate class activation mapping for the top1 prediction
CAMs = returnCAM(features_blobs[0], weight_softmax, [idx[0]])

# render the CAM and output
#print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])
img = cv2.imread('1.jpeg')
height, width, _ = img.shape
heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)
result = heatmap * 0.3 + img * 0.5
cv2.imwrite('CAM1.jpeg', result)

Error is

  model._modules.get(finalconv_name).register_forward_hook(hook_feature)

AttributeError: 'NoneType' object has no attribute 'register_forward_hook'