I want to run Grad-CAM for ensemble method. Here is my code
import torch
import matplotlib.pyplot as plt
from torch.autograd import Function
from torchvision import models
from torchvision import utils
import cv2
import sys
from collections import OrderedDict
import numpy as np
import argparse
import os
import torch.nn as nn
i=0
image = []
num_classes = 3
batch_size =8
feature_extract = True
sm = nn.Softmax(dim = 1)
class FeatureExtractor():
""" Class for extracting activations and
registering gradients from targetted intermediate layers """
def __init__(self, model, target_layers):
self.model = model
self.target_layers = target_layers
self.gradients = []
def save_gradient(self, grad):
self.gradients.append(grad)
def __call__(self, x):
outputs = []
self.gradients = []
for name, module in self.model._modules.items():
x = module(x)
if name in self.target_layers:
x.register_hook(self.save_gradient)
outputs += [x]
return outputs, x
class ModelOutputs():
""" Class for making a forward pass, and getting:
1. The network output.
2. Activations from intermeddiate targetted layers.
3. Gradients from intermeddiate targetted layers. """
def __init__(self, model, target_layers,use_cuda):
self.model = model
self.feature_extractor = FeatureExtractor(self.model, target_layers)
self.cuda = use_cuda
def get_gradients(self):
return self.feature_extractor.gradients
def __call__(self, x):
target_activations, output = self.feature_extractor(x)
output = output.view(output.size(0), -1)
#print('classfier=',output.size())
if self.cuda:
output = output.cpu()
output = model.fc(output).cuda()
else:
output = model.fc(output)
return target_activations, output
def preprocess_image(img):
means=[0.485, 0.456, 0.406]
stds=[0.229, 0.224, 0.225]
preprocessed_img = img.copy()[: , :, ::-1]
for i in range(3):
preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
preprocessed_img = \
np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
preprocessed_img = torch.from_numpy(preprocessed_img)
preprocessed_img.unsqueeze_(0)
input = preprocessed_img
input.requires_grad = True
return input
def show_cam_on_image(img, mask,name):
heatmap = cv2.applyColorMap(np.uint8(255*mask), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
cam = heatmap + np.float32(img)
cam = cam / np.max(cam)
cv2.imwrite("cam/cam_{}.jpg".format(name), np.uint8(255 * cam))
class GradCam:
def __init__(self, model, target_layer_names, use_cuda):
self.model = model
self.model.eval()
self.cuda = use_cuda
if self.cuda:
self.model = model.cuda()
self.extractor = ModelOutputs(self.model, target_layer_names, use_cuda)
def forward(self, input):
return self.model(input)
def __call__(self, input, index = None):
if self.cuda:
features, output = self.extractor(input.cuda())
else:
features, output = self.extractor(input)
if index == None:
index = np.argmax(output.cpu().data.numpy())
one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
one_hot[0][index] = 1
one_hot = torch.Tensor(torch.from_numpy(one_hot))
one_hot.requires_grad = True
if self.cuda:
one_hot = torch.sum(one_hot.cuda() * output)
else:
one_hot = torch.sum(one_hot * output)
self.model.zero_grad()
#self.model.zero_grad()
one_hot.backward(retain_graph=True)
grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
#print('grads_val',grads_val.shape)
target = features[-1]
target = target.cpu().data.numpy()[0, :]
weights = np.mean(grads_val, axis = (2, 3))[0, :]
#print('weights',weights.shape)
cam = np.zeros(target.shape[1 : ], dtype = np.float32)
#print('cam',cam.shape)
#print('features',features[-1].shape)
#print('target',target.shape)
for i, w in enumerate(weights):
cam += w * target[i, :, :]
cam = np.maximum(cam, 0)
cam = cv2.resize(cam, (224, 224))
cam = cam - np.min(cam)
cam = cam / np.max(cam)
return cam
class GuidedBackpropReLUModel:
def __init__(self, model, use_cuda):
self.model = model
self.model.eval()
self.cuda = use_cuda
if self.cuda:
self.model = model.cuda()
for module in self.model.named_modules():
module[1].register_backward_hook(self.bp_relu)
def bp_relu(self, module, grad_in, grad_out):
if isinstance(module, nn.ReLU):
return (torch.clamp(grad_in[0], min=0.0),)
def forward(self, input):
return self.model(input)
def __call__(self, input, index = None):
if self.cuda:
output = self.forward(input.cuda())
else:
output = self.forward(input)
if index == None:
index = np.argmax(output.cpu().data.numpy())
#print(input.grad)
one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
one_hot[0][index] = 1
one_hot = torch.from_numpy(one_hot)
one_hot.requires_grad = True
if self.cuda:
one_hot = torch.sum(one_hot.cuda() * output)
else:
one_hot = torch.sum(one_hot * output)
#self.model.classifier.zero_grad()
one_hot.backward(retain_graph=True)
output = input.grad.cpu().data.numpy()
output = output[0,:,:,:]
return output
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--use-cuda', action='store_true', default=False,
help='Use NVIDIA GPU acceleration')
parser.add_argument('--image-path', type=str, default='./examples/',
help='Input image path')
args = parser.parse_args()
args.use_cuda = args.use_cuda and torch.cuda.is_available()
if args.use_cuda:
print("Using GPU for acceleration")
else:
print("Using CPU for computation")
return args
class MyEnsemble(nn.Module):
def __init__(self, modelA, modelB, modelC, nb_classes=3):
super(MyEnsemble, self).__init__()
self.modelA= modelA
self.modelB = modelB
self.modelC = modelC
# Remove last linear layer
self.modelA.fc = nn.Identity()
self.modelB.fc =nn.Identity()
self.modelC.fc =nn.Identity()
# Create new classifier
self.classifier = nn.Linear(1536, nb_classes)
def forward(self, x):
x1 = self.modelA(x.clone()) # clone to make sure x is not changed by inplace methods
x1 = x1.view(x1.size(0), -1)
x2 = self.modelB(x)
x2 = x2.view(x2.size(0), -1)
x3 = self.modelC(x)
x3 = x3.view(x3.size(0), -1)
x = torch.cat((x1, x2, x3), dim=1)
x = self.classifier(F.relu(x))
return x
# Train your separate models
# ...
# We use pretrained torchvision models here
modelA = models.resnet18(pretrained=True)
num_ftrs = modelA.fc.in_features
modelA.fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(num_ftrs, 2))
modelB = models.resnet18(pretrained=True)
num_ftrs = modelB.fc.in_features
modelB.fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(num_ftrs, 2))
modelC = models.resnet18(pretrained=True)
num_ftrs = modelC.fc.in_features
modelC.fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(num_ftrs, 2))
modelA.load_state_dict(torch.load('checkpoint1.pt'))
modelB.load_state_dict(torch.load('checkpoint2.pt'))
modelC.load_state_dict(torch.load('checkpoint3.pt'))
model = MyEnsemble(modelA, modelB, modelC)
model.load_state_dict(torch.load('checkpoint_ensemble.pt'))
model.eval()
if __name__ == '__main__':
""" python grad_cam.py <path_to_image>
. """
args = get_args()
model.load_state_dict(torch.load('checkpoint_ensembleP.pt'))
model.eval()
print(model)
grad_cam = GradCam(model , target_layer_names = ["layer4"], use_cuda=args.use_cuda)
x=os.walk(args.image_path)
for root,dirs,filename in x:
#print(type(grad_cam))
print(filename)
for s in filename:
image.append(cv2.imread(args.image_path+s,1))
#img = cv2.imread(filename, 1)
for img in image:
img = np.float32(cv2.resize(img, (224, 224))) / 255
input = preprocess_image(img)
input.required_grad = True
print('input.size()=',input.size())
target_index =None
mask = grad_cam(input, target_index)
i=i+1
show_cam_on_image(img, mask,i)
But getting RuntimeError: Expected 4-dimensional input for 4-dimensional weight 64 3 7 7, but got 2-dimensional input of size [1, 512] instead