Hello,
I am trying to write some Class Activation Maps to tensorboard.
To obtain the CAMs I am basing it on Utkuozbulak’s ScoreCam implementation
I believe that it is creating desired results, however I am having problems writing them to TensorBoard.
I get the following error:
AssertionError Traceback (most recent call last)
<ipython-input-24-f63c18ac42f9> in <module>
9 custom_model.train(loss_func = criterion, optimizer = optimizer, lr_scheduler = lr_scheduler,
10 learning_rate = learning_rate, epochs = 10, trainloader = train_loader,
---> 11 valloader = val_loader, eval_period = 2)
<ipython-input-3-eb7a0260a28f> in train(self, loss_func, optimizer, lr_scheduler, learning_rate, epochs, trainloader, valloader, eval_period)
262 all_layers_maps = np.hstack(all_layers_maps)
263
--> 264 self.writer.add_image("Class Activation Maps, Layers 1-4", torch.from_numpy(all_layers_maps), global_step = epoch)
265
266
~/miniconda3/lib/python3.6/site-packages/torch/utils/tensorboard/writer.py in add_image(self, tag, img_tensor, global_step, walltime, dataformats)
538 img_tensor = workspace.FetchBlob(img_tensor)
539 self._get_file_writer().add_summary(
--> 540 image(tag, img_tensor, dataformats=dataformats), global_step, walltime)
541
542 def add_images(self, tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW'):
~/miniconda3/lib/python3.6/site-packages/torch/utils/tensorboard/summary.py in image(tag, tensor, rescale, dataformats)
300 """
301 tensor = make_np(tensor)
--> 302 tensor = convert_to_HWC(tensor, dataformats)
303 # Do not assume that user passes in values in [0, 255], use data type to detect
304 scale_factor = _calc_scale_factor(tensor)
~/miniconda3/lib/python3.6/site-packages/torch/utils/tensorboard/_utils.py in convert_to_HWC(tensor, input_format)
99 input_format: {}".format(input_format)
100 assert(len(tensor.shape) == len(input_format)), "size of input tensor and input format are different. \
--> 101 tensor shape: {}, input_format: {}".format(tensor.shape, input_format)
102 input_format = input_format.upper()
103
AssertionError: size of input tensor and input format are different. tensor shape: (4, 1200, 300, 4), input_format: CHW
There are 4 images 300x300, stacked horizontally (hence 1200), but I am not entirely certain where the 4th channel came in in the last channel (unless that is correct and I simply have to permute the axis to obtain the desired input format?)
The only other idea thatr’s coming to my head is that it could’ve been potentially added in the:
image = torch.unsqueeze(image,0).cuda()
I had to add the batch dimensionality channel as previously I was getting an error.
The Score-CAM class looks as follows:
import os
import copy
import numpy as np
from PIL import Image, ImageFilter
import matplotlib.cm as mpl_color_map
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
class CamExtractor():
"""
Extracts cam features from the model
"""
def __init__(self, model, target_layer,device):
self.model = model.to(device)
self.target_layer = target_layer
def forward_pass_on_convolutions(self, x):
"""
Does a forward pass on convolutions, hooks the function at given layer
"""
conv_output = None
# print("x type", type(x))
# print(x)
# x = torch.unsqueeze(x,0).cuda()
# print("x shape after unsqueeze", x.shape)
for module_pos, module in self.model._modules.items():
# print(module_pos)
x = module(x) # Forward
# print(f"x shape after {module_pos}", x.shape)
if module_pos == self.target_layer:
conv_output = x # Save the convolution output on that layer
return conv_output, x
def forward_pass(self, x):
# Forward pass on the convolutions
# print("X shape before forward pass", x.shape)
conv_output, x = self.forward_pass_on_convolutions(x)
# Forward pass on the classifier
# print("X shape in forward pass", x.shape)
x = self.model.avgpool(x)
# Redefine the FC to match the
#conv layer and num of classes
fc_in_feaures = x.shape[1]
self.model.fc = nn.Linear(fc_in_feaures,65).cuda()
x=x.view(x.size(0),-1)
# print("x shape before fc",x.shape)
x = self.model.fc(x)
return conv_output, x
class ScoreCam():
"""
Produces class activation map
"""
def __init__(self, model, target_layer, device):
self.model = model.to(device)
self.model.eval()
# Define extractor
self.extractor = CamExtractor(self.model, target_layer, device)
def apply_colormap_on_image(self, filename, activation, input_image ,colormap_name="gnuplot2"):
# print("original image type", type(filename))
map_size = input_image.shape[2:]
org_im = Image.open(filename).convert('RGB')
org_im = org_im.resize(map_size)
"""
Apply heatmap on image
Args:
org_img (PIL img): Original image
activation_map (numpy arr): Activation map (grayscale) 0-255
colormap_name (str): Name of the colormap
"""
# Get colormap
color_map = mpl_color_map.get_cmap(colormap_name)
no_trans_heatmap = color_map(activation)
# Change alpha channel in colormap to make sure original image is displayed
heatmap = copy.copy(no_trans_heatmap)
heatmap[:, :, 3] = 0.65
heatmap = Image.fromarray((heatmap*255).astype(np.uint8))
no_trans_heatmap = Image.fromarray((no_trans_heatmap*255).astype(np.uint8))
# Apply heatmap on iamge
heatmap_on_image = Image.new("RGBA",map_size)
heatmap_on_image = Image.alpha_composite(heatmap_on_image, org_im.convert('RGBA'))
# print("shape of heatmap_on_image", heatmap_on_image.size)
# print("shape of heatmap", heatmap.size)
heatmap_on_image = Image.alpha_composite(heatmap_on_image, heatmap)
return no_trans_heatmap, heatmap_on_image
def generate_cam(self, input_image, filename ,target_class=None):
# Full forward pass
# conv_output is the output of convolutions at specified layer
# model_output is the final output of the model (1, 1000)
conv_output, model_output = self.extractor.forward_pass(input_image)
if target_class is None:
target_class = np.argmax(model_output.data.cpu().clone().numpy())
# Get convolution outputs
target = conv_output[0]
# print("target",target.shape)
# Create empty numpy array for cam
cam = np.ones(target.shape[1:], dtype=np.float32)
# Multiply each weight with its conv output and then, sum
print("********\n\n beginning the cam generation\n\n**********")
print("len of target in 4 loop", len(target))
for i in range(len(target)):
# Unsqueeze to 4D
saliency_map = torch.unsqueeze(torch.unsqueeze(target[i, :, :],0),0)
# Upsampling to input size
input_size = input_image.shape[2:]
# print("inputsize",input_size)
saliency_map = F.interpolate(saliency_map, size=(input_size[0],input_size[0]), mode='bilinear', align_corners=False)
if saliency_map.max() == saliency_map.min():
continue
# Scale between 0-1
norm_saliency_map = (saliency_map - saliency_map.min()) / (saliency_map.max() - saliency_map.min())
# Get the target score
# print("img shape", input_image.shape)
# print("norm_saliency_map shape", norm_saliency_map.shape)
# print("target class", target_class)
w = F.softmax(self.extractor.forward_pass(input_image*norm_saliency_map)[1],dim=1)[0][target_class]
cam += w.data.detach().cpu().numpy() * target[i, :, :].data.detach().cpu().clone().numpy()
cam = np.maximum(cam, 0)
cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam)) # Normalize between 0-1
cam = np.uint8(cam * 255) # Scale between 0-255 to visualize
cam = np.uint8(Image.fromarray(cam).resize((input_image.shape[2],
input_image.shape[3]), Image.ANTIALIAS))/255
no_trans_heatmap, heatmap_on_image = self.apply_colormap_on_image(filename, cam, input_image)
return no_trans_heatmap, heatmap_on_image
The validation is as follows:
all_layers_maps = []
print("Preparing CAM")
for i in range(1,5):
print(f"Layer{i}")
score_cam = ScoreCam(self.model, f"layer{i}",self.device)
eval_count+=1
top_images = []
bottom_images = []
images_list = []
images = data["image"]
names = data["file_name"]
for idx, (image,name) in enumerate(zip(images,names),1):
# image = torch.unsqueeze(image,0)
image = torch.unsqueeze(image,0).cuda()
no_trans, heatmap_image = score_cam.generate_cam(input_image=image,
filename=name)
images_list.append(np.array(heatmap_image))
# if idx <= int(len(data)/2):
# top_images.append(np.array(heatmap_image))
# else:
# bottom_images.append(np.array(heatmap_image))
# top_images = np.hstack(top_images)
# print("shape of top im", top_images.shape)
# bottom_images = np.hstack(bottom_images)
# print("shape of bottom", bottom_images.shape)
# all_images = np.vstack((top_images,bottom_images))
all_layers_maps.append(images_list)
all_layers_maps = np.hstack(all_layers_maps)
self.writer.add_image("Class Activation Maps, Layers 1-4", torch.from_numpy(all_layers_maps), global_step = epoch)
I am not sure how to go around solving this issue, feeling quite braindead at 5am…
Regards and thank you in advance!