Weird memory usage drop when extracting class activation maps

I’ve adapted Haofanwang’s Score-CAM package

to work with my model.

However, I am noticing weird memory usage drops when I start extracting the CAMs.
It goes from ~8.2gb to ~3.2gb and jumps back up to ~8.2gb with the next training epoch.

I find it weird that this occurs only when performing visualisations!

Any idea what could it be?
Is there anything to worry about?
I am worried that it could be:

a) using the wrong weights for CAMs extraction
b) corrupting the learning process and weights themselves.

Please let me know and I will add more information!

Thank you!

Adapted code

class CamExtractor():
        Extracts cam features from the model
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer

    def forward_pass_on_convolutions(self, x):
            Does a forward pass on convolutions, hooks the function at given layer
        conv_output = None

      Iterating through all modules and children until reaching target layer
        for module_pos, module in self.model._modules.items():

            x = module(x)  # Forward

            if module_pos == self.target_layer:
                conv_output = x  # Save the convolution output on that layer
                return conv_output, x

    def forward_pass(self, x, classes_len):

        # Forward pass on the convolutions

        conv_output, x = self.forward_pass_on_convolutions(x)

        # Forward pass on the classifier
        x = self.model.avgpool(x)

        # Redefine the FC to match the
        #conv layer and num of classes
        fc_in_feaures = x.shape[1]
        self.model.fc = nn.Linear(fc_in_feaures,classes_len).cuda()

        x = self.model.fc(x)
        return conv_output, x

class ScoreCam():
        Produces class activation map
    def __init__(self, model, target_layer):
        self.model = model
        # Define extractor
        self.extractor = CamExtractor(self.model, target_layer)

    def apply_colormap_on_image(self, filename, activation, input_image ,colormap_name="gnuplot2"):

        map_size = input_image.shape[2:]
        org_im ='RGB')
        org_im = org_im.resize(map_size)

            Apply heatmap on image
            org_img (PIL img): Original image
            activation_map (numpy arr): Activation map (grayscale) 0-255
            colormap_name (str): Name of the colormap
        # Get colormap
        color_map = mpl_color_map.get_cmap(colormap_name)
        no_trans_heatmap = color_map(activation)

        # Change alpha channel in colormap to make sure original image is displayed
        heatmap = copy.copy(no_trans_heatmap)
        heatmap[:, :, 3] = 0.65
        heatmap = Image.fromarray((heatmap*255).astype(np.uint8))
        no_trans_heatmap = Image.fromarray((no_trans_heatmap*255).astype(np.uint8))

        # Apply heatmap on iamge
        heatmap_on_image ="RGBA",map_size)
        heatmap_on_image = Image.alpha_composite(heatmap_on_image, org_im.convert('RGBA'))

        heatmap_on_image = Image.alpha_composite(heatmap_on_image, heatmap)
        return no_trans_heatmap, heatmap_on_image

    def generate_cam(self, input_image, filename, classes_len ,target_class=None):
        # Full forward pass
        # conv_output is the output of convolutions at specified layer
        # model_output is the final output of the model (1, 1000)
        conv_output, model_output = self.extractor.forward_pass(input_image, classes_len = classes_len)
        if target_class is None:
            target_class = np.argmax(
        # Get convolution outputs
        target = conv_output[0]

        # Create empty numpy array for cam
        cam = np.ones(target.shape[1:], dtype=np.float32)
        # Multiply each weight with its conv output and then, sum

        for i in range(len(target)):
            # Unsqueeze to 4D
            saliency_map = torch.unsqueeze(torch.unsqueeze(target[i, :, :],0),0)
            # Upsampling to input size
            input_size = input_image.shape[2:]

            saliency_map = F.interpolate(saliency_map, size=(input_size[0],input_size[0]), mode='bilinear', align_corners=False)
            if saliency_map.max() == saliency_map.min():
            # Scale between 0-1
            norm_saliency_map = (saliency_map - saliency_map.min()) / (saliency_map.max() - saliency_map.min())
            # Get the target score
            w = F.softmax(self.extractor.forward_pass(input_image*norm_saliency_map, classes_len=classes_len)[1],dim=1)[0][target_class]
            cam += * target[i, :, :].data.detach().cpu().clone().numpy()
        cam = np.maximum(cam, 0)
        cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam))  # Normalize between 0-1
        cam = np.uint8(cam * 255)  # Scale between 0-255 to visualize
        cam = np.uint8(Image.fromarray(cam).resize((input_image.shape[2],
                       input_image.shape[3]), Image.ANTIALIAS))/255

        no_trans_heatmap, heatmap_on_image = self.apply_colormap_on_image(filename, cam, input_image)

        return no_trans_heatmap, heatmap_on_image

You might be hitting out of memory errors, which should automatically try to clear the memory cache and retry the last memory allocation.
torch.cuda.memory_summary() should give you information, if you were running into OOM issues.