How to register backward hook at the last convolutional layer of resnet50

I have this code to fine-tune a resnet50 model to my task:

import torch
import torch.nn as nn
from torch.nn.functional import normalize
import torchvision.models as models
from torchvision import transforms, utils
from torchvision.models import resnet50

import torch.nn.functional as F

class resnet(nn.Module):
    def __init__(self):
        super(resnet, self).__init__()
       
       
        self.resnet = resnet50(pretrained=True)
        self.features_conv = self.resnet.features[:48]
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        self.classifier = self.resnet.classifier
        self.gradients = None
        
        for param in resnet.parameters():
            param.requires_grad = False
        
    def activations_hook(self, grad):
        self.gradients = grad
    def forward(self, x):
        x = self.features_conv(x)
        h = x.register_hook(self.activations_hook)
        
        x = self.max_pool(x)
        x = x.view((1, -1))
        x = self.classifier(x)
        return x
    
    def get_activations_gradient(self):
      return self.gradients
  
    def get_activations(self, x):
       return self.features_conv(x)
   
    def set_parameter_requires_grad(model):
            for param in model.parameters():
                param.requires_grad = True

but I am getting this error

AttributeError: 'ResNet' object has no attribute 'features'

due to this line of code self.features_conv = self.resnet.features[:48]

What I am trying to do is to register hook at the last convolutional layer of resnet50. Any suggestions for me is welcome. Thank you in advance

The error is expected since resnet50 does not use a self.features attribute.
You can check its definition and here and select the desired layer based on the module creation.
Based on your description I would guess you want to register the hook for resnet.layer4[2].conv3.

1 Like

Hello @ptrblck , I have updated my code:

class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        
        self.resnet = resnet50(pretrained=True)
        # isolate the feature blocks
        self.features = nn.Sequential(self.resnet.conv1,
                              self.resnet.bn1,
                              nn.ReLU(),
                              nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
                              self.resnet.layer1, 
                              self.resnet.layer2, 
                              self.resnet.layer3, 
                              self.resnet.layer4)
        
      
        for param in self.resnet.layer4[2].parameters():
            param.requires_grad_(True)
       # average pooling layer
        #self.features = nn.Sequential(self.resnet.layer4) 
        
        self.avgpool = self.resnet.avgpool
    
        
        # classifier
        self.classifier = self.resnet.fc
    
    # gradient placeholder
        self.gradient = None
        
    def activations_hook(self, grad):
        self.gradients = grad
        
    def get_gradient(self):
        return self.gradient
    
    def get_activations(self, x):
        return self.features(x)
    
    def forward(self, x):
        x = self.features(x)
        h = x.register_hook(self.activations_hook)
        
        # complete the forward pass
        x = self.avgpool(x)
        x = x.view((1, -1))
        x = self.classifier(x)
        return x
But I am getting 

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x65536 and 2048x1000)

so I updated my classifier: (I have 20 classes)

self.classifier = nn.Sequential(nn.Linear(65536, 20), nn.Dropout())

but I am getting this error:

ValueError: Target size (torch.Size([32, 20])) must be the same as input size (torch.Size([1, 20]))

I tried self.classifier = nn.Sequential(nn.Linear(2048, 20), nn.Dropout())

and now this error appears:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x65536 and 2048x20)

Any advice for me? Thank you in advance

This line of code:

x = x.view((1, -1))

looks wrong since you are explicitly creating a single sample and are thus changing the batch size.

1 Like

I took this out , but I get similar error:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (65536x1 and 2048x20)

This is part of my training code:

n_classes = 20

    
    model = ResNet()
   
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, n_classes)
    model.to(device)

    optimizer = construct_optimizer(model, base_lr=base_lr)
    loss_criterion = nn.BCEWithLogitsLoss()

    ckpt = Checkpointer(model, optimizer)
    checkpoint_path = (Manager_checkpoint_name
            .find_last_checkpoint(rundir))

    avg_loss = Averager()
    avg_acc = Averager()

    start_epoch = ckpt.restore_model_magic(checkpoint_path)

    # Train loop
    for i_epoch in range(start_epoch, MAX_EPOCH):
        epoch_seed = i_epoch + initial_seed
        enforce_all_seeds(epoch_seed)

        model.train()
        for i_batch, (data, target, meta) in enumerate(dataloader_train):
            data, target = map(lambda x: x.to(device), (data, target))

            # Set appropriate learning rate
            total_batches = len(dataloader_train)
            lr = lr_func_steps_with_relative_lrs(
                    solver_steps, MAX_EPOCH, solver_lrs, base_lr,
                    i_epoch + float(i_batch)/total_batches)
            set_lr(optimizer, lr)

            # Output
            output = model(data)
            loss = loss_criterion(output, target)

            # Measure params
            with torch.no_grad():
                acc = qacc_sigmoid(output, target)
            avg_acc.update(acc)
            avg_loss.update(loss.data.item(), target.size(0))

            # Gradient and step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Log train stats
            if i_batch % 25 == 0:
                Nb = len(dataloader_train)
                loss_str = (f'loss(all/last):{avg_loss.avg:.4f}/{avg_loss.last:.4f}')
                acc_str = (f'acc(all/last):{avg_acc.avg:.2f}/{avg_acc.last:.2f}')
                log.info(f'i_epoch={i_epoch}, i_batch={i_batch}/{Nb}; '
                        f'lr={lr}; TRAIN: {loss_str} {acc_str}')

        ckpt.save_epoch(rundir, i_epoch)
        model.eval()

        targets = []
        outputs = []
        count = 0
        for i_batch, (data, target, meta) in enumerate(dataloader_test):
            data, target = map(lambda x: x.to(device), (data, target))
            with torch.no_grad():
                output = model(data)
                output_sigm = torch.sigmoid(output)
                output_np = output_sigm.detach().cpu().numpy()
                targets.append(target.cpu())
                outputs.append(output_np)

For my model, I updated the definition of my classifier:

 self.fc = self.resnet.fc

I am guessing the problem could be from avgpool layer as its output_size is (1,1) by default, and the classifier layer have in_features of 2048. Any ideas for me? Thank you very much @ptrblck

Flatten the activation via x = x.view(x.size(0), -1) in case you have now completely removed the view operation.

Hello @ptrblck

I am getting the following error:

pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])

TypeError: mean() received an invalid combination of arguments - got (NoneType, dim=list), but expected one of:

  • (Tensor input, *, torch.dtype dtype)

I have removed this part of the code

torch.no_grad():

but I am still getting the error.

I also tried adding in:

data = data.clone().requires_grad_(True)
but still get the same error.

Here is the related training code:

for i_epoch in range(start_epoch, MAX_EPOCH):
        epoch_seed = i_epoch + initial_seed
        enforce_all_seeds(epoch_seed)

        model.train()
        for i_batch, (data, target, meta) in enumerate(dataloader_train):
            data, target = map(lambda x: x.to(device), (data, target))

            # Set appropriate learning rate
            total_batches = len(dataloader_train)
            lr = lr_func_steps_with_relative_lrs(
                    solver_steps, MAX_EPOCH, solver_lrs, base_lr,
                    i_epoch + float(i_batch)/total_batches)
            set_lr(optimizer, lr)

            # Output
            output = model(data)
            loss = loss_criterion(output, target)

            # Measure params
            with torch.no_grad():
                acc = qacc_sigmoid(output, target)
            avg_acc.update(acc)
            avg_loss.update(loss.data.item(), target.size(0))

            # Gradient and step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Log train stats
            if i_batch % 25 == 0:
                Nb = len(dataloader_train)
                loss_str = (f'loss(all/last):{avg_loss.avg:.4f}/{avg_loss.last:.4f}')
                acc_str = (f'acc(all/last):{avg_acc.avg:.2f}/{avg_acc.last:.2f}')
                log.info(f'i_epoch={i_epoch}, i_batch={i_batch}/{Nb}; '
                        f'lr={lr}; TRAIN: {loss_str} {acc_str}')

        ckpt.save_epoch(rundir, i_epoch)
        model.eval()

        targets = []
        outputs = []
        activation = {}
        count = 0
        for i_batch, (data, target, meta) in enumerate(dataloader_test):
           
            data, target = map(lambda x: x.to(device), (data, target))
            data = data.clone().requires_grad_(True)
           
            output = model(data) 
            gradients = model.get_gradient()
            pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])
            activations = model.get_activations(data).detach()
            for i in range(2048):
                activations[:, i, :, :] *= pooled_gradients[i]
            heatmap = torch.mean(activations, dim=1).squeeze()
            heatmap = np.maximum(heatmap, 0)
            heatmap /= torch.max(heatmap)
            plt.matshow(heatmap.squeeze())
            plt.savefig(count+'.png')
            count = count + 1

Any advice for me? Thank you very much @ptrblck

Based on the error message it seems model.get_gradient() returns None, which is the default value for the internal self.gradient attribute.
I also guess you have a typo since you are returning and initializing self.gradient while activation_hook sets self.gradients (note the additional s).

Thank you so much @ptrblck for pointing that typo. Now it works. Though I have some errors with imshow not accepting 7 for its 3rd dimension. I will try to solve this problem myself, I think I just have to heatmap = torch.mean(activations, dim=1).squeeze() to heatmap = torch.mean(activations, dim=-1).squeeze()