Visualize feature map

I assume you want to visualize the output of the self.relu in the forward, as it returns self.x and not the output of self.fc3_mu/logvar. In this case, create a new nn.ReLU module for this particular layer (e.g. self.relu_out = nn.ReLU()), use it in the forward via:

        self.x = self.relu_out(self.x)
        # x = F.dropout(x, p=self.drop_p,
        mu, logvar = self.fc3_mu(self.x), self.fc3_logvar(self.x)
        return mu, logvar

and register the forward hook to model.relu_out.

@ptrblck thanks for response yes i want to visualize last layer output of encoder in VAE
i am getting below error @ output = resnet_vae(data) , i pused input data to gpu.because all parameters in gpu

import numpy as np
from PIL import Image
from torch.utils import data
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.autograd import Variable
import torchvision.transforms as transforms

class Dataset(data.Dataset):
    "Characterizes a dataset for PyTorch"
    def __init__(self, filenames, labels, transform=None):
        self.filenames = filenames
        self.labels = labels
        self.transform = transform

    def __len__(self):
        "Denotes the total number of samples"
        return len(self.filenames)

    def __getitem__(self, index):
        "Generates one sample of data"
        # Select sample
        filename = self.filenames[index]
        X =

        if self.transform:
            X = self.transform(X)     # transform

        y = torch.LongTensor([self.labels[index]])
        return X, y

## ---------------------- end of Dataloaders ---------------------- ##

def conv2D_output_size(img_size, padding, kernel_size, stride):
    # compute output shape of conv2D
    outshape = (np.floor((img_size[0] + 2 * padding[0] - (kernel_size[0] - 1) - 1) / stride[0] + 1).astype(int),
                np.floor((img_size[1] + 2 * padding[1] - (kernel_size[1] - 1) - 1) / stride[1] + 1).astype(int))
    return outshape

def convtrans2D_output_size(img_size, padding, kernel_size, stride):
    # compute output shape of conv2D
    outshape = ((img_size[0] - 1) * stride[0] - 2 * padding[0] + kernel_size[0],
                (img_size[1] - 1) * stride[1] - 2 * padding[1] + kernel_size[1])
    return outshape

## ---------------------- ResNet VAE ---------------------- ##

class ResNet_VAE(nn.Module):
    def __init__(self, fc_hidden1=1024, fc_hidden2=768, drop_p=0.3, CNN_embed_dim=256):
        super(ResNet_VAE, self).__init__()

        self.fc_hidden1, self.fc_hidden2, self.CNN_embed_dim = fc_hidden1, fc_hidden2, CNN_embed_dim

        # CNN architechtures
        self.ch1, self.ch2, self.ch3, self.ch4 = 16, 32, 64, 128
        self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)      # 2d kernal size
        self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)      # 2d strides
        self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)  # 2d padding

        # encoding components
        resnet = models.resnet152(pretrained=True)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.resnet = nn.Sequential(*modules)
        self.fc1 = nn.Linear(resnet.fc.in_features, self.fc_hidden1)
        self.bn1 = nn.BatchNorm1d(self.fc_hidden1, momentum=0.01)
        self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
        self.bn2 = nn.BatchNorm1d(self.fc_hidden2, momentum=0.01)
        # Latent vectors mu and sigma
        self.fc3_mu = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)      # output = CNN embedding latent variables
        self.fc3_logvar = nn.Linear(self.fc_hidden2, self.CNN_embed_dim)  # output = CNN embedding latent variables

        # Sampling vector
        self.fc4 = nn.Linear(self.CNN_embed_dim, self.fc_hidden2)
        self.fc_bn4 = nn.BatchNorm1d(self.fc_hidden2)
        self.fc5 = nn.Linear(self.fc_hidden2, 64 * 4 * 4)
        self.fc_bn5 = nn.BatchNorm1d(64 * 4 * 4)
        self.relu = nn.ReLU(inplace=True)

        # Decoder
        self.convTrans6 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=self.k4, stride=self.s4,
            nn.BatchNorm2d(32, momentum=0.01),
        self.convTrans7 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=32, out_channels=8, kernel_size=self.k3, stride=self.s3,
            nn.BatchNorm2d(8, momentum=0.01),

        self.convTrans8 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=8, out_channels=3, kernel_size=self.k2, stride=self.s2,
            nn.BatchNorm2d(3, momentum=0.01),
            nn.Sigmoid()    # y = (y1, y2, y3) \in [0 ,1]^3

    def encode(self, x):
        x = self.resnet(x)  # ResNet
        x = x.view(x.size(0), -1)  # flatten output of conv

        # FC layers
        x = self.bn1(self.fc1(x))
        x = self.relu(x)
        x = self.bn2(self.fc2(x))
        x = self.relu(x)
        # x = F.dropout(x, p=self.drop_p,
        mu, logvar = self.fc3_mu(x), self.fc3_logvar(x)
        return mu, logvar

    def reparameterize(self, mu, logvar):
            std = logvar.mul(0.5).exp_()
            eps = Variable(
            return eps.mul(std).add_(mu)
            return mu

    def decode(self, z):
        x = self.relu(self.fc_bn4(self.fc4(z)))
        x = self.relu(self.fc_bn5(self.fc5(x))).view(-1, 64, 4, 4)
        x = self.convTrans6(x)
        x = self.convTrans7(x)
        x = self.convTrans8(x)
        x = F.interpolate(x, size=(224, 224), mode='bilinear')
        return x

    def forward(self, x):
        #self.x = self.relu(self.x)
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        x_reconst = self.decode(z)

        return x_reconst, z, mu, logvar```
# EncoderCNN architecture
CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 1024
CNN_embed_dim = 256     # latent dim extracted by 2D CNN
res_size = 224        # ResNet image size
dropout_p = 0.2       # dropout probability

# training parameters
epochs = 1  # training epochs
batch_size = 64
learning_rate = 1e-3
log_interval = 10   # interval for displaying training info

# save model
save_model_path = './results_MNIST'

def check_mkdir(dir_name):
    if not os.path.exists(dir_name):

def loss_function(recon_x, x, mu, logvar):
    # MSE = F.mse_loss(recon_x, x, reduction='sum')
    MSE = F.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return MSE + KLD

def train(log_interval, model, device, train_loader, optimizer, epoch):
    # set model as training mode

    losses = []
    all_y, all_z, all_mu, all_logvar = [], [], [], []
    N_count = 0   # counting total trained sample in one epoch
    for batch_idx, (X, y) in enumerate(train_loader):
        # distribute data to device
        X, y =,, )
        N_count += X.size(0)

        X_reconst, z, mu, logvar  = model(X)  # VAE
        loss = loss_function(X_reconst, X, mu, logvar)


        # show information
        if (batch_idx + 1) % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, N_count, len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), loss.item()))

    all_y = np.stack(all_y, axis=0)
    all_z = np.stack(all_z, axis=0)
    all_mu = np.stack(all_mu, axis=0)
    all_logvar = np.stack(all_logvar, axis=0)

    # save Pytorch models of best record, os.path.join(save_model_path, 'model_epoch{}.pth'.format(epoch + 1)))  # save motion_encoder, os.path.join(save_model_path, 'optimizer_epoch{}.pth'.format(epoch + 1)))      # save optimizer
    print("Epoch {} model saved!".format(epoch + 1))

    return, all_y, all_z, all_mu, all_logvar, losses

def validation(model, device, optimizer, test_loader):
    # set model as testing mode

    test_loss = 0
    all_y, all_z, all_mu, all_logvar = [], [], [], []
    with torch.no_grad():
        for X, y in test_loader:
            # distribute data to device
            X, y =,, )
            X_reconst, z, mu, logvar = model(X)

            loss = loss_function(X_reconst, X, mu, logvar)
            test_loss += loss.item()  # sum up batch loss


    test_loss /= len(test_loader.dataset)
    all_y = np.stack(all_y, axis=0)
    all_z = np.stack(all_z, axis=0)
    all_mu = np.stack(all_mu, axis=0)
    all_logvar = np.stack(all_logvar, axis=0)

    # show information
    print('\nTest set ({:d} samples): Average loss: {:.4f}\n'.format(len(test_loader.dataset), test_loss))
    return, all_y, all_z, all_mu, all_logvar, test_loss

# Detect devices
use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU

# Data loading parameters
params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 4, 'pin_memory': True} if use_cuda else {}
transform = transforms.Compose([transforms.Resize([res_size, res_size]),
                                transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # gray -> GRB 3 channel (lambda function)
                                transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0])])  # for grayscale images

# MNIST dataset (images and labels)
MNIST_train_dataset = torchvision.datasets.MNIST(root='/content', train=True, transform=transform, download=True)
MNIST_test_dataset = torchvision.datasets.MNIST(root='/content', train=False, transform=transform)

# Data loader (input pipeline)
train_loader =, batch_size=batch_size, shuffle=True)
valid_loader =, batch_size=batch_size, shuffle=False)

# Create model
resnet_vae = ResNet_VAE(fc_hidden1=CNN_fc_hidden1, fc_hidden2=CNN_fc_hidden2, drop_p=dropout_p, CNN_embed_dim=CNN_embed_dim).to(device)
print("Using", torch.cuda.device_count(), "GPU!")
model_params = list(resnet_vae.parameters())
optimizer = torch.optim.Adam(model_params, lr=learning_rate)

# record training process
epoch_train_losses = []
epoch_test_losses = []

# start training
for epoch in range(epochs):

    # train, test model
    X_train, y_train, z_train, mu_train, logvar_train, train_losses = train(log_interval, resnet_vae, device, train_loader, optimizer, epoch)
    X_test, y_test, z_test, mu_test, logvar_test, epoch_test_loss = validation(resnet_vae, device, optimizer, valid_loader)

    # save results

    # save all train test results
    A = np.array(epoch_train_losses)
    C = np.array(epoch_test_losses), 'ResNet_VAE_training_loss.npy'), A), 'X_MNIST_train_epoch{}.npy'.format(epoch + 1)), X_train) #save last batch, 'y_MNIST_train_epoch{}.npy'.format(epoch + 1)), y_train), 'z_MNIST_train_epoch{}.npy'.format(epoch + 1)), z_train)

# Visualize feature maps
activation = {}
def get_activation(name):
    def hook(resnet_vae, input, output):
        activation[name] = output.detach()
    return hook

data, _ = MNIST_train_dataset[0]

data = data.cuda
output = resnet_vae(data)

act = activation['relu'].squeeze()
fig, axarr = plt.subplots(act.size(0))
for idx in range(act.size(0)):
    axarr[idx].imshow(act[idx]) ```

TypeError: conv2d() received an invalid combination of arguments - got (builtin_function_or_method, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!builtin_function_or_method!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!builtin_function_or_method!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)

This line of code is wrong and you would have to call cuda():

data = data.cuda

so replace it with:

data = data.cuda()

@ptrblck thank you for correcting me, still i am getting below error. already relu_out i updated in encoding. I have used above code with changing cuda() ---------------------------------------------------------------------------

KeyError Traceback (most recent call last)

in ()
204 output = resnet_vae(data)
→ 206 act = activation[‘relu_out’].squeeze()
207 fig, axarr = plt.subplots(act.size(0))
208 for idx in range(act.size(0)):

KeyError: ‘relu_out’

Could you post/update the previous code to reproduce the issue?
Based on the error activation doesn’t contain the 'relu_out' key, so the forward hook seemed to fail.

@ptrblck .thanks a lot for suggesting. as per the above comment, I updated the forward method: I m facing the below issue I already moved parameters to GPU. what i have do again?

## ---------------------- end of Dataloaders ---------------------- ##

## ---------------------- ResNet VAE ---------------------- ##

# EncoderCNN architecture
CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 1024
CNN_embed_dim = 256     # latent dim extracted by 2D CNN
res_size = 224        # ResNet image size
dropout_p = 0.2       # dropout probability

# training parameters
epochs = 1  # training epochs
batch_size = 64
learning_rate = 1e-3
log_interval = 10   # interval for displaying training info

# save model
save_model_path = './results_MNIST'

def check_mkdir(dir_name):
    if not os.path.exists(dir_name):

# Visualize feature maps
activation = {}
def get_activation(name):
    def hook(resnet_vae, input, output):
        activation[name] = output.detach()
    return hook

data, _ = MNIST_train_dataset[0]

data = data.cuda()
output = resnet_vae(data)

act = activation['relu_out'].squeeze()
fig, axarr = plt.subplots(act.size(0))
for idx in range(act.size(0)):
    axarr[idx].imshow(act[idx]) .
error:TypeError                                 Traceback (most recent call last)
<ipython-input-6-8bf6b27e0dcf> in <module>()
    208 for idx in range(act.size(0)):
    209     print(act.size(0))
--> 210     axarr[idx].imshow(act[idx])
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.


Thanks fir the update. I’m not sure how you’ve executed the code, but the last forward operation of the model yields an error:

data = data.cuda()
output = resnet_vae(data)
> ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 1024])

which would explain why the activation dict was never filled. I would generally recommend to execute the posted code snippet once manually to make sure the actual error is visible and not hidden by another one.
If you call model.eval() or use more than a single sample, the forward execution will work.
Afterwards you’ll face an error in trying to use CUDATensors in matplotlib, which also won’t work, so you would need to use cpu().numpy() in the imshow call.

@ptrblck thanks i will run again

Thanks for code to visualize feature map.

I have one doubt, why in output for above code its only showing 5 digit.

Thank you @ptrblck for providing the code snippet. I am trying to use this for the following code but I get error for dataset. Could you please guide me?


data, _ = image_datasets[0]


output = model_ft(data)

act = activation['conv1'].squeeze()

fig, axarr = plt.subplots(act.size(0))

for idx in range(act.size(0)):


error is

KeyError: 0
KeyError                                  Traceback (most recent call last)
/tmp/ipykernel_6428/ in <module>
      1 model_ft.conv1.register_forward_hook(get_activation('conv1'))
----> 2 data, _ = image_datasets[0]
      3 data.unsqueeze_(0)
      4 output = model_ft(data)

KeyError: 0

Here’s the code for loading the dataset (ants and bees) from PyTorch transfer learning tutorial

# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    'val': transforms.Compose([
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

data_dir = 'hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                  for x in ['train', 'val']}
dataloaders = {x:[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

image_datasets is defined as a dict, so you would need to index the dict first before indexing the Dataset: data, _ = image_datasets['train'][0].

Thanks a lot for your response.

I got this error:

activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook


data, _ = image_datasets['train'][0]


output = model_ft(data)

act = activation['conv1'].squeeze()

fig, axarr = plt.subplots(act.size(0))

for idx in range(act.size(0)):


RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_30848/ in <module>
     11 data.unsqueeze_(0)
---> 13 output = model_ft(data)
     15 act = activation['conv1'].squeeze()

/scratch3/venv/fashcomp/lib/python3.8/site-packages/torch/nn/modules/ in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/scratch3/research/code/fashion/fashion-compatibility/ in forward(self, x)
     80     def forward(self, x):
---> 81         x = self.conv1(x)
     82         x = self.bn1(x)
     83         x = self.relu(x)

/scratch3/venv/fashcomp/lib/python3.8/site-packages/torch/nn/modules/ in _call_impl(self, *input, **kwargs)
   1069             input = bw_hook.setup_input_hook(input)
-> 1071         result = forward_call(*input, **kwargs)
   1072         if _global_forward_hooks or self._forward_hooks:
   1073             for hook in itertools.chain(

/scratch3/venv/fashcomp/lib/python3.8/site-packages/torch/nn/modules/ in forward(self, input)
    442     def forward(self, input: Tensor) -> Tensor:
--> 443         return self._conv_forward(input, self.weight, self.bias)
    445 class Conv3d(_ConvNd):

/scratch3/venv/fashcomp/lib/python3.8/site-packages/torch/nn/modules/ in _conv_forward(self, input, weight, bias)
    437                             weight, bias, self.stride,
    438                             _pair(0), self.dilation, self.groups)
--> 439         return F.conv2d(input, weight, bias, self.stride,
    440                         self.padding, self.dilation, self.groups)

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same ...

You are apparently using a CPUTensor as the input for a model pushed onto a GPU, so you would need to push the data to the GPU as well.

You are a saint!

I got a question for this coding, when you iterate kernels.size(0), are you plotting the kernels for each batch?

I got 6 conv layer, and the shape my filters for each layer are:
torch.Size([8, 1, 5, 5])
torch.Size([16, 8, 5, 5])
torch.Size([24, 16, 3, 3])
torch.Size([32, 24, 5, 5])
torch.Size([40, 32, 3, 3])
torch.Size([48, 40, 3, 3])

If a kernel has shape like (16,8,5,5), meaning it has 16 filters, 8 channel, and 5x5 dimension?
My question is kernel plotting will make more sense if we average between channels, and plot for each filter? If I understand it right, by your code it only works for one channel?

Yes, that’s correct. The conv kernels are defined as [out_channels=nb_kernels, in_channels, height, width].

I don’t know if this would make more sense than plotting each channel for each kernel separately. Usually I just flatten the first two dimensions and plot the 5x5 filters unless the in_channels are set to 3 and one could plot RGB filters.

if I use pretrained model parameters how can I use the stride of the conv of this model in conv_trans?!

Could you please help to visualize the following code. I’m getting key error at activation line.

import torch

import torch.nn as nn

class conv_block(nn.Module):

def __init__(self, in_ch, out_ch):

    super(conv_block, self).__init__()

    self.conv = nn.Sequential(

        nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding = 1, bias=True),



        nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding = 1, bias=True),



def forward(self, x):

    x = self.conv(x)

    return x

class UNet(nn.Module):

def __init__(self, in_ch=1, out_ch=1):

    super(UNet, self).__init__()

    n1 = 64

    filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]

    self.Maxpool1 = nn.MaxPool2d(kernel_size=3, stride=1, padding = 1)

    self.Maxpool2 = nn.MaxPool2d(kernel_size=3, stride=1,  padding = 1)

    self.Maxpool3 = nn.MaxPool2d(kernel_size=3, stride=1,  padding = 1)

    self.Maxpool4 = nn.MaxPool2d(kernel_size=3, stride=1,  padding = 1)

    self.Conv1 = conv_block(in_ch, filters[0])

    self.Conv2 = conv_block(filters[0], filters[1])

    self.Conv3 = conv_block(filters[1], filters[2])

    self.Conv4 = conv_block(filters[2], filters[3])

    self.Conv5 = conv_block(filters[3], filters[4])

def forward(self, x):

    print("Original Shape",  x.shape)

    e1 = self.Conv1(x)

    print("Conv", e1.shape)

    e2 = self.Maxpool1(e1)

    print("Max pool ", e2.shape)

    e2 = self.Conv2(e2)

    print("Conv", e2.shape)

    e3 = self.Maxpool2(e2)

    print("Max pool", e3.shape)

    e3 = self.Conv3(e3)

    print("Conv", e3.shape)

    e4 = self.Maxpool3(e3)

    print("Max pool", e4.shape)

    e4 = self.Conv4(e4)

    print("Conv", e4.shape)

    e5 = self.Maxpool4(e4)

    print("Max Pool", e5.shape)

    e5 = self.Conv5(e5)

    print("Conv", e5.shape)

    return e5

I’m getting this error. Please help

You cannot visualize a tensor in the shape [128, 128, 128] as image-like arrays are expected in plt.imshow, i.e. arrays with a channel size of 3 (RGB), 1 (grayscale), or without a channel dimension (grayscale or a random matrix).
If you want to visualize each channel separately, create subplots and call imshow on each of them using the corresponding channel of the tensor.

Thank you! will do so