How do I print output of each layer in sequential?

How do I print the output of each layer in this network?

 model = nn.Sequential(
        # 2x downsample
        nn.Conv2d(3,32,5,stride=2),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        # 4x downsample
        nn.Conv2d(32,32,5, stride=2),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        # 8x downsample
        nn.Conv2d(32,32,5, stride=2),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        
        nn.Conv2d(32,32,3, stride=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        
        nn.Conv2d(32,32,3, stride=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        
        nn.Conv2d(32,32,3, stride=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        
        nn.Conv2d(32,1,6, stride=1)
        )
2 Likes

I got the same question

do you have any solutions?

1 Like

don’t use sequential method for that.

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
def __init__(self):
    super(Model, self).__init__()
    self.conv1 = nn.Conv2d(1, 20, 5)
    self.conv2 = nn.Conv2d(20, 20, 5)

def forward(self, x):
   x = F.relu(self.conv1(x))
   return F.relu(self.conv2(x))

Use this method for printing output of each layer.

1 Like

You could create an own layer for that:

class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()
    
    def forward(self, x):
        # Do your print / debug stuff here
        print(x)
        return x
    
model = nn.Sequential(
        nn.Linear(1, 5),
        PrintLayer(), # Add Print layer for debug
        nn.ReLU(),
        nn.Linear(5,1),
        nn.LogSigmoid(),
        )

x = Variable(torch.randn(10, 1))
output = model(x)

I hope this helps!

28 Likes

I am also looking for a way to print model summary in PyTorch same as Keras.
print(model in pytorch only print the layers defined in the init function of the class but not the model architecture defined in forward function. Keras model.summary() actually prints the model architecture with input and output shape along with trainable and non trainable parameters.
I haven’t found anything like that in PyTorch. I end up writing bunch of print statements in forward function to determine the input and output shape.

!pip install torchsummary

from torchsummary import summary
model.to(‘cuda’) #always need to send the model to cuda before running this command
summary(model, input_size=(channels, H, W))

Hi @ptrblck,

I used your method as follows:

import os
import random

import torch
import torch.nn as nn
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms

from utils import load_state_dict_from_url

######## To print layer outputs ########
class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()
                    
    def forward(self, x):
        # Do your print / debug stuff here
        print(x)
        return x

########################################


######## AlexNet model ########

__all__ = ['AlexNet', 'alexnet']

model_urls = { 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth', }

class AlexNet(nn.Module):
    
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
                PrintLayer(), # layer output
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                nn.Conv2d(64, 192, kernel_size=5, padding=2),
                PrintLayer(), # layer output
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                nn.Conv2d(192, 384, kernel_size=3, padding=1),
                PrintLayer(), # layer output
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 256, kernel_size=3, padding=1),
                PrintLayer(), # layer output
                nn.ReLU(inplace=True),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),
                PrintLayer(), # layer output
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
                nn.Dropout(),
                nn.Linear(256 * 6 * 6, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, num_classes),
                )
                                        
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


def alexnet(pretrained=False, progress=True, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'], progress=progress)
        model.load_state_dict(state_dict)
    return model

###############################


############# Load Data ##############

workers = 0
batchsize = 256

valdir = os.path.join( '/stor2/gakadam/GPU_RESEARCH/PyTorchFI/data/' + '/imagenet/', 'val')
normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(
            valdir, transforms.Compose( [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]),
            ),
        batch_size=batchsize, shuffle=False, num_workers=workers,)

model = alexnet(pretrained=True)

######################################


########### Classification ############

batch = next(iter(val_loader))
images, labels = batch

with torch.no_grad():
    output = model(images)
    out_soft = torch.nn.functional.softmax(output, dim=1)
    for i in out_soft:
        print(torch.argmax(i))

#######################################


But I get following error:

Traceback (most recent call last):
  File "inference_print_layer_output.py", line 104, in <module>
    model = alexnet(pretrained=True)
  File "inference_print_layer_output.py", line 85, in alexnet
    model.load_state_dict(state_dict)
  File "/home/USER/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1045, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for AlexNet:
        Missing key(s) in state_dict: "features.4.weight", "features.4.bias", "features.11.weight", "features.11.bias", "features.14.weight", "features.14.bias".
        Unexpected key(s) in state_dict: "features.3.weight", "features.3.bias", "features.6.weight", "features.6.bias", "features.10.weight", "features.10.bias".
        size mismatch for features.8.weight: copying a param with shape torch.Size([256, 384, 3, 3]) from checkpoint, the shape in current model is torch.Size([384, 192, 3, 3]).
        size mismatch for features.8.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([384]).

There is size mismatch even though PrintLayer does not change the tensor.

How to rectify this?

PrintLayer doesn’t change the tensor shapes, but changes the model architecture, i.e. in an nn.Sequential block the number of modules is different and thus you would run into this error.
You could use forward hooks to print the shape in this use case if you want to load a pretrained state_dict.

1 Like

Sorry mistakenly deleted – I wanted to add that I found the problem. I changed the model config – and the pretrained model has a different architecture,

I want the layer output. For shape, I think most people suggest torchsummary, which I am not looking for. Thanks though.

This can be achieved by forward hooks.

1 Like

Can you please,explain briefly about the parameters channel, H and W used in the summary() function?

These parameters represent the input shape of a tensor to this model and are used by summary to create a random tensor in this shape in the model execution and graph creation.

Screenshot from 2022-09-11 19-26-35
Can you explain me about the output-size in the table? What does -1 signifies?

The -1 is used as a placeholder to idicate that the size of this dimension is variable. In your example you are seeing that the batch dimension is variable as the actual batch size can be changed.

1 Like

Found this thread after a web search to see if there was already something available.

Got to a solution that I think is a slight improvement over previous answers.

Combine a Printer layer with a SummarySequential.

class Printer(torch.nn.Module):
    def __init__(self, layer):
        super().__init__()
        self.layer = layer

    def forward(self, x):
        print(f'shape: {x.shape}, layer: {self.layer}')
        return x

class SummarySequential(torch.nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.layer = torch.nn.Sequential(*sum([
            [layer, Printer(layer)]
            for layer in args
        ], []))  # the `sum` business effectively just flattens the list

    def forward(self, x):
        return self.layer(x)

This lets you change a torch.nn.Sequential to a SummarySequential without having to modify any other lines (like weaving Print layers in between each of the layers that you pass as arguments to torch.nn.Sequential.

 self.model = SummarySequential(
            torch.nn.Conv2d(...),
            torch.nn.MaxPool2d(...),
            torch.nn.ReLU,
            torch.nn.Linear(...)
        )

rather than

 self.model = torch.nn.Sequential(
            torch.nn.Conv2d(...),
            Print(...),
            torch.nn.MaxPool2d(...),
            Print(...),
            torch.nn.ReLU,
            Print(...),
            torch.nn.Linear(...)
        )

Output:

shape: torch.Size([8, 16, 256, 256]), layer: Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
shape: torch.Size([8, 16, 86, 86]), layer: MaxPool2d(kernel_size=(3, 3), stride=(3, 3), padding=1, dilation=1, ceil_mode=False)
shape: torch.Size([8, 16, 86, 86]), layer: ReLU()
shape: torch.Size([8, 32, 43, 43]), layer: Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
shape: torch.Size([8, 32, 15, 15]), layer: MaxPool2d(kernel_size=(3, 3), stride=(3, 3), padding=1, dilation=1, ceil_mode=False)
shape: torch.Size([8, 32, 15, 15]), layer: ReLU()
...
1 Like