Hey there! I’m currently trying to get my convolution maps but have trouble doing so. As for what I’m doing: I have a UNET which looks like this when printing out a summary with torchinfo
:
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
UNET [1, 1, 512, 512] --
├─ModuleList: 1-10 -- (recursive)
│ └─DoubleConv: 2-1 [1, 64, 512, 512] --
│ │ └─Sequential: 3-1 [1, 64, 512, 512] 40,000
├─MaxPool2d: 1-2 [1, 64, 256, 256] --
├─Dropout: 1-3 [1, 64, 256, 256] --
├─ModuleList: 1-10 -- (recursive)
│ └─DoubleConv: 2-2 [1, 128, 256, 256] --
│ │ └─Sequential: 3-2 [1, 128, 256, 256] 221,696
├─MaxPool2d: 1-5 [1, 128, 128, 128] --
├─Dropout: 1-6 [1, 128, 128, 128] --
├─ModuleList: 1-10 -- (recursive)
│ └─DoubleConv: 2-3 [1, 256, 128, 128] --
│ │ └─Sequential: 3-3 [1, 256, 128, 128] 885,760
├─MaxPool2d: 1-8 [1, 256, 64, 64] --
├─Dropout: 1-9 [1, 256, 64, 64] --
├─ModuleList: 1-10 -- (recursive)
│ └─DoubleConv: 2-4 [1, 512, 64, 64] --
│ │ └─Sequential: 3-4 [1, 512, 64, 64] 3,540,992
├─MaxPool2d: 1-11 [1, 512, 32, 32] --
├─Dropout: 1-12 [1, 512, 32, 32] --
├─DoubleConv: 1-13 [1, 1024, 32, 32] --
│ └─Sequential: 2-5 [1, 1024, 32, 32] --
│ │ └─Conv2d: 3-5 [1, 1024, 32, 32] 4,718,592
│ │ └─BatchNorm2d: 3-6 [1, 1024, 32, 32] 2,048
│ │ └─ReLU: 3-7 [1, 1024, 32, 32] --
│ │ └─Conv2d: 3-8 [1, 1024, 32, 32] 9,437,184
│ │ └─BatchNorm2d: 3-9 [1, 1024, 32, 32] 2,048
│ │ └─ReLU: 3-10 [1, 1024, 32, 32] --
├─ModuleList: 1-22 -- (recursive)
│ └─ConvTranspose2d: 2-6 [1, 512, 64, 64] 2,097,664
├─Dropout: 1-15 [1, 512, 64, 64] --
├─ModuleList: 1-22 -- (recursive)
│ └─DoubleConv: 2-7 [1, 512, 64, 64] --
│ │ └─Sequential: 3-11 [1, 512, 64, 64] 7,079,936
│ └─ConvTranspose2d: 2-8 [1, 256, 128, 128] 524,544
├─Dropout: 1-17 [1, 256, 128, 128] --
├─ModuleList: 1-22 -- (recursive)
│ └─DoubleConv: 2-9 [1, 256, 128, 128] --
│ │ └─Sequential: 3-12 [1, 256, 128, 128] 1,770,496
│ └─ConvTranspose2d: 2-10 [1, 128, 256, 256] 131,200
├─Dropout: 1-19 [1, 128, 256, 256] --
├─ModuleList: 1-22 -- (recursive)
│ └─DoubleConv: 2-11 [1, 128, 256, 256] --
│ │ └─Sequential: 3-13 [1, 128, 256, 256] 442,880
│ └─ConvTranspose2d: 2-12 [1, 64, 512, 512] 32,832
├─Dropout: 1-21 [1, 64, 512, 512] --
├─ModuleList: 1-22 -- (recursive)
│ └─DoubleConv: 2-13 [1, 64, 512, 512] --
│ │ └─Sequential: 3-14 [1, 64, 512, 512] 110,848
├─Conv2d: 1-23 [1, 1, 512, 512] 65
Now, after training my model for n epochs, I want to get the results of the convolutional layers as described in e.g. Visualizing Feature Maps using PyTorch | by Ravi vaishnav | Medium.
When checking for my convolution layers by looking at the list of model.children()
, I’m getting the following output:
[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, padding_mode=reflect),
Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1))]
Total convolutional layers: 19
This looks as expected to me. Then I’m loading an image as PIL Image format, apply the transformations (resize, to tensor and normalize), unsqueeze the image since I have a batch size of 1 and then send it to device.
Now, when I’m trying to put my image through those convolution layers with
outputs = []
names = []
for layer in conv_layers[0:]:
image = layer(image)
outputs.append(image)
names.append(str(layer))
print(len(outputs))#print feature_maps
for feature_map in outputs:
print(feature_map.shape)
I’m always getting a tensor size of 1,features,512,512
for my feature maps where I’d expect to have a smaller resolution (pixel wise) until I reach the bottleneck of my UNET as pointed out in the output of torchinfo.summary()
. I don’t exactly understand why this is happening since the person in the tutorial is not using any pooling layers as well which is why I’m asking for help. This is what my code looks like until that point:
import torch
import torch.nn as nn
from tifffile import imread
from torchvision import transforms
import matplotlib.pyplot as plt
from model import UNET
from pprint import pprint
from PIL import Image
IMG_CHANNELS = 3
device = None
if torch.backends.mps.is_available():
device = torch.device("mps")
weight_path = "epoch_18.pth.tar"
def initialize_model(weight_path: str, device: str, im_channels):
weights = torch.load(weight_path, map_location=torch.device("mps"))
weights = weights["state_dict"]
model = UNET(in_chnl = im_channels)
model.load_state_dict(weights)
model.to(device)
return model
# Load weights and model
model = initialize_model(weight_path, device, im_channels=IMG_CHANNELS)
# Having to transform the image to convert to tensor at a later point
transform = transforms.Compose([
transforms.Resize((512,512)),
transforms.ToTensor(),
transforms.Normalize(mean=0., std=1.)
])
# Load model parameters
model_weights = []
conv_layers = []
model_children = list(model.children())
counter = 0
for i in range(len(model_children)):
if type(model_children[i]) == nn.modules.container.ModuleList:
for j in range(len(model_children[i])):
for child in model_children[i][j].children():
if type(child) == nn.Sequential:
for grandchild in child:
if type(grandchild) == nn.Conv2d:
counter += 1
model_weights.append(grandchild.weight)
conv_layers.append(grandchild)
elif type(model_children[i]) == nn.Conv2d:
counter += 1
model_weights.append(model_children[i].weight)
conv_layers.append(model_children[i])
# hardcoded for bottleneck because there was no other option?
for i in model_children[-2].children():
for child in i:
if type(child) == nn.Conv2d:
counter += 1
model_weights.append(child.weight)
conv_layers.append(child)
# Having to re-order due to appending in wrong order
conv_layers = [
conv_layers[8], # 5-64
conv_layers[9], # 64-64
conv_layers[10], # 64 - 128
conv_layers[11], # 128 - 128
conv_layers[12], # 128 - 256
conv_layers[13], # 256 - 256
conv_layers[14], # 256 - 512
conv_layers[15], # 512 - 512
conv_layers[17], # 512 - 1024
conv_layers[18], # 1024 - 1024
conv_layers[0], # 1024 - 512
conv_layers[1], # 512 - 512
conv_layers[2], # 512 - 256
conv_layers[3], # 256 - 256
conv_layers[4], # 256 - 128
conv_layers[5], # 128 - 128
conv_layers[6], # 128 - 64
conv_layers[7], # 64 - 64
conv_layers[16] # 64 - 1
]
pprint(conv_layers)
print(f"Total convolutional layers: {counter}")
# # ################################################################################
if __name__ == "__main__":
image = imread("tile2560_5632.tif")
print(f"Image shape: {image.shape}")
image = image[:,:,:3]
image = Image.fromarray(image)
print(f"Image shape: {image.size}")
image = transform(image) # convert to tensor and normalize
image = image.unsqueeze(0) # add batch size (1)
print(f"Image shape after unsqueezing: {image.size()}. \nSending image to device: {device}.")
image = image.to(device) # send image to device
outputs = []
names = []
# with torch.no_grad():
for layer in conv_layers[0:]:
image = layer(image)
outputs.append(image)
names.append(str(layer))
print(f"Processed image in {len(outputs)} layers.\n\n")
print("Printing feature map shapes:")
for feature_map in outputs:
print(feature_map.shape)
processed = []
for feature_map in outputs:
feature_map = feature_map.squeeze(0)
gray_scale = torch.sum(feature_map,0)
gray_scale = gray_scale / feature_map.shape[0]
processed.append(gray_scale.data.cpu().numpy())
for fm in processed:
print(fm.shape)
If something is not clear, please let me know and I’ll try to elaborate the best I can. Thanks in advance!