Hi Everyone -
I created the following simple module to turn any block into a resnet block
class ResBlock(nn.Module):
r""" ResBlock
Args:
block: block or list of layers
multiplier <float [RES_MULTIPLIER]>: ident multiplier
crop: <int|bool>
if <int> cropping=crop
else if True calculate cropping
else no cropping
Links:
TODO: I THINK I GOT THE IDEA FROM FASTAI SOMEWHERE
"""
def __init__(self,
block,
in_ch,
out_ch,
multiplier=RES_MULTIPLIER,
crop=True):
super(ResBlock, self).__init__()
self.block=self._process_block(block)
self.in_ch=in_ch
self.out_ch=out_ch
self.multiplier=multiplier
self.crop=crop
if self.in_ch!=self.out_ch:
self.ident_conv=nn.Conv2d(
in_channels=self.in_ch,
out_channels=self.out_ch,
kernel_size=1)
else:
self.ident_conv=False
def forward(self, x):
block_out=self.block(x)
if self.crop:
x=self._crop(x,block_out)
if self.ident_conv:
x=self.ident_conv(x)
return (self.multiplier*x) + block_out
def _process_block(self,block):
if isinstance(block,list):
return nn.Sequential(*block)
else:
return block
def _crop(self,x,layers_out):
if not isinstance(self.crop,int):
# get cropping
out_size=layers_out.size()[-1]
x_size=x.size()[-1]
self.crop=(x_size-out_size)//2
return x[:,:,self.crop:-self.crop,self.crop:-self.crop]
Using it I found that my the number of parameters in my model were almost doubling! After playing around a bit I realized it was because the conv-blocks in my model were being set as model properties before passing them into ResBlock.
In case that isn’t clear there is an oversimplified example below where ResBlock has been replaced with PassThrough and the model is a single Conv2d layer.
My question is then, are Net2 and Net3 below really different? or is this a bug with torchsummary? I acknowledge that the Net3 version is probably just bad practice but I wanted to understand what pytorch is doing and if these really are different networks.
Here’s the oversimplified code:
SIZE=32
import torch.nn as nn
from torchsummary import summary
class PassThrough(nn.Module):
""" return the original block/layer without any changes
This module does nothing. In a real world scenario it would be replaced with something more
interesting like a resnet wrapper for a conv_block.
"""
def __init__(self,block):
super(PassThrough, self).__init__()
self.block=block
def forward(self,x):
return self.block(x)
class Net1(nn.Module):
""" a dumb test
"""
def __init__(self):
super(Net1, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
def forward(self, x):
return self.conv1(x)
print('\n'*3,'Net1')
summary(Net1(),input_size=(3,SIZE,SIZE))
class Net2(nn.Module):
""" This Network, as expected, is the same as Net1 but you see the PassThrough wrapper in the summary
"""
def __init__(self):
super(Net2, self).__init__()
c1=nn.Conv2d(3, 6, 5)
self.conv1 = PassThrough(c1)
def forward(self, x):
return self.conv1(x)
print('\n'*3,'Net2')
summary(Net2(),input_size=(3,SIZE,SIZE))
class Net3(nn.Module):
""" This Network shows each conv block twice. Is this real or a bug with torchsummary?
"""
def __init__(self):
super(Net3, self).__init__()
self.c1=nn.Conv2d(3, 6, 5)
self.conv1 = PassThrough(self.c1)
def forward(self, x):
return self.conv1(x)
print('\n'*3,'Net3')
summary(Net3(),input_size=(3,SIZE,SIZE))
Here’s the output
Net1
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 456
================================================================
Total params: 456
Trainable params: 456
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.04
Params size (MB): 0.00
Estimated Total Size (MB): 0.05
----------------------------------------------------------------
Net2
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 456
PassThrough-2 [-1, 6, 28, 28] 0
================================================================
Total params: 456
Trainable params: 456
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.07
Params size (MB): 0.00
Estimated Total Size (MB): 0.09
----------------------------------------------------------------
Net3
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 456
Conv2d-2 [-1, 6, 28, 28] 456
PassThrough-3 [-1, 6, 28, 28] 0
================================================================
Total params: 912
Trainable params: 912
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.11
Params size (MB): 0.00
Estimated Total Size (MB): 0.12
----------------------------------------------------------------
Thanks!