Hi Everyone -
I created the following simple module to turn any block into a resnet block
class ResBlock(nn.Module):
r""" ResBlock
Args:
block: block or list of layers
multiplier <float [RES_MULTIPLIER]>: ident multiplier
crop: <int|bool>
if <int> cropping=crop
else if True calculate cropping
else no cropping
Links:
TODO: I THINK I GOT THE IDEA FROM FASTAI SOMEWHERE
"""
def __init__(self,
block,
in_ch,
out_ch,
multiplier=RES_MULTIPLIER,
crop=True):
super(ResBlock, self).__init__()
self.block=self._process_block(block)
self.in_ch=in_ch
self.out_ch=out_ch
self.multiplier=multiplier
self.crop=crop
if self.in_ch!=self.out_ch:
self.ident_conv=nn.Conv2d(
in_channels=self.in_ch,
out_channels=self.out_ch,
kernel_size=1)
else:
self.ident_conv=False
def forward(self, x):
block_out=self.block(x)
if self.crop:
x=self._crop(x,block_out)
if self.ident_conv:
x=self.ident_conv(x)
return (self.multiplier*x) + block_out
def _process_block(self,block):
if isinstance(block,list):
return nn.Sequential(*block)
else:
return block
def _crop(self,x,layers_out):
if not isinstance(self.crop,int):
# get cropping
out_size=layers_out.size()[-1]
x_size=x.size()[-1]
self.crop=(x_size-out_size)//2
return x[:,:,self.crop:-self.crop,self.crop:-self.crop]
Using it I found that my the number of parameters in my model were almost doubling! After playing around a bit I realized it was because the conv-blocks in my model were being set as model properties before passing them into ResBlock
.
In case that isn’t clear there is an oversimplified example below where ResBlock
has been replaced with PassThrough
and the model is a single Conv2d
layer.
My question is then, are Net2
and Net3
below really different? or is this a bug with torchsummary? I acknowledge that the Net3
version is probably just bad practice but I wanted to understand what pytorch is doing and if these really are different networks.
Here’s the oversimplified code:
SIZE=32
import torch.nn as nn
from torchsummary import summary
class PassThrough(nn.Module):
""" return the original block/layer without any changes
This module does nothing. In a real world scenario it would be replaced with something more
interesting like a resnet wrapper for a conv_block.
"""
def __init__(self,block):
super(PassThrough, self).__init__()
self.block=block
def forward(self,x):
return self.block(x)
class Net1(nn.Module):
""" a dumb test
"""
def __init__(self):
super(Net1, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
def forward(self, x):
return self.conv1(x)
print('\n'*3,'Net1')
summary(Net1(),input_size=(3,SIZE,SIZE))
class Net2(nn.Module):
""" This Network, as expected, is the same as Net1 but you see the PassThrough wrapper in the summary
"""
def __init__(self):
super(Net2, self).__init__()
c1=nn.Conv2d(3, 6, 5)
self.conv1 = PassThrough(c1)
def forward(self, x):
return self.conv1(x)
print('\n'*3,'Net2')
summary(Net2(),input_size=(3,SIZE,SIZE))
class Net3(nn.Module):
""" This Network shows each conv block twice. Is this real or a bug with torchsummary?
"""
def __init__(self):
super(Net3, self).__init__()
self.c1=nn.Conv2d(3, 6, 5)
self.conv1 = PassThrough(self.c1)
def forward(self, x):
return self.conv1(x)
print('\n'*3,'Net3')
summary(Net3(),input_size=(3,SIZE,SIZE))
Here’s the output
Net1
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 456
================================================================
Total params: 456
Trainable params: 456
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.04
Params size (MB): 0.00
Estimated Total Size (MB): 0.05
----------------------------------------------------------------
Net2
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 456
PassThrough-2 [-1, 6, 28, 28] 0
================================================================
Total params: 456
Trainable params: 456
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.07
Params size (MB): 0.00
Estimated Total Size (MB): 0.09
----------------------------------------------------------------
Net3
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 6, 28, 28] 456
Conv2d-2 [-1, 6, 28, 28] 456
PassThrough-3 [-1, 6, 28, 28] 0
================================================================
Total params: 912
Trainable params: 912
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.11
Params size (MB): 0.00
Estimated Total Size (MB): 0.12
----------------------------------------------------------------
Thanks!