Avoid parameter sharing when calling modules multiple times!

AboMAd · October 8, 2020, 4:15pm

Hi everyone,
I am new at Pytorch, I am trying to generate a classifier net that contains a mix of plain and residual modules. However, I got an error of:

ValueError: TracedModules don’t support parameter sharing between modules

when trying to visualize it by xtensorboard, can you please help to understand where are the shared parameters, and how to avoid that!

my net:

import torch
import torch.nn as nn
import torch.nn.functional as F

# DCR block
class DCRBlock(nn.Module):

    def __init__(self, in_channel, out_channel):
        super(DCRBlock, self).__init__()
        
        self.in_channel = in_channel
        self.out_channel = out_channel

        self.conv1 = nn.Conv2d(self.in_channel, self.out_channel, 1, 1, 0, bias=False)
      
        self.conv2 = nn.Conv2d(self.out_channel, self.out_channel, 3, 1, 1, bias=False)
        
        self.conv3 = nn.Conv2d(self.out_channel, self.out_channel, 1, 1, 0, bias=False)
        
        self.conv4 = nn.Conv2d(self.out_channel, self.out_channel, 1, 1, 0, bias=False)
        
        self.conv5 = nn.Conv2d(self.out_channel, self.out_channel, 3, 1, 1, bias=False)
        
        self.conv6 = nn.Conv2d(self.out_channel, self.out_channel, 1, 1, 0, bias=False)
        
        self.conv_1 = nn.Sequential(
            self.conv1,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU()
            )
        
        self.conv_2 = nn.Sequential(
            self.conv2,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU()
            )
        
        self.conv_3 = nn.Sequential(
            self.conv3,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU()
            )
        
        self.conv_4 = nn.Sequential(
            self.conv4,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU()
            )
        
        self.conv_5 = nn.Sequential(
            self.conv5,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU()
            )
        
        self.conv_6 = nn.Sequential(
            self.conv6,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU()
            )
        
    def forward(self, x):
        
        out_1 = self.conv_1(x)
        cc_1 = out_1
        out_2 = self.conv_2(out_1)
        cc_2 = out_2
        out_3 = self.conv_3(out_2)
        cc_3 = out_3
        out = self.conv_4(out_3)
        out += cc_1
        out = self.conv_5(out)
        out += cc_2
        out = self.conv_6(out)
        
        return out + cc_3
        

# convolutional block  
class ConvBlock(nn.Module):

    def __init__(self, in_channel, out_channel):
        super(ConvBlock, self).__init__()
        
        self.in_channel = in_channel
        self.out_channel = out_channel
        
        self.conv1 = nn.Conv2d(self.in_channel, self.out_channel, 3, 1, 1, bias=False)
        
        self.conv2 = nn.Conv2d(out_channel, out_channel, 3, 1, 1, bias=False)
        
        self.conv3 = nn.Conv2d(out_channel, out_channel, 3, 1, 1, bias=False)
        
        self.convblock = nn.Sequential(
            self.conv1,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU(),           
            self.conv2,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU(),         
            self.conv3,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU(),     
            )
        
    def forward(self, x):        
        return self.convblock(x)


#create_model_architecture
class DCR_NET(nn.Module):

    def __init__(self, num_classes=6, channels=1):
        super(DCR_NET, self).__init__()
        self.classes = num_classes
        self.channels = channels
        self.convblock1 = ConvBlock(self.channels, 16)
        self.convblock2 = ConvBlock(16, 32)
        
        self.dcrblock1 = DCRBlock(32, 64)
        self.dcrblock2 = DCRBlock(64, 128) 
        self.dcrblock3 = DCRBlock(128, 256)
        
        self.fc1 = nn.Linear(4*4*256, 512, bias=True)
        self.fc2 = nn.Linear(512, 512, bias=True)
        self.fc3 = nn.Linear(512, self.classes, bias=True)
        #self.softmax = nn.LogSoftmax(dim=1)
        
        self.model = nn.Sequential(
            self.convblock1,
            nn.MaxPool2d(kernel_size=2, stride=2),
            self.convblock2,
            nn.MaxPool2d(kernel_size=2, stride=2),
            self.dcrblock1,
            nn.MaxPool2d(kernel_size=2, stride=2),
            self.dcrblock2,
            nn.MaxPool2d(kernel_size=2, stride=2),
            self.dcrblock3,
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(1),
            self.fc1,
            nn.Dropout(0.5, inplace=True),
            self.fc2,
            nn.Dropout(0.5, inplace=True),
            self.fc3
            )
        
        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        out = self.model(x)
        return out

albanD · October 8, 2020, 4:34pm

Hi,

The problem is that self.conv1 and self.conv_1[0] are the same Module. So it is effectively doing parameter sharing.
If you only use self.conv_1 then you can simply avoid defining the conv layer itself as an attribute of self by doing something like this (for all convs)

# Note that the is no self. here!
conv1 = nn.Conv2d(self.in_channel, self.out_channel, 1, 1, 0, bias=False)
self.conv_1 = nn.Sequential(
            conv1,
            nn.BatchNorm2d(self.out_channel),
            nn.PReLU()
 )

AboMAd · October 9, 2020, 3:00am

Thank you very much for your swift reply. Indeed, that works and the number of parameters reduced by half!
Still, I have one question, what is better practicing when constructing a network architecture, using the sequence method in the constructor, or build it in the forward method?

Thanks,

albanD · October 9, 2020, 1:34pm

You should always build the Modules in the init. And use them in the forward.
Just make sure to save each Module only once though!