Can i insert new layers in a resnet model without changing the names of original Renset layers names?

Because after inserting new layers in between resnet original layers, it changes the names of the further layers of the resnet model.

See self.sc_norms layers in the below code. Names of all layers after first sc_norm layer insertion gets changed. Please answer @ptrblck


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, drop=0, sagnet=True, style_stage=3):
        super().__init__()
        
        self.drop = drop
        self.sagnet = sagnet
        self.style_stage = style_stage

        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(self.drop)
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        # SCNorm
        self.sc_norms = nn.ModuleList([SCNorm(c) for c in [64, 128, 256]])

        if self.sagnet:
            # randomizations
            self.style_randomization = StyleRandomization()
            self.content_randomization = ContentRandomization()
            
            # style-biased network
            style_layers = []
            if style_stage == 1:
                self.inplanes = 64
                style_layers += [self._make_layer(block, 64, layers[0])]
                style_layers.append(SCNorm(64))                             # scnorm layer
            if style_stage <= 2:
                self.inplanes = 64 * block.expansion
                style_layers += [self._make_layer(block, 128, layers[1], stride=2)]
                style_layers.append(SCNorm(128))                             # scnorm layer
            if style_stage <= 3:
                self.inplanes = 128 * block.expansion
                style_layers += [self._make_layer(block, 256, layers[2], stride=2)]
                style_layers.append(SCNorm(256))                             # scnorm layer
            if style_stage <= 4:
                self.inplanes = 256 * block.expansion
                style_layers += [self._make_layer(block, 512, layers[3], stride=2)]

            self.style_net = nn.Sequential(*style_layers)
            
            self.style_avgpool = nn.AdaptiveAvgPool2d(1)
            self.style_dropout = nn.Dropout(self.drop)
            self.style_fc = nn.Linear(512 * block.expansion, num_classes)

def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        for i, layer in enumerate([self.layer1, self.layer2, self.layer3, self.layer4]):
            if self.sagnet and i + 1 == self.style_stage: # before passing through style stage, randomize the content
                # randomization
                x_style = self.content_randomization(x)
                x = self.style_randomization(x)
            x = layer(x)

            # Passing x through SCNorm of layer i
            if i != 3:
                x = self.sc_norms[i](x)

        # content output 
        feat = self.avgpool(x)
        feat = feat.view(x.size(0), -1)
        feat = self.dropout(feat)
        y = self.fc(feat)
    
        if self.sagnet:
            # style output
            x_style = self.style_net(x_style)
            feat = self.style_avgpool(x_style)
            feat = feat.view(feat.size(0), -1)
            feat = self.style_dropout(feat)
            y_style = self.style_fc(feat)
        else:
            y_style = None

        return y, y_style

I don’t understand why layer names are changed after an additional layer is inserted so could you show an example, please?

(style_net): Sequential(
(0): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(1): SCNorm()
(2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
)
(style_avgpool): AdaptiveAvgPool2d(output_size=1)
(style_dropout): Dropout(p=0.5, inplace=False)
(style_fc): Linear(in_features=512, out_features=7, bias=True)
)

SCNorm() is the new layer inserted by me. It shifts all further layers indices by 1. Now its causing problem to apply pretrained resnet18 weights to the further resnet layers.

But I achieved it in following way (Is this a correct way?):

    states = model_zoo.load_url(model_url)
    states.pop('fc.weight')
    states.pop('fc.bias')

    # initializa weights of style_net
    if model.sagnet:
        states_style = {}
        for i in range(model.style_stage, 5):
            for k, v in states.items():
                if k.startswith('layer' + str(i)):
                    states_style[str(i - model.style_stage) + k[6:]] = v

        #########
        states_style_updated = {}
        for k, v in states_style.items():
            # Replace 1.0 with 2.0 and 1.1 with 2.1 in the keys
            new_key = k.replace('1.0', '2.0').replace('1.1', '2.1')     ## IMPORTANT CHANGE
            states_style_updated[new_key] = v
        model.style_net.load_state_dict(states_style_updated, strict=False) # only 1.weight, 1.bias, 1.h and ccnorm are not initialized in stylenet
        ########

Thanks for the example! Manipulating the keys in the state_dict should work, but maybe an easier option would be to load the pretrained state_dict before manipulating the model.