Modifying a backbone network to accept larger input size

James_W · August 12, 2022, 8:16am

I would like to modify the source here:

cavalleria/cavaface/blob/master/backbone/mobilefacenet.py

from torch.nn import (
    Linear,
    Conv2d,
    BatchNorm1d,
    BatchNorm2d,
    PReLU,
    ReLU,
    Sigmoid,
    Dropout2d,
    Dropout,
    AvgPool2d,
    MaxPool2d,
    AdaptiveAvgPool2d,
    Sequential,
    Module,
    Parameter,
)
import torch.nn.functional as F
import torch
import torch.nn as nn

This file has been truncated. show original

to accept a larger input image size of 224x224 and not 112x112.

I am not sure what is the correct place to start for this, but looking through, I was wondering if simply changing the linear layer here:

github.com

cavalleria/cavaface/blob/822651f0e6d4d08df5441922acead39dc5375103/backbone/mobilefacenet.py#L193


      
                  return feature
          
          

          
class GDC(Module):
              def __init__(self, embedding_size):
                  super(GDC, self).__init__()
                  self.conv_6_dw = Linear_block(
                      512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)
                  )
                  self.conv_6_flatten = Flatten()
                  self.linear = Linear(512, embedding_size, bias=False)
                  # self.bn = BatchNorm1d(embedding_size, affine=False)
                  self.bn = BatchNorm1d(embedding_size)
          
          
    def forward(self, x):
                  x = self.conv_6_dw(x)
                  x = self.conv_6_flatten(x)
                  x = self.linear(x)
                  x = self.bn(x)
                  return x

will do the trick?

azhanmohammed · August 12, 2022, 2:05pm

Hi there @James_W
You are right, just changing the linear layer would work out for you. I went ahead and added an input size argument which can be used to change the linear layer according to the input size to the network, you can find the changed bits and declaration below:

class GDC(Module):
    def __init__(self, embedding_size, input_size=112):
        super(GDC, self).__init__()
        self.conv_6_dw = Linear_block(
            512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)
        )
        self.conv_6_flatten = Flatten()
        if input_size==112:
          self.linear = Linear(512, embedding_size, bias=False)
        else:
          self.linear = Linear(32768, embedding_size, bias=False)
        # self.bn = BatchNorm1d(embedding_size, affine=False)
        self.bn = BatchNorm1d(embedding_size)

    def forward(self, x):
        x = self.conv_6_dw(x)
        x = self.conv_6_flatten(x)
        x = self.linear(x)
        x = self.bn(x)
        return x

class MobileFaceNet(Module):
    def __init__(
        self, input_size, embedding_size=512, output_name="GDC", attention="none"
    ):
        super(MobileFaceNet, self).__init__()
        assert output_name in ["GNAP", "GDC"]

        self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
        self.conv2_dw = Conv_block(
            64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64
        )
        self.conv_23 = Depth_Wise(
            64, 64, attention, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128
        )
        self.conv_3 = Residual(
            64,
            attention,
            num_block=4,
            groups=128,
            kernel=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )
        self.conv_34 = Depth_Wise(
            64, 128, attention, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256
        )
        self.conv_4 = Residual(
            128,
            attention,
            num_block=6,
            groups=256,
            kernel=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )
        self.conv_45 = Depth_Wise(
            128,
            128,
            attention,
            kernel=(3, 3),
            stride=(2, 2),
            padding=(1, 1),
            groups=512,
        )
        self.conv_5 = Residual(
            128,
            attention,
            num_block=2,
            groups=256,
            kernel=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
        )
        self.conv_6_sep = Conv_block(
            128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)
        )
        if output_name == "GNAP":
            self.output_layer = GNAP(512)
        else:
            self.output_layer = GDC(embedding_size, input_size=input_size)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    m.bias.data.zero_()

    def forward(self, x):
        out = self.conv1(x)

        out = self.conv2_dw(out)

        out = self.conv_23(out)

        out = self.conv_3(out)

        out = self.conv_34(out)

        out = self.conv_4(out)

        out = self.conv_45(out)

        out = self.conv_5(out)

        conv_features = self.conv_6_sep(out)
        out = self.output_layer(conv_features)
        return out

And below is how to use the changed model:

model1 = MobileFaceNet(input_size=112)
model2 = MobileFaceNet(input_size=224)