Changing input size

nima_pw · February 25, 2023, 1:30pm

Hi,

I have a model from github, that assumes input images are 128*128:

class MISLNet_v2(torch.nn.Module):
    def __init__(self, num_classes, is_constrained=False):
        super().__init__()
        self.is_constrained = is_constrained
        if is_constrained:
            self.conv0 = nn.Conv2d(in_channels=3, out_channels=5, kernel_size=(7, 7), padding=(3, 3), stride=(1, 1))
            self.conv1 = nn.Conv2d(in_channels=5, out_channels=96, kernel_size=(7, 7), padding=(3, 3), stride=(2, 2))
        else:
            self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=(7, 7), padding=(3, 3), stride=(2, 2))

        self.bn1 = nn.BatchNorm2d(num_features=96)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=64, kernel_size=(5, 5), padding=(2, 2))
        self.bn2 = nn.BatchNorm2d(num_features=64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(5, 5), padding=(2, 2))
        self.bn3 = nn.BatchNorm2d(num_features=64)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(1, 1))
        self.bn4 = nn.BatchNorm2d(num_features=128)

        # the size 2048 is for inputs of size 3x128x128 fixme: make it dynamic by passing input dims to the __init__
        self.fcn5 = nn.Linear(in_features=2048, out_features=1024)
        self.dropout5 = nn.Dropout(p=0.3)
        self.fcn6 = nn.Linear(in_features=1024, out_features=200)
        self.dropout6 = nn.Dropout(p=0.3)
        self.fcn7 = nn.Linear(in_features=200, out_features=num_classes)

    def forward(self, cnn_inputs):
        x = self.extract_features(cnn_inputs)
        x = self._classify_features(x)
        return x

    def extract_features(self, cnn_inputs):

        if self.is_constrained:
            x = self.conv0(cnn_inputs)
            x = self.conv1(x)
        else:
            x = self.conv1(cnn_inputs)

        # Block 1
        x = self.bn1(x)
        x = nn.ReLU()(x)
        x = nn.MaxPool2d(2, 2)(x)

        # Block 2
        x = self.conv2(x)
        x = self.bn2(x)
        x = nn.ReLU()(x)
        x = nn.MaxPool2d(2, 2)(x)

        # Block 3
        x = self.conv3(x)
        x = self.bn3(x)
        x = nn.ReLU()(x)
        x = nn.MaxPool2d(2, 2)(x)

        # Block 4
        x = self.conv4(x)
        x = self.bn4(x)
        x = nn.ReLU()(x)
        x = nn.MaxPool2d(2, 2)(x)

        # Block 5
        x = torch.flatten(x, start_dim=1)
        x = self.fcn5(x)
        x = nn.Tanh()(x)
        x = self.dropout5(x)

        return x

    def _classify_features(self, features):
        # Block 6
        x = self.fcn6(features)
        x = nn.Tanh()(x)
        x = self.dropout6(x)

        # Block 7
        x = self.fcn7(x)
        # x = nn.Softmax(dim=1)(x)  # The CrossEntropy criterion also computes the SoftMax

        return

My Images have different heights and widths. For example 25601920, 9601280, 1920*2560 etc. How can I feed the model by the real size images (when I set batch_size=1)? There is a comment in the code: make it dynamic by passing input dims to the init. How should I do that?

Can you please help me about is_constrained in init ? What does it do? I’ve googled, but I didn’t find an understandable answer.

ptrblck · February 25, 2023, 8:51pm

You won’t be able to make the inputs “dynamic” by passing another in_feautres dimension to the __init__ method since this new feature size would fit only the new input shape.
To allow for variable input shapes you should use an adaptive pooling layer, which allows you to define the output size of the activation, before passing the activation to the first linear layer.

I don’t know what is_constrained represents in your model.