Define two branches model

niuyy9026 · April 18, 2023, 7:57pm

I want to build up a model in which the encoder part is two branches (2 encoders) without sharing weights. But the decoder part shares weights. Just found out, the following two different ways of defining the model will give two different models. Could you help me to figure out the reason? thanks a lot.
The first one:
import numpy as np # linear algebra
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import init
import torch.nn.functional as F
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader

class Semi_siamese_(nn.Module):

def __init__(self, in_channels=3, out_channels=1, init_features=32):
    super(Semi_siamese_, self).__init__()

    features = init_features
    self.encoder1_1 = Semi_siamese_._block(in_channels, features, name="enc1_1")
    self.pool1_1 = nn.MaxPool2d(kernel_size=2, stride=2)
    self.encoder1_2 = Semi_siamese_._block(features, features * 2, name="enc1_2")

    self.encoder2_1 = Semi_siamese_._block(in_channels, features, name="enc2_1")
    self.pool2_1 = nn.MaxPool2d(kernel_size=2, stride=2)
    self.encoder2_2 = Semi_siamese_._block(features, features * 2, name="enc2_2")

    self.upconv4 = nn.ConvTranspose2d(features * 16, features * 8, kernel_size=2, stride=2)

    self.conv_layer = nn.Sequential(
        nn.Conv2d(3, 64, 3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.Conv2d(64, 3, 3, stride=1, padding=1),
        nn.BatchNorm2d(3),
        nn.ReLU(),
        nn.Conv2d(3, 2, 3, stride=1, padding=1),
    )

def encoder1(self,x):
    enc1 = self.encoder1_1(x)
    enc2 = self.encoder1_2(self.pool1_1(enc1))
    return enc2

def encoder2(self,x):
    enc1 = self.encoder2_1(x)
    enc2 = self.encoder2_2(self.pool2_1(enc1))
    return enc2

def decoder(self,bottleneck):
    dec4 = self.upconv4(bottleneck)
    return torch.sigmoid(self.conv(dec4))

def forward(self,x1,x2):
    x1_enc1 = self.encoder1(x1)
    x2_enc1 = self.encoder2(x2)

    out1 = self.decoder(x1_enc1)
    out2 = self.decoder(x2_enc1)

    dis = out1 - out2
    out = self.conv_layer(dis)
    return out

@staticmethod
def _block(in_channels, features, name):
    return nn.Sequential(
        OrderedDict(
            [
                (
                    name + "conv1",
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=features,
                        kernel_size=3,
                        padding=1,
                        bias=False,
                    ),
                ),
                (name + "norm1", nn.BatchNorm2d(num_features=features)),
                (name + "relu1", nn.ReLU(inplace=True)),
                (
                    name + "conv2",
                    nn.Conv2d(
                        in_channels=features,
                        out_channels=features,
                        kernel_size=3,
                        padding=1,
                        bias=False,
                    ),
                ),
                (name + "norm2", nn.BatchNorm2d(num_features=features)),
                (name + "relu2", nn.ReLU(inplace=True)),
            ]
        )
    )

The second one:
import numpy as np # linear algebra
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import init
import torch.nn.functional as F
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader

class semi_siamese_(nn.Module):

def __init__(self, in_channels=3, out_channels=1, init_features=32):
    super(semi_siamese_, self).__init__()

    features = init_features
    self.encoder1_1 = semi_siamese_._block(in_channels, features, name="enc1_1")
    self.pool1_1 = nn.MaxPool2d(kernel_size=2, stride=2)  
    self.encoder1_2 = semi_siamese_._block(features, features * 2, name="enc1_2")

    self.upconv4 = nn.ConvTranspose2d(features * 16, features * 8, kernel_size=2, stride=2)

    self.conv_layer = nn.Sequential(
        nn.Conv2d(3, 64, 3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.Conv2d(64, 3, 3, stride=1, padding=1),
        nn.BatchNorm2d(3),
        nn.ReLU(),
        nn.Conv2d(3, 2, 3, stride=1, padding=1),
    )

def encoder1(self,x):
    enc1 = self.encoder1_1(x)
    enc2 = self.encoder1_2(self.pool1_1(enc1))
    return enc2

def encoder2(self,x):
    enc1 = self.encoder1_1(x)
    enc2 = self.encoder1_2(self.pool2_1(enc1))
    return enc2

def decoder(self,bottleneck):
    dec4 = self.upconv4(bottleneck)
    return torch.sigmoid(self.conv(dec4))

def forward(self,x1,x2):
    x1_enc1 = self.encoder1(x1)
    x2_enc1 = self.encoder2(x2)

    out1 = self.decoder(x1_enc1)
    out2 = self.decoder(x2_enc1)

    dis = out1 - out2
    out = self.conv_layer(dis)
    return out

@staticmethod
def _block(in_channels, features, name):
    return nn.Sequential(
        OrderedDict(
            [
                (
                    name + "conv1",
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=features,
                        kernel_size=3,
                        padding=1,
                        bias=False,
                    ),
                ),
                (name + "norm1", nn.BatchNorm2d(num_features=features)),
                (name + "relu1", nn.ReLU(inplace=True)),
                (
                    name + "conv2",
                    nn.Conv2d(
                        in_channels=features,
                        out_channels=features,
                        kernel_size=3,
                        padding=1,
                        bias=False,
                    ),
                ),
                (name + "norm2", nn.BatchNorm2d(num_features=features)),
                (name + "relu2", nn.ReLU(inplace=True)),
            ]
        )
    )

Here is the code I load and test two models:

net1 = Semi_siamese_(in_channels=3, out_channels=3, init_features=32)
net2 = semi_siamese_(in_channels=3, out_channels=3, init_features=32)
print(net1)
print(net2)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(count_parameters(net1))
print(count_parameters(net2))

The net1 can actually realize what I am looking for. But it seems like for the net2 (the second way of defining the model), the encoder part (two encoders) is sharing the same layer, even though I define two encoder instances. Could you figure out why this happens? Really apprecaite.

J_Johnson · April 18, 2023, 9:06pm

I’m not sure if this is related to your issue but the above calls layers not defined in the init.

niuyy9026 · April 18, 2023, 10:20pm

Hi Johnson,

Thanks for pointing out this mistake. This code is part of my real model, just for testing. So when I copy them, I just missed a part of the code. My bad. Now I fix this issue you mentioned.

My confusion about those two codes is that, for the second one, I define two encoder instances (encoder1 and encoder2). So those two encoders should be two different layers (two branches). But When I print out this model, it seems like those two encoders become one identical encoder (shared layer).

The only difference between the first code and the second code is that, I define all the layers of encoder1 and all the layers of encoder2 in the init in the first code. In the second code, I define some layers in init, but encoder1 and encoder2 share those layers. Then this difference makes the two model looks different.

Do you have any thoughts? Thank you so much.