I want to build up a model in which the encoder part is two branches (2 encoders) without sharing weights. But the decoder part shares weights. Just found out, the following two different ways of defining the model will give two different models. Could you help me to figure out the reason? thanks a lot.
The first one:
import numpy as np # linear algebra
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import init
import torch.nn.functional as F
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
class Semi_siamese_(nn.Module):
def __init__(self, in_channels=3, out_channels=1, init_features=32):
super(Semi_siamese_, self).__init__()
features = init_features
self.encoder1_1 = Semi_siamese_._block(in_channels, features, name="enc1_1")
self.pool1_1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder1_2 = Semi_siamese_._block(features, features * 2, name="enc1_2")
self.encoder2_1 = Semi_siamese_._block(in_channels, features, name="enc2_1")
self.pool2_1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder2_2 = Semi_siamese_._block(features, features * 2, name="enc2_2")
self.upconv4 = nn.ConvTranspose2d(features * 16, features * 8, kernel_size=2, stride=2)
self.conv_layer = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 3, 3, stride=1, padding=1),
nn.BatchNorm2d(3),
nn.ReLU(),
nn.Conv2d(3, 2, 3, stride=1, padding=1),
)
def encoder1(self,x):
enc1 = self.encoder1_1(x)
enc2 = self.encoder1_2(self.pool1_1(enc1))
return enc2
def encoder2(self,x):
enc1 = self.encoder2_1(x)
enc2 = self.encoder2_2(self.pool2_1(enc1))
return enc2
def decoder(self,bottleneck):
dec4 = self.upconv4(bottleneck)
return torch.sigmoid(self.conv(dec4))
def forward(self,x1,x2):
x1_enc1 = self.encoder1(x1)
x2_enc1 = self.encoder2(x2)
out1 = self.decoder(x1_enc1)
out2 = self.decoder(x2_enc1)
dis = out1 - out2
out = self.conv_layer(dis)
return out
@staticmethod
def _block(in_channels, features, name):
return nn.Sequential(
OrderedDict(
[
(
name + "conv1",
nn.Conv2d(
in_channels=in_channels,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm1", nn.BatchNorm2d(num_features=features)),
(name + "relu1", nn.ReLU(inplace=True)),
(
name + "conv2",
nn.Conv2d(
in_channels=features,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm2", nn.BatchNorm2d(num_features=features)),
(name + "relu2", nn.ReLU(inplace=True)),
]
)
)
The second one:
import numpy as np # linear algebra
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import init
import torch.nn.functional as F
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
class semi_siamese_(nn.Module):
def __init__(self, in_channels=3, out_channels=1, init_features=32):
super(semi_siamese_, self).__init__()
features = init_features
self.encoder1_1 = semi_siamese_._block(in_channels, features, name="enc1_1")
self.pool1_1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder1_2 = semi_siamese_._block(features, features * 2, name="enc1_2")
self.upconv4 = nn.ConvTranspose2d(features * 16, features * 8, kernel_size=2, stride=2)
self.conv_layer = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 3, 3, stride=1, padding=1),
nn.BatchNorm2d(3),
nn.ReLU(),
nn.Conv2d(3, 2, 3, stride=1, padding=1),
)
def encoder1(self,x):
enc1 = self.encoder1_1(x)
enc2 = self.encoder1_2(self.pool1_1(enc1))
return enc2
def encoder2(self,x):
enc1 = self.encoder1_1(x)
enc2 = self.encoder1_2(self.pool2_1(enc1))
return enc2
def decoder(self,bottleneck):
dec4 = self.upconv4(bottleneck)
return torch.sigmoid(self.conv(dec4))
def forward(self,x1,x2):
x1_enc1 = self.encoder1(x1)
x2_enc1 = self.encoder2(x2)
out1 = self.decoder(x1_enc1)
out2 = self.decoder(x2_enc1)
dis = out1 - out2
out = self.conv_layer(dis)
return out
@staticmethod
def _block(in_channels, features, name):
return nn.Sequential(
OrderedDict(
[
(
name + "conv1",
nn.Conv2d(
in_channels=in_channels,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm1", nn.BatchNorm2d(num_features=features)),
(name + "relu1", nn.ReLU(inplace=True)),
(
name + "conv2",
nn.Conv2d(
in_channels=features,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm2", nn.BatchNorm2d(num_features=features)),
(name + "relu2", nn.ReLU(inplace=True)),
]
)
)
Here is the code I load and test two models:
net1 = Semi_siamese_(in_channels=3, out_channels=3, init_features=32)
net2 = semi_siamese_(in_channels=3, out_channels=3, init_features=32)
print(net1)
print(net2)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(count_parameters(net1))
print(count_parameters(net2))
The net1 can actually realize what I am looking for. But it seems like for the net2 (the second way of defining the model), the encoder part (two encoders) is sharing the same layer, even though I define two encoder instances. Could you figure out why this happens? Really apprecaite.