Hi, there. How to define a network with multiple inputs (with or without same channels)? After a series of operations, I want to merge the results into a single output (In my case, I simply add the results and then do the activation). Suppose my two inputs are with shape of 2*100*100
and 2*100*100
. I can define a network using the following ways:
First way:
class MyNet(nn.Module):
def __init__(self, layers, init_features, out_channels):
super(MyNet, self).__init__()
self.nb_feature = init_features
self.features = nn.BatchNorm2d(init_features)
self.features.add_module('relu', nn.ReLU(inplace=True))
for i in range(layers):
self.features.add_module('norm', nn.BatchNorm2d(init_features))
self.features.add_module('relu', nn.ReLU(inplace=True))
self.features.add_module('conv', nn.Conv2d(init_features, init_features,
kernel_size=3, padding=1, stride=1))
self.add_module('last_norm', nn.BatchNorm2d(init_features))
self.last_conv = nn.Conv2d(init_features, out_channels, kernel_size=1, padding=0)
def first_layer(self, in_channels):
return nn.Conv2d(in_channels, self.nb_feature, kernel_size=3,
stride=1, padding=1).cuda()
def forward(self, inputs):
out = 0
for data in inputs:
_, channels, _, _ = data.shape
fl = self.first_layer(channels)(data)
out += self.last_conv(self.features(fl))
return F.sigmoid(out)
second way
class MyNet2(nn.Module):
def __init__(self, layers, init_features, out_channels):
super(MyNet2, self).__init__()
self.nb_feature = init_features
self.features = nn.Sequential(OrderedDict([
('conv0', nn.Conv2d(2, init_features, kernel_size=3, stride=1, padding=1)),
('norm0', nn.BatchNorm2d(init_features)),
('relu0', nn.ReLU(inplace=True))
]))
for i in range(layers):
self.features.add_module('norm', nn.BatchNorm2d(init_features))
self.features.add_module('relu', nn.ReLU(inplace=True))
self.features.add_module('conv', nn.Conv2d(init_features, init_features,
kernel_size=3, padding=1, stride=1))
self.add_module('last_norm', nn.BatchNorm2d(init_features))
self.last_conv = nn.Conv2d(init_features, out_channels, kernel_size=1, padding=0)
def forward(self, inputs):
out = 0
for data in inputs:
out += self.last_conv(self.features(data))
return F.sigmoid(out)
x1 = Variable(torch.rand((10, 2, 100, 100)).float(), requires_grad=True).cuda()
x2 = Variable(torch.rand((10, 2, 100, 100)).float(), requires_grad=True).cuda()
model = MyNet(3, 12, 2).cuda()
result = model([x1, x2])
But the problem is that, in real test, the performances of the above defined networks are very different. Specifically, the second one works better than the first one. But the second one could not handle the inputs with different channels, e.g., one image is with 3 channels and the other one is with 4 channels.
My problems are:
(1): when the inputs have the same channel, why the defined two networks have different performance?
(2): How to properly define a network that can handle multiple inputs with different channels.
Thanks a lot!