Concatenate two encoders presentations

gesm · November 30, 2022, 11:16am

Hi!

I create two encoders models to concatenate the features of two images, and then I’d like to concatenate the encoders’ output to be one input to the decoder module. My problem is with the method that I should apply to concatenate the encoders’ outputs. Any help is really appreciated! below my code

# define the NN architecture
class ConvDenoiser(nn.Module):
    def __init__(self):
        super(ConvDenoiser, self).__init__()
        ## encoder layers ##
        # conv layer (depth from 1 --> 32), 3x3 kernels
        self.conv1 = nn.Conv2d(3, 128, 3, padding=1) 
        self.conv1_bn = nn.BatchNorm2d(128) 
        # conv layer (depth from 32 --> 16), 3x3 kernels
        self.conv2 = nn.Conv2d(128, 64, 3, padding=1)
        self.conv2_bn = nn.BatchNorm2d(64) 
        # conv layer (depth from 16 --> 8), 3x3 kernels
        self.conv3 = nn.Conv2d(64, 32, 3, padding=1)
        # pooling layer to reduce x-y dims by two; kernel and stride of 2
        self.pool = nn.MaxPool2d(2, 2)
        
        ## decoder layers ##
        # transpose layer, a kernel of 2 and a stride of 2 will increase the spatial dims by 2
        self.t_conv1 = nn.ConvTranspose2d(32, 32, 2, stride=2)  # kernel_size=3 to get to a 7x7 image output
        self.t_conv1_bn = nn.BatchNorm2d(32) 
        # two more transpose layers with a kernel of 2
        self.t_conv2 = nn.ConvTranspose2d(32, 64, 2, stride=2)
        self.t_conv2_bn = nn.BatchNorm2d(64) 
        self.t_conv3 = nn.ConvTranspose2d(64, 128, 2, stride=2)
        # one, final, normal conv layer to decrease the depth
        self.conv_out = nn.Conv2d(128, 3, 3, padding=1)
    def forward(self, x):
        ## encode ##
        # add hidden layers with relu activation function
        # and maxpooling after
        x = self.conv1(x)
        y = self.conv1(y)
        #print(a.transpose(0,3).transpose(1,2).size())
        x = F.relu(self.conv1_bn(x))
        y = F.relu(self.conv1_bn(y))
        x = self.pool(x)
        y = self.pool(x)
        # add second hidden layer
        x = self.conv2(x)
        y = self.conv2(y)
        x = F.relu(self.conv2_bn(x))
        y = F.relu(self.conv2_bn(y))
        x = self.pool(x)
        y = self.pool(y)
        # add third hidden layer
        x = F.relu(self.conv3(x))
        y = F.relu(self.conv3(y))
        x = self.pool(x)  # compressed representation
        y = self.pool(y)  # compressed representation

        z = torch.cat((x,y),1)
        
        ## decode ##
        # add transpose conv layers, with relu activation function
        z = self.t_conv1(z)
        z = F.relu(self.t_conv1_bn(z))
        z = self.t_conv2(z)
        z = F.relu(self.t_conv2_bn(z))
        z = F.relu(self.t_conv3(z))
        #x = x.squeeze(2)
        #x = x.permute(2,3,1,0)  # [w, b, c]
        # transpose again, output should have a sigmoid applied
        z = F.sigmoid(self.conv_out(z))
                
        return z

# initialize the NN
model = ConvDenoiser()