I’m relatively new to PyTorch and trying to build an encoder-decoder architecture that encodes the sequence of images ((batch, channels, seq_len, height, width) --------> (2,3,25,120,160)) using the 3D Convolutions (torch.nn.Conv3D()). The output from the encoder is supposed to be (2,4,25,1,1). But what I’m getting is (2,4,19,1,1). What might be the reason and where am I going wrong? Please find my code below:
class Encoder_Decoder(nn.Module):
def __init__(self):
super(Encoder_Decoder, self).__init__()
self.encoder = nn.Sequential(nn.Conv3d(in_channels=3, out_channels=16, kernel_size=3, stride=(1,2,2)), nn.ReLU(), nn.BatchNorm3d(16),
nn.Conv3d(in_channels=16, out_channels=32, kernel_size=3, stride=(1,2,2)), nn.ReLU(), nn.BatchNorm3d(32),
nn.Conv3d(in_channels=32, out_channels=16, kernel_size=3, stride=(1,2,2)), nn.ReLU(), nn.BatchNorm3d(16),
nn.Conv3d(in_channels=16, out_channels=8, kernel_size=1, stride=(1,3,4)), nn.ReLU(), nn.BatchNorm3d(8),
nn.Conv3d(in_channels=8, out_channels=4, kernel_size=1, stride=(1,5,5)), nn.ReLU(), nn.BatchNorm3d(4))
#Decoder
self.decoder = nn.Sequential(nn.ConvTranspose3d(in_channels=4, out_channels=8, kernel_size=3), nn.ReLU(), nn.Upsample((25,120,160)),
nn.ConvTranspose3d(in_channels=8, out_channels=16, kernel_size=3), nn.ReLU(), nn.Upsample((25,120,160)),
nn.ConvTranspose3d(in_channels=16, out_channels=32, kernel_size=3), nn.ReLU(), nn.Upsample((25,120,160)),
nn.ConvTranspose3d(in_channels=32, out_channels=16, kernel_size=3), nn.ReLU(), nn.Upsample((25,120,160)),
nn.ConvTranspose3d(in_channels=16, out_channels=3, kernel_size=3), nn.ReLU(), nn.Upsample((25,120,160)))
def forward(self,x):
x = x.reshape(2,3,25,120,160)
print(x.shape)
x1 = self.encoder(x)
print(x1.shape)
x2 = self.decoder(x1)
print(x2.shape)
return x1, x2.reshape(2,25,3,120,160)