I am getting stuck when setting the input shape of a tensor from a linear layer to a 2D convolutional transpose layer in the decoder network of my variational autoencoder.
After sampling from my encoder network, I have an input tensor of shape (1 x 8)
for this decoder:
class Decoder(nn.Module):
def __init__(self, latent_size, output_size, kernel1=4, stride1=2, kernel2=4,
stride2=2, kernel3=4, stride3=2, kernel4=4, stride4=2,
kernel5=4, stride5=2):
super(Decoder, self).__init__()
"""
Parameters:
----------
latent_size : int
latent dimension size of the autoencoder.
output_size : int
Output dimension for the data. Should equal input_dimension of AE.
kernel* : int, defualt=4
Convolutional filter size for layer *.
stride* : int, default=2
Stride length for convolutional filter at layer *.
"""
self.latent_size = latent_size
self.output_size = output_size
# ....
self.fc = nn.Linear(self.latent_size, 32)
self.cnn_decoder = nn.Sequential(
nn.ConvTranspose2d(1, 32, self.kernel1, self.stride1, padding=2),
nn.MaxPool2d(2),
nn.ELU(),
nn.ConvTranspose2d(32, 64, self.kernel2, self.stride2),
nn.MaxPool2d(2),
nn.ELU(),
nn.ConvTranspose2d(64, 64, self.kernel3, self.stride3),
nn.MaxPool2d(2),
nn.ELU(),
nn.ConvTranspose2d(64, 128, self.kernel4, self.stride4),
nn.MaxPool2d(2),
nn.ELU(),
nn.ConvTranspose2d(128, 128, self.kernel5, self.stride5),
nn.MaxPool2d(2),
)
self.fc2 = nn.Linear(128, self.output_size)
def forward(self, latent_input):
"""
Parameters:
----------
input : float tensor shape=(batch_size, input_size)
Returns:
-------
A float tensor with shape (batch_size, output_size)
"""
out = self.fc(latent_input)
out = out.view(out.size(0), 1, 16, 16)
print('output of linear layer has shape {}'.format(out.shape))
out = self.cnn_decoder(out)
out = out.view(out.size(0), -1)
out = self.fc2(out)
return out
I don’t fully understand how to use view()
to go from the linear layer to the convolutional layer. I thought that since the linear layer output is just a (1 x 32)
vector, I could just reshape it for the convolutional layer input. Here is the error message I am getting:
latent input in decoder: Variable containing:
-0.2977 0.5545 1.1009 0.3358 1.0061 -1.6431 -0.2010 0.1817
[torch.FloatTensor of size 1x8]
Traceback (most recent call last):
File "main.py", line 82, in <module>
main()
File "main.py", line 68, in main
dec = vae(inputs)
File "/Users/youngtodd/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 325, in __call__
result = self.forward(*input, **kwargs)
File "/Users/youngtodd/molecules/molecules/vae.py", line 56, in forward
out = self.decoder(z)
File "/Users/youngtodd/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 325, in __call__
result = self.forward(*input, **kwargs)
File "/Users/youngtodd/molecules/molecules/decoder.py", line 79, in forward
out = out.view(out.size(0), 1, 16, 16)
RuntimeError: invalid argument 2: size '[1 x 1 x 16 x 16]' is invalid for input with 32 elements at /Users/soumith/minicondabuild3/conda-bld/pytorch_1512381214802/work/torch/lib/TH/THStorage.c:41