Hello everyone!
I am developing a fully convolutional autoencoder which processes images of different sizes and outputs a manipulated version with the original size. I am using half padding (setting the padding of the pooling layer to 1), thus the images do not get rounded off. But i get this error: ValueError: invalid output_size “torch.Size([12, 15])” (dim 0 must be between 20 and 24)
I think that something with the Pooling/Unpooling is wrong, becaues this line throws the exception:
x = self.unpool(x, indices1, output_size=size4)
import torch.nn as nn
import pdb
class AE(nn.Module):
def __init__(self):
super(AE, self).__init__()
self.leakyR = nn.LeakyReLU(0.2)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices=True, ceil_mode=True)
self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=1)
self.softmax = nn.Softmax()
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(64)
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(128)
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(256)
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(512)
)
self.conv5 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(1024)
)
self.conv6 = nn.Sequential(
nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(512)
)
self.conv7 = nn.Sequential(
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(256)
)
self.conv8 = nn.Sequential(
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(128)
)
self.conv9 = nn.Sequential(
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=0),
nn.BatchNorm2d(64)
)
self.conv10 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=3, stride=1, padding=0)
def forward(self, x):
# encoder
x = self.conv1(x)
x = self.leakyR(x)
x, indices1 = self.pool(x)
size1 = x.size()
x = self.conv2(x)
x = self.leakyR(x)
x, indices2 = self.pool(x)
size2 = x.size()
x = self.conv3(x)
x = self.leakyR(x)
x, indices3 = self.pool(x)
size3 = x.size()
x = self.conv4(x)
x = self.leakyR(x)
x, indices4 = self.pool(x)
size4 = x.size()
######################
x = self.conv5(x)
x = self.leakyR(x)
x = self.conv6(x)
x = self.leakyR(x)
######################
# decoder
x = self.unpool(x, indices1, output_size=size4)
x = self.conv7(x)
x = self.leakyR(x)
x = self.unpool(x, indices2, output_size=size3)
x = self.conv8(x)
x = self.leakyR(x)
x = self.unpool(x, indices3, output_size=size2)
x = self.conv9(x)
x = self.leakyR(x)
x = self.unpool(x, indices4, output_size=size1)
x = self.conv10(x)
x = self.softmax(x)
return x