Error in Autoencoder, when Maxpool2D/MaxUnpool2D is used

Hi,

I am implementing Autoencoder architecture on 2D numpy arrays.
When I use MaxPool2D/MaxUnpool2D layers, I receive the following error:

  • (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
    didn’t match because some of the arguments have invalid types: (!tuple!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
  • (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
    didn’t match because some of the arguments have invalid types: (!tuple!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)

Whenever I exclude Maxpool/MaxUnpool layers and match sizes of layers, architecture doesn’t give any error, however still doesn’t reconstruct well - so if any advices on how good architecture should look like, please advise!

class Reshape(nn.Module):
    def __init__(self, *args):
        super().__init__()
        self.shape = args

    def forward(self, x):
        return x.view(self.shape)

class En_De_coder2(nn.Module):
    def __init__(self):
        super().__init__()        
        self.encoder = nn.Sequential( #1, 46, 46
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5),#32, 42, 42
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, return_indices=True),#32, 14, 14
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5), #64,10,10
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5), #128,6,6
            nn.Flatten(),
            nn.Linear(4608, 20)
            
        )
        
        self.decoder = nn.Sequential(
            torch.nn.Linear(20, 4608),
            Reshape(-1, 128, 5, 5),
            nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=5),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=5),
            nn.MaxUnpool2d(kernel_size=3),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=32, out_channels=1, kernel_size=5),
            
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

model = En_De_coder2()

epsilon = 1
criterion = nn.MSELoss(reduction='sum') #* epsilon + nn.KLDivLoss(reduction='batchmean') * (1-epsilon)
#criterion = nn.KLDivLoss(reduction="batchmean")
optimizer = torch.optim.Adam(model.parameters(),
                             lr=1e-3, 
                             weight_decay=1e-5)

num_epochs = 10

for epoch in range(num_epochs):
    train_loss = 0
    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device), targets.to(device)
        recon = model(data.float())
        batch_loss = criterion(recon, targets)
        train_loss = train_loss + batch_loss.item()

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    val_loss = 0
    with torch.no_grad():
        for batch_idx, (data, targets) in enumerate(val_loader):
            data, targets = data.to(device), targets.to(device)
            recon = model(data.float())
            batch_loss = criterion(recon, targets)
            val_loss = val_loss + batch_loss.item()

    print(f'Epoch:{epoch+1}, Train Loss:{train_loss:.4f}')
    print(f'Epoch:{epoch+1}, Validation Loss:{val_loss:.4f}')
    outputs.append((epoch, train_loss, val_loss, targets, recon))