Same loss while I train my Autoencoder

jules · February 2, 2021, 1:52pm

Hi! I am starting to use PyTorch and I am trying to do my first Autoencoder (for the MNIST).
When I run the code there is no mistakes however the value does not change with the epoch.

I have seen in a forum that I should verify the param.grad:
for name, param in model.named_parameters(): print(name, param.grad)

I have the value ‘None’ so there is a problem but I don’t know how to solve it…

Here is my code:

import torch.nn as nn
import torch
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
from torchsummary import summary
from sklearn.utils import shuffle

class AutoEncoder(nn.Module):

    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1,32, 3, stride=1, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(32),

            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 64, 3, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 64, 3, stride=1, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),

            nn.Flatten(1, -1),
            nn.Linear(3136, 2)
        )

        self.decoder = nn.Sequential(
            nn.Linear(2, 3136),
            nn.Unflatten(1, (64, 7, 7)),

            nn.ConvTranspose2d(64, 64, 3, stride=1, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),

            nn.ConvTranspose2d(64, 64, 4, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(64),

            nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1),
            nn.ReLU(True),
            nn.BatchNorm2d(32),

            nn.ConvTranspose2d(32, 1, 3, stride=1, padding=1)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)

        return x


def generate_dataloader(batch_size=32):

    mnist = pd.read_csv("train.csv")
    y = mnist.iloc[:, 0].values
    X = mnist.iloc[:, 1:].values

    X, y = shuffle(X, y)

    X = np.array(X)[:4000, np.newaxis, :]
    y = y[:4000]

    X = np.reshape(X, (4000, 1, 28, 28))

    X = torch.Tensor(X)
    y = torch.Tensor(y)

    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size)

    return loader



autoencoder = AutoEncoder()
summary(autoencoder, input_size=(1, 28, 28))
train_loader = generate_dataloader(32)

# loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.01)

for epoch in range(100):
    loss = 0
    for batch_features, _ in train_loader:
        # reshape mini-batch data to [N, 784] matrix
        # load it to the active device

        # reset the gradients back to zero
        # PyTorch accumulates gradients on subsequent backward passes
        optimizer.zero_grad()

        # compute reconstructions
        batch_features = Variable(batch_features, requires_grad=True)
        outputs = autoencoder(batch_features)


        # compute training reconstruction loss
        # batch_features = batch_features.squeeze(1)
        # outputs = outputs.squeeze(1)

        train_loss = criterion(outputs.data, batch_features)

        # compute accumulated gradients
        train_loss.backward()

        # perform parameter update based on current gradients
        optimizer.step()

        # add the mini-batch training loss to epoch loss
        loss += train_loss.item()

    # compute the epoch training loss
    loss = loss / len(train_loader)

    # display the epoch training loss
    print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, 100, loss))

Thank you for your help !

ptrblck · February 3, 2021, 9:03am

You are passing the .data attribute to the criterion, which will detach the tensor from the computation graph and can yield other unwanted side effects.
We do not recommend to use this attribute, so pass the outputs tensor directly to criterion.