Conv autoencoder on RGB images not working

imran · April 22, 2021, 6:02pm

I have created a conv autoencoder to generate custom images (Generated features can be used for clustering). But I am not able to generate the images, even the result is very bad. I am not able to understand what is this problem. Image size is 240x270 and is resized to 224x224

Autoencoder class is as follow

        self.encoder = nn.Sequential(
                      
            nn.Conv2d(3, 16, 7, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(16, 32, 7, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.Conv2d(64, 128, 3, stride=2, padding=1)  # b, 16, 10, 10
            # nn.ReLU(True),
            # nn.Conv2d(128, 256, 2, stride=5, padding=1)  # b, 16, 10, 10
            
        )
        self.decoder = nn.Sequential(
            # nn.ConvTranspose2d(256, 128, 2),  # b, 16, 5, 5
            # nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 3,stride=2, padding=1),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 3,stride=2, padding=0),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, 5, stride=3, padding=0),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 7, stride=3, padding=0,output_padding=1),  # b, 1, 28, 28
            nn.ReLU(True),
            nn.ConvTranspose2d(8,3, 7, stride=1, padding=0),  # b, 1, 28, 28
            nn.Sigmoid()
            
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

Model Summary is as follow

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 16, 74, 74]           2,368
              ReLU-2           [-1, 16, 74, 74]               0
            Conv2d-3           [-1, 32, 24, 24]          25,120
              ReLU-4           [-1, 32, 24, 24]               0
            Conv2d-5           [-1, 64, 12, 12]          18,496
              ReLU-6           [-1, 64, 12, 12]               0
            Conv2d-7            [-1, 128, 6, 6]          73,856
   ConvTranspose2d-8           [-1, 64, 11, 11]          73,792
              ReLU-9           [-1, 64, 11, 11]               0
  ConvTranspose2d-10           [-1, 32, 23, 23]          18,464
             ReLU-11           [-1, 32, 23, 23]               0
  ConvTranspose2d-12           [-1, 16, 71, 71]          12,816
             ReLU-13           [-1, 16, 71, 71]               0
  ConvTranspose2d-14          [-1, 8, 218, 218]           6,280
             ReLU-15          [-1, 8, 218, 218]               0
  ConvTranspose2d-16          [-1, 3, 224, 224]           1,179
          Sigmoid-17          [-1, 3, 224, 224]               0
================================================================
Total params: 232,371
Trainable params: 232,371
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 11.50
Params size (MB): 0.89
Estimated Total Size (MB): 12.96
----------------------------------------------------------------

Data Loader class is as follow

mean = torch.tensor((0.485, 0.456, 0.406)).reshape(1,3,1,1).cuda().requires_grad_(False) 
std = torch.tensor((0.229, 0.224, 0.225)).reshape(1,3,1,1).cuda().requires_grad_(False) 
 
def normalize(tensorInput, mean=mean, std=std, device=None):
    return tensorInput.sub(mean.to(device)).div(std.to(device))


def denormalize(tensorInput, mean=mean, std=std, device=None):
    return tensorInput.mul(std.to(device)).add(mean.to(device))

class DatasetLoader(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(root)))
        

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.imgs[idx])
        img=cv2.imread(img_path)
        img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(32,32)) / 255.0
        
        img = np.transpose(img,[2,0,1]).astype(np.float32)
        return torch.from_numpy(img)

    def __len__(self):
        return len(self.imgs)

Dataloading and train script is as follow


dataset = DatasetLoader('D:\images\Bright_240_270', get_transform(train=True))

torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset, indices[-50:])

data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=batchSize, shuffle=True, num_workers=0)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = autoencoder().to(device)
summary(model, (3, 224, 224))

criterion = nn.MSELoss()
# criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)
trainLoss=[]
total_loss = 0
data_iter = 0
for epoch in range(num_epochs):
    data_iter = 0
    for data in data_loader:
        # print(data)
        img =  data.clone().detach().cuda()
        # print("Min Value of input Image = ",torch.min(img))
        # print("Max Value of input Image = ",torch.max(img))        
        img = Variable(img).cuda()
        img= normalize(img)
        # ===================forward=====================
        output = model(img)
        print(data.min(),data.max(),output.min().item(),output.max().item())

        output = denormalize(output)
        # print("Input Image shape = ",img.shape)
        # print("Output Image shape = ",output.shape)
        loss = criterion(output,data.cuda())
        show_and_save_figure(data[0],output[0],'AutoEncoder')
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        data_iter=data_iter+1
        if data_iter % 10 == 0:
           print(f"Data Iteration = {data_iter}")
        total_loss += loss.data
    # ===================log========================
    total_loss /= len(data_loader)
    trainLoss.append(trainLoss.append(loss.data.cpu().detach().numpy()))
    plt.plot(trainLoss)
    plt.xlabel('Epochs')
    # naming the y axis
    plt.ylabel('Loss')
    # giving a title to my graph
    plt.title('Training loss')
    plt.show()
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, total_loss))

The original image and image generated are
trainImage

Where is the problem. Why I am not able to generate it?