Loss doesn't decrease

Preetham_R_Patlolla · December 10, 2020, 3:46am

I’ve been trying to develop an Autoencoder model for the task of knowledge representation where my input is a sequence of images. Loss of this model (both training and testing) doesn’t decrease. It is instead fluctuating and of course, my image reconstruction is very poor. I’ve tried the following:

Changing the learning rate between 1 and 0.001
Increasing and decreasing the batch size
With and without dropout layers

Encoder-Decoder model

class Encoder_Decoder(nn.Module):

def __init__(self):
    super(Encoder_Decoder, self).__init__()
    
    #Encoder
    self.encoder = nn.Sequential(nn.Conv3d(in_channels=3, out_channels=16, kernel_size=(3, 3, 3), padding=(0,0,0)),  
                                 nn.ReLU(), nn.BatchNorm3d(16), 
                                 nn.MaxPool3d(kernel_size=(1,2,2)),
                                 nn.Conv3d(in_channels=16, out_channels=64, kernel_size=(3, 3, 3), padding=(0,1,1)), 
                                 nn.ReLU(), nn.BatchNorm3d(64), 
                                 nn.MaxPool3d(kernel_size=(2,2,2)),
                                 nn.Conv3d(in_channels=64, out_channels=256, kernel_size=(3, 3, 3), padding=(1,1,1)), 
                                 nn.ReLU(), nn.BatchNorm3d(256), 
                                 nn.MaxPool3d(kernel_size=(2,2,2)),
                                 nn.Conv3d(in_channels=256, out_channels=64, kernel_size=(3,3,3),padding=(1,1,1)), 
                                 nn.ReLU(), nn.BatchNorm3d(64), 
                                 nn.MaxPool3d(kernel_size=(2,2,2)),
                                 nn.Conv3d(in_channels=64, out_channels=16, kernel_size=(1,1,1), padding=(0,0,0)), 
                                 nn.ReLU(), nn.BatchNorm3d(16), 
                                 nn.MaxPool3d(kernel_size=(2,1,1)),
                                 nn.Conv3d(in_channels=16, out_channels=4, kernel_size=(1,1,1), padding=(0,0,0)), 
                                 nn.ReLU(), nn.BatchNorm3d(4), 
                                 nn.MaxPool3d(kernel_size=(1,1,1)))
    
    
    #Decoder
    self.decoder = nn.Sequential(nn.ConvTranspose3d(in_channels=4, out_channels=16, kernel_size=3), 
                                 nn.ReLU(), 
                                 nn.Upsample(scale_factor=(2,2,2)),
                                 nn.ConvTranspose3d(in_channels=16, out_channels=64, kernel_size=3), 
                                 nn.ReLU(), 
                                 nn.Upsample(scale_factor=(2,2,2)),
                                 nn.ConvTranspose3d(in_channels=64, out_channels=256, kernel_size=3), 
                                 nn.ReLU(), 
                                 nn.Upsample(scale_factor=(2,2,2)),
                                 nn.ConvTranspose3d(in_channels=256, out_channels=64, kernel_size=3), 
                                 nn.ReLU(), 
                                 nn.Upsample(scale_factor=(2,2,2)),
                                 nn.ConvTranspose3d(in_channels=64, out_channels=16, kernel_size=1), 
                                 nn.ReLU(), 
                                 nn.Upsample(size=(22,28,28)),
                                 nn.ConvTranspose3d(in_channels=16, out_channels=3, kernel_size=1), 
                                 nn.ReLU(), 
                                 nn.Upsample(size=(22,28,28)))
    
def forward(self,x):

    # x has the shapw (16,22,3,28,28)

    
    x1 = self.encoder(x)
    # x1 has the shape (16,4,1,1,1)

    
    x2 = self.decoder(x1)
    # x2 has the shape (16,22,3,28,28)

    return x1, x2

Train:

def train(model, trainloader, criterion, optimizer, epoch):

model.train()

    

for batch_idx, inputs in enumerate(trainloader):
    

    inputs = inputs.float()

    

    if torch.cuda.is_available():

        inputs = inputs.to("cuda")

    optimizer.zero_grad()

    encoded_vectors,outputs = model(inputs)

    loss = criterion(outputs,inputs)

    loss.backward()

    optimizer.step()

    if batch_idx % 50 == 0:

        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(

            epoch, batch_idx * len(inputs), len(trainloader.dataset),

            100. * batch_idx / len(trainloader), loss.item()))

test:

def test(model, criterion1, testloader):

model.eval()

test_loss = 0

for batch_idx, inputs in enumerate(testloader):

    if torch.cuda.is_available():

        inputs = inputs.cuda()

    encoded_vectors,outputs = model(inputs)

    loss = criterion(outputs,inputs)

    test_loss += loss.item() * inputs.shape[0]

test_loss /= len(testloader.dataset)

print('\nTest set: Average loss: {:.4f}\n'.format(test_loss))

if abs(test_loss) <= 0.005:

    return True

else:

    return False

def main():

model = Encoder_Decoder()



model = model.to('cuda')

criterion = torch.nn.MSELoss()

optimizer = optim.AdamW(model.parameters(), lr=0.005, eps=1e-3, amsgrad=False)

for epoch in range(20):

    train(model, train_loader, criterion, optimizer, epoch)

    test(model, criterion, test_loader)
   

return model

if name == “main”:

model = main()

Preetham_R_Patlolla · December 10, 2020, 5:38am

I have also tried experimenting different loss functions and different dimensions for the latent space (encoding vector) but nothing worked out and stuck at this part for more than 2 weeks. @ptrblck Kindly address this. Many thanks in advance.

Preetham_R_Patlolla · December 11, 2020, 7:55am

This is how my loss looks like:

Train Epoch: 0 [0/2000 (0%)] Loss: 0.890425
Train Epoch: 0 [400/2000 (20%)] Loss: 0.739331
Train Epoch: 0 [800/2000 (40%)] Loss: 0.740495
Train Epoch: 0 [1200/2000 (60%)] Loss: 0.737210
Train Epoch: 0 [1600/2000 (80%)] Loss: 0.742648