Why the predictions are so off?

I have the following code snippet in the train cell:


network = Network()
network.cuda()    

criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)

loss_min = np.inf
num_epochs = 10

start_time = time.time()
for epoch in range(1,num_epochs+1):
    
    loss_train = 0
    loss_test = 0
    running_loss = 0
    
    
    network.train()
    print('size of train loader is: ', len(train_loader))

    for step in range(1, len(train_loader)+1):

        
        batch = next(iter(train_loader))
        images, landmarks = batch['image'], batch['landmarks']
        print(images.shape)
       
        images = images.unsqueeze_(1)

        images = torch.cat((images,images,images),1)
        images = images.cuda()
    
        landmarks = landmarks.view(landmarks.size(0),-1).cuda() 
        norm_image = transforms.Normalize(0.3812, 0.1123) 
        for image in images:
            image = image.float()
            ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
            image = norm_image(image)
        
        ###removing landmarks normalize because of the following error
        ###ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([8, 8])
       
         
        for i in range(8):
            if(i%2==0):
                landmarks[:,i] = landmarks[:,i]/800
            else:
                landmarks[:,i] = landmarks[:,i]/600
                
        print(landmarks.shape)
        print(landmarks)
        
        

        
        ##norm_landmarks = transforms.Normalize(0.4949, 0.2165)
        landmarks [landmarks != landmarks] = 0
        landmarks = landmarks.unsqueeze_(0)
        landmarks = norm_landmarks(landmarks)
        
        predictions = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        print('predictions are: ', predictions.float())
        print('landmarks are: ', landmarks.float())
        # find the loss for the current step
        loss_train_step = criterion(predictions.float(), landmarks.float())
        
        
        loss_train_step = loss_train_step.to(torch.float32)
        print("loss_train_step before backward: ", loss_train_step)
        
        # calculate the gradients
        loss_train_step.backward()
        
        # update the parameters
        optimizer.step()
        
        print("loss_train_step after backward: ", loss_train_step)

        
        loss_train += loss_train_step.item()
        
        print("loss_train: ", loss_train)
        running_loss = loss_train/step
        print('step: ', step)
        print('running loss: ', running_loss)
        
        print_overwrite(step, len(train_loader), running_loss, 'train')
        
    network.eval() 
    with torch.no_grad():
        
        for step in range(1,len(test_loader)+1):
            
            batch = next(iter(train_loader))
            images, landmarks = batch['image'], batch['landmarks']
            images = images.cuda()
            landmarks = landmarks.view(landmarks.size(0),-1).cuda()
            ##[8, 600, 800] --> [8,3,600,800]
            images = images.unsqueeze(1)
            images = torch.cat((images, images, images), 1)
            predictions = network(images)

            # find the loss for the current step
            loss_test_step = criterion(predictions, landmarks)

            loss_test += loss_test_step.item()
            running_loss = loss_test/step

            print_overwrite(step, len(test_loader), running_loss, 'Validation')
    
    loss_train /= len(train_loader)
    loss_test /= len(test_loader)
    
    print('\n--------------------------------------------------')
    print('Epoch: {}  Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_test))
    print('--------------------------------------------------')
    
    if loss_test < loss_min:
        loss_min = loss_test
        torch.save(network.state_dict(), '../moth_landmarks.pth') 
        print("\nMinimum Valid Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
        print('Model Saved\n')
     
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))

And I have the following code in the evaluation cell:

start_time = time.time()

with torch.no_grad():

    best_network = Network()
    best_network.cuda()
    best_network.load_state_dict(torch.load('../moth_landmarks.pth')) 
    best_network.eval()
    
    batch = next(iter(train_loader))
    images, landmarks = batch['image'], batch['landmarks']
        
    images = images.cuda()
    landmarks = (landmarks + 0.5) * 224
    
    ##[8, 600, 800] --> [8,3,600,800]
    images = images.unsqueeze(1)
    images = torch.cat((images, images, images), 1)

    predictions = (best_network(images).cpu() + 0.5) * 224
    predictions = predictions.view(-1,4,2)
    
    plt.figure(figsize=(10,40))
    
    for img_num in range(8):
        plt.subplot(8,1,img_num+1)
        plt.imshow(images[img_num].cpu().numpy().transpose(1,2,0).squeeze(), cmap='gray')
        plt.scatter(predictions[img_num,:,0], predictions[img_num,:,1], c = 'r')
        plt.scatter(landmarks[img_num,:,0], landmarks[img_num,:,1], c = 'g')

print('Total number of test images: {}'.format(len(test_dataset)))

end_time = time.time()
print("Elapsed Time : {}".format(end_time - start_time))

These are the predictions I am getting (the green ones are the ground truth):

I am not sure what is the main reason these predictions are so off and how to fix them.

The line:

    landmarks = (landmarks + 0.5) * 224

is taken from https://thecleverprogrammer.com/2020/07/22/face-landmarks-detection/

This is what I have in the network section:

num_classes = 4 * 2 #4 coordinates X and Y flattened --> 4 of 2D keypoints or landmarks

class Network(nn.Module):
    def __init__(self,num_classes=8):
        super().__init__()
        self.model_name = 'resnet18'
        self.model = models.resnet18()
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
        
    def forward(self, x):
        x = x.float()
        out = self.model(x)
        return out

Additionally, do you know how to apply:

        for i in range(8):
            if(i%2==0):
                landmarks[:,i] = landmarks[:,i]/800
            else:
                landmarks[:,i] = landmarks[:,i]/600

That I have in train to evaluation cell?

Can you try normalizing your input images at the test time, before making predictions? (since the images are also normalized during training)

norm_image = transforms.Normalize(0.3812, 0.1123)
image = norm_image(image)

Thanks for your response. Unfortunately, this didn’t work.

Here’s the code in train cell:

network = Network()
network.cuda()    

criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)

loss_min = np.inf
num_epochs = 10

start_time = time.time()
for epoch in range(1,num_epochs+1):
    
    loss_train = 0
    loss_test = 0
    running_loss = 0
    
    
    network.train()
    print('size of train loader is: ', len(train_loader))

    for step in range(1, len(train_loader)+1):

        
        batch = next(iter(train_loader))
        images, landmarks = batch['image'], batch['landmarks']
        print(images.shape)
       
        images = images.unsqueeze_(1)

        images = torch.cat((images,images,images),1)
        images = images.cuda()
    
        landmarks = landmarks.view(landmarks.size(0),-1).cuda() 
        norm_image = transforms.Normalize(0.3812, 0.1123) 
        for image in images:
            image = image.float()
            ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
            image = norm_image(image)
        
        ###removing landmarks normalize because of the following error
        ###ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([8, 8])
       
         
        for i in range(8):
            if(i%2==0):
                landmarks[:,i] = landmarks[:,i]/800
            else:
                landmarks[:,i] = landmarks[:,i]/600
                
        print(landmarks.shape)
        print(landmarks)
        
        

        
        ##norm_landmarks = transforms.Normalize(0.4949, 0.2165) ## should I do this?
        landmarks [landmarks != landmarks] = 0
        landmarks = landmarks.unsqueeze_(0)
        ##landmarks = norm_landmarks(landmarks)
        
        predictions = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        print('predictions are: ', predictions.float())
        print('landmarks are: ', landmarks.float())
        # find the loss for the current step
        loss_train_step = criterion(predictions.float(), landmarks.float())
        
        
        loss_train_step = loss_train_step.to(torch.float32)
        print("loss_train_step before backward: ", loss_train_step)
        
        # calculate the gradients
        loss_train_step.backward()
        
        # update the parameters
        optimizer.step()
        
        print("loss_train_step after backward: ", loss_train_step)

        
        loss_train += loss_train_step.item()
        
        print("loss_train: ", loss_train)
        running_loss = loss_train/step
        print('step: ', step)
        print('running loss: ', running_loss)
        
        print_overwrite(step, len(train_loader), running_loss, 'train')
        
    network.eval() 
    with torch.no_grad():
        
        for step in range(1,len(test_loader)+1):
            
            batch = next(iter(train_loader))
            images, landmarks = batch['image'], batch['landmarks']
            images = images.cuda()
            landmarks = landmarks.view(landmarks.size(0),-1).cuda()
            ##[8, 600, 800] --> [8,3,600,800]
            images = images.unsqueeze(1)
            images = torch.cat((images, images, images), 1)
            predictions = network(images)

            # find the loss for the current step
            loss_test_step = criterion(predictions, landmarks)

            loss_test += loss_test_step.item()
            running_loss = loss_test/step

            print_overwrite(step, len(test_loader), running_loss, 'Validation')
    
    loss_train /= len(train_loader)
    loss_test /= len(test_loader)
    
    print('\n--------------------------------------------------')
    print('Epoch: {}  Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_test))
    print('--------------------------------------------------')
    
    if loss_test < loss_min:
        loss_min = loss_test
        torch.save(network.state_dict(), '../moth_landmarks.pth') 
        print("\nMinimum Valid Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
        print('Model Saved\n')
     
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))

Here’s the result of train cell (unexpired pastebin): https://pastebin.com/raw/1Qsrnbr3

Do you know why I get NAN loss in train?

Also, do you know what am I doing wrong or missing here in the evaluation cell?

start_time = time.time()

with torch.no_grad():

    best_network = Network()
    best_network.cuda()
    best_network.load_state_dict(torch.load('../moth_landmarks.pth')) 
    best_network.eval()
    
    batch = next(iter(train_loader))
    images, landmarks = batch['image'], batch['landmarks']
    #images = images.unsqueeze_(1)

    images = torch.cat((images,images,images),1)
    images = images.cuda()

    norm_image = transforms.Normalize(0.3812, 0.1123) 
    for image in images:
        image = image.float()
        ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
        image = norm_image(image)
    landmarks = (landmarks + 0.5) * 224
    
    ##[8, 600, 800] --> [8,3,600,800]
    images = images.unsqueeze(1)
    images = torch.cat((images, images, images), 1)

    predictions = (best_network(images).cpu() + 0.5) * 224
    predictions = predictions.view(-1,4,2)
    
    plt.figure(figsize=(10,40))
    
    for img_num in range(8):
        plt.subplot(8,1,img_num+1)
        plt.imshow(images[img_num].cpu().numpy().transpose(1,2,0).squeeze(), cmap='gray')
        plt.scatter(predictions[img_num,:,0], predictions[img_num,:,1], c = 'r')
        plt.scatter(landmarks[img_num,:,0], landmarks[img_num,:,1], c = 'g')

print('Total number of test images: {}'.format(len(test_dataset)))

end_time = time.time()
print("Elapsed Time : {}".format(end_time - start_time))

The error is:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-59-e4aa0ace8c75> in <module>
     19         image = image.float()
     20         ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
---> 21         image = norm_image(image)
     22     landmarks = (landmarks + 0.5) * 224
     23 

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, tensor)
    210             Tensor: Normalized Tensor image.
    211         """
--> 212         return F.normalize(tensor, self.mean, self.std, self.inplace)
    213 
    214     def __repr__(self):

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/functional.py in normalize(tensor, mean, std, inplace)
    282     if tensor.ndimension() != 3:
    283         raise ValueError('Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = '
--> 284                          '{}.'.format(tensor.size()))
    285 
    286     if not inplace:

ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([1800, 800]).

Have you tried overfitting your model on a tiny sample (like 2 or 3 datapoints) ? That should let you at least know if your model is capable of learning or if there are some underlying issues with the data getting passed.

Also, from what I can tell, your normalized images are getting thrown out. That image on line 21 isn’t referenced again, and it would only contain the final image in any case, but I could be missing something in the code. It’s pretty standard to separate out the transforms into a separate transforms.compose function and pass that into your dataloader so that you don’t have to worry about it in your training loop. That might things easier to troubleshoot as well.

Training:

network = Network()
network.cuda()    

criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)

loss_min = np.inf
num_epochs = 10

start_time = time.time()
for epoch in range(1,num_epochs+1):
    
    loss_train = 0
    loss_test = 0
    running_loss = 0
    
    
    network.train()
    print('size of train loader is: ', len(train_loader))

    for step in range(1, len(train_loader)+1):

        
        batch = next(iter(train_loader))
        images, landmarks = batch['image'], batch['landmarks']
        print(images.shape)
       
        images = images.unsqueeze_(1) 

        images = torch.cat((images,images,images),1)
        
        images = images.cuda()
    
        landmarks = landmarks.view(landmarks.size(0),-1).cuda() 
        norm_image = transforms.Normalize(0.3812, 0.1123) 
        for image in images:
            image = image.float()
            ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
            image = norm_image(image)
        
        ###removing landmarks normalize because of the following error
        ###ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([8, 8])
       
         
        for i in range(8):
            if(i%2==0):
                landmarks[:,i] = landmarks[:,i]/800
            else:
                landmarks[:,i] = landmarks[:,i]/600
                
        print(landmarks.shape)
        print("raw landmarks: ", landmarks)
        
        

        
        ##norm_landmarks = transforms.Normalize(0.4949, 0.2165) ## should I do this?
        landmarks [landmarks != landmarks] = 0
        landmarks = landmarks.unsqueeze_(0)
        ##landmarks = norm_landmarks(landmarks)
        
        predictions = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        print('predictions are: ', predictions.float())
        print('landmarks are: ', landmarks.float())
        # find the loss for the current step
        loss_train_step = criterion(predictions.float(), landmarks.float())
        
        
        loss_train_step = loss_train_step.to(torch.float32)
        print("loss_train_step before backward: ", loss_train_step)
        
        # calculate the gradients
        loss_train_step.backward()
        
        # update the parameters
        optimizer.step()
        
        print("loss_train_step after backward: ", loss_train_step)

        
        loss_train += loss_train_step.item()
        
        print("loss_train: ", loss_train)
        running_loss = loss_train/step
        print('step: ', step)
        print('running loss: ', running_loss)
        
        print_overwrite(step, len(train_loader), running_loss, 'train')
        
    network.eval() 
    with torch.no_grad():
        
        for step in range(1,len(test_loader)+1):
            
            batch = next(iter(train_loader))
            images, landmarks = batch['image'], batch['landmarks']
            images = images.cuda()
            landmarks = landmarks.view(landmarks.size(0),-1).cuda() 

            
            for i in range(8):
                if(i%2==0):
                    landmarks[:,i] = landmarks[:,i]/800
                else:
                    landmarks[:,i] = landmarks[:,i]/600
            
            landmarks = landmarks.view(landmarks.size(0),-1).cuda()
            landmarks [landmarks != landmarks] = 0
            landmarks = landmarks.unsqueeze_(0)
            ##[8, 600, 800] --> [8,3,600,800]
            images = images.unsqueeze(1)
            images = torch.cat((images, images, images), 1)
            predictions = network(images)

            # find the loss for the current step
            loss_test_step = criterion(predictions, landmarks)
            print('step: ', step)
            print('landmarks: ', landmarks)
            print('predictions: ', predictions)
           

            loss_test += loss_test_step.item()
            print('loss test {}, loss test step {}, step {}'.format(loss_test, loss_test_step, step))
            running_loss = loss_test/step

            print_overwrite(step, len(test_loader), running_loss, 'Validation')
    
    loss_train /= len(train_loader)
    loss_test /= len(test_loader)
    
    print('\n--------------------------------------------------')
    print('Epoch: {}  Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_test))
    print('--------------------------------------------------')
    
    if loss_test < loss_min:
        loss_min = loss_test
        torch.save(network.state_dict(), '../moth_landmarks.pth') 
        print("\nMinimum Valid Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
        print('Model Saved\n')
     
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))

Evaluation:

start_time = time.time()

with torch.no_grad():

    best_network = Network()
    best_network.cuda()
    best_network.load_state_dict(torch.load('../moth_landmarks.pth')) 
    best_network.eval()
    
    batch = next(iter(train_loader))
    images, landmarks = batch['image'], batch['landmarks']
    landmarks = landmarks.view(landmarks.size(0),-1).cuda()

    print(landmarks.shape)
    for i in range(8):
        if(i%2==0):
            landmarks[:,i] = landmarks[:,i]/800
        else:
            landmarks[:,i] = landmarks[:,i]/600
    landmarks [landmarks != landmarks] = 0
    #landmarks = landmarks.unsqueeze_(0)

    images = images.cuda()
    
    print('*, ', landmarks.shape)

    norm_image = transforms.Normalize(0.3812, 0.1123) 
    print('images shape: ', images.shape)
    for image in images:
        
        image = image.unsqueeze_(1)

        #images = torch.cat((images,images,images),1)
        image = image.float()
        ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
        image = norm_image(image)
    
    print('max: ', torch.max(landmarks))
    print('min: ', torch.min(landmarks))

    ##landmarks = (landmarks + 0.5) * 224 #?? chera?
    print('**')
    print(images.shape, landmarks.shape)
    ##[8, 600, 800] --> [8,3,600,800]
    images = images.unsqueeze(1)
    images = torch.cat((images, images, images), 1)

    #predictions = (best_network(images).cpu() + 0.5) * 224
    predictions = best_network(images).cpu()  

    print('****', predictions.shape)
    for i in range(8):
        if(i%2==0):
            predictions[:,i] = predictions[:,i]*800
        else:
            predictions[:,i] = predictions[:,i]*600

    predictions = predictions.view(-1,4,2)
    print('****', predictions.shape)
    
  
    
    for i in range(8):
        if(i%2==0):
            landmarks[:,i] = landmarks[:,i]*800
        else:
            landmarks[:,i] = landmarks[:,i]*600

    landmarks = landmarks.view(-1,4,2)
    plt.figure(figsize=(10,40))
    landmarks = landmarks.cpu()
    print(type(landmarks), landmarks.shape)
    for img_num in range(8):
        plt.subplot(8,1,img_num+1)
        plt.imshow(images[img_num].cpu().numpy().transpose(1,2,0).squeeze(), cmap='gray')
        plt.scatter(predictions[img_num,:,0], predictions[img_num,:,1], c = 'r')
        plt.scatter(landmarks[img_num,:,0], landmarks[img_num,:,1], c = 'g')

print('Total number of test images: {}'.format(len(test_dataset)))

end_time = time.time()
print("Elapsed Time : {}".format(end_time - start_time))