ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([8, 8])

I am trying to normalize my targets (landmarks) here in which each image has 4 landmarks with x and y values for each landmark (keypoint). Here the batch size is 8.

network = Network()
network.cuda()    

criterion = nn.MSELoss()
optimizer = optim.Adam(network.parameters(), lr=0.0001)

loss_min = np.inf
num_epochs = 1

start_time = time.time()
for epoch in range(1,num_epochs+1):
    
    loss_train = 0
    loss_test = 0
    running_loss = 0
    
    
    network.train()
    print('size of train loader is: ', len(train_loader))

    for step in range(1,len(train_loader)+1):

        
        batch = next(iter(train_loader))
        images, landmarks = batch['image'], batch['landmarks']
        #RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 600, 800, 3] to have 3 channels, but got 600 channels instead
        #using permute below to fix the above error
        images = images.permute(0,3,1,2)
        
        images = images.cuda()
    
        landmarks = landmarks.view(landmarks.size(0),-1).cuda() 
    
        norm_image = transforms.Normalize([0.3809, 0.3810, 0.3810], [0.1127, 0.1129, 0.1130]) 
        for image in images:
            image = image.float()
            ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
            image = norm_image(image)
        
        
        norm_landmarks = transforms.Normalize(0.4949, 0.2165)
        landmarks = norm_landmarks(landmarks)
        ##landmarks = torchvision.transforms.Normalize(landmarks) #Do I need to normalize the target?
        
        predictions = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        print('predictions are: ', predictions.float())
        print('landmarks are: ', landmarks.float())
        # find the loss for the current step
        loss_train_step = criterion(predictions.float(), landmarks.float())
        
        
        loss_train_step = loss_train_step.to(torch.float32)
        print("loss_train_step before backward: ", loss_train_step)
        
        # calculate the gradients
        loss_train_step.backward()
        
        # update the parameters
        optimizer.step()
        
        print("loss_train_step after backward: ", loss_train_step)

        
        loss_train += loss_train_step.item()
        
        print("loss_train: ", loss_train)
        running_loss = loss_train/step
        print('step: ', step)
        print('running loss: ', running_loss)
        
        print_overwrite(step, len(train_loader), running_loss, 'train')
        
    network.eval() 
    with torch.no_grad():
        
        for step in range(1,len(test_loader)+1):
            
            batch = next(iter(train_loader))
            images, landmarks = batch['image'], batch['landmarks']
            images = images.permute(0,3,1,2)
            images = images.cuda()
            landmarks = landmarks.view(landmarks.size(0),-1).cuda()
        
            predictions = network(images)

            # find the loss for the current step
            loss_test_step = criterion(predictions, landmarks)

            loss_test += loss_test_step.item()
            running_loss = loss_test/step

            print_overwrite(step, len(test_loader), running_loss, 'Validation')
    
    loss_train /= len(train_loader)
    loss_test /= len(test_loader)
    
    print('\n--------------------------------------------------')
    print('Epoch: {}  Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_test))
    print('--------------------------------------------------')
    
    if loss_test < loss_min:
        loss_min = loss_test
        torch.save(network.state_dict(), '../moth_landmarks.pth') 
        print("\nMinimum Valid Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
        print('Model Saved\n')
     
print('Training Complete')
print("Total Elapsed Time : {} s".format(time.time()-start_time))

But I get this error:

size of train loader is:  90

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-3e4770ad7109> in <module>
     40 
     41         norm_landmarks = transforms.Normalize(0.4949, 0.2165)
---> 42         landmarks = norm_landmarks(landmarks)
     43         ##landmarks = torchvision.transforms.Normalize(landmarks) #Do I need to normalize the target?
     44 

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, tensor)
    210             Tensor: Normalized Tensor image.
    211         """
--> 212         return F.normalize(tensor, self.mean, self.std, self.inplace)
    213 
    214     def __repr__(self):

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/functional.py in normalize(tensor, mean, std, inplace)
    282     if tensor.ndimension() != 3:
    283         raise ValueError('Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = '
--> 284                          '{}.'.format(tensor.size()))
    285 
    286     if not inplace:

ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([8, 8]).

How should I normalize my landmarks?

Can you try adding

landmarks.unsqueeze_(0)

before the line:

1 Like

superb! should I have both norm_landmarks as well as torch.div by max value in the raw landmarks?


        norm_landmarks = transforms.Normalize(0.4949, 0.2165)
        landmarks = landmarks.unsqueeze_(0)
        landmarks = norm_landmarks(landmarks)
         
        #743 is max value in raw landmarks values
        landmarks = torch.div(landmarks, 743.)
        
        predictions = network(images)

with the above code, I still end up getting nans after first step.

I answered your question abt normalization in the other thread to maintain continuity.