Value Error (Wrong target size, cross entropy)

Hello,
I am very new to PyTorch and I am now trying to train my first neuronal net with my own data.
Sadly I get an error message I could not figure out a solution for.

It would be great if you could help me.

I am getting the following error:


ValueError Traceback (most recent call last)
in
13 for inputs, labels in training_loader:
14 outputs = model(inputs)
β€”> 15 loss = criterion(outputs, labels)
16
17 optimizer.zero_grad()

~\Anaconda3\envs\opencv4\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
–> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)

~\Anaconda3\envs\opencv4\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
940 def forward(self, input, target):
941 return F.cross_entropy(input, target, weight=self.weight,
–> 942 ignore_index=self.ignore_index, reduction=self.reduction)
943
944

~\Anaconda3\envs\opencv4\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2054 if size_average is not None or reduce is not None:
2055 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2056 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2057
2058

~\Anaconda3\envs\opencv4\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1879 if target.size()[1:] != input.size()[2:]:
1880 raise ValueError(β€˜Expected target size {}, got {}’.format(
-> 1881 out_size, target.size()))
1882 input = input.contiguous().view(n, c, 1, -1)
1883 target = target.contiguous().view(n, 1, -1)

ValueError: Expected target size (100, 4), got torch.Size([100])

This is my code:

import torch
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as F
from torch import nn
from torchvision import datasets, transforms
from torchvision import transforms
import pandas

class CsvDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, transforms=None):
        
        """
        Args: 
         csv_file (string): Path to csv file
         transforms (callable, optional): Optional tranforms to be applied on a sample
        """
        self.df = pandas.read_csv(csv_file, sep=';')
        self.df = self.df.loc[:, ~self.df.columns.str.contains('^Unnamed')] #Delete last colum

        self.transforms = transforms

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample_tensor = torch.tensor(self.df.iloc[idx:idx+1, 4:].values).float()
        
        lbl_to_idx = {
            'z': 0,
            'g': 1,
            'e': 2,
            'u': 3
        }
        lbl_val = self.df.iloc[idx:idx+1, 1:2].values       
        label = torch.tensor(lbl_to_idx[lbl_val[0,0]]).float()
        
        sample = {'sample': sample_tensor, 'label': label}

        if self.transforms: # Something Wrong??????????
            sample = self.transform(sample)

        return sample_tensor, label

    def __len__(self):
        return self.df.shape[0]


dataset = CsvDataset(csv_file='C:/data.csv')



# __Split Dataset__
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, validation_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

training_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=100, shuffle=True) 
validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=100, shuffle=False) 


class Classifier(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super().__init__()
        #Define neural net:
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x) 
        return 


model = Classifier(224, 125, 65, 4) #224 Inputs, 125 nodes in H1, 65 nodes in H2, 4 ouput classes
model


criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


epochs = 15
running_loss_history = []
running_corrects_history = []
val_running_loss_history = [] #For Validation with validation dataset
val_running_corrects_history = []

for e in range(epochs):
    running_loss = 0.0
    running_corrects = 0.0
    val_running_loss = 0.0
    val_running_corrects = 0.0
    
    for inputs, labels in training_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() 
        running_corrects += torch.sum(preds == labels.data) 

    else:
        #Validation:
        with torch.no_grad(): 
            for val_inputs, val_labels in validation_loader:
                val_outputs = model(val_inputs)
                val_loss = criterion(val_outputs, val_labels)
        
                _, val_preds = torch.max(val_outputs, 1)
                val_running_loss += val_loss.item() 
                val_running_corrects += torch.sum(val_preds == val_labels.data) 
      
        epoch_loss = running_loss/len(training_loader) 
        epoch_acc = running_corrects.float()/ len(training_loader)
        running_loss_history.append(epoch_loss)
        running_corrects_history.append(epoch_acc)
    
        val_epoch_loss = val_running_loss/len(validation_loader)
        val_epoch_acc = val_running_corrects.float()/ len(validation_loader)
        val_running_loss_history.append(val_epoch_loss)
        val_running_corrects_history.append(val_epoch_acc)
        print('epoch :', (e+1))
        print('training loss: {:.4f}, acc {:.4f} '.format(epoch_loss, epoch_acc.item()))
        print('validation loss: {:.4f}, validation acc {:.4f} '.format(val_epoch_loss, val_epoch_acc.item()))

Thank you very much!

Your tensor outputs seems to be of shape [100, something, 4].
I suppose 100 is the batch size, and 4 is the number of classes ?
But what does the other dimension correspond to ?

Thanks for your help.
Right, 100 would be the batch size and I have 4 classes.
A sample tensor consists of 224 values.

This is how a piece of the dataset looks like (first measurement sample, then label):

dataset[5]

(tensor([[0.0681, 0.0647, 0.0697, 0.0680, 0.0632, 0.0590, 0.0717, 0.0640, 0.0558,
          0.0673, 0.0649, 0.0675, 0.0678, 0.0717, 0.0726, 0.0713, 0.0720, 0.0757,
          0.0756, 0.0744, 0.0744, 0.0760, 0.0777, 0.0797, 0.0760, 0.0781, 0.0758,
          0.0759, 0.0768, 0.0758, 0.0760, 0.0744, 0.0757, 0.0744, 0.0754, 0.0774,
          0.0777, 0.0788, 0.0829, 0.0857, 0.0875, 0.0942, 0.1035, 0.1125, 0.1204,
          0.1349, 0.1482, 0.1601, 0.1754, 0.1912, 0.2024, 0.2140, 0.2218, 0.2294,
          0.2344, 0.2405, 0.2447, 0.2486, 0.2535, 0.2542, 0.2544, 0.2568, 0.2554,
          0.2477, 0.2405, 0.2350, 0.2305, 0.2221, 0.2132, 0.2095, 0.2054, 0.2022,
          0.1978, 0.1955, 0.1899, 0.1882, 0.1892, 0.1882, 0.1846, 0.1804, 0.1777,
          0.1741, 0.1703, 0.1661, 0.1627, 0.1583, 0.1578, 0.1576, 0.1568, 0.1529,
          0.1481, 0.1425, 0.1366, 0.1270, 0.1198, 0.1130, 0.1099, 0.1076, 0.1022,
          0.0994, 0.0946, 0.0880, 0.0840, 0.0820, 0.0815, 0.0802, 0.0827, 0.0880,
          0.0988, 0.1171, 0.1491, 0.1890, 0.2348, 0.2812, 0.3261, 0.3630, 0.3943,
          0.4248, 0.4504, 0.4734, 0.4962, 0.5118, 0.5217, 0.5316, 0.5350, 0.5365,
          0.5429, 0.5515, 0.5720, 0.5907, 0.5909, 0.5929, 0.6001, 0.6017, 0.6039,
          0.6022, 0.6034, 0.6033, 0.5968, 0.5984, 0.5936, 0.5948, 0.5960, 0.5980,
          0.6030, 0.6056, 0.6022, 0.5938, 0.5872, 0.5832, 0.5741, 0.5695, 0.5610,
          0.5634, 0.5688, 0.5707, 0.5690, 0.5678, 0.5646, 0.5563, 0.5579, 0.5532,
          0.5549, 0.5573, 0.5536, 0.5591, 0.5611, 0.5601, 0.5557, 0.5563, 0.5591,
          0.5535, 0.5577, 0.5565, 0.5551, 0.5545, 0.5469, 0.5592, 0.5621, 0.5559,
          0.5560, 0.5592, 0.5609, 0.5594, 0.5594, 0.5601, 0.5568, 0.5478, 0.5518,
          0.5447, 0.5483, 0.5453, 0.5404, 0.5367, 0.5438, 0.5450, 0.5398, 0.5360,
          0.5313, 0.5155, 0.5035, 0.4977, 0.4822, 0.4723, 0.4667, 0.4621, 0.4480,
          0.4449, 0.4368, 0.4308, 0.4343, 0.4166, 0.4172, 0.4014, 0.3871, 0.3612,
          0.3562, 0.3375, 0.3182, 0.2991, 0.3049, 0.2872, 0.2858, 0.2696]]),
 tensor(1.))

Okay, it seems that you have a phantom dimension that you need to get rid of.
The tensor inputs has shape [batch, 1, 224], you need to squeeze it to have [batch, 224] instead.

Instead of calling outputs = model(inputs), try :

outputs = model(inputs.squeeze(1))
1 Like