Target 40 is out of bounds for nn.CrossEntropyLoss()

wangmyde · May 2, 2020, 2:27am

I create a custom image data set like:

from torch.utils.data.dataset import Dataset
from PIL import Image
import torchvision
from torchvision import datasets, models, transforms
import numpy as np

class MyCustomDataset(Dataset):
    def __init__(self, df, transforms=None):


        """
        Args:
            df (pandas.DataFrame): path to images and in dataframe
            transforms: pytorch transforms for transforms and tensor conversion
        """
        # Transforms
        self.transforms = transforms
        # Read the dataframe
        self.data_info = df              
        # First column contains the image paths
        self.image_arr = np.asarray(self.data_info.iloc[:, 0])
        # Second column is the labels
        self.label_arr = np.asarray(self.data_info.iloc[:, 1])
        # Calculate len
        self.data_len = len(self.data_info.index)

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        # Open image
        img_as_img = Image.open(single_image_name)

        img_as_tensor= self.transforms(img_as_img)
        # Get label(class) of the image based on the cropped pandas column
        single_image_label = self.label_arr[index]

        return (img_as_tensor, single_image_label)

    def __len__(self):
        return self.data_len

The input of df for MyCustomDataset(df, transforms) is pandas dataframe storing images’ paths and labels look like below:

file_name                          label
0   M:\RealModels\images\001\001001.png 0
1   M:\RealModels\images\001\002001.png 0
2   M:\RealModels\images\001\003001.png 0
3   M:\RealModels\images\001\004001.png 0
4   M:\RealModels\images\001\006001.png 0
... ... ...
3197    M:\RenderedModels\images_rgb\450\116450.png 45
3198    M:\RenderedModels\images_rgb\450\117450.png 45
3199    M:\RenderedModels\images_rgb\450\118450.png 45
3200    M:\RenderedModels\images_rgb\450\119450.png 45
3201    M:\RenderedModels\images_rgb\450\120450.png 45
3202 rows × 2 columns

There are 16 classes in my dataset. The classes lables are like: ['00', '01', '12', '34','35'...,'45']

My whole program is:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import time
import os
import copy
import scipy
from torch.utils.tensorboard import SummaryWriter

from torch.utils.data.dataset import Dataset
import torchvision
from torchvision import datasets, models, transforms


# In[2]:


import sys
sys.path.append(r"M:\program\pytorch\Scripts")
import custom_fun
from custom_fun import custom_dataset
import create_folder
from create_folder import create_tb_folder
import dataset_from_image
from dataset_from_image import MyCustomDataset


# In[3]:


tb_dir = r'../'
path = create_tb_folder(tb_dir)


# In[4]:


mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])


# In[5]:


data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(degrees=15),
        transforms.Resize((224,224)), # 299 for Inception v3
        transforms.ColorJitter(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
}


# In[6]:


data_dir = r'M:\dataset\first_att'


# In[7]:


batch_size = 4
lr = 0.003


# In[8]:


data = custom_dataset(data_dir,0.8)


# In[37]:


df = data['train']
df.head()


# In[9]:


image_datasets = {x: MyCustomDataset(data[x], data_transforms[x]) for x in ['train', 'val']}


# In[24]:


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=0)
              for x in ['train', 'val']}


# In[25]:


dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
device = torch.device("cpu")


# In[26]:


# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
print(inputs.shape,classes.shape)
print(classes)

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)


# In[27]:


tb1 = SummaryWriter(path[1])
tb2 = SummaryWriter(path[2])


# In[28]:


def train_model(model, criterion, optimizer, scheduler, num_epochs):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase] # images of training data
            epoch_acc = running_corrects.double() / dataset_sizes[phase] # images of val data

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))


            # Tensorboard works here
            if phase == 'train':
                tb1.add_scalar('Loss', epoch_loss, epoch)
                tb1.add_scalar('Accuracy', epoch_acc, epoch)
                for name, weight in model.named_parameters():
                    tb.add_histogram(name, weight, epoch)
                    tb.add_histogram(f'{name}.grad', weight.grad, epoch)

            else:
                tb2.add_scalar('Loss', epoch_loss, epoch)
                tb2.add_scalar('Accuracy', epoch_acc, epoch)




            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


# In[29]:


model = models.resnet18(pretrained=True) 

# The way below is feature extraction. 
#for param in model.parameters():
    #param.requires_grad = False


# In[30]:


num_ftrs = model.fc.in_features


# In[31]:


model.fc = nn.Linear(num_ftrs, 16)


# In[32]:


model = model.to(device)
criterion = nn.CrossEntropyLoss()


# In[33]:


# Observe that all parameters are being optimized
optimizer = optim.Adam(model.parameters(), lr=lr)


# In[34]:


step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)


# In[35]:


tb = SummaryWriter(path[0])
grid = torchvision.utils.make_grid(inputs)
tb.add_image('images', grid)
tb.add_graph(model, inputs)
tb.close()


# In[36]:


model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)


# In[ ]:


torch.save(model.state_dict(), 'first_att_02.pth')


# In[ ]:
But I got the error:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-36-edba91d1cb93> in <module>
----> 1 model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=20)

<ipython-input-28-662f652902cf> in train_model(model, criterion, optimizer, scheduler, num_epochs)
     29                     outputs = model(inputs)
     30                     _, preds = torch.max(outputs, 1)
---> 31                     loss = criterion(outputs, labels)
     32 
     33                     # backward + optimize only if in training phase

M:\program\pytorch\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
    530             result = self._slow_forward(*input, **kwargs)
    531         else:
--> 532             result = self.forward(*input, **kwargs)
    533         for hook in self._forward_hooks.values():
    534             hook_result = hook(self, input, result)

M:\program\pytorch\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
    914     def forward(self, input, target):
    915         return F.cross_entropy(input, target, weight=self.weight,
--> 916                                ignore_index=self.ignore_index, reduction=self.reduction)
    917 
    918 

M:\program\pytorch\lib\site-packages\torch\nn\functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
   2019     if size_average is not None or reduce is not None:
   2020         reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2021     return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
   2022 
   2023 

M:\program\pytorch\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   1836                          .format(input.size(0), target.size(0)))
   1837     if dim == 2:
-> 1838         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
   1839     elif dim == 4:
   1840         ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

IndexError: Target 40 is out of bounds.

Could you let me know where I am wrong? Thanks very much.

ptrblck · May 2, 2020, 6:16am

The 16 classes should be mapped to the range [0, 15], while your target uses sparse values between 0 and 45.
You would have to remap these target values, as they are used to index the model output to calculate the loss.

wangmyde · May 2, 2020, 5:55pm

Hi, thanks very much for your reply. Your way works.
Because my labels original are string like: [00, 01, 11, 15, …, 45], is there any way that I can use my own image loader to convert those string labels to int to use them in crossentropy function withour directly assign the string to 0-16? Thanks?

ptrblck · May 3, 2020, 12:50am

You could create a dict inside your Dataset and map the target strings to the class indices in the __getitem__ method.

wangmyde · May 16, 2020, 5:31pm

Thanks very much for your help.