Target 100 out of bonds

Im having an issue training a CNN on an image classification task.

Here’s the dataset used: facial age | Kaggle

Here’s my code

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset
import torchvision
from torchvision import transforms
from PIL import Image
import os

# Custom Dataset
class CustomImageFolder(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []

        for label in sorted(os.listdir(root_dir)):
            label_path = os.path.join(root_dir, label)
            if os.path.isdir(label_path):
                for img_file in os.listdir(label_path):
                    img_path = os.path.join(label_path, img_file)
                    if os.path.isfile(img_path):
                        self.images.append(img_path)
                        self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, int(label)

# Path to your dataset
data_path = '/path/to/your/data'

# Define the transform
transform = transforms.Compose([transforms.ToTensor()])

# Create the custom dataset
dataset = CustomImageFolder(root_dir=data_path, transform=transform)

# Split into train and test sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Model
model = torchvision.models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 99)  # Assuming 99 classes

# Move model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss function and optimizer
class_weights_tensor = torch.tensor([1.0]*99, dtype=torch.float)  # Replace with actual class weights
loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor.to(device))
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

print('Finished Training')

Here’s the traceback:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Input In [20], in <cell line: 4>()
     15 # Forward pass
     16 outputs = model(inputs)
---> 17 loss = loss_fn(outputs, labels)
     19 # Backward pass and optimize
     20 loss.backward()

File ~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1190, in Module._call_impl(self, *input, **kwargs)
   1186 # If we don't have any hooks, we want to skip the rest of the logic in
   1187 # this function, and just call forward.
   1188 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1189         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1190     return forward_call(*input, **kwargs)
   1191 # Do not call functions when jit is used
   1192 full_backward_hooks, non_full_backward_hooks = [], []

File ~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/modules/loss.py:1174, in CrossEntropyLoss.forward(self, input, target)
   1173 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1174     return F.cross_entropy(input, target, weight=self.weight,
   1175                            ignore_index=self.ignore_index, reduction=self.reduction,
   1176                            label_smoothing=self.label_smoothing)

File ~/opt/anaconda3/lib/python3.9/site-packages/torch/nn/functional.py:3026, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
   3024 if size_average is not None or reduce is not None:
   3025     reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3026 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)

IndexError: Target 100 is out of bounds.

(note: the dataset has an extra folder by the same name as the parent folder face_age.)

Could you explain why this extra folder is inside the root or could you remove it?
I haven’t checked your CustomImageFolder in detail, but the default ImageFolder will assign a class label to each folder. Extra folders would thus increase the number of classes in the target and would yield your error.

My bad I should’ve been more clear. Yeah im not sure why there was an extra folder so I got rid. Just a heads up for those that try to run my code to do the same.