SyntaxError: index out of range

epochs = 10
for epoch in range(1, epochs+1):
    print(f'Epoch: {epoch}/{epochs}')

    model.train()
    train_losses = []
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images).to(torch.float64)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.cpu().detach().numpy())
    train_loss = np.sum(train_losses) / len(train_losses)
    print(train_loss)
Epoch: 1/10
  File "<string>", line unknown
SyntaxError: index out of range

why i am getting this error? can you please help?
I have tried this using 10000 images but when i tried full dataset (51000) then i got this error.

class LoadDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.annotations = df
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = io.imread(img_path)
        y_label = torch.tensor(self.annotations.iloc[index, 1:], dtype = torch.float64, requires_grad = True)
        
        if self.transform:
            image = self.transform(image)
            
        return (image, y_label)


dataset = LoadDataset(df = new_df, root_dir = root_dir, transform = transform_img)

no_of_training_images = round(len(dataset) * .8)
remaining_images = len(dataset) - no_of_training_images

train_data, valid_data = torch.utils.data.random_split(dataset, [no_of_training_images, remaining_images])

train_loader = DataLoader(dataset = train_data, batch_size = 16, shuffle = True)
valid_loader = DataLoader(dataset = valid_data, batch_size = 16, shuffle = True)

The error message is a bit hard to understand, but it usually raised by PIL, e.g. if the image file is broken.
You could debug it further by checking which image is causing this issue (e.g. print the index in Dataset.__getitem__ and try to load the image directly) and either fix the image (download it again) or remove it.

1 Like

Update: the problem is solved.
I just created a list of problematic(NoneType) images and removed those images.

problematic_images = []
for img in new_df.iloc[:, 0]:
    img_path = os.path.join(root_dir, img)
    img_array = cv2.imread(img_path)
    if type(img_array) == type(None):
        problematic_images.append(img_path)