SyntaxError: index out of range

Mehedi_Hasan_Bijoy · April 12, 2021, 4:15am

epochs = 10
for epoch in range(1, epochs+1):
    print(f'Epoch: {epoch}/{epochs}')

    model.train()
    train_losses = []
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images).to(torch.float64)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.cpu().detach().numpy())
    train_loss = np.sum(train_losses) / len(train_losses)
    print(train_loss)

Epoch: 1/10
  File "<string>", line unknown
SyntaxError: index out of range

why i am getting this error? can you please help?
I have tried this using 10000 images but when i tried full dataset (51000) then i got this error.

class LoadDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.annotations = df
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = io.imread(img_path)
        y_label = torch.tensor(self.annotations.iloc[index, 1:], dtype = torch.float64, requires_grad = True)
        
        if self.transform:
            image = self.transform(image)
            
        return (image, y_label)


dataset = LoadDataset(df = new_df, root_dir = root_dir, transform = transform_img)

no_of_training_images = round(len(dataset) * .8)
remaining_images = len(dataset) - no_of_training_images

train_data, valid_data = torch.utils.data.random_split(dataset, [no_of_training_images, remaining_images])

train_loader = DataLoader(dataset = train_data, batch_size = 16, shuffle = True)
valid_loader = DataLoader(dataset = valid_data, batch_size = 16, shuffle = True)

ptrblck · April 12, 2021, 5:51am

The error message is a bit hard to understand, but it usually raised by PIL, e.g. if the image file is broken.
You could debug it further by checking which image is causing this issue (e.g. print the index in Dataset.__getitem__ and try to load the image directly) and either fix the image (download it again) or remove it.

Mehedi_Hasan_Bijoy · April 13, 2021, 3:12am

Update: the problem is solved.
I just created a list of problematic(NoneType) images and removed those images.

problematic_images = []
for img in new_df.iloc[:, 0]:
    img_path = os.path.join(root_dir, img)
    img_array = cv2.imread(img_path)
    if type(img_array) == type(None):
        problematic_images.append(img_path)