IndexError: list index out of range only for a loader

I am working on MNIST dataset, using a multi-input network. I have split my dataset in 2 parts (one for the first net and the second for the second net) and each part is divided in train, val and test. When I do the training on train and test, there are no problems, instead when I do the training on validation, I have the error of title
IndexError: list idex out of range
I have no idea why only on val set.
This is the code:

# Define loaders

train_loader_a = DataLoader(train_set_a, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
val_loader_a   = DataLoader(val_set_a,   batch_size=128, num_workers=0, shuffle=False, drop_last=False)
test_loader_a  = DataLoader(test_set_a,  batch_size=128, num_workers=0, shuffle=False, drop_last=False)

train_loader_b = DataLoader(train_set_b, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
val_loader_b   = DataLoader(val_set_b,   batch_size=128, num_workers=0, shuffle=False, drop_last=False)
test_loader_b  = DataLoader(test_set_b,  batch_size=128, num_workers=0, shuffle=False, drop_last=False)
# Define dictionary of loaders
loaders_a = {"train": train_loader_a,
           "val": val_loader_a,
           "test": test_loader_a}

loaders_b = {"train": train_loader_b,
           "val": val_loader_b,
           "test": test_loader_b}

This is training function on train set:

def train(net, loaders, optimizer, criterion, epochs=20, dev=None, save_param=False, model_name="valerio"):
      loaders_a, loaders_b = loaders
    # try:
      net = net.to(dev)
      #print(net)
      #summary(net,[(net.in_channels,net.in_width,net.in_height)]*2)


      criterion.to(dev)


      # Initialize history
      history_loss = {"train": [], "val": [], "test": []}
      history_accuracy_combo = {"train": [], "val": [], "test": []}
      history_accuracy_a = {"train": [], "val": [], "test": []}
      history_accuracy_b = {"train": [], "val": [], "test": []}
      # Store the best val accuracy
      best_val_accuracy = 0

      # Process each epoch
      for epoch in range(epochs):
        # Initialize epoch variables
        sum_loss = {"train": 0, "val": 0, "test": 0}
        sum_accuracy_combo = {"train": 0, "val": 0, "test": 0}
        sum_accuracy_a = {"train": 0, "val": 0, "test": 0}
        sum_accuracy_b = {"train": 0, "val": 0, "test": 0}

        progbar = None
        # Process each split
        net.train()
            #widgets = [
              #' [', pb.Timer(), '] '#,
              #pb.Bar(),
              #' [', pb.ETA(), '] ', pb.Variable('ta','[Train Acc: {formatted_value}]')
            #]

            #progbar = pb.ProgressBar(max_value=len(loaders_a[split]),widgets=widgets,redirect_stdout=True)

        #net.eval()
        #print("Evaluation")
          # Process each batch
        print("Pre-for")
          #j=0
        for j, ((input_a, labels_a), (input_b, labels_b)) in enumerate(zip(loaders_a["train"], loaders_b["train"])):
          print("first print inside for")
          labels_a = labels_a.unsqueeze(1).float()
          labels_b = labels_b.unsqueeze(1).float()

          input_a = input_a.to(dev)
          labels_a = labels_a.to(dev)
          input_b = input_b.to(dev)
          labels_b = labels_b.to(dev)

            # Reset gradients
          optimizer.zero_grad()
            # Compute output
          pred = net(input_a,input_b)

          loss = criterion(pred, [labels_a, labels_b])
            # Update loss
          sum_loss["train"] += loss.item()
            # Check parameter update
          #if split == "train":
              # Compute gradients
          loss.backward()
              # Optimize
          optimizer.step()
          print("second print inside for")

            # Compute accuracy
          pred_labels = (pred[2] >= 0.0).long()  # Binarize predictions to 0 and 1
          pred_labels_a = (pred[0] >= 0.0).long()  # Binarize predictions to 0 and 1
          pred_labels_b = (pred[1] >= 0.0).long()  # Binarize predictions to 0 and 1


          #batch_accuracy_combo = (pred_labels == labels).sum().item() / len(labels)
          batch_accuracy_a = (pred_labels_a == labels_a).sum().item() / len(labels_a)
          batch_accuracy_b = (pred_labels_b == labels_b).sum().item() / len(labels_b)
            # Update accurac
          #sum_accuracy_combo["train"] += batch_accuracy_combo
          sum_accuracy_a["train"] += batch_accuracy_a
          sum_accuracy_b["train"] += batch_accuracy_b

          #if (split=='train'):
              #progbar.update(j, ta=batch_accuracy)
              #progbar.update(j, ta=batch_accuracy_a)
              #progbar.update(j, ta=batch_accuracy_b)
          print("no progbar")
            #j=j+1

        #if (progbar is not None):
          #progbar.finish()
        # Compute epoch loss/accuracy
        epoch_loss = sum_loss["train"] / (len(loaders_a["train"]) + len(loaders_b["train"]))
        #epoch_accuracy_combo = sum_accuracy_combo["train"] / (len(loaders_a["train"]) + len(loaders_b["train"]))
        epoch_accuracy_a = sum_accuracy_a["train"] / len(loaders_a["train"])
        epoch_accuracy_b = sum_accuracy_b["train"] / len(loaders_b["train"])

        # Store params at the best validation accuracy
        if save_param and epoch_accuracy["val"] > best_val_accuracy:
          # torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
          torch.save(net.state_dict(), f"{model_name}_best_val.pth")
          best_val_accuracy = epoch_accuracy["val"]

        # Update history
        history_loss["train"].append(epoch_loss)
        #history_accuracy_combo["train"].append(epoch_accuracy_combo)
        history_accuracy_a["train"].append(epoch_accuracy_a)
        history_accuracy_b["train"].append(epoch_accuracy_b)
        # Print info
        print(f"Epoch {epoch + 1}:",
              f"TrL for combo={epoch_loss:.4f},",)
        print(f"Epoch {epoch + 1}:",
              f"TrA for A={epoch_accuracy_a:.4f},")
        print(f"Epoch {epoch + 1}:",
              f"TrA for B={epoch_accuracy_b:.4f},")

And then this is the same function, but changing all the “train” in “val”

def train(net, loaders, optimizer, criterion, epochs=20, dev=None, save_param=False, model_name="valerio"):
      loaders_a, loaders_b = loaders
    # try:
      net = net.to(dev)
      #print(net)
      #summary(net,[(net.in_channels,net.in_width,net.in_height)]*2)


      criterion.to(dev)


      # Initialize history
      history_loss = {"train": [], "val": [], "test": []}
      history_accuracy_combo = {"train": [], "val": [], "test": []}
      history_accuracy_a = {"train": [], "val": [], "test": []}
      history_accuracy_b = {"train": [], "val": [], "test": []}
      # Store the best val accuracy
      best_val_accuracy = 0

      # Process each epoch
      for epoch in range(epochs):
        # Initialize epoch variables
        sum_loss = {"train": 0, "val": 0, "test": 0}
        sum_accuracy_combo = {"train": 0, "val": 0, "test": 0}
        sum_accuracy_a = {"train": 0, "val": 0, "test": 0}
        sum_accuracy_b = {"train": 0, "val": 0, "test": 0}

        progbar = None
        # Process each split
        net.train()
            #widgets = [
              #' [', pb.Timer(), '] '#,
              #pb.Bar(),
              #' [', pb.ETA(), '] ', pb.Variable('ta','[Train Acc: {formatted_value}]')
            #]

            #progbar = pb.ProgressBar(max_value=len(loaders_a[split]),widgets=widgets,redirect_stdout=True)

        #net.eval()
        #print("Evaluation")
          # Process each batch
        print("Pre-for")
          #j=0
        for j, ((input_a, labels_a), (input_b, labels_b)) in enumerate(zip(loaders_a["val"], loaders_b["val"])):
          print("first print inside for")
          labels_a = labels_a.unsqueeze(1).float()
          labels_b = labels_b.unsqueeze(1).float()

          input_a = input_a.to(dev)
          labels_a = labels_a.to(dev)
          input_b = input_b.to(dev)
          labels_b = labels_b.to(dev)

            # Reset gradients
          optimizer.zero_grad()
            # Compute output
          pred = net(input_a,input_b)

          loss = criterion(pred, [labels_a, labels_b])
            # Update loss
          sum_loss["val"] += loss.item()
            # Check parameter update
          #if split == "train":
              # Compute gradients
          loss.backward()
              # Optimize
          optimizer.step()
          print("second print inside for")

            # Compute accuracy
            #pred_labels = (pred[2] >= 0.0).long()  # Binarize predictions to 0 and 1
          pred_labels_a = (pred[0] >= 0.0).long()  # Binarize predictions to 0 and 1
          pred_labels_b = (pred[1] >= 0.0).long()  # Binarize predictions to 0 and 1


            #batch_accuracy = (pred_labels == labels).sum().item() / len(labels)
          batch_accuracy_a = (pred_labels_a == labels_a).sum().item() / len(labels_a)
          batch_accuracy_b = (pred_labels_b == labels_b).sum().item() / len(labels_b)
            # Update accurac
            #sum_accuracy[split] += batch_accuracy
          sum_accuracy_a["val"] += batch_accuracy_a
          sum_accuracy_b["val"] += batch_accuracy_b

          #if (split=='train'):
              #progbar.update(j, ta=batch_accuracy)
              #progbar.update(j, ta=batch_accuracy_a)
              #progbar.update(j, ta=batch_accuracy_b)
          print("no progbar")
            #j=j+1

        #if (progbar is not None):
          #progbar.finish()
        # Compute epoch loss/accuracy
        epoch_loss = sum_loss["val"] / (len(loaders_a["val"]) + len(loaders_b["val"]))
        #epoch_accuracy = {split: sum_accuracy[split] / len(loaders[split]) for split in ["train", "val", "test"]}
        epoch_accuracy_a = sum_accuracy_a["val"] / len(loaders_a["val"])
        epoch_accuracy_b = sum_accuracy_b["val"] / len(loaders_b["val"])

        # Store params at the best validation accuracy
        if save_param and epoch_accuracy["val"] > best_val_accuracy:
          # torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
          torch.save(net.state_dict(), f"{model_name}_best_val.pth")
          best_val_accuracy = epoch_accuracy["val"]

        # Update history
        history_loss["val"].append(epoch_loss)
        history_accuracy_a["val"].append(epoch_accuracy_a)
        history_accuracy_b["val"].append(epoch_accuracy_b)
        # Print info
        print(f"Epoch {epoch + 1}:",
              f"VaL for combo={epoch_loss:.4f},")
        print(f"Epoch {epoch + 1}:",
              f"VaA for A={epoch_accuracy_a:.4f},")
        print(f"Epoch {epoch + 1}:",
              f"VaA for B={epoch_accuracy_b:.4f},")

And this is how I call the train function:
# Train model on val set train(combo, (loaders_a, loaders_b), optimizer, criterion, epochs=3, dev=dev)

Could you post the complete stack trace, which would show which line of code is raising this issue, please?

This is the complete error:

---------------------------------------------------------------------------

IndexError                                Traceback (most recent call last)

<ipython-input-41-e0be5daf1ede> in <module>()
      1 # Train model on val set
----> 2 train(combo, (loaders_a, loaders_b), optimizer, criterion, epochs=3, dev=dev)

6 frames

<ipython-input-40-457ef59c47c1> in train(net, loaders, optimizer, criterion, epochs, dev, save_param, model_name)
     42         print("Pre-for")
     43           #j=0
---> 44         for j, ((input_a, labels_a), (input_b, labels_b)) in enumerate(zip(loaders_a["val"], loaders_b["val"])):
     45           print("first print inside for")
     46           labels_a = labels_a.unsqueeze(1).float()

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    515             if self._sampler_iter is None:
    516                 self._reset()
--> 517             data = self._next_data()
    518             self._num_yielded += 1
    519             if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
    555     def _next_data(self):
    556         index = self._next_index()  # may raise StopIteration
--> 557         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    558         if self._pin_memory:
    559             data = _utils.pin_memory.pin_memory(data)

/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataset.py in __getitem__(self, idx)
    328 
    329     def __getitem__(self, idx):
--> 330         return self.dataset[self.indices[idx]]
    331 
    332     def __len__(self):

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataset.py in __getitem__(self, idx)
    328 
    329     def __getitem__(self, idx):
--> 330         return self.dataset[self.indices[idx]]
    331 
    332     def __len__(self):

IndexError: list index out of range

This occur only on val set.

I guess you’ve wrapped your Dataset into a Subset (or a similar object, which would use specific indices), which contain invalid indices.
Could you check the length of the original Dataset and make sure the split indices are valid?

Yes, I did. These are all the cells related to the dataset:

def parse_dataset(dataset):

  dataset.targets = dataset.targets % 2

  return dataset
rescale_data = transforms.Lambda(lambda x : x/255)

# Compose transformations
data_transform = transforms.Compose([
  transforms.Resize(32),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  rescale_data,
])

test_transform = transforms.Compose([
  transforms.Resize(32),
  transforms.ToTensor(),
  rescale_data,
])
# Load MNIST dataset with transforms
train_set = torchvision.datasets.MNIST(root=root_dir, train=True, download=True, transform=data_transform)
test_set = torchvision.datasets.MNIST(root=root_dir, train=False, download=True, transform=test_transform)
train_set = parse_dataset(train_set)
test_set = parse_dataset(test_set)

# Dataset len
num_train = len(train_set)
num_test = len(test_set)
print(f"Num. training samples: {num_train}")
print(f"Num. test samples:     {num_test}")
# List of indexes on the training set
train_idx = list(range(num_train))

# List of indexes of the test set
test_idx = list(range(num_test))

# Shuffle the training set

random.shuffle(train_idx)
# Fraction of the original train set that we want to use as validation set
val_frac = 0.1
# Number of samples of the validation set
num_val = int(num_train * val_frac)
num_train = num_train - num_val

# Split training set
val_idx = train_idx[num_train:]
train_idx = train_idx[:num_train]

print(f"{num_train} samples used as train set")
print(f"{num_val}  samples used as val set")

len(train_idx)
val_set = Subset(train_set, val_idx)
train_set = Subset(train_set, train_idx)
# Let's split in loader_a and loader_b
num_train = int(len(train_set)/2)
num_test = int(len(test_set)/2)

# List of indexes on the training set
train_idx = list(range(num_train*2))

# List of indexes of the test set
test_idx = list(range(num_test*2))

# Split training set
train_a_idx = train_idx[num_train:]
train_b_idx = train_idx[:num_train]

# Split test set
test_a_idx = test_idx[num_test:]
test_b_idx = test_idx[:num_test]

train_set_a1 = Subset(train_set, train_a_idx)
train_set_b1 = Subset(train_set, train_b_idx)

test_set_a = Subset(test_set, test_a_idx)
test_set_b = Subset(test_set, test_b_idx)

print(f"{num_train} samples used as train set")
print(f"{num_test}  samples used as test set")
num_train_a1 = len(train_set_a1)
num_train_b1 = len(train_set_b1)
# Fraction of the original train set that we want to use as validation set
val_frac = 0.1
# Number of samples of the validation set
num_val_a = int(num_train_a1 * val_frac)
num_train_a = num_train_a1 - num_val_a

num_val_b = int(num_train_b1 * val_frac)
num_train_b = num_train_b1 - num_val_b

# Split training set
val_idx_a = train_idx[num_train_a:]
train_idx_a = train_idx[:num_train_a]
val_set_a = Subset(train_set_a1, val_idx_a)
train_set_a = Subset(train_set_a1, train_idx_a)

val_idx_b = train_idx[num_train_b:]
train_idx_b = train_idx[:num_train_b]
val_set_b = Subset(train_set_b1, val_idx_b)
train_set_b = Subset(train_set_b1, train_idx_b)

print(f"{num_train_a} samples used as train set a")
print(f"{num_val_a}  samples used as val set a")
print(f"{num_train_b} samples used as train set b")
print(f"{num_val_b}  samples used as val set b")

Were you able to check the len of the dataset raising the error as well as the passed indices and if so, what did it yield?

You are right, I think there is some problem in the split of the dataset. I will try to check and fix. If I will not able I will edit this post!

The IndexError is raised when attempting to retrieve an index from a sequence (e.g. list, tuple), and the index isn’t found in the sequence. The Python documentation defines when this exception is raised:

Raised when a sequence subscript is out of range.

Here’s an Python Split() example that raises the IndexError:

data = "one%two%three%four%five"
numbers = data.split('%')

The list numbers has 5 elements, and the indexing starts with 0, so, the last element will have index 4. If you try to subscript with an index higher than 4, the Python Interpreter will raise an IndexError since there is no element at such index.