ValueError: Sample larger than population or is negative-Error with the dataloader class

Hi,
I am trying to train images on a pretrained model.The images have a piece size of 200 and the shuffle buffer has a size of 1000. Whilst training it raises an
ValueError: Sample larger than population or is negative

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-131-cbb9a89cf661> in <module>()
      5               board_writer,
      6               device,
----> 7               batches_per_epoch=500)

6 frames
<ipython-input-118-01a36e28a5fa> in train_it(no_of_epochs, starting_epoch, model_name, model, loss_criterion, optimizer, batch_size, allTheDataloaders, board_writer, device, batches_per_epoch, is_best, min_validation_loss)
     36         no_of_batches_in_this_epoch = 0
     37         train_correct_in_this_epoch = 0
---> 38         for train_batch_data, train_batch_labels in allTheDataloaders["Training"]:
     39                 train_batch_data, train_batch_labels = train_batch_data.to(device), train_batch_labels.to(device)
     40                 no_of_batches_in_this_epoch+= 1

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    515             if self._sampler_iter is None:
    516                 self._reset()
--> 517             data = self._next_data()
    518             self._num_yielded += 1
    519             if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
    555     def _next_data(self):
    556         index = self._next_index()  # may raise StopIteration
--> 557         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    558         if self._pin_memory:
    559             data = _utils.pin_memory.pin_memory(data)

/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     26             for _ in possibly_batched_index:
     27                 try:
---> 28                     data.append(next(self.dataset_iter))
     29                 except StopIteration:
     30                     break

/content/drive/MyDrive/square_data_set_generation1.py in shuffle_buffer_iterator(actual_iterator, size_of_buffer)
     46         while(size < size_of_buffer):
     47             try:
---> 48                 shuffle.append(next(actual_iterator))
     49                 size +=1
     50             except StopIteration:

/content/drive/MyDrive/square_data_set_generation1.py in puzzle_piece_pair_iterator(self)
    184                 puzzle_piece_crop_list = []
    185                 for d in dist_dict:
--> 186                     puzzle_piece_crop_list.extend(sample(dist_dict[d], no_of_rep[d]))
    187 
    188                 for label_pairs in sample(puzzle_piece_crop_list, len(puzzle_piece_crop_list)):

/usr/lib/python3.7/random.py in sample(self, population, k)
    319         n = len(population)
    320         if not 0 <= k <= n:
--> 321             raise ValueError("Sample larger than population or is negative")
    322         result = [None] * k
    323         setsize = 21        # size of a small set minus size of an empty list

ValueError: Sample larger than population or is negative

The shuffle buffer is located in my data generation class

def shuffle_buffer_iterator(actual_iterator, size_of_buffer):
    shuffle = []

    while(True):
        size = 0
        while(size < size_of_buffer):
            try:
                shuffle.append(next(actual_iterator))
                size +=1
            except StopIteration:
                shuffle = sample(shuffle, len(shuffle))    
                for s in shuffle:
                    yield s
                return
        
        shuffle = sample(shuffle, len(shuffle))    
        for s in shuffle:
            yield s
        shuffle = []

The training method and the size of the model

def set_the_dataset_input(default=True):
    if default:
        puzzle_piece_dim=200
        size_of_buffer = 1000
        model_dim = 224
        batch_size = 20       
    else:
        puzzle_piece_dim, size_of_buffer, model_dim, batch_size = retireve_the_dataset_input()
    return puzzle_piece_dim, size_of_buffer, model_dim, batch_size
def initialse_dataloader(root_dir,val_dir,test_dir,puzzle_piece_dim,size_of_buffer, model_dim,batch_size):
  ##ADD YOUR OWN DATASETS
     training_dataset=square_data_set_generation1.AdjacencyDataset(root_dir,puzzle_piece_dim, size_of_buffer, model_dim)
     #training_dataset=triangles_pieces_dataset.triangle_pieces_generator(root_dir,puzzle_piece_dim,size_of_buffer, model_dim)
     print(root_dir)
     print(puzzle_piece_dim)
  #square_data_set_generation.AdjacencyDataset(root_dir,puzzle_piece_dim, size_of_buffer, model_dim)
  ##Load the data using data loader
     train_dataset_dataloader = DataLoader(training_dataset, batch_size)
  ##Validation Data set
     validation_dataset=square_data_set_generation1.AdjacencyDataset(val_dir,puzzle_piece_dim, size_of_buffer, model_dim)
  #square_data_set_generation.AdjacencyDataset(val_dir,puzzle_piece_dim, size_of_buffer, model_dim)
     #validation_dataset=triangles_pieces_dataset.triangle_pieces_generator(val_dir,puzzle_piece_dim,size_of_buffer, model_dim)
     validation_dataset_dataloader = DataLoader(validation_dataset, batch_size)
  ##Add in testing dataset 
     testing_dataset=square_data_set_generation1.AdjacencyDataset(test_dir,puzzle_piece_dim, size_of_buffer, model_dim)
     #triangles_pieces_dataset.triangle_pieces_generator(test_dir,puzzle_piece_dim, size_of_buffer, model_dim)
     #square_data_set_generation1.AdjacencyDataset(test_dir,puzzle_piece_dim, size_of_buffer, model_dim)
     test_dataset_dataloader = DataLoader(testing_dataset, batch_size)
     allTheDataloaders={'Training':train_dataset_dataloader , 'Validation': validation_dataset_dataloader,'Testing':test_dataset_dataloader}
     return allTheDataloaders

The training method

import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
def train_it(no_of_epochs, starting_epoch, 
              model_name,model,loss_criterion, optimizer,
              batch_size, allTheDataloaders,board_writer,device,batches_per_epoch=100,
              is_best=False,min_validation_loss=math.inf):

    last_checkpoint_path = f"./last_checkpoint_for_{model_name}.pt"
    best_model_path=f"./best_model_for_{model_name}.pt"
    
    #to_track_info = ["epoch", "total time", "train loss"]
    #f val_loader is not None:
      #  to_track_info.append("val loss")
    #for eval_score in score_funcs:
     #   to_track_info.append("train " + eval_score )
      #  if val_loader is not None:
       #     to_track_info.append("val " + eval_score )
    
   # results={}

    #for j in to_track_info:
     #  results[j]=[]

    for epoch in range(starting_epoch,starting_epoch+no_of_epochs):
        print(f"Epoch : {epoch}")
        start_time = time()

        model.train()

     
        print("Training")
        train_loss_in_this_epoch = 0
        no_of_batches_in_this_epoch = 0
        train_correct_in_this_epoch = 0
        for train_batch_data, train_batch_labels in allTheDataloaders["Training"]:
                train_batch_data, train_batch_labels = train_batch_data.to(device), train_batch_labels.to(device)
                no_of_batches_in_this_epoch+= 1
                optimizer.zero_grad()
                train_batch_outputs = model(train_batch_data)
                #Compute loss for this batch
                train_batch_loss = loss_criterion(train_batch_outputs, train_batch_labels)
                train_loss_in_this_batch = train_batch_loss.item()
                train_loss_in_this_epoch += train_loss_in_this_batch 
                train_batch_loss.backward()
                optimizer.step()
                with torch.no_grad():
                    new_pred=torch.max(train_batch_outputs, axis = 1) 
                    train_score, train_predictions = torch.max(train_batch_outputs, axis = 1)   
                    train_correct_in_this_batch = torch.sum(train_predictions == train_batch_labels.data).item()
                    train_correct_in_this_epoch += train_correct_in_this_batch
                    train_batch_labels = train_batch_labels.detach().cpu().numpy()
                    train_score = train_score.detach().cpu().numpy()


                if (no_of_batches_in_this_epoch % (batches_per_epoch//10)) == 0:
                    print(f"Training #{no_of_batches_in_this_epoch} Batch Acc : {train_correct_in_this_batch}/{batch_size}, Batch Loss: {train_loss_in_this_batch}")
                if no_of_batches_in_this_epoch == batches_per_epoch:
                    print(f"Epoch : {epoch}, Training Batch: {no_of_batches_in_this_epoch}")
                    break
     #  results["epoch"].append( epoch )
      # results["total time"].append( time()-start_time) )
       #results["train loss"].append( train_loss_in_this_epoch )
     #   t_predicitions = np.asarray(t_predicitions)
        
#        if t_predicitions.shape[1] > 1: #We have a classification problem, convert to labels
     #   t_predicitions = np.argmax(t_predicitions, axis=1)
            
      #  for name, score_func in score_cal.items():
       #     results["train " + name].append( score_cal(t_labels, t_predicitions) )

        board_writer.add_scalar(f'Training/Loss/Average', train_loss_in_this_epoch/no_of_batches_in_this_epoch, epoch)
        board_writer.add_scalar(f'Training/Accuracy/Average', train_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size), epoch)
        board_writer.add_scalar(f'Training/TimeTakenInMinutes', (time()-start_time)/60, epoch)
        board_writer.flush()
        print(f"Training average accuracy : {train_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size)}")
        print(f"Training average loss : {train_loss_in_this_epoch/no_of_batches_in_this_epoch}")
            
        #create f measure
        model.eval()
        print("Validation")
        val_loss_in_this_epoch = 0
        no_of_batches_in_this_epoch = 0
        val_correct_in_this_epoch = 0
        with torch.no_grad():
            for val_batch_data, val_batch_labels in allTheDataloaders["Validation"]:
                val_batch_data, val_batch_labels = val_batch_data.to(device), val_batch_labels.to(device)
                no_of_batches_in_this_epoch+= 1
                val_batch_outputs = model(val_batch_data)
                #Compute loss for this batch
                val_batch_loss = loss_criterion(val_batch_outputs, val_batch_labels)
                val_loss_in_this_batch = val_batch_loss.item()
                val_loss_in_this_epoch += val_loss_in_this_batch 
                val_score, val_predictions = torch.max(val_batch_outputs, axis = 1)   
                val_correct_in_this_batch = torch.sum(val_predictions == val_batch_labels.data).item()
                val_correct_in_this_epoch += val_correct_in_this_batch
                if (no_of_batches_in_this_epoch % (batches_per_epoch//10)) == 0:
                    print(f"Validation #{no_of_batches_in_this_epoch} Batch Acc : {val_correct_in_this_batch}/{batch_size}, Batch Loss: {val_loss_in_this_batch}")
                if no_of_batches_in_this_epoch == batches_per_epoch:
                    print(f"Epoch : {epoch}, Validation Batch: {no_of_batches_in_this_epoch}")
                    break
            board_writer.add_scalar(f'Validation/Loss/Average', val_loss_in_this_epoch/no_of_batches_in_this_epoch, epoch)
            board_writer.add_scalar(f'Validation/Accuracy/Average', val_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size), epoch)
            board_writer.add_scalar(f'Validation/TimeTakenInMinutes', (time()-start_time)/60, epoch)
            board_writer.flush()
            print(f"Validation average accuracy : {val_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size)}")
            print(f"Validation average loss : {val_loss_in_this_epoch/no_of_batches_in_this_epoch}")
            if  min_validation_loss >= val_loss_in_this_epoch:
                    is_best = True
                    min_validation_loss = min(min_validation_loss,val_loss_in_this_epoch)
                    checkpoint = {
                        'epoch': epoch + 1,
                        'min_validation_loss': min_validation_loss,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                    }
                    save_the_models_at_the_best_checkpoint(checkpoint, is_best, last_checkpoint_path, best_model_path)
                    print(f"In epoch number {epoch}, average validation loss decreased to {val_loss_in_this_epoch/no_of_batches_in_this_epoch}")
            load_the_models_at_the_best_checkpoint = {
                    'epoch': epoch + 1,
                    'min_validation_loss': min_validation_loss,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                     }
            save_the_models_at_the_best_checkpoint(load_the_models_at_the_best_checkpoint, False, last_checkpoint_path, best_model_path)
    print(f"Total Training Time Taken: to complete training & Validation ",((time()-start_time)/60))
    board_writer.close()
    #from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
   # print(confusion_matrix(t_labels,t_predicitions))
    #print(classification_report(t_labels,scores))
   # print(metrics.classification_report (t_labels, t_predicitions))
   # print(accuracy_score(t_labels, t_predicitions))
   # t_labels=[]
    #t_predicitions=[]
    #t_data=[]
    predlist=torch.zeros(0,dtype=torch.long, device='cpu')
    lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
    with torch.no_grad():
        model.eval()
        i=0
        for test_batch_data,test_batch_labels in allTheDataloaders["Testing"]:
            new_data,new_test_labels=test_batch_data,test_batch_labels
            test_batch_data, test_batch_labels = test_batch_data.to(device), test_batch_labels.to(device)
            y_test_pred=model(test_batch_data)
            y_scores, y_pred_targets = torch.max(y_test_pred, dim = 1)

            predlist=torch.cat([predlist,y_pred_targets.view(-1).cpu()])
            lbllist=torch.cat([lbllist,test_batch_labels.view(-1).cpu()])
           
     
    print(classification_report(lbllist.numpy(), predlist.numpy()))
    data=confusion_matrix(lbllist.numpy(), predlist.numpy())
    data_cm=pd.DataFrame(data, columns=np.unique(lbllist.numpy()), index = np.unique(lbllist.numpy()))
    data_cm.index.name = 'Actual'
    data_cm.columns.name = 'Predicted'
    plt.figure(figsize = (10,7))
    sn.set(font_scale=1.4)#for label size
    sn.heatmap(data_cm, cmap="Blues", annot=True,annot_kws={"size": 16},fmt=".1f")

It works for images which are 100 by 100 but when i increase the size it throws errors.
I have tried to change the buffer size aswell as it doesn’t work either.

Please help.