Hi,
I am trying to train images on a pretrained model.The images have a piece size of 200 and the shuffle buffer has a size of 1000. Whilst training it raises an
ValueError: Sample larger than population or is negative
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-131-cbb9a89cf661> in <module>()
5 board_writer,
6 device,
----> 7 batches_per_epoch=500)
6 frames
<ipython-input-118-01a36e28a5fa> in train_it(no_of_epochs, starting_epoch, model_name, model, loss_criterion, optimizer, batch_size, allTheDataloaders, board_writer, device, batches_per_epoch, is_best, min_validation_loss)
36 no_of_batches_in_this_epoch = 0
37 train_correct_in_this_epoch = 0
---> 38 for train_batch_data, train_batch_labels in allTheDataloaders["Training"]:
39 train_batch_data, train_batch_labels = train_batch_data.to(device), train_batch_labels.to(device)
40 no_of_batches_in_this_epoch+= 1
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in __next__(self)
515 if self._sampler_iter is None:
516 self._reset()
--> 517 data = self._next_data()
518 self._num_yielded += 1
519 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
555 def _next_data(self):
556 index = self._next_index() # may raise StopIteration
--> 557 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
558 if self._pin_memory:
559 data = _utils.pin_memory.pin_memory(data)
/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
26 for _ in possibly_batched_index:
27 try:
---> 28 data.append(next(self.dataset_iter))
29 except StopIteration:
30 break
/content/drive/MyDrive/square_data_set_generation1.py in shuffle_buffer_iterator(actual_iterator, size_of_buffer)
46 while(size < size_of_buffer):
47 try:
---> 48 shuffle.append(next(actual_iterator))
49 size +=1
50 except StopIteration:
/content/drive/MyDrive/square_data_set_generation1.py in puzzle_piece_pair_iterator(self)
184 puzzle_piece_crop_list = []
185 for d in dist_dict:
--> 186 puzzle_piece_crop_list.extend(sample(dist_dict[d], no_of_rep[d]))
187
188 for label_pairs in sample(puzzle_piece_crop_list, len(puzzle_piece_crop_list)):
/usr/lib/python3.7/random.py in sample(self, population, k)
319 n = len(population)
320 if not 0 <= k <= n:
--> 321 raise ValueError("Sample larger than population or is negative")
322 result = [None] * k
323 setsize = 21 # size of a small set minus size of an empty list
ValueError: Sample larger than population or is negative
The shuffle buffer is located in my data generation class
def shuffle_buffer_iterator(actual_iterator, size_of_buffer):
shuffle = []
while(True):
size = 0
while(size < size_of_buffer):
try:
shuffle.append(next(actual_iterator))
size +=1
except StopIteration:
shuffle = sample(shuffle, len(shuffle))
for s in shuffle:
yield s
return
shuffle = sample(shuffle, len(shuffle))
for s in shuffle:
yield s
shuffle = []
The training method and the size of the model
def set_the_dataset_input(default=True):
if default:
puzzle_piece_dim=200
size_of_buffer = 1000
model_dim = 224
batch_size = 20
else:
puzzle_piece_dim, size_of_buffer, model_dim, batch_size = retireve_the_dataset_input()
return puzzle_piece_dim, size_of_buffer, model_dim, batch_size
def initialse_dataloader(root_dir,val_dir,test_dir,puzzle_piece_dim,size_of_buffer, model_dim,batch_size):
##ADD YOUR OWN DATASETS
training_dataset=square_data_set_generation1.AdjacencyDataset(root_dir,puzzle_piece_dim, size_of_buffer, model_dim)
#training_dataset=triangles_pieces_dataset.triangle_pieces_generator(root_dir,puzzle_piece_dim,size_of_buffer, model_dim)
print(root_dir)
print(puzzle_piece_dim)
#square_data_set_generation.AdjacencyDataset(root_dir,puzzle_piece_dim, size_of_buffer, model_dim)
##Load the data using data loader
train_dataset_dataloader = DataLoader(training_dataset, batch_size)
##Validation Data set
validation_dataset=square_data_set_generation1.AdjacencyDataset(val_dir,puzzle_piece_dim, size_of_buffer, model_dim)
#square_data_set_generation.AdjacencyDataset(val_dir,puzzle_piece_dim, size_of_buffer, model_dim)
#validation_dataset=triangles_pieces_dataset.triangle_pieces_generator(val_dir,puzzle_piece_dim,size_of_buffer, model_dim)
validation_dataset_dataloader = DataLoader(validation_dataset, batch_size)
##Add in testing dataset
testing_dataset=square_data_set_generation1.AdjacencyDataset(test_dir,puzzle_piece_dim, size_of_buffer, model_dim)
#triangles_pieces_dataset.triangle_pieces_generator(test_dir,puzzle_piece_dim, size_of_buffer, model_dim)
#square_data_set_generation1.AdjacencyDataset(test_dir,puzzle_piece_dim, size_of_buffer, model_dim)
test_dataset_dataloader = DataLoader(testing_dataset, batch_size)
allTheDataloaders={'Training':train_dataset_dataloader , 'Validation': validation_dataset_dataloader,'Testing':test_dataset_dataloader}
return allTheDataloaders
The training method
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
def train_it(no_of_epochs, starting_epoch,
model_name,model,loss_criterion, optimizer,
batch_size, allTheDataloaders,board_writer,device,batches_per_epoch=100,
is_best=False,min_validation_loss=math.inf):
last_checkpoint_path = f"./last_checkpoint_for_{model_name}.pt"
best_model_path=f"./best_model_for_{model_name}.pt"
#to_track_info = ["epoch", "total time", "train loss"]
#f val_loader is not None:
# to_track_info.append("val loss")
#for eval_score in score_funcs:
# to_track_info.append("train " + eval_score )
# if val_loader is not None:
# to_track_info.append("val " + eval_score )
# results={}
#for j in to_track_info:
# results[j]=[]
for epoch in range(starting_epoch,starting_epoch+no_of_epochs):
print(f"Epoch : {epoch}")
start_time = time()
model.train()
print("Training")
train_loss_in_this_epoch = 0
no_of_batches_in_this_epoch = 0
train_correct_in_this_epoch = 0
for train_batch_data, train_batch_labels in allTheDataloaders["Training"]:
train_batch_data, train_batch_labels = train_batch_data.to(device), train_batch_labels.to(device)
no_of_batches_in_this_epoch+= 1
optimizer.zero_grad()
train_batch_outputs = model(train_batch_data)
#Compute loss for this batch
train_batch_loss = loss_criterion(train_batch_outputs, train_batch_labels)
train_loss_in_this_batch = train_batch_loss.item()
train_loss_in_this_epoch += train_loss_in_this_batch
train_batch_loss.backward()
optimizer.step()
with torch.no_grad():
new_pred=torch.max(train_batch_outputs, axis = 1)
train_score, train_predictions = torch.max(train_batch_outputs, axis = 1)
train_correct_in_this_batch = torch.sum(train_predictions == train_batch_labels.data).item()
train_correct_in_this_epoch += train_correct_in_this_batch
train_batch_labels = train_batch_labels.detach().cpu().numpy()
train_score = train_score.detach().cpu().numpy()
if (no_of_batches_in_this_epoch % (batches_per_epoch//10)) == 0:
print(f"Training #{no_of_batches_in_this_epoch} Batch Acc : {train_correct_in_this_batch}/{batch_size}, Batch Loss: {train_loss_in_this_batch}")
if no_of_batches_in_this_epoch == batches_per_epoch:
print(f"Epoch : {epoch}, Training Batch: {no_of_batches_in_this_epoch}")
break
# results["epoch"].append( epoch )
# results["total time"].append( time()-start_time) )
#results["train loss"].append( train_loss_in_this_epoch )
# t_predicitions = np.asarray(t_predicitions)
# if t_predicitions.shape[1] > 1: #We have a classification problem, convert to labels
# t_predicitions = np.argmax(t_predicitions, axis=1)
# for name, score_func in score_cal.items():
# results["train " + name].append( score_cal(t_labels, t_predicitions) )
board_writer.add_scalar(f'Training/Loss/Average', train_loss_in_this_epoch/no_of_batches_in_this_epoch, epoch)
board_writer.add_scalar(f'Training/Accuracy/Average', train_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size), epoch)
board_writer.add_scalar(f'Training/TimeTakenInMinutes', (time()-start_time)/60, epoch)
board_writer.flush()
print(f"Training average accuracy : {train_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size)}")
print(f"Training average loss : {train_loss_in_this_epoch/no_of_batches_in_this_epoch}")
#create f measure
model.eval()
print("Validation")
val_loss_in_this_epoch = 0
no_of_batches_in_this_epoch = 0
val_correct_in_this_epoch = 0
with torch.no_grad():
for val_batch_data, val_batch_labels in allTheDataloaders["Validation"]:
val_batch_data, val_batch_labels = val_batch_data.to(device), val_batch_labels.to(device)
no_of_batches_in_this_epoch+= 1
val_batch_outputs = model(val_batch_data)
#Compute loss for this batch
val_batch_loss = loss_criterion(val_batch_outputs, val_batch_labels)
val_loss_in_this_batch = val_batch_loss.item()
val_loss_in_this_epoch += val_loss_in_this_batch
val_score, val_predictions = torch.max(val_batch_outputs, axis = 1)
val_correct_in_this_batch = torch.sum(val_predictions == val_batch_labels.data).item()
val_correct_in_this_epoch += val_correct_in_this_batch
if (no_of_batches_in_this_epoch % (batches_per_epoch//10)) == 0:
print(f"Validation #{no_of_batches_in_this_epoch} Batch Acc : {val_correct_in_this_batch}/{batch_size}, Batch Loss: {val_loss_in_this_batch}")
if no_of_batches_in_this_epoch == batches_per_epoch:
print(f"Epoch : {epoch}, Validation Batch: {no_of_batches_in_this_epoch}")
break
board_writer.add_scalar(f'Validation/Loss/Average', val_loss_in_this_epoch/no_of_batches_in_this_epoch, epoch)
board_writer.add_scalar(f'Validation/Accuracy/Average', val_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size), epoch)
board_writer.add_scalar(f'Validation/TimeTakenInMinutes', (time()-start_time)/60, epoch)
board_writer.flush()
print(f"Validation average accuracy : {val_correct_in_this_epoch/(no_of_batches_in_this_epoch*batch_size)}")
print(f"Validation average loss : {val_loss_in_this_epoch/no_of_batches_in_this_epoch}")
if min_validation_loss >= val_loss_in_this_epoch:
is_best = True
min_validation_loss = min(min_validation_loss,val_loss_in_this_epoch)
checkpoint = {
'epoch': epoch + 1,
'min_validation_loss': min_validation_loss,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
save_the_models_at_the_best_checkpoint(checkpoint, is_best, last_checkpoint_path, best_model_path)
print(f"In epoch number {epoch}, average validation loss decreased to {val_loss_in_this_epoch/no_of_batches_in_this_epoch}")
load_the_models_at_the_best_checkpoint = {
'epoch': epoch + 1,
'min_validation_loss': min_validation_loss,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
save_the_models_at_the_best_checkpoint(load_the_models_at_the_best_checkpoint, False, last_checkpoint_path, best_model_path)
print(f"Total Training Time Taken: to complete training & Validation ",((time()-start_time)/60))
board_writer.close()
#from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# print(confusion_matrix(t_labels,t_predicitions))
#print(classification_report(t_labels,scores))
# print(metrics.classification_report (t_labels, t_predicitions))
# print(accuracy_score(t_labels, t_predicitions))
# t_labels=[]
#t_predicitions=[]
#t_data=[]
predlist=torch.zeros(0,dtype=torch.long, device='cpu')
lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
with torch.no_grad():
model.eval()
i=0
for test_batch_data,test_batch_labels in allTheDataloaders["Testing"]:
new_data,new_test_labels=test_batch_data,test_batch_labels
test_batch_data, test_batch_labels = test_batch_data.to(device), test_batch_labels.to(device)
y_test_pred=model(test_batch_data)
y_scores, y_pred_targets = torch.max(y_test_pred, dim = 1)
predlist=torch.cat([predlist,y_pred_targets.view(-1).cpu()])
lbllist=torch.cat([lbllist,test_batch_labels.view(-1).cpu()])
print(classification_report(lbllist.numpy(), predlist.numpy()))
data=confusion_matrix(lbllist.numpy(), predlist.numpy())
data_cm=pd.DataFrame(data, columns=np.unique(lbllist.numpy()), index = np.unique(lbllist.numpy()))
data_cm.index.name = 'Actual'
data_cm.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sn.set(font_scale=1.4)#for label size
sn.heatmap(data_cm, cmap="Blues", annot=True,annot_kws={"size": 16},fmt=".1f")
It works for images which are 100 by 100 but when i increase the size it throws errors.
I have tried to change the buffer size aswell as it doesn’t work either.
Please help.