In my train code I also keep track of the validation MAE in order to save the model state that achieves the best val MAE. The best MAE is registered in the weights file as defined in the code below. When loading the model, after the train execution, and evaluating it on the exact same validation set (with the same transformations and no shuffle) I noticed the MAE was greater than the best MAE previously registered for that data and model.
def train(model, num_epochs, train_loader, val_loader, device, criterion, optimizer, scheduler):
train_loss_history = []
train_mae_history = []
val_loss_history = []
val_mae_history = []
best_mean_mae = float('inf') # Initialize with a very high value
best_model_wts = None
best_preds = None
start_time = time.time()
print('Training for ' + str(num_epochs) + ' epochs...')
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
mae_accumulator = 0.0
val_preds = []
val_gts = []
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.view(-1, 1).to(device) # Reshape labels to (batch_size, 1)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels.float())
loss.backward()
optimizer.step()
running_loss += loss.item()
mae_accumulator += torch.sum(torch.abs(labels - outputs)).item()
mean_mae = mae_accumulator / len(train_loader.dataset)
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}, MAE: {mean_mae}")
train_loss_history.append(running_loss / len(train_loader))
train_mae_history.append(mean_mae)
# Validation
model.eval()
val_loss = 0.0
val_mae_accumulator = 0.0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.view(-1, 1).to(device) # Reshape labels to (batch_size, 1)
outputs = model(inputs)
val_preds.append(outputs.cpu().float().numpy())
val_gts.append(labels.cpu().int().numpy())
loss = criterion(outputs, labels.float())
val_loss += loss.item()
val_mae_accumulator += torch.sum(torch.abs(labels - outputs)).item()
val_gts = np.concatenate(val_gts)[:, 0]
val_preds = np.concatenate(val_preds)[:, 0]
mean_val_mae = compute_mae(val_gts, val_preds)
print(f"Validation Loss: {val_loss / len(val_loader)}, Validation MAE: {mean_val_mae}")
# Append validation loss and MAE to the history lists
val_loss_history.append(val_loss / len(val_loader))
val_mae_history.append(mean_val_mae)
if mean_val_mae < best_mean_mae:
best_mean_mae = mean_val_mae
best_model_wts = model.state_dict().copy()
best_preds = val_preds
torch.save(best_model_wts, model.__class__.__name__ + '-mae' + f'{best_mean_mae:.2f}' + '.pt')
def evaluate(model, dataloader, device):
gt_ages, pred_ages, images = [], [], []
confusion_matrix = torch.zeros(10, 10)
model.eval()
with torch.no_grad():
for batch_images, batch_gt_ages in dataloader:
images.append(batch_images.numpy())
batch_images = batch_images.to(device)
batch_pred_ages = model(batch_images).cpu().float().numpy()
pred_ages.append(batch_pred_ages)
gt_ages.append(batch_gt_ages.int().numpy())
images = np.concatenate(images, axis=0)
gt_ages = np.concatenate(gt_ages)
pred_ages = np.concatenate(pred_ages)[:, 0]
return images, pd.Series(gt_ages, name='gt_ages'), pd.Series(pred_ages, name='pred_ages')
Checking the predictions, it is noticeable how they vary and thus increase the computed MAE. The ground truths, as expected, are the same ensuring that it is the same data. Why does this behavior happen? Shouldn’t predictions be consistent? I even tried setting seeds but nothing changed