Accuracy, F1-score remains static throughout all the epochs

I am working on a classification problem using pre-trained VGG16 on imagenet. The training loss and Val loss are changing in each epoch, but the evaluation metrics like accuracy, F1-score, precision, recall remain exactly static throughout the epochs as can be seen in the image attached

. How can I make sure that evaluation metrics are changing according, as I have to choose the checkpoint based on the best F1-score for the test set. Below is the code for validation and metrics calculation:

def validate():

    valid_losses = []
    y_true, y_pred = [], []

    with torch.no_grad():
        for batch, target in valid_loader:
            # move data to the device
            batch =
            target =
            # make predictions
            predictions = model(batch)
            # calculate loss
            loss = criterion(predictions, target)
            # track losses and predictions
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    valid_losses = np.array(valid_losses) 
    # calculate the mean validation loss
    valid_loss = valid_losses.mean()
    # return valid_loss, valid_accuracy, macro_f1, macro_p, macro_r, micro_f1, cm
    return valid_loss, y_true, y_pred
def evaluation_analysis(y_true, y_pred):

  # calculate validation accuracy from y_true and y_pred
    valid_accuracy = np.mean(y_pred == y_true)
  # macro_average
    macro_f1 =  metrics.f1_score(y_true, y_pred, average='macro')
    macro_p  =  metrics.precision_score(y_true, y_pred, average='macro') 
    macro_r  =  metrics.recall_score(y_true, y_pred, average='macro')
    # micro average
    micro_f1 = metrics.f1_score(y_true, y_pred, average='micro')
    # confusion matrix
    cm = metrics.confusion_matrix(y_true, y_pred)

    return acc_score,  macro_f1, macro_p, macro_r, micro_f1, cm```