Training Loss and Accuracy both increasing while training DP-SGD

I’m trying to test DP-SGD to privatize model training on X-ray but I don’t know why I’m getting same validation accuracy and F1 score for all the epochs and it is as a random model. Also the training loss and validation loss is also increasing with epochs.

def train(model, train_loader, optimizer, epoch, device, privacy_engine, delta, criterion, MAX_PHYSICAL_BATCH_SIZE):
    model.train()

    losses = []
    top1_acc = []

    with BatchMemoryManager(
        data_loader=train_loader,
        max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
        optimizer=optimizer
    ) as memory_safe_data_loader:

        for i, (images, target) in enumerate(memory_safe_data_loader):
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            loss = criterion(output, target)

            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy_score(labels, preds)

            losses.append(loss.item())
            top1_acc.append(acc)

            loss.backward()
            optimizer.step()

        epsilon = privacy_engine.get_epsilon(delta)
        print(
            f"\tTrain Epoch: {epoch} \t"
            f"Loss: {np.mean(losses):.6f} "
            f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
            f"(ε = {epsilon:.2f}, δ = {delta})"
        )

    return np.mean(top1_acc) * 100  # Return train accuracy

def evaluate(model, loader, device, num_classes, criterion):
    model.eval()
    losses = []
    top1_acc = []
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for images, target in loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            loss = criterion(output, target)
            probs = torch.softmax(output, dim=1).cpu().numpy()
            preds = np.argmax(probs, axis=1)
            labels = target.cpu().numpy()

            acc = accuracy_score(labels, preds)
            losses.append(loss.item())
            top1_acc.append(acc)
            all_preds.extend(preds)
            all_labels.extend(labels)
            all_probs.extend(probs)

    top1_avg = np.mean(top1_acc)
    mean_loss = np.mean(losses)
    f1 = f1_score(all_labels, all_preds, average="macro")
    auc = roc_auc_score(all_labels, all_probs, multi_class="ovr") if num_classes > 2 else roc_auc_score(all_labels, [p[1] for p in all_probs])

    print(
        f"\tTest set:"
        f" Loss: {mean_loss:.6f} "
        f" Acc: {top1_avg * 100:.6f} "
        f" F1: {f1:.6f} "
        f" AUC: {auc:.6f} "
    )

    return top1_avg, f1, auc, all_labels, all_probs, all_preds

def train_loop(NAME, EPOCHS, BATCH_SIZE, MAX_PHYSICAL_BATCH_SIZE, EPSILON, DELTA, CLIPPING, MAX_GRAD_NORM, LR, RESULTS, all_logs):
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=MAX_PHYSICAL_BATCH_SIZE, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=MAX_PHYSICAL_BATCH_SIZE, shuffle=False)

    model = call_model(NAME)
    model = ModuleValidator.fix(model)
    model = model.to(device)
    
    # Convert weights to torch tensor
    class_weights = torch.tensor([neg_weights, pos_weights], dtype=torch.float32).to(device)
    # Define loss function
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    
    optimizer = optim.RMSprop(model.parameters(), lr=LR)
    privacy_engine = PrivacyEngine()

    if CLIPPING == "per_layer":
        num_layers = len([p for p in model.parameters() if p.requires_grad])
        MAX_GRAD_NORM_LIST = [MAX_GRAD_NORM] * num_layers
        model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            epochs=EPOCHS,
            target_epsilon=EPSILON,
            target_delta=DELTA,
            max_grad_norm=MAX_GRAD_NORM_LIST,
            clipping=CLIPPING,
            **adaptive_clipping_kwargs
        )
    else:
        model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
            module=model,
            optimizer=optimizer,
            data_loader=train_loader,
            epochs=EPOCHS,
            target_epsilon=EPSILON,
            target_delta=DELTA,
            max_grad_norm=MAX_GRAD_NORM,
            clipping=CLIPPING,
            **adaptive_clipping_kwargs
        )

    print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}")

    train_acc_per_epoch = []
    val_acc_per_epoch = []

    for epoch in tqdm(range(EPOCHS), desc=f"Epoch", unit="epoch"):
        train_acc = train(model, train_loader, optimizer, epoch + 1, device, privacy_engine, DELTA, criterion, MAX_PHYSICAL_BATCH_SIZE)
        train_acc_per_epoch.append(train_acc)
        val_acc, _, _, _, _, _ = evaluate(model, val_loader, device, 2, criterion)
        val_acc_per_epoch.append(val_acc * 100)

    top1_avg, f1, auc_score, all_labels, all_probs, all_preds = evaluate(model, test_loader, device, 2, criterion)
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, digits=4))
    
    key = f"{NAME}_E{EPOCHS}_B{BATCH_SIZE}_ε{EPSILON}_C{CLIPPING}_N{MAX_GRAD_NORM}"

    RESULTS[key] = (top1_avg, f1, auc_score)
    all_logs[key] = {
        "train_acc": train_acc_per_epoch,
        # "val_acc": val_acc_per_epoch,
        "fpr_tpr": roc_curve(all_labels, [p[1] for p in all_probs]),
        "all_labels": all_labels,
        "all_preds": all_preds
    }

def ablation_study(parameters):
    
    print(parameters)

    NAME = parameters["NAME"]
    EPOCHS = parameters["EPOCHS"]
    EPSILON_LIST = parameters["EPSILON_LIST"]
    BATCH_SIZE_LIST = parameters["BATCH_SIZE_LIST"]
    MAX_PHYSICAL_BATCH_SIZE = parameters["MAX_PHYSICAL_BATCH_SIZE"]
    DELTA = parameters["DELTA"]
    CLIPPING = parameters["CLIPPING"]
    MAX_GRAD_NORM = parameters["MAX_GRAD_NORM"]
    LR = parameters["LR"]
    
    RESULTS = {}
    all_logs = {}

    for eps in EPSILON_LIST:
        for batch in BATCH_SIZE_LIST:
            print(f"\nRunning for ε={eps}, Batch Size={batch}")
            train_loop(NAME, EPOCHS, batch, MAX_PHYSICAL_BATCH_SIZE, eps, DELTA, CLIPPING, MAX_GRAD_NORM, LR, RESULTS, all_logs)

    # Plot train and val accuracy curves
    plt.figure(figsize=(14, 6))
    for k, log in all_logs.items():
        plt.plot(log['train_acc'], label=f"{k} - Train")
        plt.plot(log['val_acc'], label=f"{k} - Val", linestyle='--')
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy (%)")
    plt.title("Train/Val Accuracy per Epoch for each (ε, Batch Size)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("DP_Train_Val_Accuracy_Ablation.png", dpi=300)
    plt.show()

    # Plot ROC curves
    plt.figure(figsize=(10, 8))
    for k, log in all_logs.items():
        fpr, tpr, _ = log['fpr_tpr']
        auc_val = RESULTS[k][2]
        plt.plot(fpr, tpr, label=f"{k} (AUC={auc_val:.3f})")
    plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curves for all (ε, Batch Size) Configurations")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("DP_AUC_ROC_Ablation.png", dpi=300)
    plt.show()

    return RESULTS, all_logs

parameters = {
    "NAME" : 'DenseNet121',
    "EPOCHS" : 5,
    "EPSILON_LIST" : [8.0],
    "BATCH_SIZE_LIST" : [1024],
    "MAX_PHYSICAL_BATCH_SIZE" : 16,
    "DELTA" : 1e-4,
    "CLIPPING" : "flat",
    "MAX_GRAD_NORM" : 1.0,
    "LR" : 1e-5
}

adaptive_clipping_kwargs = {
    "target_unclipped_quantile": 0.5,
    "clipbound_learning_rate": 0.2,
    "max_clipbound": 10.0,
    "min_clipbound": 0.1,
    "unclipped_num_std": 2.0,
}

results, all_logs = ablation_study(parameters)