Hello, my binary text classification training does not improve accuracy& loss. Thanks for any help!
# Define the objective function
def objective(trial):
model = nn.Sequential(
nn.Linear(500, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, 128),
nn.Linear(128, 5),
nn.Softmax(dim=1)
).to(device)
# Hyperparameters to tune
learning_rate = trial.suggest_categorical('learning_rate', [1e-5, 1e-4, 1e-3])
beta_1 = trial.suggest_categorical('beta_1', [0.8, 0.85, 0.9, 0.95])
beta_2 = trial.suggest_categorical('beta_2', [0.995, 0.996, 0.997, 0.998, 0.999])
epsilon = trial.suggest_categorical('epsilon', [1e-8, 1e-7, 1e-6])
batch_size = 16
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(beta_1, beta_2), eps=epsilon)
loss_fn = torch.nn.BCEWithLogitsLoss()
num_epochs = 10
accurasies = []
# Train the model
for epoch in range(num_epochs):
model.train()
start_time = time.time()
for batch_idx, batch in enumerate(train_loader, 1):
optimizer.zero_grad()
input_ids, _, labels = batch
input_ids, labels = input_ids.to(device), labels.to(device)
outputs = model(input_ids.float())
max_indices = torch.argmax(outputs, dim=1)
loss = loss_fn(max_indices.float(), labels.float())
loss.requires_grad = True
loss.backward()
optimizer.step()
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Epoch {epoch + 1} training completed in time: {elapsed_time}")
model.eval()
val_loss = 0.0
total = 0
correct = 0
for batch_idx, batch in enumerate(val_loader, 1):
with torch.no_grad():
input_ids, _, labels = batch
input_ids, labels = input_ids.to(device), labels.to(device)
outputs = model(input_ids.float())
max_indices = torch.argmax(outputs, dim=1)
max_indices_float32 = max_indices
val_loss += loss_fn(max_indices.float(), labels.float())
total += labels.size(0)
correct += (max_indices_float32 == labels).sum().item()
accurasies.append(correct/ total)
print(f"accuracy: {(correct/ total)}")
print(f"accurasies: {str(accurasies)}")
trial.set_user_attr("val_loss", val_loss)
trial.set_user_attr("model", model)
trial.set_user_attr("hyperparameters", {'learning_rate': learning_rate, 'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon, 'batch_size': batch_size})
val_loss /= len(val_loader)
print(f"Epoch {epoch + 1}, Validation Loss: {val_loss}, Accuracy: {correct/ total}")
print(f"Used hyperparameters: {{'learning_rate': {learning_rate}, 'beta_1': {beta_1}, 'beta_2': {beta_2}, 'epsilon': {epsilon}, 'batch_size': {batch_size}}}")
return val_loss
# Create a study object and optimize the objective function
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)
best_trial = study.best_trial
print(f"Best trial number: {best_trial.number}")
print(f"Best trial validation loss: {best_trial.user_attrs['val_loss']}")
print(f"Best trial hyperparameters: {best_trial.params}")