LSTM binary text classification model not learning

Hello, my lstm binary torch classification model is not learning. It is always making the same prediction.

class LSTM(nn.Module):
    def __init__(self, num_emb, output_size, num_layers=1, hidden_size=128):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Create an embedding for each token
        self.embedding = nn.Embedding(num_emb, 500)
        
        self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, 
                            num_layers=num_layers, batch_first=True, dropout=0.5)
        self.fc_out = nn.Linear(hidden_size, output_size)

    def forward(self, input_seq):
        input_embs = self.embedding(input_seq)
        self.memory_a = False
        
        
        if self.memory_a:
            output, (hidden_out, mem_out) = self.lstm(input_embs, (self.hidden, self.memory))
        else:
            hidden = torch.zeros(self.num_layers, 512, self.hidden_size, device=device)
            memory = torch.zeros(self.num_layers, 512, self.hidden_size, device=device)
            output, (hidden_out, mem_out) = self.lstm(input_embs, (hidden, memory))
            self.hidden = hidden_out
            self.memory = mem_out
            self.memory_a = True
                
        return self.fc_out(output)

def objective(trial):
    input_size = 500
    hidden_size = 500
    output_size = 1
    num_layers = 2
    model = LSTM(num_emb=32000, output_size=1, 
                           num_layers=2, hidden_size=input_size).to(device)

    # Hyperparameters to tune
    learning_rate = trial.suggest_categorical('learning_rate', [1e-5, 1e-4, 1e-3, 1e-2])
    beta_1 = trial.suggest_categorical('beta_1', [0.85, 0.9, 0.95])
    beta_2 = trial.suggest_categorical('beta_2', [0.9, 0.95, 0.995, 0.996, 0.997, 0.998, 0.999])
    epsilon = trial.suggest_categorical('epsilon', [1e-8, 1e-7, 1e-6, 1e-5, 1e-4])
    
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(beta_1, beta_2), eps=epsilon)
    loss_fn = nn.BCEWithLogitsLoss()
    
    
    
    train_acc = 0
    test_acc = 0
    
    bs = 512

    
    for epoch in range(10):

        model.train()
        training_loss_logger = []
        training_acc_logger = []
        steps = 0
        
        for batch_idx, batch in tqdm(enumerate(train_loader, 1), desc="Training", total=len(train_loader)):
            text, _, labels = batch
            text, labels = text.to(device), labels.to(device)
            
            pred = model(text)

            loss = loss_fn(pred[:, -1, :], labels.float().unsqueeze(1))

            optimizer.zero_grad()
            
            loss.backward()
            
            optimizer.step()

            training_loss_logger.append(loss.item())

            train_acc += (pred[:, -1, :].argmax(1) == labels).sum()
            steps += bs            
            
        train_acc = (train_acc/steps).item()
        training_acc_logger.append(train_acc)
        print(f"VAL LOSS: {np.mean((training_acc_logger))}")

        model.eval()
        test_loss_logger = []
        test_acc_logger = []
        steps = 0
        with torch.no_grad():
            for batch_idx, batch in tqdm(enumerate(val_loader, 1), desc="Testing", total=len(val_loader)):
                text, _, labels = batch
                text, labels = text.to(device), labels.to(device)
                bs = labels.shape[0]


                pred = model(text)
                

                loss = loss_fn(pred[:, -1, :], labels.float().unsqueeze(1))
                test_loss_logger.append(loss.item())

                test_acc += (pred[:, -1, :].argmax(1) == labels).sum()
                steps += bs

            test_acc = (test_acc/steps).item()
            test_acc_logger.append(test_acc)
            print(f"VAL LOSS: {np.mean((test_loss_logger))}")
            print(f"VAL ACC: {test_acc}")
            print(f"pred: {pred[:, -1, :].argmax(1)}")
            print(f"labels: {labels}")

    trial.set_user_attr("val_loss", np.mean((test_loss_logger)))
    trial.set_user_attr("accuracy", test_acc)
    trial.set_user_attr("model", model)
    trial.set_user_attr("hyperparameters", {'learning_rate': learning_rate, 'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon})
        
    print(f"Used Hyperparameters: 'learning_rate': {learning_rate}, 'beta_1': {beta_1}, 'beta_2': {beta_2}, 'epsilon': {epsilon}")
    
    return np.mean((test_loss_logger))


prediction after an epoch

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')