Binary classification accuracy no improvement

I have a binary classification problem using logistic regression. The input of linear layer is 1 dimension.
When training, the loss of training set decreases but the model keeps predicting 0 for every data sample. I checked the output: the first element in the 2-dimension output is far larger than the second one. It’s been no change after training. How to resolve this?

class ERModeler(nn.Module):
    def __init__(self, vocab_size, embedding_dim, weInput):
        super(ERModeler, self).__init__()
        self.embeddings = nn.Embedding.from_pretrained(weInput)
        self.weight_embeddings = nn.Embedding(vocab_size, 1, scale_grad_by_freq=True)
        self.weight_embeddings.weight.data.fill_(1)
        self.linear = nn.Linear(1, 2)

    def forward(self, input1, input2):
        #Word Vector Look Up
        embeds1 = self.embeddings(input1)
        embeds2 = self.embeddings(input2)
        # add weight
        embeds1 = embeds1*self.weight_embeddings(input1)
        embeds2 = embeds2*self.weight_embeddings(input2)
        #Average
        embeds1 = embeds1.mean(0)
        embeds2 = embeds2.mean(0)
        #Similariy
        out = embeds1.dot(embeds2)/(embeds1.norm() * embeds2.norm()).view(-1, 1)
        #Linear
        out = self.linear(out)
        return out

model = ERModeler(VOC_SIZE, EMBEDDING_DIM, weInput)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.0001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  

iteration = 0
for epoch in range(10):
    for i in range(len(pairs)):
        # Load images as Variable
        s1 = pairs[i][0]
        s2 = pairs[i][1]
        l = labels[i]
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        
        # Forward pass to get output/logits
        outputs = model(string_to_index(s1, word2index), 
                        string_to_index(s2, word2index))
        
        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, l)
        
        # Getting gradients w.r.t. parameters
        loss.backward()
        
        # Updating parameters
        optimizer.step()
        
        iteration += 1
        
        if iteration % 1000 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            loss_sum = 0
            # Iterate through test dataset
            for j in range(len(pairs)):
                # Load images to a Torch Variable
                s1 = pairs[j][0]
                s2 = pairs[j][1]
                l = labels[j]
                
                # Forward pass only to get logits/output
                outputs = model(string_to_index(s1, word2index), 
                                string_to_index(s2, word2index))
                
                loss_sum += criterion(outputs, l)
                # Get predictions from the maximum value
                # 100 x 1
                _, predicted = torch.max(outputs.data, 1)
                # Total number of labels
                total += 1
                
                # Total correct predictions
                correct += (predicted == l).item()
                
            accuracy = 100 * correct / total
            print(loss_sum.item())
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iteration, loss.item(), accuracy))