LSTM loss is constant

I have a siamese network structure to compare 2 input to test whether they are similar enough or not. However, when I started to train the model, loss is exactly constant during training stage. You can find my train loop, model architecture and loss function below:

Model

class ClassifierSiameseLSTM(nn.Module):
    def __init__(self, num_sensors=2, hidden_units=16):
        super().__init__()
        self.num_sensors = num_sensors  # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = 1

        self.lstm = nn.LSTM(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.fc = nn.Sequential(
            nn.Linear(in_features=self.hidden_units, out_features=128),
            nn.ReLU(inplace=True),
            
            nn.Linear(128, 256),
        )
        #self.fc = nn.Linear(in_features=self.hidden_units, out_features=256) #Binary Cross Entropy

    def forward_once(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units,dtype=torch.double).to(device).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units,dtype=torch.double).to(device).requires_grad_()

        output, (hn, cn) = self.lstm(x, (h0, c0))
        #out = self.linear(hn[0]).flatten()  # First dim of Hn is num_layers, which is set to 1 above.
        out = self.fc(output[:, -1, :])
        return out
    def forward(self,x1,x2):
        output1 = self.forward_once(x1)
        output2 = self.forward_once(x2)

        return output1,output2

Train Loop

def train(epoch):
    print('\nEpoch : %d'%epoch)
    model.train()

    running_loss=0
    correct=0
    total=0

    for X1,X2, y in train_loader:
        X1, X2, y = X1.cuda(), X2.cuda(), y.cuda()
        
        optimizer.zero_grad()
        output1, output2 = model(X1,X2)
        
        loss = loss_function(output1,output2, y)
        #print(loss)
        loss.backward()
        optimizer.step()

        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        cos_similarity = cos(output1, output2)
        similarity_scores = torch.div(torch.add(cos_similarity, 1), 2) 
        distances = torch.add(torch.mul(similarity_scores, -1), 1)
          
        predicted = []
        running_loss += loss.item()
        
        for distance in distances:
            if distance < similarity_threshold: #genius
                predicted.append(0)
            else:                               #skilled forgery
                predicted.append(1)

        total += y.size(0)
        correct += (torch.tensor(predicted).to(device) == y).sum().item()

Loss Function

class ContrastiveLoss(torch.nn.Module):
    def __init__(self, margin=loss_margin):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
      cos = nn.CosineSimilarity(dim=1, eps=1e-6)
      cos_similarity = cos(output1, output2)  
      similarity_scores = torch.div(torch.add(cos_similarity, 1), 2) 
      distance = torch.add(torch.mul(similarity_scores, -1), 1)

      loss_contrastive = torch.mean((1-label) * torch.pow(distance, 2) +
                                    (label) * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))

      return loss_contrastive