Siamese network loss not decreasing

Hello, I am working on a Siamese net developed in this paper and trying to reproduce it using batches of 28x28 data.

My network loss seems to be constant and does not decrease. Here is the code:

Model

class Siamese(nn.Module):
    def __init__(self):
        super(Siamese, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0) 
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=0) 
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0) 
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0)
        self.conv5 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=0)

        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.drop = nn.Dropout(p=0.5)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(128)

        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 5)
        self.fcOut = nn.Linear(5, 10)
        self.sigmoid = nn.Sigmoid()

    def linear_layers(self, x):
      output = nn.Sequential(
          nn.Linear(int(x.shape[0]), int(x.shape[0]/2)),
          nn.Dropout(0.5),
          nn.Linear(int(x.shape[0]/2), int(x.shape[0]/2)),
          nn.Dropout(0.5),
          nn.Linear(int(x.shape[0]/2), int(x.shape[0]))
      )
      return output(x)
    
    def convs(self, x):
        x = self.relu(self.bn1(self.conv1(x))) #32,26,26
        x = self.relu(self.conv2(x)) #64,24,24
        x = self.pool(self.drop(x)) #64,12,12

        x = self.relu(self.conv3(x)) #64,10,10
        x = self.pool(self.drop(x)) #64,5,5
        x = self.relu(self.bn2(self.conv4(x))) #128,3,3

        x = self.relu(self.conv5(x)) #128,1,1

        return x

    def forward(self, x1, x2):
        cos = nn.CosineSimilarity()
        x1 = self.convs(x1)
        x2 = self.convs(x2)
        similarity = F.cosine_similarity(x1, x2).view(x1.shape[0])
        
        output = self.linear_layers(similarity)
        
        output = self.sigmoid(output)
        return output

Training and evaluation

def train(model, device, train_loader, optimizer, criterion, epoch, display=True):
    model.train()
    for batch_idx, (image1, image2, labels) in enumerate(train_loader):
        image1, image2, labels = image1.to(device), image2.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(image1, image2)
        labels = labels.float()
        loss = criterion(output, labels.view(output.shape))
        loss.backward()
        optimizer.step()

def test(model, device, test_loader, name="\nVal"):
    model.eval()
    test_loss = 0
    correct = 0
    test_predictions = []
    with torch.no_grad():
        for image1, image2, labels in test_loader:
            image1, image2, labels = image1.to(device), image2.to(device), labels.to(device)
            output = model(image1, image2)
            test_loss += F.binary_cross_entropy_with_logits(output, labels.view(output.shape), size_average=False).item() # sum up batch loss
            for index, prob in enumerate(output):
              if prob <= 0.5 and labels[index] == 0:
                correct +=1
              if prob > 0.5 and labels[index] == 1:
                correct +=1

    test_loss /= len(test_loader.dataset)

With 50 epochs I get the following results:

Train Epoch: 0 [0/10 (0%)]	Loss: 0.786944
Train Epoch: 5 [0/10 (0%)]	Loss: 0.607093
Train Epoch: 10 [0/10 (0%)]	Loss: 0.745753
Train Epoch: 15 [0/10 (0%)]	Loss: 0.716524
Train Epoch: 20 [0/10 (0%)]	Loss: 0.665673
Train Epoch: 25 [0/10 (0%)]	Loss: 0.724105
Train Epoch: 30 [0/10 (0%)]	Loss: 0.792330
Train Epoch: 35 [0/10 (0%)]	Loss: 0.921617
Train Epoch: 40 [0/10 (0%)]	Loss: 0.731843
Train Epoch: 45 [0/10 (0%)]	Loss: 0.669673

Does anything seem wrong in my model?