I have a siamese network structure to compare 2 input to test whether they are similar enough or not. However, when I started to train the model, loss is exactly constant during training stage. You can find my train loop, model architecture and loss function below:
Model
class ClassifierSiameseLSTM(nn.Module):
def __init__(self, num_sensors=2, hidden_units=16):
super().__init__()
self.num_sensors = num_sensors # this is the number of features
self.hidden_units = hidden_units
self.num_layers = 1
self.lstm = nn.LSTM(
input_size=num_sensors,
hidden_size=hidden_units,
batch_first=True,
num_layers=self.num_layers
)
self.fc = nn.Sequential(
nn.Linear(in_features=self.hidden_units, out_features=128),
nn.ReLU(inplace=True),
nn.Linear(128, 256),
)
#self.fc = nn.Linear(in_features=self.hidden_units, out_features=256) #Binary Cross Entropy
def forward_once(self, x):
batch_size = x.shape[0]
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units,dtype=torch.double).to(device).requires_grad_()
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units,dtype=torch.double).to(device).requires_grad_()
output, (hn, cn) = self.lstm(x, (h0, c0))
#out = self.linear(hn[0]).flatten() # First dim of Hn is num_layers, which is set to 1 above.
out = self.fc(output[:, -1, :])
return out
def forward(self,x1,x2):
output1 = self.forward_once(x1)
output2 = self.forward_once(x2)
return output1,output2
Train Loop
def train(epoch):
print('\nEpoch : %d'%epoch)
model.train()
running_loss=0
correct=0
total=0
for X1,X2, y in train_loader:
X1, X2, y = X1.cuda(), X2.cuda(), y.cuda()
optimizer.zero_grad()
output1, output2 = model(X1,X2)
loss = loss_function(output1,output2, y)
#print(loss)
loss.backward()
optimizer.step()
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
cos_similarity = cos(output1, output2)
similarity_scores = torch.div(torch.add(cos_similarity, 1), 2)
distances = torch.add(torch.mul(similarity_scores, -1), 1)
predicted = []
running_loss += loss.item()
for distance in distances:
if distance < similarity_threshold: #genius
predicted.append(0)
else: #skilled forgery
predicted.append(1)
total += y.size(0)
correct += (torch.tensor(predicted).to(device) == y).sum().item()
Loss Function
class ContrastiveLoss(torch.nn.Module):
def __init__(self, margin=loss_margin):
super(ContrastiveLoss, self).__init__()
self.margin = margin
def forward(self, output1, output2, label):
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
cos_similarity = cos(output1, output2)
similarity_scores = torch.div(torch.add(cos_similarity, 1), 2)
distance = torch.add(torch.mul(similarity_scores, -1), 1)
loss_contrastive = torch.mean((1-label) * torch.pow(distance, 2) +
(label) * torch.pow(torch.clamp(self.margin - distance, min=0.0), 2))
return loss_contrastive