I try to create LSTM Siamese network for text similarity classification.
But the network doesn’t learn correctly. What could it be?
class LSTMEncoder(nn.Module):
def __init__(self, embed_size, batch_size, hidden_size, num_layers, embed_matrix, bidir=True):
super(LSTMEncoder, self).__init__()
self.embed_size = embed_size
self.batch_size = batch_size
self.hidden_size_1 = hidden_size
self.hidden_size_2 = hidden_size
self.num_layers = num_layers
self.bidir = bidir
if self.bidir:
self.direction = 2
else: self.direction = 1
self.dropout = 0.35
self.embedding = embed_matrix
self.lstm_1 = nn.LSTM(input_size=self.embed_size, hidden_size=self.hidden_size_1, dropout=self.dropout,
num_layers=self.num_layers, bidirectional=self.bidir).to(device)
self.lstm_2 = nn.LSTM(input_size=self.hidden_size_1, hidden_size=self.hidden_size_2,dropout=self.dropout,
num_layers=self.num_layers, bidirectional=self.bidir).to(device)
def initHiddenCell(self):
rand_hidden = Variable(torch.randn(self.direction * self.num_layers, self.batch_size, self.hidden_size_1)).to(device)
rand_cell = Variable(torch.randn(self.direction * self.num_layers, self.batch_size, self.hidden_size_1)).to(device)
return rand_hidden, rand_cell
def forward(self, input, hidden, cell):
input = self.embedding(input).view(1, self.batch_size, -1)
output, (hidden, cell) = self.lstm_1(input, (hidden, cell))
return output, hidden, cell
class Siamese_lstm(nn.Module):
def __init__(self, embed_size, batch_size, hidden_size, num_layers, embed_matrix, bidir=True):
super(Siamese_lstm, self).__init__()
self.encoder = LSTMEncoder(embed_size, batch_size, hidden_size, num_layers, embed_matrix, bidir=True).to(device)
self.input_dim = 5 * self.encoder.direction * hidden_size
self.classifier = nn.Sequential(
nn.Linear(self.input_dim, int(self.input_dim/2)).to(device),
nn.Linear( int(self.input_dim/2), 2 ).to(device)
)
# self.classifier = nn.Linear(self.input_dim, 2).to(device)
def forward(self, s1, s2):
# init hidden, cell
h1, c1 = self.encoder.initHiddenCell()
h2, c2 = self.encoder.initHiddenCell()
# input one by one
for i in range(len(s1)):
v1, h1, c1 = self.encoder(s1[i], h1, c1)
for j in range(len(s2)):
v2, h2, c2 = self.encoder(s2[j], h2, c2)
# utilize these two encoded vectors
features = torch.cat((v1,torch.abs(v1 - v2),v2,v1*v2, (v1+v2)/2), 2)
# features = v1-v2
output = self.classifier(features)
output = torch.sigmoid(output)
return output
embedding:
TEXT.build_vocab(trn,min_freq=1,vectors = “fasttext.en.300d”)
embedding_matrix = nn.Embedding.from_pretrained(torch.FloatTensor(TEXT.vocab.vectors))
embedding_matrix.requires_grad = False
hyperparams:
model = Siamese_lstm(embed_size=300, batch_size=64, hidden_size=512, num_layers=4, embed_matrix=embedding_matrix, bidir=True)
Example of output:
tensor([[0.4977, 0.5058],
[0.4980, 0.5057],
[0.4976, 0.5062],
[0.4980, 0.5060],
[0.4981, 0.5058],
[0.4982, 0.5061],
[0.4981, 0.5057],
[0.4979, 0.5061],
[0.4978, 0.5056],
[0.4976, 0.5056],
Loss (CrossEntropyLoss) doesn’t decrease