My training loss keeps going slightly up and down around the same value using MSELoss()
. On top of that my validation loss stays the exact same down to the 8th decimal point. Did I mess something up because I don’t think my model is training?
class LSTMSentenceEncoder(nn.Module):
def __init__(self,input_size,hidden_size,num_layers,vectors,word2idx):
super(LSTMSentenceEncoder,self).__init__()
self.embedding = nn.Embedding.from_pretrained(vectors,freeze=False,padding_idx=word2idx['_PAD'])
self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
self.hidden_size = hidden_size
self.num_layers = num_layers
self.word2idx = word2idx
self.input_size = input_size
def forward(self,x):
if torch.cuda.is_available():
h0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True).cuda()
c0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True).cuda()
embs = self.embedding(x)
embs = embs.view(x.size(0),-1,self.input_size).requires_grad_().cuda()
else:
h0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True)
c0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True)
embs = self.embedding(x)
embs = embs.view(x.size(0),-1,self.input_size)
out,(hn,cn) = self.lstm(embs,(h0,c0))
out = out[:, -1, :]
return out,hn
class SiameseLSTM(nn.Module):
def __init__(self,input_size,hidden_size,num_layers,vectors,word2idx):
super(SiameseLSTM, self).__init__()
self.encoder = LSTMSentenceEncoder(input_size,hid_size,num_layers,vectors,word2idx)
def forward(self, s1, s2):
outputs = []
for i in range(len(s1)):
v1,h1 = self.encoder(s1[i])
v2,h2 = self.encoder(s2[i])
# to select the final hidden representation
h1 = h1[-1,-1,:]
h2 = h2[-1,-1,:]
out = torch.exp(-torch.norm((h1 - h2), 1))
outputs.append(out)
return torch.stack(outputs).cuda()
model = SiameseLSTM(input_size,hid_size,num_layers,wordEmbs,word2idx)
if torch.cuda.is_available():
model = model.cuda()
model.float()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.001)
criterion = torch.nn.MSELoss()
count = 0
seq_dim = 15
num_epochs = 5
for epoch in tqdm(range(num_epochs)):
for i, (s1,s2, labels) in enumerate(trainLoader):
if torch.cuda.is_available():
s1 = Variable(s1.view(-1, seq_dim, 1).cuda())
s2 = Variable(s2.view(-1, seq_dim, 1).cuda())
labels = Variable(labels.cuda())
else:
s1 = Variable(s1.view(-1, seq_dim, 1),)
s2 = Variable(s2.view(-1, seq_dim, 1),)
labels = Variable(labels)
optimizer.zero_grad()
outputs = model(s1,s2)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
count += 1
if count % 10 == 0:
val_losses = []
correct = 0
total = 0
for s1,s2, labels in testLoader:
if torch.cuda.is_available():
s1 = Variable(s1.view(-1, seq_dim, 1).cuda())
s2 = Variable(s2.view(-1, seq_dim, 1).cuda())
labels = Variable(labels.cuda())
else:
s1 = Variable(s1.view(-1, seq_dim, 1),)
s2 = Variable(s2.view(-1, seq_dim, 1),)
labels = Variable(labels)
predicted = model(s1,s2)
total += labels.size(0)
vloss = criterion(predicted,labels)
val_losses.append(vloss.data.cpu())
print('Iteration: {}. Loss: {}. MSE: {}'.format(count, loss.item(), np.mean(val_losses)))