Oscillating Loss and constant validation loss

Nikhil_Pinnaparaju · October 24, 2019, 6:52am

My training loss keeps going slightly up and down around the same value using MSELoss(). On top of that my validation loss stays the exact same down to the 8th decimal point. Did I mess something up because I don’t think my model is training?

class LSTMSentenceEncoder(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,vectors,word2idx):
        super(LSTMSentenceEncoder,self).__init__()
        self.embedding = nn.Embedding.from_pretrained(vectors,freeze=False,padding_idx=word2idx['_PAD'])
        
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.word2idx = word2idx
        self.input_size = input_size
        
    def forward(self,x):
        if torch.cuda.is_available():
            h0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True).cuda()
            c0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True).cuda()
            
            embs = self.embedding(x)
            embs = embs.view(x.size(0),-1,self.input_size).requires_grad_().cuda()
            
        else:
            h0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True)
            c0 = Variable(torch.randn(self.num_layers,x.size(0),self.hidden_size),requires_grad=True)
        
            embs = self.embedding(x)
            embs = embs.view(x.size(0),-1,self.input_size)

        out,(hn,cn) = self.lstm(embs,(h0,c0))
        out = out[:, -1, :]
        return out,hn

class SiameseLSTM(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,vectors,word2idx):
        super(SiameseLSTM, self).__init__()
        self.encoder = LSTMSentenceEncoder(input_size,hid_size,num_layers,vectors,word2idx)

    def forward(self, s1, s2):
        outputs = []
        
        for i in range(len(s1)):
            v1,h1 = self.encoder(s1[i])
            v2,h2 = self.encoder(s2[i])
            # to select the final hidden representation
            h1 = h1[-1,-1,:]
            h2 = h2[-1,-1,:]

            out = torch.exp(-torch.norm((h1 - h2), 1))
            outputs.append(out)

        return torch.stack(outputs).cuda()

model = SiameseLSTM(input_size,hid_size,num_layers,wordEmbs,word2idx)
if torch.cuda.is_available():
    model = model.cuda()
model.float()

optimizer = torch.optim.Adam(model.parameters(),lr = 0.001)
criterion = torch.nn.MSELoss()

count = 0
seq_dim = 15
num_epochs = 5

for epoch in tqdm(range(num_epochs)):
    for i, (s1,s2, labels) in enumerate(trainLoader):
        if torch.cuda.is_available():
            s1 = Variable(s1.view(-1, seq_dim, 1).cuda())
            s2 = Variable(s2.view(-1, seq_dim, 1).cuda())
            labels = Variable(labels.cuda())
            
        else:
            s1 = Variable(s1.view(-1, seq_dim, 1),)
            s2 = Variable(s2.view(-1, seq_dim, 1),)
            labels = Variable(labels)
    
        optimizer.zero_grad()
        
        outputs = model(s1,s2)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        count += 1
        
        if count % 10 == 0:
            val_losses = []
            correct = 0
            total = 0
            for s1,s2, labels in testLoader:
                if torch.cuda.is_available():
                    s1 = Variable(s1.view(-1, seq_dim, 1).cuda())
                    s2 = Variable(s2.view(-1, seq_dim, 1).cuda())
                    labels = Variable(labels.cuda())

                else:
                    s1 = Variable(s1.view(-1, seq_dim, 1),)
                    s2 = Variable(s2.view(-1, seq_dim, 1),)
                    labels = Variable(labels)
                
                predicted = model(s1,s2)
                
                total += labels.size(0)

                vloss = criterion(predicted,labels)
                val_losses.append(vloss.data.cpu())
            
            print('Iteration: {}. Loss: {}. MSE: {}'.format(count, loss.item(), np.mean(val_losses)))

shagunsodhani · February 1, 2020, 6:17pm

This could be due to many reasons, the most probable being using a very high/low learning rate. Could you try tweaking that?

DrDumbenstein · January 5, 2021, 6:44am

I am having the same problem, did you solve it?