Validation error increases after every epoch. Is my implementation correct?

se_ti · March 4, 2019, 1:12pm

I am trying to build a siamese model that will compare 2 time-series and learn if they are similar or not by using the cross entropy loss. I have the following model:

class SiameseNetwork(nn.Module):
    
    def __init__(self, in_shape):
        super(SiameseNetwork, self).__init__()
        
        self.cnn = nn.Sequential(
            nn.Conv2d(in_shape[0], 32, kernel_size = (1,9), stride = (1,2), padding = (0,4)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = (1,2), stride = (1,2)),
            
            nn.Conv2d(32, 64, kernel_size = (1,3), stride = 1, padding = (0,1)),
            nn.ReLU(),
            
            nn.Conv2d(64, 64, kernel_size = (1,3), stride = 1, padding = (0,1)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = (1,2), stride = (1,2)),
            
            nn.Conv2d(64, 128, kernel_size = (6,1), stride = 1, padding = (0,0)),
            nn.ReLU(),
        )
        
        # calculate the output size of the cnn to flatten it for the fc layer 
        x = Variable(torch.rand(in_shape), requires_grad=False)
        x = self.cnn(x)
        n = self.num_flat_features(x)
        
        self.classifier = nn.Sequential(
            nn.Linear(2*n, 2),
        )
        
    def forward_one(self, x):
        out_x = self.cnn(x)
        out_x = out_x.view(out_x.size(0),-1)
        return out_x
        
    def forward(self, x1, x2):
        out_x1 = self.forward_one(x1)
        out_x2 = self.forward_one(x2)
        out_conc = torch.cat((out_x1, out_x2), 1)
        out = self.classifier(out_conc)
        return out
        
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = np.prod(size) # returns the product of the array elements
        return num_features

then I am trying to train it using the following code:

siamese_net = SiameseNetwork(input_shape)
if GPU and torch.cuda.is_available():
    siamese_net = siamese_net.cuda()
criterion = nn.CrossEntropyLoss() 
optimizer = optim.Adam(siamese_net.parameters(),lr = 0.0025)

for epoch in range(number_epochs):
    torch.cuda.empty_cache()
    epoch_loss_train = 0
    epoch_loss_valid = 0
    epoch_accuracy_valid = 0
    epoch_accuracy_train = 0
    
    #training
    siamese_net.train()
    for batch_index, data in enumerate(train_loader, 0):
        x1_batch, x2_batch, y_batch = data
        if GPU and torch.cuda.is_available():
            x1_batch = x1_batch.type('torch.FloatTensor').cuda()
            x2_batch = x2_batch.type('torch.FloatTensor').cuda()
            y_batch = y_batch.long().cuda()
        else:
            x1_batch = x1_batch.type('torch.FloatTensor')
            x2_batch = x2_batch.type('torch.FloatTensor')
            y_batch = y_batch.type('torch.FloatTensor').unsqueeze(1)
        
        
        optimizer.zero_grad()
        output = siamese_net(x1_batch, x2_batch)
        output.long()

        train_loss = criterion(output, y_batch)
        train_loss.backward()
        optimizer.step()
        
        epoch_loss_train += train_loss.item()
        
        max_index = torch.max(output, 1)[1]
        num_correct = torch.sum(max_index==y_batch)
        accuracy = (num_correct*100)/len(max_index)
        epoch_accuracy_train += accuracy
        

    #validating               
    siamese_net.eval()
    with torch.no_grad():
        for batch_index, data in enumerate(valid_loader, 0):
            x1_batch, x2_batch, y_batch = data
            if GPU and torch.cuda.is_available():
                x1_batch = x1_batch.type('torch.FloatTensor').cuda()
                x2_batch = x2_batch.type('torch.FloatTensor').cuda()
                y_batch = y_batch.long().cuda()
            else:
                x1_batch = x1_batch.type('torch.FloatTensor')
                x2_batch = x2_batch.type('torch.FloatTensor')
                y_batch = y_batch.type('torch.FloatTensor').unsqueeze(1)

            output = siamese_net(x1_batch, x2_batch)
            output.long()
            valid_loss = criterion(output, y_batch)

            epoch_loss_valid += valid_loss.item()
    
            max_index = torch.max(output, 1)[1]
            num_correct = torch.sum(max_index==y_batch)
            accuracy = (num_correct*100)/len(max_index)
            epoch_accuracy_valid += accuracy
        
        
    #print results        
    train_loss_history.append(epoch_loss_train/ len(train_loader))
    print("===> Epoch {} Complete: Avg. training Loss: {:.4f}, Train Accuracy: {:.4f}".format(epoch, epoch_loss_train / len(train_loader), epoch_accuracy_train/len(train_loader)))
    valid_loss_history.append(epoch_loss_valid/ len(valid_loader))
    print("===> Epoch {} Complete: Avg. valid Loss: {:.4f}, Valid Accuracy: {:.4f}".format(epoch, epoch_loss_valid / len(valid_loader), epoch_accuracy_valid/len(valid_loader)))

x1 and x2 are vectors of size 6x128 and y is 0 or 1 depending on if x1 and x2 are similar or not. I split the data into training and validation sets, but the validation loss increases after every epoch, while the training loss is decreasing. I checked for overfitting I tried dropout, I tried using less layers and fewer nodes, but the issue persists. I am new in pytorch, so I would like to ask if my implementation is correct. I also tried using random x1 and x2 and the network was not learning anything (had the same loss after each epoch), which I think is correct. I would appreciate any suggestions.