Custom Minimum Error Entropy Loss Function Implementation

Hello, I am writing a TDNN to compare the performance of MSE and Minimum Error Entropy (MEE) loss on time series data - the goal is to predict the next sample with 10 delays. I created a custom function that calculates MEE loss but I am not sure If I need to make a custom autograd method or if it will work? Also, I removed the negative log from the formulation of MEE so I would need to do gradient ascent rather than descent; is there a way to make the .backward() function do this or should I just make the output negative?

Right now the training isn’t working (and it takes forever) and I don’t quite understand how making a custom loss function works. Please let me know if you can offer any insight or if you spot any problems in my code. Additionally, I had to change my training loop since the MEE formulation requires saving the outputs and targets across an epoch since it requires a set of errors in order to calculate the loss (with online learning I only get 1). Previously, for the MSE method, I was using online learning and backpropagating for every window (i.e. 10 elements of the data array) of the time series.

Model:

class TDNN(nn.Module):
    def __init__(self, num_delays, hidden_size, num_outputs):
        super(TDNN, self).__init__()
        self.num_delays = num_delays
        self.hidden_size = hidden_size
        self.num_outputs = num_outputs
        
        self.fc0 = nn.Linear(self.num_delays, self.hidden_size)
        self.fc1 = nn.Linear(self.hidden_size, num_outputs)
        
    def forward(self, x):
        f1 = self.fc0(x)
        out = self.fc1(f1)
        return out

MEE Loss Function:

def Gaussian_Kernel(x, mu, sigma):
    prob = (1./(torch.sqrt(2. * math.pi * (sigma**2)))) * torch.exp((-1.) * (((x**2) - mu)/(2. * (sigma**2))))
    return prob

train_list = torch.tensor(train_list)
variance = torch.var(train_list)**0.5
mean = torch.mean(train_list)

def InformationPotential(output, target, mu, sigma):
    error = output - target
    error_diff = []
    for i in range(0, error.size(0)):
        for j in range(0, error.size(0)):
            error_diff.append(error[i] - error[j])
    error_diff = torch.cat(error_diff)
#     print(error_diff)
    return (1./(target.size(0)**2))*torch.sum(Gaussian_Kernel(error_diff, 0, variance*(2**0.5)))            

Training Function:

train_losses = []
train_counter = []

def train2(model2):
    model2.train()
    
    outputs = []
    targets = []
    for i, (data, target) in enumerate(train_loader):
        data, target = data.cuda(), target.cuda()

        # Compute the forward pass through the network up to the loss
        output = model2(data)
        outputs.append(output)
        targets.append(target)
    
    outputs = torch.cat(outputs)
    targets = torch.cat(targets)
        
    loss = InformationPotential(outputs, targets, 0, variance)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss))

Test Function:

tot_losses = []
tot_counter = [i * len(train_loader.dataset) for i in range(EPOCHS + 1)]

def test2(model2, loader):
    with torch.no_grad():
        model2.eval()
#         N = 0
        tot_loss = 0.0
        predictions = []
        targets = []
        
        for i, (data, target) in enumerate(loader):
            data, target = data.cuda(), target.cuda()
            output = model2(data)
            targets.append(target.cpu())
            predictions.append(output.cpu())
        
        predictions = torch.cat(predictions)
        targets = torch.cat(targets)
            
        tot_loss += InformationPotential(predictions, targets, 0, variance)
#             pred = output.data.max(1, keepdim = True)[1]

#             correct += pred.eq(target.data.view_as(pred)).sum()
    
        tot_loss /= len(test_loader.dataset)
        tot_losses.append(tot_loss)
                
        return tot_loss, predictions, targets

Train/Test Loop:

print("Before training validation set performance: \n")
test_loss, _, _ = test2(model2, test_loader)
print("\nTest : Avg. Loss : " + str(test_loss))
print()

print("TRAINING")
for epoch in range(EPOCHS):
    print("---------------------------------------------------------------------------")
    train2(model)

    test_loss, _, _ = test2(model2, test_loader)
    print("\nTest : Avg. Loss : {:.4f}\n".format(test_loss))
    print()
    print("---------------------------------------------------------------------------")

print("FINISHED TRAINING")

Train/Test Loop Output:

Before training validation set performance: 


Test : Avg. Loss : tensor(0.0005)

TRAINING
---------------------------------------------------------------------------
Train Epoch: 0	Loss: 0.630891

Test : Avg. Loss : tensor(0.0005)

---------------------------------------------------------------------------
---------------------------------------------------------------------------
Train Epoch: 1	Loss: 0.630746

Test : Avg. Loss : tensor(0.0005)

---------------------------------------------------------------------------
---------------------------------------------------------------------------

Thank you in advance for any help and insight!