Why loss does not change when I apply custom loss?

Hi. I’m trying to implement custom loss, but I have a problem: loss doesn’t change during the learning process.
I think the error is related to autograd, but I’m not sure.
What am I doing wrong?

Below is the code and output.

Code:

import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader

train_size = 10000
validate_size = 500

train_data = pd.read_csv("train.csv", header=None, nrows=train_size)
train_tensorX = torch.Tensor(train_data.values)[:, :-1]
train_tensorY = torch.Tensor(train_data.values)[:, -1]
train_dataset = TensorDataset(train_tensorX, train_tensorY)
train_dataloader = DataLoader(train_dataset, batch_size=1000)

validate_data = pd.read_csv("val.csv", header=None, nrows=validate_size)
validate_tensorX = torch.Tensor(validate_data.values)[:, :-1]
validate_tensorY = torch.Tensor(validate_data.values)[:, -1]
validate_dataset = TensorDataset(validate_tensorX, validate_tensorY)
validate_dataloader = DataLoader(validate_dataset)

class opMetricLoss(nn.Module):
    def __init__(self):
      super(opMetricLoss, self).__init__()

    def forward(self, pred, target):
      self.tensorOne = torch.ones(pred.size(), dtype=torch.float, requires_grad=True)
      predDotTensorOne = ((pred * self.tensorOne) == True).long()
      targetDotTensorOne = ((target * self.tensorOne) == True).long()
      tp = ((predDotTensorOne + targetDotTensorOne) == self.tensorOne*2).sum()
      fp = ((predDotTensorOne*0.6 + targetDotTensorOne*0.4) == self.tensorOne*0.6).sum()
      fn = ((predDotTensorOne*0.6 + targetDotTensorOne*0.4) == self.tensorOne*0.4).sum()

      return (tp-fp)/(tp+fn) 


class Net(nn.Module):
  def __init__(self,input_shape):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_shape,1024)
    self.fc2 = nn.Linear(1024,64)
    self.fc3 = nn.Linear(64,1)
  def forward(self,x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = (torch.sigmoid(self.fc3(x)) > 0.5).float()

learning_rate = 0.0000001
epochs = 700
model = Net(input_shape=train_tensorX.shape[1])
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
loss_fn = opMetricLoss()


metricHistory = []
metricValidateHistory = []
losses = []
accur = []
for i in range(epochs):
  for j,(x_train,y_train) in enumerate(train_dataloader):

metricHistory = []
metricValidateHistory = []
losses = []
accur = []
for i in range(epochs):
  for j,(x_train,y_train) in enumerate(train_dataloader):
    
    #calculate output
    output = model(x_train)

    #calculate loss

    loss = loss_fn(output,y_train.reshape(-1,1))
    loss.requires_grad=True
 
    #accuracy
    predicted = model(torch.tensor(train_tensorX,dtype=torch.float32))
    acc = (predicted.reshape(-1).detach().round() == train_tensorY).float().mean()
	
    #backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if i%10 == 0 or i<5:
    metricHistory.append(getMetric(output.round(),y_train.reshape(-1,1)))
    
    with torch.no_grad():
      validateOutput = model(validate_tensorX)
      metricValidateHistory.append(getMetric(validateOutput.round(),validate_tensorY.reshape(-1,1)))
        
    losses.append(loss)
    accur.append(acc)
    print("epoch {} loss : {} accuracy : {}".format(i,loss,acc)," tr: ",metricHistory[-1], "val: ", metricValidateHistory[-1])

Output:

epoch 0 loss : -7.7317070960998535 accuracy : 0.6672000288963318  tr:  -7.7317073170731705 val:  -3.9210526315789473
epoch 1 loss : -7.7317070960998535 accuracy : 0.6672000288963318  tr:  -7.7317073170731705 val:  -3.9210526315789473
epoch 2 loss : -7.7317070960998535 accuracy : 0.6672000288963318  tr:  -7.7317073170731705 val:  -3.9210526315789473
epoch 3 loss : -7.7317070960998535 accuracy : 0.6672000288963318  tr:  -7.7317073170731705 val:  -3.9210526315789473
epoch 4 loss : -7.7317070960998535 accuracy : 0.6672000288963318  tr:  -7.7317073170731705 val:  -3.9210526315789473
epoch 10 loss : -7.7317070960998535 accuracy : 0.6672000288963318  tr:  -7.7317073170731705 val:  -3.9210526315789473
epoch 20 loss : -7.7317070960998535 accuracy : 0.6672000288963318  tr:  -7.7317073170731705 val:  -3.9210526315789473

The threshold op is not differentiable:

x = (torch.sigmoid(self.fc3(x)) > 0.5).float()

and will break the computation graph.
Setting the .requires_grad attribute to True afterwards will mask the error but won’t fix it:

loss.requires_grad=True
1 Like

I would like to solve the problem using custom metric.
Is the comparison operation inside Net::forward() the only reason?

If I replace
x = (torch.sigmoid(self.fc3(x)) > 0.5).float()

to

x = torch.sigmoid(self.fc5(x))

And then I tweak a bit by rewriting opMetricLoss::forward() then comparison operations such as:

fp = ((predDotTensorOne*0.6 + targetDotTensorOne*0.4) == self.tensorOne*0.6).sum()

predDotTensorOne = ((pred * self.tensorOne) == True).long()

tp = ((predDotTensorOne + targetDotTensorOne) > torch.full((1, pred.size()[0]), 1.5)).sum()

won’t be a hindrance?