Error when custom loss function

Schroter · March 5, 2023, 2:43pm

Hi All,
I am trying to use a custom loss function when training. Please see the code below.

#Custom function
def pfbeta_fast(labels, predictions, beta=1.3):

    pTP = np.sum(labels * predictions)
    pFP = np.sum((1-labels) * predictions)
    num_positives = np.sum(labels)  #  = pTP+pFN

    pPrecision = pTP/(pTP+pFP)
    pRecall = pTP/num_positives

    beta_squared = beta**2

    if (pPrecision > 0 and pRecall > 0):
        pF1 = (1+beta_squared) * pPrecision * pRecall/(beta_squared*pPrecision + pRecall)
        return pF1
    else:
        return 0

model_conv = torchvision.models.regnet_y_32gf(weights = 'RegNet_Y_32GF_Weights.IMAGENET1K_SWAG_E2E_V1')
#model_conv = torchvision.models.efficientnet_b7(weights = 'EfficientNet_B7_Weights.IMAGENET1K_V1')

for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)

model_conv = model_conv.to(device)

criterion = pfbeta_fast #set model criterion 

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=1)

For the above code I get the error below:

Epoch 0/0
----------
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_23/3648721085.py in <module>
      1 model_conv = train_model(model_conv, criterion, optimizer_conv,
----> 2                          exp_lr_scheduler, num_epochs=1)

/tmp/ipykernel_23/3892717004.py in train_model(model, criterion, optimizer, scheduler, num_epochs)
     32                     outputs = model(inputs)
     33                     _, preds = torch.max(outputs, 1)
---> 34                     loss = criterion(outputs, labels)
     35 
     36                     # backward + optimize only if in training phase

/tmp/ipykernel_23/3685174594.py in pfbeta_fast(labels, predictions, beta)
      1 def pfbeta_fast(labels, predictions, beta=1.3):
      2 
----> 3     pTP = np.sum(labels * predictions)
      4     pFP = np.sum((1-labels) * predictions)
      5     num_positives = np.sum(labels)  #  = pTP+pFN

RuntimeError: The size of tensor a (2) must match the size of tensor b (16) at non-singleton dimension 1

Would anyone be able to help me in this regards? Also is there a specific procedure that needed to be followed as in convert the loss function to pytorch commands for usage? Please

Thanks & Best Regards
AMJS

ptrblck · March 5, 2023, 10:32pm

Check the shape of labels and predictions in pTP = np.sum(labels * predictions) and make sure you can multiply these numpy arrays.
Based on your code it seems your model output and the targets are expected to match in their shape, which isn’t the case.

Schroter · March 6, 2023, 2:08pm

Hi, Thanks for the reply. But please find the code below.
Train Function:

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(labels,preds) #The previous code was: criterion(outputs,labels) 

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

Custom Loss function converted to pytorch tensors:

def pfbeta_fast(labels, predictions, beta=1.3):
    labels = torch.Tensor(labels)
    predictions = torch.Tensor(predictions)
    
    pTP = torch.sum(labels * predictions)
    pFP = torch.sum((1-labels) * predictions)
    num_positives = torch.sum(labels)  #  = pTP+pFN

    pPrecision = pTP/(pTP+pFP)
    pRecall = pTP/num_positives

    beta_squared = beta**2

    if (pPrecision > 0 and pRecall > 0):
        pF1 = (1+beta_squared) * pPrecision * pRecall/(beta_squared*pPrecision + pRecall)
        return pF1
    else:
        return 0

I still get another error as below:

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:3: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  This is separate from the ipykernel package so we can avoid doing imports until
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_27/1892088165.py in <module>
----> 1 model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=1)

/tmp/ipykernel_27/3619938709.py in train_model(model, criterion, optimizer, scheduler, num_epochs)
     37                     # backward + optimize only if in training phase
     38                     if phase == 'train':
---> 39                         loss.backward()
     40                         optimizer.step()
     41 

/opt/conda/lib/python3.7/site-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    486             )
    487         torch.autograd.backward(
--> 488             self, gradient, retain_graph, create_graph, inputs=inputs
    489         )
    490 

/opt/conda/lib/python3.7/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    197     Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    198         tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 199         allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass
    200 
    201 def grad(

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Would anyone be able to help please in this regards
Thanks & Best Regards
AMJS

ptrblck · March 6, 2023, 8:12pm

Rewrapping tensors will detach them from the computation graph:

    labels = torch.Tensor(labels)
    predictions = torch.Tensor(predictions)

and you would need to use the tensors instead.

Schroter · March 7, 2023, 4:53am

Hi, thanks for the reply. In the above code snippet in the last line the original code was:
loss = criterion(outputs,labels) as mentioned in my comment. The original code worked well for inbuilt loss function nn.CrossEntropyLoss()

Furthermore, when looking at the outputs variable, it is of the form of:

tensor([[-0.1908,  0.4115],
        [-1.0019, -0.1685],
        [-1.1265, -0.3025],
        [-0.5925, -0.6610],
        [-0.4076, -0.4897],
        [-0.6450, -0.2863],
        [ 0.1632,  0.4944],
        [-1.0743,  0.1003],
        [ 0.6172,  0.5104],
        [-0.2296, -0.0551],
        [-1.3165,  0.3386],
        [ 0.2705,  0.1200],
        [-1.3767, -0.6496],
        [-0.5603,  1.0609],
        [-0.0109,  0.5767],
        [-1.1081,  0.8886]], grad_fn=<AddmmBackward0>)

From what I understand, the first value of the first list element is the raw model output, and the second value is the prediction value, and the 16 elements in the list represent the batch size. If so, how is the model able to predict without going through the loss function even once? (Since its a pre-trained model?) If I am incorrect then what are the values of the outputs variable?

According to my knowledge I adjusted the loss function and used the original code. Since, I did not want to adjust the train function to accommodate a different loss functions.
Please find the code of the edited cost function below:

def pfbeta_torch(predictions, labels , beta=1.3):
 '''#labels = torch.tensor(labels.clone().detach().requires_grad(True),dtype=torch.float64)
    #predictions = torch.tensor(predictions.clone().detach().requires_grad(True),dtype=torch.float64)'''
    predictions = torch.tensor([1 if x > 0.5 else 0 for x in predictions[:,1]])
    pTP = torch.sum(labels * predictions)
    pFP = torch.sum((1-labels) * predictions)
    num_positives = torch.sum(labels)  #  = pTP+pFN

    pPrecision = pTP/(pTP+pFP)
    pRecall = pTP/num_positives

    beta_squared = beta**2

    if (pPrecision > 0 and pRecall > 0):
        pF1 = (1+beta_squared) * pPrecision * pRecall/(beta_squared*pPrecision + pRecall)
        return pF1
    else:
        return 0

Now I get a different error please have a look below:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_27/1892088165.py in <module>
----> 1 model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=1)

/tmp/ipykernel_27/3271713126.py in train_model(model, criterion, optimizer, scheduler, num_epochs)
     37                     # backward + optimize only if in training phase
     38                     if phase == 'train':
---> 39                         loss.backward()
     40                         optimizer.step()
     41 

/opt/conda/lib/python3.7/site-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    486             )
    487         torch.autograd.backward(
--> 488             self, gradient, retain_graph, create_graph, inputs=inputs
    489         )
    490 

/opt/conda/lib/python3.7/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    197     Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    198         tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 199         allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass
    200 
    201 def grad(

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Afterwards I changed the loss function code as below and re-ran it.

def pfbeta_torch(predictions, labels , beta=1.3):
    #labels = torch.tensor(labels.clone().detach().requires_grad(True),dtype=torch.float64)
    #predictions = torch.tensor(predictions.clone().detach().requires_grad(True),dtype=torch.float64)
    predictions = [1 if x > 0.5 else 0 for x in predictions[:,1]] #Get second element and recode as 1 if value > than .5 else 0
    predictions = torch.tensor(predictions, requires_grad=True) #Loss function changed here
    pTP = torch.sum(labels * predictions)
    pFP = torch.sum((1-labels) * predictions)
    num_positives = torch.sum(labels)  #  = pTP+pFN

    pPrecision = pTP/(pTP+pFP)
    pRecall = pTP/num_positives

    beta_squared = beta**2

    if (pPrecision > 0 and pRecall > 0):
        pF1 = (1+beta_squared) * pPrecision * pRecall/(beta_squared*pPrecision + pRecall)
        return pF1
    else:
        return 0

However, I get another error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_27/1892088165.py in <module>
----> 1 model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=1)

/tmp/ipykernel_27/3271713126.py in train_model(model, criterion, optimizer, scheduler, num_epochs)
     33                     _, preds = torch.max(outputs, 1)
     34                     print(outputs,labels)
---> 35                     loss = criterion(outputs,labels) #Changed here to criterion(labels,preds)
     36 
     37                     # backward + optimize only if in training phase

/tmp/ipykernel_27/3035900216.py in pfbeta_torch(predictions, labels, beta)
      3     #predictions = torch.tensor(predictions.clone().detach().requires_grad(True),dtype=torch.float64)
      4     predictions = [1 if x > 0.5 else 0 for x in predictions[:,1]] #Get second element and recode as 1 if value > than .5 else 0
----> 5     predictions = torch.tensor(predictions,requires_grad=True)
      6     pTP = torch.sum(labels * predictions)
      7     pFP = torch.sum((1-labels) * predictions)

RuntimeError: Only Tensors of floating point and complex dtype can require gradients

Would you be able to help me in this matter please.
Thanks & Best Regards
AMJS

Schroter · March 11, 2023, 7:22am

Hi, I am not sure that I understand what you said. Could you please elaborate considering my additional info as well
Thanks & Best Regards
AMJS

ptrblck · March 11, 2023, 9:17am

You are still recreating tensors in your updated code, now with the requires_grad=True attribute, which is wrong as it will still detach the tensor from the computation graph.
Besides that you are trying to recreate the tensor containing integer values, which is also invalid since only floating point values can require gradients.
This threshold operation:

predictions = [1 if x > 0.5 else 0 for x in predictions[:,1]]

creates the integer values and detaches prediction from the computation graph since the comparison is not differentiable.

Schroter · March 25, 2023, 12:09pm

Hi, Thanks for the reply. This is the new loss function which I have edited:

def pfbeta_torch(predictions, labels, beta=1.3):
    #labels = torch.tensor(labels.clone().detach(), dtype=torch.float64, requires_grad=True)
    predictions = torch.tensor(predictions.clone(), dtype=torch.float64, requires_grad=True)
    pTP = torch.sum(labels * predictions)
    pFP = torch.sum((1 - labels) * predictions)
    num_positives = torch.sum(labels)  #  = pTP+pFN

    pPrecision = pTP / (pTP + pFP)
    pRecall = pTP / num_positives

    beta_squared = beta ** 2
    # x=0
    if (pPrecision > 0 and pRecall > 0):
        pF1 = (1 + beta_squared) * pPrecision * pRecall / (beta_squared * pPrecision + pRecall)
        return pF1
    else:
        return torch.tensor(0, dtype=torch.float64, requires_grad=True)

The inputs for the function from the train loop:

preds:  tensor([0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0])
labels:  tensor([0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1])

Nevertheless I still get an warning as below:

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:3: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  This is separate from the ipykernel package so we can avoid doing imports until

Would you be able help me in this matter please.

Thanks & Best Regards
AMJS

thecho7 · March 25, 2023, 1:26pm

To solve the following warning,

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:3: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).

Modify

predictions = torch.tensor(predictions.clone(), dtype=torch.float64, requires_grad=True)

to

predictions = predictions.clone().detach().requires_grad_(True)

clone: copy a tensor (with gradient if the source has it)
detach: remove the computational graph from the tensor
requires_grad_(True): in-place operation that assign the target tensor to have a graph

Schroter · March 25, 2023, 1:33pm

Hi,
Thanks for the reply. However, it gives me this error:
RuntimeError: only Tensors of floating point dtype can require gradients.

Thanks & Best Regards
AMJS

thecho7 · March 25, 2023, 1:41pm

predictions = predictions.clone().detach().requires_grad_(True).to(torch.float64)

ptrblck · March 26, 2023, 7:17am

You are again recreating the predictions tensor:

predictions = torch.tensor(predictions.clone(), dtype=torch.float64, requires_grad=True)

which will detach it from the computation graph and raise the warning. Just remove this line of code.

@thecho7’s suggestion will get rid of the error but will explicitly detach the tensor. If you want to use pfbeta_torch as a loss function and want eventually call .backward() on the output tensor, this is also wrong.

Schroter · March 26, 2023, 9:54am

Hi,
Thanks for the reply. After commenting out the line in question I ran the code again. Only to get a another error which is below;

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_23/2530295950.py in <module>
----> 1 model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=100)

/tmp/ipykernel_23/2815325787.py in train_model(model, criterion, optimizer, scheduler, num_epochs)
     39                     # backward + optimize only if in training phase
     40                     if phase == 'train':
---> 41                         loss.backward()
     42                         optimizer.step()
     43 

/opt/conda/lib/python3.7/site-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    486             )
    487         torch.autograd.backward(
--> 488             self, gradient, retain_graph, create_graph, inputs=inputs
    489         )
    490 

/opt/conda/lib/python3.7/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    197     Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    198         tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 199         allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass
    200 
    201 def grad(

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

Would you be able to help me in this regards
Thanks & Best Regards
AMJS

ptrblck · March 26, 2023, 7:22pm

I guess you are still using non-differentiable operations, such as the threshold operation, as already mentioned:

ptrblck:

This threshold operation:
predictions = [1 if x > 0.5 else 0 for x in predictions[:,1]]
creates the integer values and detaches prediction from the computation graph since the comparison is not differentiable.

If not, check if you are still somewhere re-wrapping a tensor or if any operation creates an integer type output tensor or generally removes the .grad_fn attribute of a tensor.

Schroter · March 27, 2023, 7:51am

Schroter:

def pfbeta_torch(predictions, labels, beta=1.3):
    #labels = torch.tensor(labels.clone().detach(), dtype=torch.float64, requires_grad=True)
    predictions = torch.tensor(predictions.clone(), dtype=torch.float64, requires_grad=True)
    pTP = torch.sum(labels * predictions)
    pFP = torch.sum((1 - labels) * predictions)
    num_positives = torch.sum(labels)  #  = pTP+pFN

    pPrecision = pTP / (pTP + pFP)
    pRecall = pTP / num_positives

    beta_squared = beta ** 2
    # x=0
    if (pPrecision > 0 and pRecall > 0):
        pF1 = (1 + beta_squared) * pPrecision * pRecall / (beta_squared * pPrecision + pRecall)
        return pF1
    else:
        return torch.tensor(0, dtype=torch.float64, requires_grad=True)

The inputs for the function from the train loop ():

preds:  tensor([0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0])
labels:  tensor([0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1])

Nevertheless I still get an warning as below:

/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:3: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  This is separate from the ipykernel package so we can avoid doing imports until

Would you be able help me in this matter please.

Hi, As above I have changes my loss function since. Also, would you be able to explain why the tensor list preds do not have AutoGrad (requires_grad=True) attached to it for back propagation purpose please. This example from pytorch clearly shows that its needed. Not sure why it is not generated in my in my example which I have borrowed from this pytorch tutorial.

Please let me know your feedback on this matter.
Thanks & Best Regards
AMJS