What is the formula for cross entropy loss with label smoothing?

KaiHoo · April 22, 2022, 10:45pm

Hello, I found that the result of build-in cross entropy loss with label smoothing is different from my implementation. Not sure if my implementation has some bugs or not. Here is the script:

import torch

class label_smooth_loss(torch.nn.Module):
    def __init__(self, num_classes, smoothing=0.1):
        super(label_smooth_loss, self).__init__()
        self.negative = smoothing / (num_classes - 1)
        self.positive = (1 - smoothing)
    
    def forward(self, pred, target):
        pred = pred.log_softmax(dim=1)
        true_dist = torch.zeros_like(pred)
        true_dist.fill_(self.negative)
        true_dist.scatter_(1, target.data.unsqueeze(1), self.positive)
        return torch.sum(-true_dist * pred, dim=1).mean()


x = torch.randn(1,10)
y = torch.randint(10,size=[1])

loss1 = label_smooth_loss(num_classes=10, smoothing=0.1)
loss2 = torch.nn.CrossEntropyLoss(label_smoothing=0.1)

print(loss1(x,y), loss2(x,y))

InnovArul · April 22, 2022, 11:31pm

label smoothing concept was introduced in the paper https://arxiv.org/pdf/1512.00567.pdf and the same is followed in PyTorch.
You need to change your function as follows:

class label_smooth_loss(torch.nn.Module):
    def __init__(self, num_classes, smoothing=0.1):
        super(label_smooth_loss, self).__init__()
        eps = smoothing / num_classes
        self.negative = eps
        self.positive = (1 - smoothing) + eps
    
    def forward(self, pred, target):
        pred = pred.log_softmax(dim=1)
        true_dist = torch.zeros_like(pred)
        true_dist.fill_(self.negative)
        true_dist.scatter_(1, target.data.unsqueeze(1), self.positive)
        return torch.sum(-true_dist * pred, dim=1).mean()