Hi,
The full code can be like this:
class LabelSmoothSoftmaxCE(nn.Module):
def __init__(self,
lb_pos = 0.9,
lb_neg = 0.005,
reduction = 'mean',
lb_ignore = 255,
*args, **kwargs):
super(LabelSmoothSoftmaxCE, self).__init__()
self.lb_pos = lb_pos
self.lb_neg = lb_neg
self.reduction = reduction
self.lb_ignore = lb_ignore
self.log_softmax = nn.LogSoftmax(1)
def forward(self, logits, label):
logs = self.log_softmax(logits)
ignore = label.data==self.lb_ignore
label[ignore] = 0
lb_one_hot = logits.data.clone().zero_().scatter_(1, label.unsqueeze(1), 1)
label = self.lb_pos * lb_one_hot + self.lb_neg * (1-lb_one_hot)
ignore = ignore.nonzero()
N, M = ignore.size()
# label[ignore[:,0],:,ignore[:,1],ignore[:,2]] = 0
label[ignore.chunk(M, dim=1)] = 0is
if self.reduction == 'mean':
loss = -torch.mean(torch.sum(logs*label, dim=1))
elif self.reduction == 'none':
loss = -torch.sum(logs*label, dim=1)
return loss
if __name__ == '__main__':
logits = torch.randn(2, 3, 4, 4)
label = torch.randint(0, 3, (2, 4, 4))
label[1, 2, 2] = 255
label[1, 3, 2] = 255
criteria_softmax = LabelSmoothSoftmaxCE(lb_pos=0.9, lb_neg=0.05, reduction='none')
loss = criteria_softmax(logits, label)
print(loss)
As can be seen, the input logits
has size (2,3,4,4)
, and label
has size (2,4,4)
, I am trying to make this in consistent with the requirement of nn.CrossEntropyLoss
.
How could I cope with this problem please?
Edit:
I noticed that this implementation can enlarge the usage of memory during training, do I have better approaches to do it?