I am trying to switch to dice loss for a semantic segmentation and I was having problems with the autograd.

I saw on another post that

“**torch.backends.cudnn.benchmark = True**” could help with this but now I get the following error

Anyone have any ideas how to fix.

```
RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR
You can try to repro this exception using the following code snippet. If that doesn't trigger the error, please include your original repro script when reporting this issue.
import torch
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.allow_tf32 = True
data = torch.randn([5, 64, 64, 64], dtype=torch.float, device='cuda', requires_grad=True)
net = torch.nn.Conv2d(64, 3, kernel_size=[1, 1], padding=[0, 0], stride=[1, 1], dilation=[1, 1], groups=1)
net = net.cuda().float()
out = net(data)
out.backward(torch.randn_like(out))
torch.cuda.synchronize()
ConvolutionParams
data_type = CUDNN_DATA_FLOAT
padding = [0, 0, 0]
stride = [1, 1, 0]
dilation = [1, 1, 0]
groups = 1
deterministic = false
allow_tf32 = true
input: TensorDescriptor 0000028C53188760
type = CUDNN_DATA_FLOAT
nbDims = 4
dimA = 5, 64, 64, 64,
strideA = 262144, 4096, 64, 1,
output: TensorDescriptor 0000028C53188840
type = CUDNN_DATA_FLOAT
nbDims = 4
dimA = 5, 3, 64, 64,
strideA = 12288, 4096, 64, 1,
weight: FilterDescriptor 0000028C5023B060
type = CUDNN_DATA_FLOAT
tensor_format = CUDNN_TENSOR_NCHW
nbDims = 4
dimA = 3, 64, 1, 1,
Pointer addresses:
input: 000000076F5A0000
output: 0000000722600000
weight: 00000007095D4400
```

I found an implementation of the loss function on the internet

```
class BinaryDiceLoss(nn.Module):
"""Dice loss of binary class
Args:
smooth: A float number to smooth loss, and avoid NaN error, default: 1
p: Denominator value: \sum{x^p} + \sum{y^p}, default: 2
predict: A tensor of shape [N, *]
target: A tensor of shape same with predict
reduction: Reduction method to apply, return mean over batch if 'mean',
return sum if 'sum', return a tensor of shape [N,] if 'none'
Returns:
Loss tensor according to arg reduction
Raise:
Exception if unexpected reduction
"""
def __init__(self, smooth=1, p=2, reduction='mean'):
super(BinaryDiceLoss, self).__init__()
self.smooth = smooth
self.p = p
self.reduction = reduction
def forward(self, predict, target):
assert predict.shape[0] == target.shape[0], "predict & target batch size don't match"
predict = predict.contiguous().view(predict.shape[0], -1)
target = target.contiguous().view(target.shape[0], -1)
num = torch.sum(torch.mul(predict, target), dim=1) + self.smooth
den = torch.sum(predict.pow(self.p) + target.pow(self.p), dim=1) + self.smooth
loss = 1 - num / den
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
elif self.reduction == 'none':
return loss
else:
raise Exception('Unexpected reduction {}'.format(self.reduction))
class DiceLoss(nn.Module):
"""Dice loss, need one hot encode input
Args:
weight: An array of shape [num_classes,]
ignore_index: class index to ignore
predict: A tensor of shape [N, C, *]
target: A tensor of same shape with predict
other args pass to BinaryDiceLoss
Return:
same as BinaryDiceLoss
"""
def __init__(self, weight=None, ignore_index=None, **kwargs):
super(DiceLoss, self).__init__()
self.kwargs = kwargs
self.weight = weight
self.ignore_index = ignore_index
def forward(self, predict, target):
target = torch.nn.functional.one_hot(target, num_classes = 3).permute(0,3,1,2).contiguous()
assert predict.shape == target.shape, 'predict & target shape do not match'
dice = BinaryDiceLoss(**self.kwargs)
total_loss = 0
predict = F.softmax(predict, dim=1)
for i in range(target.shape[1]):
if i != self.ignore_index:
dice_loss = dice(predict[:, i], target[:, i])
if self.weight is not None:
assert self.weight.shape[0] == target.shape[1], \
'Expect weight shape [{}], get[{}]'.format(target.shape[1], self.weight.shape[0])
dice_loss *= self.weight[i]
total_loss += dice_loss
return total_loss/target.shape[1]
```