RuntimeError: CUDNN_STATUS_EXECUTION_FAILED at loss

Xiaoyu_Song · January 9, 2019, 8:32am

I get this error while doing the training:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-30-d2b2adb629c5> in <module>
     20         # Define the loss
     21         loss = criterion(outputs, labels.long().cuda())
---> 22         loss.backward()
     23         optimizer.step()
     24 

~/miniconda3/envs/deep_mol/lib/python3.6/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
     91                 products. Defaults to ``False``.
     92         """
---> 93         torch.autograd.backward(self, gradient, retain_graph, create_graph)
     94 
     95     def register_hook(self, hook):

~/miniconda3/envs/deep_mol/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
     87     Variable._execution_engine.run_backward(
     88         tensors, grad_tensors, retain_graph, create_graph,
---> 89         allow_unreachable=True)  # allow_unreachable flag
     90 
     91 

RuntimeError: CUDNN_STATUS_EXECUTION_FAILED

I changed it to cpu and I get the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-31-6b269cdd76cf> in <module>
     20 
     21         # Define the loss
---> 22         loss = criterion(outputs, labels.long())
     23         loss.backward()
     24         optimizer.step()

~/miniconda3/envs/deep_mol/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    489             result = self._slow_forward(*input, **kwargs)
    490         else:
--> 491             result = self.forward(*input, **kwargs)
    492         for hook in self._forward_hooks.values():
    493             hook_result = hook(self, input, result)

~/miniconda3/envs/deep_mol/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
    191         _assert_no_grad(target)
    192         return F.nll_loss(input, target, self.weight, self.size_average,
--> 193                           self.ignore_index, self.reduce)
    194 
    195 

~/miniconda3/envs/deep_mol/lib/python3.6/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce)
   1332         return torch._C._nn.nll_loss(input, target, weight, size_average, ignore_index, reduce)
   1333     elif dim == 4:
-> 1334         return torch._C._nn.nll_loss2d(input, target, weight, size_average, ignore_index, reduce)
   1335     elif dim == 3 or dim > 4:
   1336         n = input.size(0)

RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed.  at /opt/conda/conda-bld/pytorch_1525909934016/work/aten/src/THNN/generic/SpatialClassNLLCriterion.c:111

Could someone help?