Cuda error in Semantic Segmentation with FCN

RuntimeError Traceback (most recent call last)
in ()
15 labels=labels.long()
16 lossi = loss(output, labels)
—> 17 lossi.backward()
18 optimizer.step()
19 print(f"batch {c}, loss–>{lossi.item()}")

1 frames
/usr/local/lib/python3.6/dist-packages/torch/autograd/init.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
125 Variable._execution_engine.run_backward(
126 tensors, grad_tensors, retain_graph, create_graph,
–> 127 allow_unreachable=True) # allow_unreachable flag
128
129

RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED
Exception raised from createCuDNNHandle at /pytorch/aten/src/ATen/cudnn/Handle.cpp:9 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x42 (0x7f5b063391e2 in /usr/local/lib/python3.6/dist-packages/torch/lib/libc10.so)
frame #1: + 0x100ca68 (0x7f5b077aea68 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #2: at::native::getCudnnHandle() + 0x108d (0x7f5b077b034d in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #3: + 0xeda4cc (0x7f5b0767c4cc in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #4: + 0xed59ee (0x7f5b076779ee in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #5: + 0xed75db (0x7f5b076795db in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #6: at::native::cudnn_convolution_backward_input(c10::ArrayRef, at::Tensor const&, at::Tensor const&, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, long, bool, bool) + 0xb2 (0x7f5b07679b32 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #7: + 0xf3cd3b (0x7f5b076ded3b in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #8: + 0xf6cb58 (0x7f5b0770eb58 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #9: at::cudnn_convolution_backward_input(c10::ArrayRef, at::Tensor const&, at::Tensor const&, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, long, bool, bool) + 0x1ad (0x7f5b3e58588d in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #10: at::native::cudnn_convolution_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, long, bool, bool, std::array<bool, 2ul>) + 0x223 (0x7f5b07678203 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #11: + 0xf3ce25 (0x7f5b076dee25 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #12: + 0xf6cbb4 (0x7f5b0770ebb4 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cuda.so)
frame #13: at::cudnn_convolution_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, long, bool, bool, std::array<bool, 2ul>) + 0x1e2 (0x7f5b3e594242 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #14: + 0x2ec9c62 (0x7f5b40257c62 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #15: + 0x2ede224 (0x7f5b4026c224 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #16: at::cudnn_convolution_backward(at::Tensor const&, at::Tensor const&, at::Tensor const&, c10::ArrayRef, c10::ArrayRef, c10::ArrayRef, long, bool, bool, std::array<bool, 2ul>) + 0x1e2 (0x7f5b3e594242 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::generated::CudnnConvolutionBackward::apply(std::vector<at::Tensor, std::allocatorat::Tensor >&&) + 0x258 (0x7f5b400dec38 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #18: + 0x3375bb7 (0x7f5b40703bb7 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #19: torch::autograd::Engine::evaluate_function(std::shared_ptrtorch::autograd::GraphTask&, torch::autograd::Node*, torch::autograd::InputBuffer&, std::shared_ptrtorch::autograd::ReadyQueue const&) + 0x1400 (0x7f5b406ff400 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #20: torch::autograd::Engine::thread_main(std::shared_ptrtorch::autograd::GraphTask const&) + 0x451 (0x7f5b406fffa1 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #21: torch::autograd::Engine::thread_init(int, std::shared_ptrtorch::autograd::ReadyQueue const&, bool) + 0x89 (0x7f5b406f8119 in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_cpu.so)
frame #22: torch::autograd::python::PythonEngine::thread_init(int, std::shared_ptrtorch::autograd::ReadyQueue const&, bool) + 0x4a (0x7f5b4de9834a in /usr/local/lib/python3.6/dist-packages/torch/lib/libtorch_python.so)
frame #23: + 0xbd6df (0x7f5b6af1a6df in /usr/lib/x86_64-linux-gnu/libstdc++.so.6)
frame #24: + 0x76db (0x7f5b6bffc6db in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #25: clone + 0x3f (0x7f5b6c335a3f in /lib/x86_64-linux-gnu/libc.so.6)

These above is the error
output = fcn_resnet_50(images)[“out”]

    labels=labels.long()

    lossi = loss(output, labels)

    lossi.backward()

    optimizer.step()

number of classes match with number of levels
link of colab

Could you post an executable code snippet to reproduce this issue, please?
Also, which PyTorch and CUDA versions are you using?

Thank you for the quick reply. But it worked out, I was giving number of classes wrong. So that is the reason it is giving CUDA error