RuntimeError: cuDNN error: CUDNN_STATUS_MAPPING_ERROR

My input shape without batch size = 87 * 61 * 73 * 61

Here, 87 = time points, [61,73,61] = 3D data.

Trying to run the below code using 4 GPU.

class cnn_lstm(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv3d( 1, 4, kernel_size=(5,5,5), padding=(2, 2, 2), bias=False),
            nn.ReLU(inplace=True),
        ).cuda(0)
        
        self.cnn2 = nn.Sequential(
            nn.Conv3d( 4, 8, kernel_size=(5,5,5) , padding=(2, 2, 2), bias=False),
            nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2),
            nn.ReLU(inplace=True),
        ).cuda(1)
        self.rnn = nn.LSTM(input_size=8 * 30 * 36 * 30, hidden_size=500, num_layers=3, batch_first=True).cuda(2)
        self.classifier = nn.Sequential(
            #nn.Dropout(),
            nn.Linear(500, 100),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(100, classes),
        ).cuda(3)

    def forward(self, x):
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.contiguous().view(batch_size * timesteps, C, H, W)
        c_in = c_in.view(-1, 1, C, H, W).float()
       
        c_out = self.cnn1(c_in.cuda(0))
        c_out = self.cnn2(c_out.cuda(1))
        c_out = c_out.view(-1, 8 * 30 * 36 * 30)
      
        r_in = c_out.view(batch_size, timesteps, -1)
        r_out, (h_n, h_c) = self.rnn(r_in.cuda(2))

        r_out2 = self.classifier(r_out[:, -1, :].cuda(3))
        return F.log_softmax(r_out2, dim=1)


trainloader = D.DataLoader(ds_train, batch_size=2, shuffle=True, num_workers=0)
validloader = D.DataLoader(ds_valid, batch_size=2, shuffle=True, num_workers=0)
testloader = D.DataLoader(ds_test, batch_size=2, shuffle=False, num_workers=0)

model = cnn_lstm()
torch.cuda.empty_cache()
optimizer = optim.SGD(model.parameters(), lr=0.001)

train_losses = []
valid_losses = []
avg_train_losses = []
avg_valid_losses = [] 
	 
for epoch in range(n_epochs):
	running_loss = 0
	model.train()
	for data, label in trainloader:
		 data = data.float()
		 y_hat  = model(data)
		 data = data.cpu()
		 loss = criterion(y_hat.cuda(3), label.cuda(3)).cuda(3)
		
		 optimizer.zero_grad()
		 loss.backward()
		 optimizer.step()
		 running_loss += loss.item()

Get Error:

    loss.backward()
  File "/home/.../lib/python3.6/site-packages/torch/tensor.py", line 118, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/.../lib/python3.6/site-packages/torch/autograd/__init__.py", line 93, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: cuDNN error: CUDNN_STATUS_MAPPING_ERROR

Torch Version: ‘1.2.0’
Cuda Version: 10.1.168

GPU Information:
±----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67 Driver Version: 418.67 CUDA Version: 10.1 |
|-------------------------------±---------------------±---------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE… On | 00000000:03:00.0 Off | 0 |
| N/A 27C P0 25W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+
| 1 Tesla P100-PCIE… On | 00000000:04:00.0 Off | 0 |
| N/A 30C P0 23W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+
| 2 Tesla P100-PCIE… On | 00000000:82:00.0 Off | 0 |
| N/A 30C P0 25W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+
| 3 Tesla P100-PCIE… On | 00000000:83:00.0 Off | 0 |
| N/A 27C P0 25W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+

Please help. how can I fix the error: CUDNN_STATUS_MAPPING_ERROR?

I find the same error log as you. It is strange that when I disabled cuDNN, the error disapeared and I can train my codes sucessfully. I think the bug may lie in the cuDNN.

I tried to reproduce this error using this code:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset


class cnn_lstm(torch.nn.Module):

    def __init__(self):
        super(cnn_lstm, self).__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv3d( 1, 4, kernel_size=(5,5,5), padding=(2, 2, 2), bias=False),
            nn.ReLU(inplace=True),
        ).cuda(0)
        
        self.cnn2 = nn.Sequential(
            nn.Conv3d( 4, 8, kernel_size=(5,5,5) , padding=(2, 2, 2), bias=False),
            nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2),
            nn.ReLU(inplace=True),
        ).cuda(1)
        self.rnn = nn.LSTM(input_size=8 * 30 * 36 * 30, hidden_size=500, num_layers=3, batch_first=True).cuda(2)
        self.classifier = nn.Sequential(
            #nn.Dropout(),
            nn.Linear(500, 100),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(100, classes),
        ).cuda(3)

    def forward(self, x):
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.contiguous().view(batch_size * timesteps, C, H, W)
        c_in = c_in.view(-1, 1, C, H, W).float()
       
        c_out = self.cnn1(c_in.cuda(0))
        c_out = self.cnn2(c_out.cuda(1))
        c_out = c_out.view(-1, 8 * 30 * 36 * 30)
      
        r_in = c_out.view(batch_size, timesteps, -1)
        r_out, (h_n, h_c) = self.rnn(r_in.cuda(2))

        r_out2 = self.classifier(r_out[:, -1, :].cuda(3))
        return F.log_softmax(r_out2, dim=1)


classes = 10
N = 1
ds_train = TensorDataset(
    torch.randn(N, 87, 61, 73, 61),
    torch.randint(0, classes, (N,))
)

trainloader = DataLoader(ds_train, batch_size=2, shuffle=True, num_workers=0)


model = cnn_lstm()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()

train_losses = []
avg_train_losses = []

n_epochs = 1	 
for epoch in range(n_epochs):
	running_loss = 0
	model.train()
	for data, label in trainloader:
		 data = data.float()
		 y_hat  = model(data)
		 data = data.cpu()
		 loss = criterion(y_hat.cuda(3), label.cuda(3)).cuda(3)
		
		 optimizer.zero_grad()
		 loss.backward()
		 optimizer.step()
		 running_loss += loss.item()

but it works fine with PyTorch built from master and cudnn7.6.5.32.
Could you update your PyTorch to the latest stable release and let me know, how to reproduce this issue?

For anyone still having this error. I ran into the same problem today. The reason was because I was using multiple GPUS,I “fixed” it by limiting the amount of GPU’s to 1. So this probably has something to do with data being spread out on different GPU’s