My input shape without batch size = 87 * 61 * 73 * 61
Here, 87 = time points, [61,73,61] = 3D data.
Trying to run the below code using 4 GPU.
class cnn_lstm(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.cnn1 = nn.Sequential(
nn.Conv3d( 1, 4, kernel_size=(5,5,5), padding=(2, 2, 2), bias=False),
nn.ReLU(inplace=True),
).cuda(0)
self.cnn2 = nn.Sequential(
nn.Conv3d( 4, 8, kernel_size=(5,5,5) , padding=(2, 2, 2), bias=False),
nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2),
nn.ReLU(inplace=True),
).cuda(1)
self.rnn = nn.LSTM(input_size=8 * 30 * 36 * 30, hidden_size=500, num_layers=3, batch_first=True).cuda(2)
self.classifier = nn.Sequential(
#nn.Dropout(),
nn.Linear(500, 100),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(100, classes),
).cuda(3)
def forward(self, x):
batch_size, timesteps, C, H, W = x.size()
c_in = x.contiguous().view(batch_size * timesteps, C, H, W)
c_in = c_in.view(-1, 1, C, H, W).float()
c_out = self.cnn1(c_in.cuda(0))
c_out = self.cnn2(c_out.cuda(1))
c_out = c_out.view(-1, 8 * 30 * 36 * 30)
r_in = c_out.view(batch_size, timesteps, -1)
r_out, (h_n, h_c) = self.rnn(r_in.cuda(2))
r_out2 = self.classifier(r_out[:, -1, :].cuda(3))
return F.log_softmax(r_out2, dim=1)
trainloader = D.DataLoader(ds_train, batch_size=2, shuffle=True, num_workers=0)
validloader = D.DataLoader(ds_valid, batch_size=2, shuffle=True, num_workers=0)
testloader = D.DataLoader(ds_test, batch_size=2, shuffle=False, num_workers=0)
model = cnn_lstm()
torch.cuda.empty_cache()
optimizer = optim.SGD(model.parameters(), lr=0.001)
train_losses = []
valid_losses = []
avg_train_losses = []
avg_valid_losses = []
for epoch in range(n_epochs):
running_loss = 0
model.train()
for data, label in trainloader:
data = data.float()
y_hat = model(data)
data = data.cpu()
loss = criterion(y_hat.cuda(3), label.cuda(3)).cuda(3)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
Get Error:
loss.backward()
File "/home/.../lib/python3.6/site-packages/torch/tensor.py", line 118, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/home/.../lib/python3.6/site-packages/torch/autograd/__init__.py", line 93, in backward
allow_unreachable=True) # allow_unreachable flag
RuntimeError: cuDNN error: CUDNN_STATUS_MAPPING_ERROR
Torch Version: ‘1.2.0’
Cuda Version: 10.1.168
GPU Information:
±----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67 Driver Version: 418.67 CUDA Version: 10.1 |
|-------------------------------±---------------------±---------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Tesla P100-PCIE… On | 00000000:03:00.0 Off | 0 |
| N/A 27C P0 25W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+
| 1 Tesla P100-PCIE… On | 00000000:04:00.0 Off | 0 |
| N/A 30C P0 23W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+
| 2 Tesla P100-PCIE… On | 00000000:82:00.0 Off | 0 |
| N/A 30C P0 25W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+
| 3 Tesla P100-PCIE… On | 00000000:83:00.0 Off | 0 |
| N/A 27C P0 25W / 250W | 0MiB / 12198MiB | 0% Default |
±------------------------------±---------------------±---------------------+
Please help. how can I fix the error: CUDNN_STATUS_MAPPING_ERROR?