Hello everyone,
I have created a simple RNN network which runs on the CPU without any problems. However, when I load the model onto the GPU, I get the following error:
RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED
Here is my Code of the RNN class:
class RNN_Netz(nn.Module):
def __init__(self, Input_Num, Output_Num, Hidden_Num, Layer_Num):
super(RNN_Netz, self).__init__()
self.Input_Num = Input_Num
self.Output_Num = Output_Num
self.Hidden_Num = Hidden_Num
self.Layer_Num = Layer_Num
self.rnn = nn.RNN(input_size=Input_Num,
hidden_size=Hidden_Num,
num_layers=Layer_Num,
nonlinearity='relu',
bias=True,
batch_first=True,
dropout=0.1)
self.linear = nn.Linear(Hidden_Num, Output_Num)
def forward(self, x):
h_init = self.init_hidden(x)
input_tensor = x.transpose(0, 1).view(x.size(1), x.size(0), 1)
ouput, hn = self.rnn(input_tensor, h_init)
last_output = ouput[-1]
result = self.linear(last_output)
return result
def init_hidden(self, x):
device = torch.device('cuda:1' if (torch.cuda.is_available()) else 'cpu')
dtype = torch.float
h_init = torch.zeros(self.Layer_Num*1, x.size(1), self.Hidden_Num,
device=device, dtype=dtype)
return h_init
and here is the relevant code how i call the class:
device = torch.device('cuda:1' if (torch.cuda.is_available()) else 'cpu')
train_data = torch.utils.data.TensorDataset(train_features, train_targets)
kwargs = {'num_workers': 2, 'pin_memory': True}
loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=8000, shuffle=True, drop_last=True,**kwargs)
model = RNN_Netz(Input_Num=1, Output_Num=1, Hidden_Num=50, Layer_Num=2)
model = model.to(device)
The error message appears when I load the model onto the GPU --> model = model.to(device).
Here is the Detail Error:
Traceback (most recent call last):
File "Main_RNN.py", line 36, in <module>
run_trainings_process_RNN(1000, 8000)
File "Main_RNN.py", line 34, in run_trainings_process_RNN
RNN_class.train(train_features, train_targets, Epochen, Batch_size, 0.001, 1, 50, test_features, test_targets)
File "/home/simtower2/Babak/Pytorch_LSTM/RNN_Net.py", line 108, in train
model = model.to(device)
File "/home/simtower2/Babak/Env/Pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 381, in to
return self._apply(convert)
File "/home/simtower2/Babak/Env/Pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 187, in _apply
module._apply(fn)
File "/home/simtower2/Babak/Env/Pytorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 117, in _apply
self.flatten_parameters()
File "/home/simtower2/Babak/Env/Pytorch/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 113, in flatten_parameters
self.batch_first, bool(self.bidirectional))
RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED
Previously, I had already trained FeedForward networks and LSTM networks with the torch.nn.LSTMCells on the GPU without any problems.
Therefore, I think it must be a special problem with the torch.nn.RNN. I also changed the code using torch.nn.LSTM, but the error message appears at the same place.
GPU: RTX 2080 TI, Pytorch 1.0
I’m grateful for any help!