I got the following error when I run LSTM.
Traceback (most recent call last):
File “run.py”, line 61, in
pred = model(x, hx)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/nn/modules/module.py”, line 206, in call
result = self.forward(*input, **kwargs)
File “/mnt/lfs1/users/zeweichu/research/jacob_lstm/models.py”, line 25, in for ward
out, (hx, cx) = self.lstm(x, hx)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/nn/modules/module.py”, line 206, in call
result = self.forward(*input, **kwargs)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/nn/modules/rnn.py”, line 91, in forward
output, hidden = func(input, self.all_weights, hx)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/nn/_functions/rnn.py”, line 343, in forward
return func(input, *fargs, **fkwargs)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/autograd/function.py”, line 202, in _do_forward
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/autograd/function.py”, line 224, in forward
result = self.forward_extended(*nested_tensors)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/nn/_functions/rnn.py”, line 285, in forward_extended
cudnn.rnn.forward(self, input, hx, weight, output, hy)
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/backends/cudnn/rnn.py”, line 190, in forward
handle = cudnn.get_handle()
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/backends/cudnn/init.py”, line 262, in get_handle
handle = CuDNNHandle()
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/backends/cudnn/init.py”, line 81, in init
check_error(lib.cudnnCreate(ctypes.byref(ptr)))
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/backends/cudnn/init.py”, line 249, in check_error
raise CuDNNError(status)
torch.backends.cudnn.CuDNNError: 4: b’CUDNN_STATUS_INTERNAL_ERROR’
Exception ignored in: <bound method CuDNNHandle.del of <torch.backends.cudnn .CuDNNHandle object at 0x7fee39a04f98>>
Traceback (most recent call last):
File “/ktg/share/lfs1/users/zeweichu/anaconda3/envs/pytorch36/lib/python3.6/si te-packages/torch/backends/cudnn/init.py”, line 85, in del
check_error(lib.cudnnDestroy(self))
ctypes.ArgumentError: argument 1: <class ‘TypeError’>: Don’t know how to convert parameter 1
The code is just as simple as
self.lstm = nn.LSTM(layers[0], layers[1], num_layers=2, dropout=0.2, batch_first=True)
out, (hx, cx) = self.lstm(x, hx)
The input data looks like:
x = Variable(torch.from_numpy(X_train[i:min(i+batch_size, num_instances)])).float()
y = Variable(torch.from_numpy(y_train[i:min(i+batch_size, num_instances)])).float()
B, T = x.size()
if use_cuda:
x = x.cuda()
y = y.cuda()
hx = model.init_hidden(B)
Any idea what might be the reason? Is it because my CuDNN installation is problematic?