CUDNN_STATUS_BAD_PARAM and LSTM

rasoolfa · July 30, 2017, 11:28pm

I am getting a very strange error when using bidirectional lstm in which it deals with a very long sequence:

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.autograd as autograd

def Variable(data, *args, **kwargs):
    if torch.cuda.is_available():
        return autograd.Variable(data.cuda(), *args, **kwargs)
    else:
        return autograd.Variable(data, *args, **kwargs)

class LSTM_MEM_LEAK(nn.Module):
    
    def __init__(self):
      
        super(LSTM_MEM_LEAK, self).__init__()
        self.h_size = 600
        self.e_size = 900
        self.l1 = nn.Linear(256, self.e_size)
        self.lstm =nn.LSTM(self.e_size, self.h_size, batch_first = True, num_layers = 2, bidirectional = 1)
        self.l2 = nn.Linear(self.h_size*2, 300)
            
    def forward(self, input):
        
        hidden = (Variable(torch.zeros(2*2, 16, self.h_size)),
                Variable(torch.zeros(2*2, 16, self.h_size))) 
        l1 = F.relu(self.l1(input.view(-1, 256)))
        lstm_out, h = self.lstm(l1.view(16, -1, self.e_size), hidden)
        l2  = F.relu(self.l2(lstm_out.contiguous().view(-1, self.h_size*2)))
        
        return l2    
    
net = LSTM_MEM_LEAK()
net.cuda()
input = Variable(torch.rand(16, 6000, 256))
print(input.requires_grad)
out = net(input)

and here is the error:

Traceback (most recent call last):
File “test.py”, line 38, in
out = net(input)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/modules/module.py”, line 224, in call
result = self.forward(*input, **kwargs)
File “test.py”, line 29, in forward
lstm_out, h = self.lstm(l1.view(16, -1, self.e_size), hidden)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/modules/module.py”, line 224, in call
result = self.forward(*input, **kwargs)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/modules/rnn.py”, line 162, in forward
output, hidden = func(input, self.all_weights, hx)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/_functions/rnn.py”, line 351, in forward
return func(input, *fargs, **fkwargs)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/autograd/function.py”, line 284, in _do_forward
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/autograd/function.py”, line 306, in forward
result = self.forward_extended(*nested_tensors)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/_functions/rnn.py”, line 293, in forward_extended
cudnn.rnn.forward(self, input, hx, weight, output, hy)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/backends/cudnn/rnn.py”, line 305, in forward
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/backends/cudnn/init.py”, line 255, in check_error
raise CuDNNError(status)
torch.backends.cudnn.CuDNNError: 3: b’CUDNN_STATUS_BAD_PARAM’

This error happens when h_size >=600 and using bi-LSTM.
I guess there is a bug somewhere.

Any idea what is the problem?

wasiahmad · January 13, 2018, 11:58pm

did you solve the problem? I am facing the same error, it is weird.

rasoolfa · March 16, 2018, 7:43am

Not really, just tried to use smaller model since my sequence was very long.
Have look at this thread: https://github.com/pytorch/pytorch/issues/2267