I am getting a very strange error when using bidirectional lstm in which it deals with a very long sequence:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.autograd as autograd
def Variable(data, *args, **kwargs):
if torch.cuda.is_available():
return autograd.Variable(data.cuda(), *args, **kwargs)
else:
return autograd.Variable(data, *args, **kwargs)
class LSTM_MEM_LEAK(nn.Module):
def __init__(self):
super(LSTM_MEM_LEAK, self).__init__()
self.h_size = 600
self.e_size = 900
self.l1 = nn.Linear(256, self.e_size)
self.lstm =nn.LSTM(self.e_size, self.h_size, batch_first = True, num_layers = 2, bidirectional = 1)
self.l2 = nn.Linear(self.h_size*2, 300)
def forward(self, input):
hidden = (Variable(torch.zeros(2*2, 16, self.h_size)),
Variable(torch.zeros(2*2, 16, self.h_size)))
l1 = F.relu(self.l1(input.view(-1, 256)))
lstm_out, h = self.lstm(l1.view(16, -1, self.e_size), hidden)
l2 = F.relu(self.l2(lstm_out.contiguous().view(-1, self.h_size*2)))
return l2
net = LSTM_MEM_LEAK()
net.cuda()
input = Variable(torch.rand(16, 6000, 256))
print(input.requires_grad)
out = net(input)
and here is the error:
Traceback (most recent call last):
File “test.py”, line 38, in
out = net(input)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/modules/module.py”, line 224, in call
result = self.forward(*input, **kwargs)
File “test.py”, line 29, in forward
lstm_out, h = self.lstm(l1.view(16, -1, self.e_size), hidden)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/modules/module.py”, line 224, in call
result = self.forward(*input, **kwargs)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/modules/rnn.py”, line 162, in forward
output, hidden = func(input, self.all_weights, hx)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/_functions/rnn.py”, line 351, in forward
return func(input, *fargs, **fkwargs)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/autograd/function.py”, line 284, in _do_forward
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/autograd/function.py”, line 306, in forward
result = self.forward_extended(*nested_tensors)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/nn/_functions/rnn.py”, line 293, in forward_extended
cudnn.rnn.forward(self, input, hx, weight, output, hy)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/backends/cudnn/rnn.py”, line 305, in forward
ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
File “/opt/conda/envs/pytorch-py35/lib/python3.5/site-packages/torch/backends/cudnn/init.py”, line 255, in check_error
raise CuDNNError(status)
torch.backends.cudnn.CuDNNError: 3: b’CUDNN_STATUS_BAD_PARAM’
This error happens when h_size >=600 and using bi-LSTM.
I guess there is a bug somewhere.
Any idea what is the problem?