Hello,
I have been trying to incorporate my own CUDA kernel for a Highway LSTM into a PyTorch layer, mostly following the suggestions here: Compiling an Extension with CUDA files
Like what was suggested in that thread, I am reading the data from the tensors and running the kernel like this:
#include <THC/THC.h>
#include "highway_lstm_kernel.h"
extern THCState *state;
int highway_lstm_forward_cuda(int inputSize, int hiddenSize, int miniBatch,
int numLayers, int seqLength,
THCudaTensor *x,
THCudaTensor *h_data,
THCudaTensor *c_data,
THCudaTensor *tmp_i,
THCudaTensor *tmp_h,
THCudaTensor *T,
THCudaTensor *bias,
THCudaTensor *dropout,
THCudaTensor *gates,
int isTraining) {
float * x_ptr = THCudaTensor_data(state, x);
float * h_data_ptr = THCudaTensor_data(state, h_data);
float * c_data_ptr = THCudaTensor_data(state, c_data);
float * tmp_i_ptr = THCudaTensor_data(state, tmp_i);
float * tmp_h_ptr = THCudaTensor_data(state, tmp_h);
float * T_ptr = THCudaTensor_data(state, T);
float * bias_ptr = THCudaTensor_data(state, bias);
float * dropout_ptr = THCudaTensor_data(state, dropout);
float * gates_ptr = THCudaTensor_data(state, gates);
cudaStream_t stream = THCState_getCurrentStream(state);
cublasHandle_t handle = THCState_getCurrentBlasHandle(state);
highway_lstm_ongpu(inputSize, hiddenSize, miniBatch, numLayers, seqLength,
x_ptr, h_data_ptr, c_data_ptr, tmp_i_ptr, tmp_h_ptr, T_ptr, bias_ptr,
dropout_ptr, gates_ptr, isTraining, stream, handle);
return 1;
}
And then I call this from within Python like so:
highway_lstm_layer.highway_lstm_forward_cuda(
self.input_size, self.hidden_size, self.mini_batch, self.num_layers,
self.seq_length, input, hy, cy, tmp_i, tmp_h, weight, bias, dropout,
gates, 1 if self.train else 0)
However, I get the following error:
Traceback (most recent call last):
File "highway_lstm_layer.py", line 112, in <module>
print lstm(input)
File "/home/nfitz/miniconda2/lib/python2.7/site-packages/torch/nn/modules/module.py", line 224, in __call__
result = self.forward(*input, **kwargs)
File "highway_lstm_layer.py", line 96, in forward
output, hidden = HighwayLSTMFunction(self.input_size, self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, train=self.train)(input, self.weight, self.bias)
File "/home/nfitz/miniconda2/lib/python2.7/site-packages/torch/autograd/function.py", line 284, in _do_forward
flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
File "/home/nfitz/miniconda2/lib/python2.7/site-packages/torch/autograd/function.py", line 306, in forward
result = self.forward_extended(*nested_tensors)
File "highway_lstm_layer.py", line 34, in forward_extended
gates, 1 if self.train else 0)
File "/home/nfitz/miniconda2/lib/python2.7/site-packages/torch/utils/ffi/__init__.py", line 177, in safe_call
result = torch._C._safe_call(*args, **kwargs)
TypeError: 'struct THCudaTensor' is opaque
Any hints on what would be causing this?