Hello all,
I was following the tutorial for extending PyTorch to have C++ function from this tutorial here and I couldn’t manage to get the tutorial to work, so I fear I’m either missing some file or I’ve just followed the tutorial incorrectly (probably the latter)
Following the tutorial I have 2 files, the “lltm_cpp.cpp” and “setup.py”.
The “setup.py” file is shown here,
from setuptools import setup, Extension
from torch.utils import cpp_extension
setup(name='lltm_cpp',
ext_modules=[cpp_extension.CppExtension('lltm_cpp', ['lltm.cpp'])],
cmdclass={'build_ext': cpp_extension.BuildExtension})
and the “lltm_cpp.py” file is shown here,
#include <torch/extension.h>
#include <iostream>
torch::Tensor d_sigmoid(torch::Tensor z) {
auto s = torch::sigmoid(z);
return (1 - s) * s;
}
#include <vector>
std::vector<at::Tensor> lltm_forward(
torch::Tensor input,
torch::Tensor weights,
torch::Tensor bias,
torch::Tensor old_h,
torch::Tensor old_cell) {
auto X = torch::cat({old_h, input}, /*dim=*/1);
auto gate_weights = torch::addmm(bias, X, weights.transpose(0, 1));
auto gates = gate_weights.chunk(3, /*dim=*/1);
auto input_gate = torch::sigmoid(gates[0]);
auto output_gate = torch::sigmoid(gates[1]);
auto candidate_cell = torch::elu(gates[2], /*alpha=*/1.0);
auto new_cell = old_cell + candidate_cell * input_gate;
auto new_h = torch::tanh(new_cell) * output_gate;
return {new_h,
new_cell,
input_gate,
output_gate,
candidate_cell,
X,
gate_weights};
}
// tanh'(z) = 1 - tanh^2(z)
torch::Tensor d_tanh(torch::Tensor z) {
return 1 - z.tanh().pow(2);
}
// elu'(z) = relu'(z) + { alpha * exp(z) if (alpha * (exp(z) - 1)) < 0, else 0}
torch::Tensor d_elu(torch::Tensor z, torch::Scalar alpha = 1.0) {
auto e = z.exp();
auto mask = (alpha * (e - 1)) < 0;
return (z > 0).type_as(z) + mask.type_as(z) * (alpha * e);
}
std::vector<torch::Tensor> lltm_backward(
torch::Tensor grad_h,
torch::Tensor grad_cell,
torch::Tensor new_cell,
torch::Tensor input_gate,
torch::Tensor output_gate,
torch::Tensor candidate_cell,
torch::Tensor X,
torch::Tensor gate_weights,
torch::Tensor weights) {
auto d_output_gate = torch::tanh(new_cell) * grad_h;
auto d_tanh_new_cell = output_gate * grad_h;
auto d_new_cell = d_tanh(new_cell) * d_tanh_new_cell + grad_cell;
auto d_old_cell = d_new_cell;
auto d_candidate_cell = input_gate * d_new_cell;
auto d_input_gate = candidate_cell * d_new_cell;
auto gates = gate_weights.chunk(3, /*dim=*/1);
d_input_gate *= d_sigmoid(gates[0]);
d_output_gate *= d_sigmoid(gates[1]);
d_candidate_cell *= d_elu(gates[2]);
auto d_gates =
torch::cat({d_input_gate, d_output_gate, d_candidate_cell}, /*dim=*/1);
auto d_weights = d_gates.t().mm(X);
auto d_bias = d_gates.sum(/*dim=*/0, /*keepdim=*/true);
auto d_X = d_gates.mm(weights);
const auto state_size = grad_h.size(1);
auto d_old_h = d_X.slice(/*dim=*/1, 0, state_size);
auto d_input = d_X.slice(/*dim=*/1, state_size);
return {d_old_h, d_input, d_weights, d_bias, d_old_cell};
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &lltm_forward, "LLTM forward");
m.def("backward", &lltm_backward, "LLTM backward");
}
When I run python3 setup.py install
it returns the following error,
running install
running bdist_egg
running egg_info
writing lltm_cpp.egg-info/PKG-INFO
writing dependency_links to lltm_cpp.egg-info/dependency_links.txt
writing top-level names to lltm_cpp.egg-info/top_level.txt
reading manifest file 'lltm_cpp.egg-info/SOURCES.txt'
writing manifest file 'lltm_cpp.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_ext
building 'lltm_cpp' extension
Emitting ninja build file ~/custom_build_pytorch/pytorch/lltm-extension/build/temp.linux-x86_64-3.8/build.ninja...
Compiling objects...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
ninja: error: '~/custom_build_pytorch/pytorch/lltm-extension/lltm.cpp', needed by '~/custom_build_pytorch/pytorch/lltm-extension/build/temp.linux-x86_64-3.8/lltm.o', missing and no known rule to make it
Traceback (most recent call last):
File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1726, in _run_ninja_build
subprocess.run(
File "~/anaconda3/lib/python3.8/subprocess.py", line 516, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "setup.py", line 5, in <module>
setup(name='lltm_cpp',
File "~/anaconda3/lib/python3.8/site-packages/setuptools/__init__.py", line 153, in setup
return distutils.core.setup(**attrs)
File "~/anaconda3/lib/python3.8/distutils/core.py", line 148, in setup
dist.run_commands()
File "~/anaconda3/lib/python3.8/distutils/dist.py", line 966, in run_commands
self.run_command(cmd)
File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/install.py", line 67, in run
self.do_egg_install()
File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/install.py", line 109, in do_egg_install
self.run_command('bdist_egg')
File "~/anaconda3/lib/python3.8/distutils/cmd.py", line 313, in run_command
self.distribution.run_command(command)
File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/bdist_egg.py", line 164, in run
cmd = self.call_command('install_lib', warn_dir=0)
File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/bdist_egg.py", line 150, in call_command
self.run_command(cmdname)
File "~/anaconda3/lib/python3.8/distutils/cmd.py", line 313, in run_command
self.distribution.run_command(command)
File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/install_lib.py", line 11, in run
self.build()
File "~/anaconda3/lib/python3.8/distutils/command/install_lib.py", line 107, in build
self.run_command('build_ext')
File "~/anaconda3/lib/python3.8/distutils/cmd.py", line 313, in run_command
self.distribution.run_command(command)
File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
cmd_obj.run()
File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/build_ext.py", line 79, in run
_build_ext.run(self)
File "~/anaconda3/lib/python3.8/site-packages/Cython/Distutils/old_build_ext.py", line 186, in run
_build_ext.build_ext.run(self)
File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 340, in run
self.build_extensions()
File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 741, in build_extensions
build_ext.build_extensions(self)
File "~/anaconda3/lib/python3.8/site-packages/Cython/Distutils/old_build_ext.py", line 195, in build_extensions
_build_ext.build_ext.build_extensions(self)
File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 449, in build_extensions
self._build_extensions_serial()
File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 474, in _build_extensions_serial
self.build_extension(ext)
File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/build_ext.py", line 196, in build_extension
_build_ext.build_extension(self, ext)
File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 528, in build_extension
objects = self.compiler.compile(sources,
File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 562, in unix_wrap_ninja_compile
_write_ninja_file_and_compile_objects(
File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1405, in _write_ninja_file_and_compile_objects
_run_ninja_build(
File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1742, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error compiling objects for extension
I haven’t done much, if any, C++ so I was wondering if 1) have I actually copied the C++ files correctly from the tutorial? 2) Am I correctly using the setup.py
file? or 3) Is there a further issue with my ninja installation that’s causing the tutorial to just fail out right?
Thank you for the help!
EDIT: After having a quick look in to it, it seems that there should be a ninja.build
file in the current working directory, however, no such file is built. When doing the JIT compilation part of the tutorial that works fine but the AOT compilatiion doesn’t.