How to bind C++ extension to PyTorch (subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.)

AlphaBetaGamma96 · March 7, 2022, 5:07pm

Hello all,

I was following the tutorial for extending PyTorch to have C++ function from this tutorial here and I couldn’t manage to get the tutorial to work, so I fear I’m either missing some file or I’ve just followed the tutorial incorrectly (probably the latter)

Following the tutorial I have 2 files, the “lltm_cpp.cpp” and “setup.py”.

The “setup.py” file is shown here,

from setuptools import setup, Extension
from torch.utils import cpp_extension

setup(name='lltm_cpp',
      ext_modules=[cpp_extension.CppExtension('lltm_cpp', ['lltm.cpp'])],
      cmdclass={'build_ext': cpp_extension.BuildExtension})

and the “lltm_cpp.py” file is shown here,

#include <torch/extension.h>
#include <iostream>

torch::Tensor d_sigmoid(torch::Tensor z) {
  auto s = torch::sigmoid(z);
  return (1 - s) * s;
}

#include <vector>

std::vector<at::Tensor> lltm_forward(
    torch::Tensor input,
    torch::Tensor weights,
    torch::Tensor bias,
    torch::Tensor old_h,
    torch::Tensor old_cell) {
  auto X = torch::cat({old_h, input}, /*dim=*/1);

  auto gate_weights = torch::addmm(bias, X, weights.transpose(0, 1));
  auto gates = gate_weights.chunk(3, /*dim=*/1);

  auto input_gate = torch::sigmoid(gates[0]);
  auto output_gate = torch::sigmoid(gates[1]);
  auto candidate_cell = torch::elu(gates[2], /*alpha=*/1.0);

  auto new_cell = old_cell + candidate_cell * input_gate;
  auto new_h = torch::tanh(new_cell) * output_gate;

  return {new_h,
          new_cell,
          input_gate,
          output_gate,
          candidate_cell,
          X,
          gate_weights};
}


// tanh'(z) = 1 - tanh^2(z)
torch::Tensor d_tanh(torch::Tensor z) {
  return 1 - z.tanh().pow(2);
}

// elu'(z) = relu'(z) + { alpha * exp(z) if (alpha * (exp(z) - 1)) < 0, else 0}
torch::Tensor d_elu(torch::Tensor z, torch::Scalar alpha = 1.0) {
  auto e = z.exp();
  auto mask = (alpha * (e - 1)) < 0;
  return (z > 0).type_as(z) + mask.type_as(z) * (alpha * e);
}

std::vector<torch::Tensor> lltm_backward(
    torch::Tensor grad_h,
    torch::Tensor grad_cell,
    torch::Tensor new_cell,
    torch::Tensor input_gate,
    torch::Tensor output_gate,
    torch::Tensor candidate_cell,
    torch::Tensor X,
    torch::Tensor gate_weights,
    torch::Tensor weights) {
  auto d_output_gate = torch::tanh(new_cell) * grad_h;
  auto d_tanh_new_cell = output_gate * grad_h;
  auto d_new_cell = d_tanh(new_cell) * d_tanh_new_cell + grad_cell;

  auto d_old_cell = d_new_cell;
  auto d_candidate_cell = input_gate * d_new_cell;
  auto d_input_gate = candidate_cell * d_new_cell;

  auto gates = gate_weights.chunk(3, /*dim=*/1);
  d_input_gate *= d_sigmoid(gates[0]);
  d_output_gate *= d_sigmoid(gates[1]);
  d_candidate_cell *= d_elu(gates[2]);

  auto d_gates =
      torch::cat({d_input_gate, d_output_gate, d_candidate_cell}, /*dim=*/1);

  auto d_weights = d_gates.t().mm(X);
  auto d_bias = d_gates.sum(/*dim=*/0, /*keepdim=*/true);

  auto d_X = d_gates.mm(weights);
  const auto state_size = grad_h.size(1);
  auto d_old_h = d_X.slice(/*dim=*/1, 0, state_size);
  auto d_input = d_X.slice(/*dim=*/1, state_size);

  return {d_old_h, d_input, d_weights, d_bias, d_old_cell};
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("forward", &lltm_forward, "LLTM forward");
  m.def("backward", &lltm_backward, "LLTM backward");
}

When I run python3 setup.py install it returns the following error,

running install
running bdist_egg
running egg_info
writing lltm_cpp.egg-info/PKG-INFO
writing dependency_links to lltm_cpp.egg-info/dependency_links.txt
writing top-level names to lltm_cpp.egg-info/top_level.txt
reading manifest file 'lltm_cpp.egg-info/SOURCES.txt'
writing manifest file 'lltm_cpp.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_ext
building 'lltm_cpp' extension
Emitting ninja build file ~/custom_build_pytorch/pytorch/lltm-extension/build/temp.linux-x86_64-3.8/build.ninja...
Compiling objects...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
ninja: error: '~/custom_build_pytorch/pytorch/lltm-extension/lltm.cpp', needed by '~/custom_build_pytorch/pytorch/lltm-extension/build/temp.linux-x86_64-3.8/lltm.o', missing and no known rule to make it
Traceback (most recent call last):
  File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1726, in _run_ninja_build
    subprocess.run(
  File "~/anaconda3/lib/python3.8/subprocess.py", line 516, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "setup.py", line 5, in <module>
    setup(name='lltm_cpp',
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/__init__.py", line 153, in setup
    return distutils.core.setup(**attrs)
  File "~/anaconda3/lib/python3.8/distutils/core.py", line 148, in setup
    dist.run_commands()
  File "~/anaconda3/lib/python3.8/distutils/dist.py", line 966, in run_commands
    self.run_command(cmd)
  File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
    cmd_obj.run()
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/install.py", line 67, in run
    self.do_egg_install()
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/install.py", line 109, in do_egg_install
    self.run_command('bdist_egg')
  File "~/anaconda3/lib/python3.8/distutils/cmd.py", line 313, in run_command
    self.distribution.run_command(command)
  File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
    cmd_obj.run()
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/bdist_egg.py", line 164, in run
    cmd = self.call_command('install_lib', warn_dir=0)
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/bdist_egg.py", line 150, in call_command
    self.run_command(cmdname)
  File "~/anaconda3/lib/python3.8/distutils/cmd.py", line 313, in run_command
    self.distribution.run_command(command)
  File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
    cmd_obj.run()
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/install_lib.py", line 11, in run
    self.build()
  File "~/anaconda3/lib/python3.8/distutils/command/install_lib.py", line 107, in build
    self.run_command('build_ext')
  File "~/anaconda3/lib/python3.8/distutils/cmd.py", line 313, in run_command
    self.distribution.run_command(command)
  File "~/anaconda3/lib/python3.8/distutils/dist.py", line 985, in run_command
    cmd_obj.run()
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/build_ext.py", line 79, in run
    _build_ext.run(self)
  File "~/anaconda3/lib/python3.8/site-packages/Cython/Distutils/old_build_ext.py", line 186, in run
    _build_ext.build_ext.run(self)
  File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 340, in run
    self.build_extensions()
  File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 741, in build_extensions
    build_ext.build_extensions(self)
  File "~/anaconda3/lib/python3.8/site-packages/Cython/Distutils/old_build_ext.py", line 195, in build_extensions
    _build_ext.build_ext.build_extensions(self)
  File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 449, in build_extensions
    self._build_extensions_serial()
  File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 474, in _build_extensions_serial
    self.build_extension(ext)
  File "~/anaconda3/lib/python3.8/site-packages/setuptools/command/build_ext.py", line 196, in build_extension
    _build_ext.build_extension(self, ext)
  File "~/anaconda3/lib/python3.8/distutils/command/build_ext.py", line 528, in build_extension
    objects = self.compiler.compile(sources,
  File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 562, in unix_wrap_ninja_compile
    _write_ninja_file_and_compile_objects(
  File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1405, in _write_ninja_file_and_compile_objects
    _run_ninja_build(
  File "~/.local/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1742, in _run_ninja_build
    raise RuntimeError(message) from e
RuntimeError: Error compiling objects for extension

I haven’t done much, if any, C++ so I was wondering if 1) have I actually copied the C++ files correctly from the tutorial? 2) Am I correctly using the setup.py file? or 3) Is there a further issue with my ninja installation that’s causing the tutorial to just fail out right?

Thank you for the help!

EDIT: After having a quick look in to it, it seems that there should be a ninja.build file in the current working directory, however, no such file is built. When doing the JIT compilation part of the tutorial that works fine but the AOT compilatiion doesn’t.

AlphaBetaGamma96 · March 7, 2022, 7:27pm

  ext_modules=[cpp_extension.CppExtension('lltm_cpp', ['lltm_cpp.cpp'])],

wrong name is compiling