Cuda error with cudnn convolution backward weight function

Hi all,

cpp is far outside my comfort zone. So far I was using torch.nn.grad.conv2d_weight but this seems very slow. I want to know if using cudnn_convolution_backward_weight is faster. My cpp extension looks like this:

#include <torch/extension.h>

#include <vector>
#include <ATen/NativeFunctions.h>
#include <ATen/Config.h>

at::Tensor backward_weight(
    c10::ArrayRef<long int> weight_size,
    const at::Tensor& grad_output,
    const at::Tensor& input,
    c10::ArrayRef<long int> padding,
    c10::ArrayRef<long int> stride,
    c10::ArrayRef<long int> dilation,
    int64_t groups,
    bool benchmark,
    bool deterministic) {

  return at::cudnn_convolution_backward_weight(
      weight_size,
      grad_output,
      input,
      padding,
      stride,
      dilation,
      groups,
      benchmark,
      deterministic);
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("backward", &backward_weight, "Conv2d backward cudnn");
}

Which I compile jit like this:

from torch.utils.cpp_extension import load
conv2d_cudnn = load(name="conv2d_backward", sources=["conv2d_backward.cpp"], verbose=True)

I can then use it in my python code: conv2d_cudnn.backward.

All my parameters seems correct (identical to torch.nn.grad.conv2d_weight, order has to be a bit different though.

I receive the following error:


RuntimeError: cuDNN error: CUDNN_STATUS_BAD_PARAM (getWorkspaceSize at /opt/conda/conda-bld/pytorch_1549628766161/work/aten/src/ATen/native/cudnn/Conv.cpp:653)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x45 (0x7f1d5fc60cf5 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x1082df9 (0x7f1d63d8bdf9 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libcaffe2_gpu.so)
frame #2: at::native::raw_cudnn_convolution_backward_weight_out(at::Tensor const&, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>, c10::ArrayRef<long>, long, bool, bool) + 0x19b (0x7f1d63d89eeb in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libcaffe2_gpu.so)
frame #3: at::native::cudnn_convolution_backward_weight(char const*, c10::ArrayRef<long>, at::TensorArg const&, at::TensorArg const&, c10::ArrayRef<long>, c10::ArrayRef<long>, c10::ArrayRef<long>, long, bool, bool) + 0x3f7 (0x7f1d63d8a797 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libcaffe2_gpu.so)
frame #4: at::native::cudnn_convolution_backward_weight(c10::ArrayRef<long>, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>, c10::ArrayRef<long>, long, bool, bool) + 0xf7 (0x7f1d63d8aac7 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libcaffe2_gpu.so)
frame #5: at::CUDAFloatType::cudnn_convolution_backward_weight(c10::ArrayRef<long>, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>, c10::ArrayRef<long>, long, bool, bool) const + 0xab (0x7f1d63e62fdb in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libcaffe2_gpu.so)
frame #6: torch::autograd::VariableType::cudnn_convolution_backward_weight(c10::ArrayRef<long>, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>, c10::ArrayRef<long>, long, bool, bool) const + 0x336 (0x7f1d5dacc436 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libtorch.so.1)
frame #7: <unknown function> + 0x34cb3 (0x7f1d42018cb3 in /tmp/torch_extensions/conv2d_backward/conv2d_backward.so)
frame #8: backward_weight(c10::ArrayRef<long>, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>, c10::ArrayRef<long>, c10::ArrayRef<long>, long, bool, bool) + 0x85 (0x7f1d42019015 in /tmp/torch_extensions/conv2d_backward/conv2d_backward.so)
frame #9: <unknown function> + 0x57f10 (0x7f1d4203bf10 in /tmp/torch_extensions/conv2d_backward/conv2d_backward.so)
frame #10: <unknown function> + 0x54a4e (0x7f1d42038a4e in /tmp/torch_extensions/conv2d_backward/conv2d_backward.so)
frame #11: <unknown function> + 0x50259 (0x7f1d42034259 in /tmp/torch_extensions/conv2d_backward/conv2d_backward.so)
frame #12: <unknown function> + 0x5093f (0x7f1d4203493f in /tmp/torch_extensions/conv2d_backward/conv2d_backward.so)
frame #13: <unknown function> + 0x412aa (0x7f1d420252aa in /tmp/torch_extensions/conv2d_backward/conv2d_backward.so)
<omitting python frames>
frame #26: torch::autograd::PyFunctionPostHook::operator()(std::vector<torch::autograd::Variable, std::allocator<torch::autograd::Variable> > const&, std::vector<torch::autograd::Variable, std::allocator<torch::autograd::Variable> > const&) + 0xe4 (0x7f1d81086b84 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #27: torch::autograd::Engine::evaluate_function(torch::autograd::FunctionTask&) + 0x1711 (0x7f1d5d9b40c1 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libtorch.so.1)
frame #28: torch::autograd::Engine::thread_main(torch::autograd::GraphTask*) + 0xc0 (0x7f1d5d9b4e80 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libtorch.so.1)
frame #29: torch::autograd::Engine::thread_init(int) + 0xc7 (0x7f1d5d9b1a47 in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libtorch.so.1)
frame #30: torch::autograd::python::PythonEngine::thread_init(int) + 0x2a (0x7f1d8107633a in /home/hans/anaconda/lib/python3.6/site-packages/torch/lib/libtorch_python.so)
frame #31: <unknown function> + 0xb8678 (0x7f1d81d08678 in /home/hans/anaconda/lib/python3.6/site-packages/torch/../../../libstdc++.so.6)
frame #32: <unknown function> + 0x76ba (0x7f1d91d7e6ba in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #33: clone + 0x6d (0x7f1d91ab441d in /lib/x86_64-linux-gnu/libc.so.6)

Does anybody know where to look to debug?

Thanks!

3 Likes

Turns out the it does work and this error indicates that the shapes are not correct. Seems like torch.nn.grad.conv2d_weight is a bit more forgiving in handling wrong shapes.

I tried:
grad_output shape: torch.Size([1, 32, 46, 46])
input shape: torch.Size([1, 128, 49, 49])

But it should have been (with padding 0):
input shape: torch.Size([1, 128, 48, 48])

cudnn_convolution_backward_weight is about 3x faster than torch.nn.grad.conv2d_weight in my case :slight_smile:

1 Like

Can you give an example of how to call this function? I get the following error:

TypeError: backward(): incompatible function arguments. The following argument types are supported:
1. (arg0: at::IntArrayRef, arg1: at::Tensor, arg2: at::Tensor, arg3: at::IntArrayRef, arg4: at::IntArrayRef, arg5: at::IntArrayRef, arg6: int, arg7: bool, arg8: bool) -> at::Tensor

Invoked with: torch.Size([512, 512, 3, 3]), tensor([[[[ 0.0000e+00, 1.3958e-06, -4.4237e-05, …, 6.3123e-05,
0.0000e+00, 0.0000e+00],
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, …, 0.0000e+00,
-3.5066e-04, 0.0000e+00],
[ 0.0000e+00, 7.1572e-05, 0.0000e+00, …, 0.0000e+00,
0.0000e+00, 0.0000e+00],
…,
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, …, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[ 0.0000e+00, 0.0000e+00, 0.0000e+00, …, 0.0000e+00,
0.0000e+00, 0.0000e+00],
[ 0.0000e+00, -5.9945e-05, 1.2524e-04, …, 0.0000e+00,
3.2651e-04, 0.0000e+00]],

     [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      ...,
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00]],

      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  1.0104e-04,
        0.0000e+00, -5.8621e-05],
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      [ 0.0000e+00,  1.1409e-04,  0.0000e+00,  ...,  4.9096e-05,
       -3.9835e-05,  0.0000e+00]]]], device='cuda:0'), tensor([[[[ 5.1700e-07, -1.7602e-07,  2.0850e-07,  ...,  4.2585e-07,
       -2.7707e-08, -2.4121e-07],
      [ 6.9768e-07,  7.2259e-07,  2.7369e-07,  ...,  3.4273e-07,
       -3.3702e-07, -8.6962e-07],
      [ 2.3399e-07, -8.0187e-07, -7.4774e-07,  ..., -2.6042e-07,
       -5.1840e-07, -7.7020e-07],
      ...,
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00],
      [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
        0.0000e+00,  0.0000e+00]]]], device='cuda:0'), (1, 1), (1, 1), (1, 1), 

1, True, False

Hi Rahan, it is a bit hard to see what is wrong due to the formatting. I call the function like this:

conv2d_cudnn.backward(module.weight.shape, gradient, input_tensor,
                      module.padding, module.stride, module.dilation, 
                      module.groups, True, False)
4 Likes

I meet same problem. Can you tell me how to solve problem?

This worked for me, but haven’t tested it with the latest pytorch version

#include <torch/extension.h>
#include <c10/util/ArrayRef.h>

#include
#include <ATen/NativeFunctions.h>
#include <ATen/Config.h>

at::Tensor backward_weight(
std::vector<int64_t> weight_size,
const at::Tensor& grad_output,
const at::Tensor& input,
std::vector<int64_t> padding,
std::vector<int64_t> stride,
std::vector<int64_t> dilation,
int64_t groups,
bool benchmark,
bool deterministic) {

return at::cudnn_convolution_backward_weight(
weight_size,
grad_output,
input,
padding,
stride,
dilation,
groups,
benchmark,
deterministic);
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def(“backward”, &backward_weight, “Conv2d backward cudnn”);
}

I am not sure if this is still a valid solution.

In my code I am getting this error saying cudnn_convolution_backward_weight cannot be located.

cudnn_convolution.cpp:58:16: error: ‘cudnn_convolution_backward_weight’ is not a member of ‘at’; did you mean ‘cudnn_convolution_add_relu’?

Any clue?