C++ Extension from the tutorial not working

revolve · September 22, 2018, 10:38pm

I’m following this guide from the docs and getting the following error in PyTorch 0.4.0:

Traceback (most recent call last):
  File "./test_lltm.py", line 24, in <module>
    (new_h.sum() + new_C.sum()).backward()
  File "/usr/local/lib/python3.6/site-packages/torch/tensor.py", line 93, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/usr/local/lib/python3.6/site-packages/torch/autograd/__init__.py", line 89, in backward
    allow_unreachable=True)  # allow_unreachable flag
  File "/usr/local/lib/python3.6/site-packages/torch/autograd/function.py", line 76, in apply
    return self._forward_cls.backward(self, *args)
  File "pytorch-cpp_extension/lltm_module.py", line 20, in backward
    grad_h.contiguous(), grad_cell.contiguous(), *ctx.saved_variables)
TypeError: backward(): incompatible function arguments. The following argument types are supported:
    1. (arg0: at::Tensor, arg1: at::Tensor, arg2: at::Tensor, arg3: at::Tensor, arg4: at::Tensor, arg5: at::Tensor, arg6: at::Tensor, arg7: at::Tensor, arg8: at::Tensor) -> List[at::Tensor]

Invoked with: tensor([[ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        ...,
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.]]), tensor([[ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        ...,
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.]]), tensor([[ 0.3192, -1.8704, -0.8799,  ...,  0.8626,  0.8778, -1.3170],
        [-0.9186,  0.0976,  0.3910,  ..., -1.1944, -0.3676, -1.8568],
        [ 1.5784,  1.0141, -1.6633,  ..., -1.7777,  0.7029,  1.5205],
        ...,
        [ 0.0074, -1.1240,  0.5354,  ...,  0.0938, -0.0832,  0.6223],
        [ 0.6271,  1.3720,  0.3067,  ...,  1.2909, -0.0441, -0.2857],
        [ 0.5443, -0.3135,  1.7047,  ..., -0.2348,  0.6889,  1.5579]]), tensor([[ 0.3077,  0.5248,  0.4264,  ...,  0.3737,  0.7988,  0.2708],
        [ 0.2598,  0.6701,  0.2882,  ...,  0.6734,  0.7510,  0.4476],
        [ 0.6727,  0.6147,  0.5034,  ...,  0.7011,  0.5038,  0.8394],
        ...,
        [ 0.5524,  0.8047,  0.3472,  ...,  0.4168,  0.3848,  0.7396],
        [ 0.4479,  0.3595,  0.4419,  ...,  0.4272,  0.2743,  0.5995],
        [ 0.6624,  0.3497,  0.5137,  ...,  0.3188,  0.4537,  0.3837]]), tensor([[ 0.5219,  0.3497,  0.5952,  ...,  0.6442,  0.5780,  0.4918],
        [ 0.4940,  0.6128,  0.4159,  ...,  0.4481,  0.5114,  0.3499],
        [ 0.5985,  0.4134,  0.3614,  ...,  0.4335,  0.4966,  0.5901],
        ...,
        [ 0.5867,  0.4867,  0.3882,  ...,  0.5812,  0.2363,  0.5231],
        [ 0.3881,  0.6624,  0.4019,  ...,  0.6080,  0.4533,  0.6273],
        [ 0.6356,  0.5038,  0.2778,  ...,  0.5350,  0.5904,  0.6836]]), tensor([[-0.6857, -0.3889,  0.2202,  ...,  0.1592, -0.5210,  0.2845],
        [-0.6698,  0.1386, -0.4092,  ...,  0.4946, -0.7050,  0.3303],
        [ 0.6763,  0.1155, -0.1945,  ...,  1.0118, -0.0272,  0.8258],
        ...,
        [ 0.2057, -0.3746,  0.2036,  ...,  0.2296, -0.5906,  0.0231],
        [ 1.7566,  0.0207,  0.3850,  ...,  0.4190, -0.3259,  0.2389],
        [-0.3949, -0.5740, -0.6295,  ..., -0.6262,  0.2720, -0.0367]]), tensor([[ 0.3438, -0.2844, -0.1446,  ...,  1.5429,  0.7136, -0.6271],
        [ 1.1514,  0.0200,  1.6413,  ..., -0.4849,  0.7752, -0.6972],
        [-0.2518,  1.7913,  1.5784,  ..., -0.0136, -1.0266,  0.9354],
        ...,
        [ 1.0337, -1.2237, -1.6819,  ...,  0.4432,  0.2858, -1.5759],
        [ 0.6127, -1.3635, -2.0234,  ...,  1.2623, -0.2420, -1.2271],
        [-0.7801, -0.2667, -1.9412,  ...,  0.8488,  0.6501, -0.2292]]), tensor([[-0.8108,  0.0993, -0.2964,  ...,  0.1592, -0.7361,  0.2845],
        [-1.0468,  0.7086, -0.9041,  ...,  0.4946, -1.2207,  0.3303],
        [ 0.7203,  0.4670,  0.0138,  ...,  1.0118, -0.0275,  0.8258],
        ...,
        [ 0.2104,  1.4160, -0.6313,  ...,  0.2296, -0.8931,  0.0231],
        [-0.2091, -0.5773, -0.2333,  ...,  0.4190, -0.3944,  0.2389],
        [ 0.6740, -0.6205,  0.0549,  ..., -0.9841,  0.2720, -0.0374]]), tensor(1.00000e-02 *
       [[-4.0889,  5.3715,  2.9703,  ...,  5.7781, -4.5135, -7.6115],
        [ 6.1024, -0.9446,  6.2535,  ..., -5.8360,  3.9295,  5.9560],
        [ 4.8387, -3.3462, -8.8240,  ...,  1.9880,  6.7588,  3.8668],
        ...,
        [-6.6015, -6.4044, -2.1561,  ..., -1.3643,  2.1749,  7.7383],
        [-3.8508,  6.1907, -2.9171,  ..., -4.5393,  6.4634,  7.7479],
        [ 5.8162, -5.8616, -6.5713,  ..., -6.9058, -1.7886, -3.0226]]), tensor([[ 0.5302, -1.6663, -0.9738,  ...,  0.8031,  1.2939, -1.3940],
        [-0.7445,  0.0047,  0.5090,  ..., -1.5274,  0.1618, -2.0047],
        [ 1.1235,  0.9431, -1.5653,  ..., -2.4871,  0.7166,  0.8272],
        ...,
        [-0.1063, -0.8225,  0.4647,  ..., -0.0019,  0.1441,  0.6053],
        [-0.1597,  1.3646,  0.1365,  ...,  1.1119,  0.0453, -0.4289],
        [ 0.8059, -0.1128,  2.0281,  ..., -0.0352,  0.5655,  1.5720]])

If I change the forward method in the torch.autograd.Function wrapper to

    @staticmethod
    def forward(ctx, input, weights, bias, old_h, old_cell):
        outputs = lltm.forward(input, weights, bias, old_h, old_cell)
        new_h, new_cell = outputs[:2]
#        variables = outputs[1:] + [weights, old_cell] # Doesn't work
        variables = outputs[2:] + [weights, old_cell]
        ctx.save_for_backward(*variables)

        return new_h, new_cell

I get

Traceback (most recent call last):
  File "./test_lltm.py", line 24, in <module>
    (new_h.sum() + new_C.sum()).backward()
  File "/usr/local/lib/python3.6/site-packages/torch/tensor.py", line 93, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/usr/local/lib/python3.6/site-packages/torch/autograd/__init__.py", line 89, in backward
    allow_unreachable=True)  # allow_unreachable flag
  File "/usr/local/lib/python3.6/site-packages/torch/autograd/function.py", line 76, in apply
    return self._forward_cls.backward(self, *args)
  File "pytorch-cpp_extension/lltm_module.py", line 21, in backward
    grad_h.contiguous(), grad_cell.contiguous(), *ctx.saved_variables)
RuntimeError: The size of tensor a (160) must match the size of tensor b (128) at non-singleton dimension 1

Does anyone know what the solution is?

jiweibo · December 26, 2018, 10:16am

    @staticmethod
    def forward(ctx, input, weights, bias, old_h, old_cell):
        outputs = lltm.forward(input, weights, bias, old_h, old_cell)
        new_h, new_cell = outputs[:2]
#        variables = outputs[1:] + [weights, old_cell] # Doesn't work
        variables = outputs[2:] + [weights, old_cell]
        ctx.save_for_backward(*variables)

        return new_h, new_cell

Modify to this

variables = outputs[2:] + [weights]

smth · December 29, 2018, 5:49am

Sorry about this, the fixes to the tutorial are to the autograd Function’s forward and backward.

In forward, the change needed is:

12c12
<         variables = outputs[1:] + [weights, old_cell]
---
>         variables = outputs[1:] + [weights]

In backward, the change needed is:

21c21
<         d_old_h, d_input, d_weights, d_bias, d_old_cell, d_gates = outputs
---
>         d_old_h, d_input, d_weights, d_bias, d_old_cell = outputs

I’m fixing the tutorial now.