I’m following this guide from the docs and getting the following error in PyTorch 0.4.0:
Traceback (most recent call last):
File "./test_lltm.py", line 24, in <module>
(new_h.sum() + new_C.sum()).backward()
File "/usr/local/lib/python3.6/site-packages/torch/tensor.py", line 93, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/usr/local/lib/python3.6/site-packages/torch/autograd/__init__.py", line 89, in backward
allow_unreachable=True) # allow_unreachable flag
File "/usr/local/lib/python3.6/site-packages/torch/autograd/function.py", line 76, in apply
return self._forward_cls.backward(self, *args)
File "pytorch-cpp_extension/lltm_module.py", line 20, in backward
grad_h.contiguous(), grad_cell.contiguous(), *ctx.saved_variables)
TypeError: backward(): incompatible function arguments. The following argument types are supported:
1. (arg0: at::Tensor, arg1: at::Tensor, arg2: at::Tensor, arg3: at::Tensor, arg4: at::Tensor, arg5: at::Tensor, arg6: at::Tensor, arg7: at::Tensor, arg8: at::Tensor) -> List[at::Tensor]
Invoked with: tensor([[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
...,
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.]]), tensor([[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
...,
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.]]), tensor([[ 0.3192, -1.8704, -0.8799, ..., 0.8626, 0.8778, -1.3170],
[-0.9186, 0.0976, 0.3910, ..., -1.1944, -0.3676, -1.8568],
[ 1.5784, 1.0141, -1.6633, ..., -1.7777, 0.7029, 1.5205],
...,
[ 0.0074, -1.1240, 0.5354, ..., 0.0938, -0.0832, 0.6223],
[ 0.6271, 1.3720, 0.3067, ..., 1.2909, -0.0441, -0.2857],
[ 0.5443, -0.3135, 1.7047, ..., -0.2348, 0.6889, 1.5579]]), tensor([[ 0.3077, 0.5248, 0.4264, ..., 0.3737, 0.7988, 0.2708],
[ 0.2598, 0.6701, 0.2882, ..., 0.6734, 0.7510, 0.4476],
[ 0.6727, 0.6147, 0.5034, ..., 0.7011, 0.5038, 0.8394],
...,
[ 0.5524, 0.8047, 0.3472, ..., 0.4168, 0.3848, 0.7396],
[ 0.4479, 0.3595, 0.4419, ..., 0.4272, 0.2743, 0.5995],
[ 0.6624, 0.3497, 0.5137, ..., 0.3188, 0.4537, 0.3837]]), tensor([[ 0.5219, 0.3497, 0.5952, ..., 0.6442, 0.5780, 0.4918],
[ 0.4940, 0.6128, 0.4159, ..., 0.4481, 0.5114, 0.3499],
[ 0.5985, 0.4134, 0.3614, ..., 0.4335, 0.4966, 0.5901],
...,
[ 0.5867, 0.4867, 0.3882, ..., 0.5812, 0.2363, 0.5231],
[ 0.3881, 0.6624, 0.4019, ..., 0.6080, 0.4533, 0.6273],
[ 0.6356, 0.5038, 0.2778, ..., 0.5350, 0.5904, 0.6836]]), tensor([[-0.6857, -0.3889, 0.2202, ..., 0.1592, -0.5210, 0.2845],
[-0.6698, 0.1386, -0.4092, ..., 0.4946, -0.7050, 0.3303],
[ 0.6763, 0.1155, -0.1945, ..., 1.0118, -0.0272, 0.8258],
...,
[ 0.2057, -0.3746, 0.2036, ..., 0.2296, -0.5906, 0.0231],
[ 1.7566, 0.0207, 0.3850, ..., 0.4190, -0.3259, 0.2389],
[-0.3949, -0.5740, -0.6295, ..., -0.6262, 0.2720, -0.0367]]), tensor([[ 0.3438, -0.2844, -0.1446, ..., 1.5429, 0.7136, -0.6271],
[ 1.1514, 0.0200, 1.6413, ..., -0.4849, 0.7752, -0.6972],
[-0.2518, 1.7913, 1.5784, ..., -0.0136, -1.0266, 0.9354],
...,
[ 1.0337, -1.2237, -1.6819, ..., 0.4432, 0.2858, -1.5759],
[ 0.6127, -1.3635, -2.0234, ..., 1.2623, -0.2420, -1.2271],
[-0.7801, -0.2667, -1.9412, ..., 0.8488, 0.6501, -0.2292]]), tensor([[-0.8108, 0.0993, -0.2964, ..., 0.1592, -0.7361, 0.2845],
[-1.0468, 0.7086, -0.9041, ..., 0.4946, -1.2207, 0.3303],
[ 0.7203, 0.4670, 0.0138, ..., 1.0118, -0.0275, 0.8258],
...,
[ 0.2104, 1.4160, -0.6313, ..., 0.2296, -0.8931, 0.0231],
[-0.2091, -0.5773, -0.2333, ..., 0.4190, -0.3944, 0.2389],
[ 0.6740, -0.6205, 0.0549, ..., -0.9841, 0.2720, -0.0374]]), tensor(1.00000e-02 *
[[-4.0889, 5.3715, 2.9703, ..., 5.7781, -4.5135, -7.6115],
[ 6.1024, -0.9446, 6.2535, ..., -5.8360, 3.9295, 5.9560],
[ 4.8387, -3.3462, -8.8240, ..., 1.9880, 6.7588, 3.8668],
...,
[-6.6015, -6.4044, -2.1561, ..., -1.3643, 2.1749, 7.7383],
[-3.8508, 6.1907, -2.9171, ..., -4.5393, 6.4634, 7.7479],
[ 5.8162, -5.8616, -6.5713, ..., -6.9058, -1.7886, -3.0226]]), tensor([[ 0.5302, -1.6663, -0.9738, ..., 0.8031, 1.2939, -1.3940],
[-0.7445, 0.0047, 0.5090, ..., -1.5274, 0.1618, -2.0047],
[ 1.1235, 0.9431, -1.5653, ..., -2.4871, 0.7166, 0.8272],
...,
[-0.1063, -0.8225, 0.4647, ..., -0.0019, 0.1441, 0.6053],
[-0.1597, 1.3646, 0.1365, ..., 1.1119, 0.0453, -0.4289],
[ 0.8059, -0.1128, 2.0281, ..., -0.0352, 0.5655, 1.5720]])
If I change the forward
method in the torch.autograd.Function
wrapper to
@staticmethod
def forward(ctx, input, weights, bias, old_h, old_cell):
outputs = lltm.forward(input, weights, bias, old_h, old_cell)
new_h, new_cell = outputs[:2]
# variables = outputs[1:] + [weights, old_cell] # Doesn't work
variables = outputs[2:] + [weights, old_cell]
ctx.save_for_backward(*variables)
return new_h, new_cell
I get
Traceback (most recent call last):
File "./test_lltm.py", line 24, in <module>
(new_h.sum() + new_C.sum()).backward()
File "/usr/local/lib/python3.6/site-packages/torch/tensor.py", line 93, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "/usr/local/lib/python3.6/site-packages/torch/autograd/__init__.py", line 89, in backward
allow_unreachable=True) # allow_unreachable flag
File "/usr/local/lib/python3.6/site-packages/torch/autograd/function.py", line 76, in apply
return self._forward_cls.backward(self, *args)
File "pytorch-cpp_extension/lltm_module.py", line 21, in backward
grad_h.contiguous(), grad_cell.contiguous(), *ctx.saved_variables)
RuntimeError: The size of tensor a (160) must match the size of tensor b (128) at non-singleton dimension 1
Does anyone know what the solution is?