Hi,
I wrote a c++ function that receives a quantized tensor and outputs a fp32 tensor. everything compiled without errors and the python binding also works but when i try to use the function i get the following error:
RuntimeError: Could not run 'quantized::linear_my' with arguments from the 'QuantizedCPU' backend. 'quantized::linear_my' is only available for these backends: [].
so my question is how does pytorch determine which backends does my function support?
how can i fix this state where my function doesnt support any backend?
Thanks,
Ofir
tom
(Thomas V)
September 16, 2020, 1:00pm
2
Are you using TorchBind rather than PyBind11?
I am not sure, I used the same method that was used in the torch library, I modified code inside the library.
I resolved the problem in the end, it was a compilation problem, cleaning the project resolved it.
jerryzh168
(Jerry Zhang)
October 9, 2020, 1:17am
4
you can take a look at how we implement quantized ops:
we need to declare the signature:
extern template torch::class_<ConvPackedParamsBase<2>> register_conv_params<2>();
extern template torch::class_<ConvPackedParamsBase<3>> register_conv_params<3>();
torch::class_<EmbeddingPackedParamsBase> register_embedding_params();
TORCH_LIBRARY(quantized, m) {
register_linear_params();
register_conv_params<2>();
register_conv_params<3>();
register_embedding_params();
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add(Tensor qa, Tensor qb, float scale, int zero_point) -> Tensor qc"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add.out(Tensor qa, Tensor qb, Tensor(a!) out) -> Tensor(a!) out"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add.Scalar(Tensor qa, Scalar b) -> Tensor qc"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add.Scalar_out(Tensor qa, Scalar b, Tensor(a!) out) -> Tensor(a!) out"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add_relu(Tensor qa, Tensor qb, float scale, int zero_point) -> Tensor qc"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add_relu.Scalar(Tensor qa, Scalar b) -> Tensor qc"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add_relu.out(Tensor qa, Tensor qb, Tensor(a!) out) -> Tensor(a!) out"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add_relu.Scalar_out(Tensor qa, Scalar b, Tensor(a!) out) -> Tensor(a!) out"));
// deprecated functions, kept for backward compatibility
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add_out(Tensor qa, Tensor qb, Tensor(a!) out) -> Tensor(a!) out"));
m.def(TORCH_SELECTIVE_SCHEMA("quantized::add_relu_out(Tensor qa, Tensor qb, Tensor(a!) out) -> Tensor(a!) out"));
and then implement it:
}
// `torch.jit.trace` will trace Scalar as Tensor
// This can be removed after broadcast is supported and
// all variations of `quantized::add` is merged into `quantized::add`
template <bool ReLUFused = false>
Tensor qadd_scalar_tensor_out(Tensor qa, Tensor b, Tensor out) {
return qadd_scalar_out(qa, b.item(), out);
}
TORCH_LIBRARY_IMPL(quantized, QuantizedCPU, m) {
m.impl(TORCH_SELECTIVE_NAME("quantized::add"), TORCH_FN(qadd</*ReLUFused=*/false>));
m.impl(TORCH_SELECTIVE_NAME("quantized::add.out"), TORCH_FN(qadd_out</*ReLUFused=*/false>));
m.impl(TORCH_SELECTIVE_NAME("quantized::add.Scalar"), TORCH_FN(qadd_scalar</*ReLUFused=*/false>));
m.impl(TORCH_SELECTIVE_NAME("quantized::add.Scalar_out"), TORCH_FN(qadd_scalar_out</*ReLUFused=*/false>));
m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu"), TORCH_FN(qadd</*ReLUFused=*/true>));
m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu.out"), TORCH_FN(qadd_out</*ReLUFused=*/true>));
m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu.Scalar"), TORCH_FN(qadd_scalar</*ReLUFused=*/true>));
m.impl(TORCH_SELECTIVE_NAME("quantized::add_relu.Scalar_out"), TORCH_FN(qadd_scalar_out</*ReLUFused=*/true>));
// deprecated functions, kept for backward compatibility
m.impl(TORCH_SELECTIVE_NAME("quantized::add_out"), TORCH_FN(qadd_out</*ReLUFused=*/false>));
1 Like