Hi,
I want to quantize a model so that I can run it without the quantization stubs and just pass in directly int8. I followed some of the tutorials and previous discussions on this forum. Here is the current code I use to experiment with features:
class M(torch.nn.Module):
def __init__(self):
super().__init__()
# QuantStub converts tensors from floating point to quantized
self.quant = torch.ao.quantization.QuantStub()
self.conv = torch.nn.Conv2d(1, 1, 1)
self.relu = torch.nn.ReLU()
# DeQuantStub converts tensors from quantized to floating point
self.dequant = torch.ao.quantization.DeQuantStub()
def forward(self, x):
# manually specify where tensors will be converted from floating
# point to quantized in the quantized model
x = self.quant(x)
x = self.conv(x)
x = self.relu(x)
# manually specify where tensors will be converted from quantized
# to floating point in the quantized model
x = self.dequant(x)
return x
# create a model instance
model_fp32 = M()
# model must be set to eval mode for static quantization logic to work
model_fp32.eval()
example_inputs = torch.rand(1,1,1,1)
qconfig = get_default_qconfig("x86")
qconfig_mapping = QConfigMapping().set_global(qconfig)
prepare_custom_config = PrepareCustomConfig()
prepare_custom_config.set_input_quantized_indexes([0])
prepare_custom_config.set_output_quantized_indexes([0])
prepared_model = prepare_fx(model_fp32, qconfig_mapping, example_inputs, prepare_custom_config, backend_config=backend_config)
def calibrate(model):
model.eval()
with torch.no_grad():
for i in range(10):
rand_in = torch.rand(1,1,1,1)
model(rand_in)
calibrate(prepared_model)
quantized_model = convert_fx(prepared_model)
example_inputs_int8 = torch.randint(-122, 123, (1,1,1,1), dtype=torch.int8)
res = prepared_model(example_inputs_int8)
When I try to run this I get this error:
RuntimeError: Input type (signed char) and bias type (float) should be the same
It seems like the biases are not converted. What do I need to pass here to fix this? Thanks in advance!