My original model looks like:
ResNetCifar10(
(conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(layer1): Sequential(
(0): BasicBlockCifar10(
(conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): Sequential()
)
(1): BasicBlockCifar10(
(conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): Sequential()
)
(2): BasicBlockCifar10(
(conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): Sequential()
)
)
(layer2): Sequential(
(0): BasicBlockCifar10(
(conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): LambdaLayer()
)
(1): BasicBlockCifar10(
(conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): Sequential()
)
(2): BasicBlockCifar10(
(conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): Sequential()
)
)
(layer3): Sequential(
(0): BasicBlockCifar10(
(conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): LambdaLayer()
)
(1): BasicBlockCifar10(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): Sequential()
)
(2): BasicBlockCifar10(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(shortcut): Sequential()
)
)
(linear): Linear(in_features=64, out_features=10, bias=True)
)
I then convert it to a model that can be trained using QAT:
(quant and dequant, fuse layers and torch.quantization.prepare_qat)
QuantWrapper(
(quant): QuantStub(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(dequant): DeQuantStub(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(module): ResNetCifar10(
(conv1): ConvBnReLU2d(
3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu): Identity()
(layer1): Sequential(
(0): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
(1): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
(2): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
)
(layer2): Sequential(
(0): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): LambdaLayer()
)
(1): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
(2): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
)
(layer3): Sequential(
(0): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): LambdaLayer()
)
(1): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
(2): BasicBlockCifar10(
(conv1): ConvBnReLU2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn1): Identity()
(relu1): Identity()
(conv2): ConvBn2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(bn2): Identity()
(relu2): ReLU(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
)
(linear): Linear(
in_features=64, out_features=10, bias=True
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
(weight_fake_quant): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=None, zero_point=None
(observer): MovingAverageMinMaxObserver(min_val=None, max_val=None)
)
)
)
)
Which can be trained, but I then try to convert it to a evaluation model using
torch.quantization.convert
like in the tutorial:
The converted model looks like:
QuantWrapper(
(quant): Quantize(scale=tensor([0.0203]), zero_point=tensor([120]), dtype=torch.quint8)
(dequant): DeQuantize()
(module): ResNetCifar10(
(conv1): QuantizedConvReLU2d(3, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.00903782807290554, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu): Identity()
(layer1): Sequential(
(0): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.009634326212108135, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.018731188029050827, zero_point=130, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=tensor([0.0086]), zero_point=tensor([0])
(observer): MovingAverageMinMaxObserver(min_val=0.0, max_val=2.1906778812408447)
)
)
)
(1): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.010925675742328167, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.01969429850578308, zero_point=141, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=tensor([0.0112]), zero_point=tensor([0])
(observer): MovingAverageMinMaxObserver(min_val=0.0, max_val=2.8511974811553955)
)
)
)
(2): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.007963746786117554, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.016999518498778343, zero_point=136, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=tensor([0.0144]), zero_point=tensor([0])
(observer): MovingAverageMinMaxObserver(min_val=0.0, max_val=3.661449670791626)
)
)
)
)
(layer2): Sequential(
(0): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(16, 32, kernel_size=(3, 3), stride=(2, 2), scale=0.009311596862971783, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.02033022604882717, zero_point=115, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): LambdaLayer()
)
(1): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.006638957187533379, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.018797850236296654, zero_point=136, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=tensor([0.0147]), zero_point=tensor([0])
(observer): MovingAverageMinMaxObserver(min_val=0.0, max_val=3.7611701488494873)
)
)
)
(2): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.007839899510145187, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.026490747928619385, zero_point=85, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=tensor([0.0144]), zero_point=tensor([0])
(observer): MovingAverageMinMaxObserver(min_val=0.0, max_val=3.678955316543579)
)
)
)
)
(layer3): Sequential(
(0): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(32, 64, kernel_size=(3, 3), stride=(2, 2), scale=0.009626154787838459, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.02358696237206459, zero_point=109, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): LambdaLayer()
)
(1): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.0076929363422095776, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.02710540033876896, zero_point=124, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=tensor([0.0241]), zero_point=tensor([0])
(observer): MovingAverageMinMaxObserver(min_val=0.0, max_val=6.156522750854492)
)
)
)
(2): BasicBlockCifar10(
(conv1): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.008539369329810143, zero_point=0, padding=(1, 1))
(bn1): Identity()
(relu1): Identity()
(conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.06625250726938248, zero_point=92, padding=(1, 1))
(bn2): Identity()
(relu2): QuantizedReLU()
(shortcut): Sequential(
(observer): FakeQuantize(
fake_quant_enabled=True, observer_enabled=True, scale=tensor([0.0236]), zero_point=tensor([0])
(observer): MovingAverageMinMaxObserver(min_val=0.0, max_val=6.006941795349121)
)
)
)
)
(linear): QuantizedLinear(in_features=64, out_features=10, scale=0.10857795923948288, zero_point=69)
)
)
It appears that some fakequantization nodes remain after converting.
shortcut is an empty nn.Sequential()
layer (depending on the network arguments this gets overwritten to handle different strides)
But it running the model results in an error:
RuntimeError: Didn't find kernel to dispatch to for operator 'aten::sub'. Tried to look up kernel for dispatch key 'QuantizedCPUTensorId'. Registered dispatch keys are: [CUDATensorId, SparseCPUTensorId, VariableTensorId, CPUTensorId, SparseCUDATensorId]