Hi,
I’m trying to quantize a trained model of Efficientnet-Lite0, following the architectural changes detailed in this blog post.
I’m using the implementation from this repo and I get a significant accuracy drop (5-10%) after quantizing the model.
The full model after converting to 8-bit is:
EfficientNet(
(conv_stem): ConvReLU6(
(0): QuantizedConv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), scale=0.36273476481437683, zero_point=57, padding=(1, 1))
(1): QuantizedReLU6(inplace=True)
)
(bn1): Identity()
(act1): Identity()
(blocks): Sequential(
(0): Sequential(
(0): DepthwiseSeparableConv(
(skip_add): QFunctional(
scale=1.0, zero_point=0
(activation_post_process): Identity()
)
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.6822086572647095, zero_point=56, padding=(1, 1), groups=32)
(1): QuantizedReLU6(inplace=True)
)
(bn1): Identity()
(act1): Identity()
(conv_pw): QuantizedConv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), scale=0.7673127055168152, zero_point=65)
(bn2): Identity()
(act2): Identity()
)
)
(1): Sequential(
(0): InvertedResidual(
(skip_add): QFunctional(
scale=1.0, zero_point=0
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), scale=0.5392391085624695, zero_point=60)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), scale=0.322853684425354, zero_point=57, padding=(1, 1), groups=96)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), scale=0.7627326250076294, zero_point=63)
(bn3): Identity()
)
(1): InvertedResidual(
(skip_add): QFunctional(
scale=0.8407724499702454, zero_point=62
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), scale=0.3213047683238983, zero_point=63)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), scale=0.267162948846817, zero_point=67, padding=(1, 1), groups=144)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), scale=0.6916980743408203, zero_point=53)
(bn3): Identity()
)
)
(2): Sequential(
(0): InvertedResidual(
(skip_add): QFunctional(
scale=1.0, zero_point=0
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), scale=0.30310994386672974, zero_point=62)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(144, 144, kernel_size=(5, 5), stride=(2, 2), scale=0.20994137227535248, zero_point=61, padding=(2, 2), groups=144)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(144, 40, kernel_size=(1, 1), stride=(1, 1), scale=0.6519036889076233, zero_point=65)
(bn3): Identity()
)
(1): InvertedResidual(
(skip_add): QFunctional(
scale=0.7288376092910767, zero_point=63
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), scale=0.20947812497615814, zero_point=52)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(240, 240, kernel_size=(5, 5), stride=(1, 1), scale=0.24765455722808838, zero_point=83, padding=(2, 2), groups=240)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(240, 40, kernel_size=(1, 1), stride=(1, 1), scale=0.4334663450717926, zero_point=61)
(bn3): Identity()
)
)
(3): Sequential(
(0): InvertedResidual(
(skip_add): QFunctional(
scale=1.0, zero_point=0
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), scale=0.20177333056926727, zero_point=56)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), scale=0.22160769999027252, zero_point=61, padding=(1, 1), groups=240)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), scale=0.5097917914390564, zero_point=64)
(bn3): Identity()
)
(1): InvertedResidual(
(skip_add): QFunctional(
scale=0.514493465423584, zero_point=64
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), scale=0.15477867424488068, zero_point=47)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), scale=0.19667555391788483, zero_point=82, padding=(1, 1), groups=480)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), scale=0.2826884686946869, zero_point=64)
(bn3): Identity()
)
(2): InvertedResidual(
(skip_add): QFunctional(
scale=0.5448680520057678, zero_point=65
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), scale=0.12001236528158188, zero_point=67)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), scale=0.1878129243850708, zero_point=79, padding=(1, 1), groups=480)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), scale=0.23110872507095337, zero_point=61)
(bn3): Identity()
)
)
(4): Sequential(
(0): InvertedResidual(
(skip_add): QFunctional(
scale=1.0, zero_point=0
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), scale=0.20795781910419464, zero_point=51)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(480, 480, kernel_size=(5, 5), stride=(1, 1), scale=0.2575533390045166, zero_point=81, padding=(2, 2), groups=480)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), scale=0.5269572138786316, zero_point=63)
(bn3): Identity()
)
(1): InvertedResidual(
(skip_add): QFunctional(
scale=0.5629716515541077, zero_point=65
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), scale=0.16619464755058289, zero_point=58)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), scale=0.2228115200996399, zero_point=69, padding=(2, 2), groups=672)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), scale=0.3241402208805084, zero_point=63)
(bn3): Identity()
)
(2): InvertedResidual(
(skip_add): QFunctional(
scale=0.642544686794281, zero_point=67
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), scale=0.13504581153392792, zero_point=60)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), scale=0.2062821239233017, zero_point=73, padding=(2, 2), groups=672)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), scale=0.25870615243911743, zero_point=63)
(bn3): Identity()
)
)
(5): Sequential(
(0): InvertedResidual(
(skip_add): QFunctional(
scale=1.0, zero_point=0
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), scale=0.16723443567752838, zero_point=66)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), scale=0.22132091224193573, zero_point=61, padding=(2, 2), groups=672)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(672, 192, kernel_size=(1, 1), stride=(1, 1), scale=0.4806938171386719, zero_point=63)
(bn3): Identity()
)
(1): InvertedResidual(
(skip_add): QFunctional(
scale=0.49192753434181213, zero_point=64
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), scale=0.1888679713010788, zero_point=51)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), scale=0.2976231873035431, zero_point=83, padding=(2, 2), groups=1152)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), scale=0.34456929564476013, zero_point=60)
(bn3): Identity()
)
(2): InvertedResidual(
(skip_add): QFunctional(
scale=0.5567103624343872, zero_point=62
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), scale=0.19077259302139282, zero_point=47)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), scale=0.38248512148857117, zero_point=91, padding=(2, 2), groups=1152)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), scale=0.2738204598426819, zero_point=65)
(bn3): Identity()
)
(3): InvertedResidual(
(skip_add): QFunctional(
scale=0.6205083727836609, zero_point=62
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), scale=0.15164275467395782, zero_point=59)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), scale=0.29384535551071167, zero_point=80, padding=(2, 2), groups=1152)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), scale=0.24689887464046478, zero_point=63)
(bn3): Identity()
)
)
(6): Sequential(
(0): InvertedResidual(
(skip_add): QFunctional(
scale=1.0, zero_point=0
(activation_post_process): Identity()
)
(conv_pw): ConvReLU6(
(0): QuantizedConv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), scale=0.20717555284500122, zero_point=64)
(1): QuantizedReLU6()
)
(bn1): Identity()
(act1): Identity()
(conv_dw): ConvReLU6(
(0): QuantizedConv2d(1152, 1152, kernel_size=(3, 3), stride=(1, 1), scale=0.3554805517196655, zero_point=68, padding=(1, 1), groups=1152)
(1): QuantizedReLU6()
)
(bn2): Identity()
(act2): Identity()
(conv_pwl): QuantizedConv2d(1152, 320, kernel_size=(1, 1), stride=(1, 1), scale=0.2588821351528168, zero_point=63)
(bn3): Identity()
)
)
)
(conv_head): ConvReLU6(
(0): QuantizedConv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), scale=0.2839420437812805, zero_point=80)
(1): QuantizedReLU6(inplace=True)
)
(bn2): Identity()
(act2): Identity()
(global_pool): SelectAdaptivePool2d (output_size=1, pool_type=avg)
(quant): Quantize(scale=tensor([0.0374]), zero_point=tensor([57]), dtype=torch.quint8)
(dequant): DeQuantize()
(classifier): QuantizedLinear(in_features=1280, out_features=1000, scale=0.14930474758148193, zero_point=34, qscheme=torch.per_channel_affine)
)
Is there anything I’m missing? I can provide the conversion code and other information if needed.
Thanks in advance!