I’m trying to follow the documentation line by line, but I realized, that the saved model is bigger than the original (not quantized one) and much worse, it is 10x times slower than the original one.
So I search in the forum and went through the documentation again and I realized, that I have many questions:
- In the same documentation above 2 quantized_linear functions for representation are defined. Where should I call them?
- Running the code in the documentation I get the following warning:
W1018 22:32:58.975000 139636602156224 torch/_export/init.py:94] +============================+
W1018 22:32:58.976000 139636602156224 torch/_export/init.py:95] | !!! WARNING !!! |
W1018 22:32:58.976000 139636602156224 torch/_export/init.py:96] +============================+
W1018 22:32:58.976000 139636602156224 torch/_export/init.py:97] capture_pre_autograd_graph() is deprecated and doesn’t provide any function guarantee moving forward.
W1018 22:32:58.976000 139636602156224 torch/_export/init.py:98] Please switch to use torch.export instead.
But changing the line torch.export leads to the following error:
AttributeError: ‘ExportedProgram’ object has no attribute ‘meta’
Exception in thread Thread-1 (_pin_memory_loop):
Which does not appear, if I rung capture_pre_autograd_graph()
- Printing my quantized model I can see that it contained int8 tensors. But the saved file is bigger and the inference is much worse than the original model. What is wrong?
GraphModule()
def forward(self, x):
arg0, arg1, = fx_pytree.tree_flatten_spec(([x, load], {}), self._in_spec)
arg0_1 = arg0
quantize_per_tensor_default = torch.ops.quantized_decomposed.quantize_per_tensor.default(arg0_1, 0.1397862285375595, -36, -128, 127, torch.int8); arg0_1 = None
dequantize_per_tensor_default = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default, 0.1397862285375595, -36, -128, 127, torch.int8); quantize_per_tensor_default = None
arg1_1 = arg1
frozen_param0 = self.frozen_param0
dequantize_per_tensor_default_1 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(frozen_param0, 0.0014694788260385394, 0, -127, 127, torch.int8); frozen_param0 = None
stem_conv_0_weight_bias = self.stem_conv_0_weight_bias
conv2d = torch.ops.aten.conv2d.default(dequantize_per_tensor_default, dequantize_per_tensor_default_1, stem_conv_0_weight_bias, [2, 2], [1, 1]); dequantize_per_tensor_default = dequantize_per_tensor_default_1 = stem_conv_0_weight_bias = None
quantize_per_tensor_default_2 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d, 0.06254792213439941, -9, -128, 127, torch.int8); conv2d = None
dequantize_per_tensor_default_2 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_2, 0.06254792213439941, -9, -128, 127, torch.int8); quantize_per_tensor_default_2 = None
hardtanh = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_2, 0.0, 6.0); dequantize_per_tensor_default_2 = None
quantize_per_tensor_default_3 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh, 0.06254792213439941, -9, -128, 127, torch.int8); hardtanh = None
dequantize_per_tensor_default_3 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_3, 0.06254792213439941, -9, -128, 127, torch.int8); quantize_per_tensor_default_3 = None
_frozen_param1 = self._frozen_param1
dequantize_per_tensor_default_4 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param1, 0.027294060215353966, 0, -127, 127, torch.int8); frozen_param1 = None
layers_0_layers_0_0_weight_bias = self.layers_0_layers_0_0_weight_bias
conv2d_1 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_3, dequantize_per_tensor_default_4, layers_0_layers_0_0_weight_bias, [1, 1], [1, 1], [1, 1], 32); dequantize_per_tensor_default_3 = dequantize_per_tensor_default_4 = layers_0_layers_0_0_weight_bias = None
quantize_per_tensor_default_5 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_1, 0.07956162095069885, -7, -128, 127, torch.int8); conv2d_1 = None
dequantize_per_tensor_default_5 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_5, 0.07956162095069885, -7, -128, 127, torch.int8); quantize_per_tensor_default_5 = None
hardtanh__1 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_5, 0.0, 6.0); dequantize_per_tensor_default_5 = None
quantize_per_tensor_default_6 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__1, 0.07956162095069885, -7, -128, 127, torch.int8); hardtanh__1 = None
dequantize_per_tensor_default_6 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_6, 0.07956162095069885, -7, -128, 127, torch.int8); quantize_per_tensor_default_6 = None
_frozen_param2 = self._frozen_param2
dequantize_per_tensor_default_7 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param2, 0.008065199479460716, 0, -127, 127, torch.int8); frozen_param2 = None
layers_0_layers_1_0_weight_bias = self.layers_0_layers_1_0_weight_bias
conv2d_2 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_6, dequantize_per_tensor_default_7, layers_0_layers_1_0_weight_bias); dequantize_per_tensor_default_6 = dequantize_per_tensor_default_7 = layers_0_layers_1_0_weight_bias = None
quantize_per_tensor_default_8 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_2, 0.06042521819472313, -5, -128, 127, torch.int8); conv2d_2 = None
dequantize_per_tensor_default_8 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_8, 0.06042521819472313, -5, -128, 127, torch.int8); quantize_per_tensor_default_8 = None
hardtanh__2 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_8, 0.0, 6.0); dequantize_per_tensor_default_8 = None
quantize_per_tensor_default_9 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__2, 0.06042521819472313, -5, -128, 127, torch.int8); hardtanh__2 = None
dequantize_per_tensor_default_9 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_9, 0.06042521819472313, -5, -128, 127, torch.int8); quantize_per_tensor_default_9 = None
_frozen_param3 = self._frozen_param3
dequantize_per_tensor_default_10 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param3, 0.009179635904729366, 0, -127, 127, torch.int8); frozen_param3 = None
layers_1_layers_0_0_weight_bias = self.layers_1_layers_0_0_weight_bias
conv2d_3 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_9, dequantize_per_tensor_default_10, layers_1_layers_0_0_weight_bias); dequantize_per_tensor_default_9 = dequantize_per_tensor_default_10 = layers_1_layers_0_0_weight_bias = None
quantize_per_tensor_default_11 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_3, 0.06431649625301361, -2, -128, 127, torch.int8); conv2d_3 = None
dequantize_per_tensor_default_11 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_11, 0.06431649625301361, -2, -128, 127, torch.int8); quantize_per_tensor_default_11 = None
hardtanh__3 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_11, 0.0, 6.0); dequantize_per_tensor_default_11 = None
quantize_per_tensor_default_12 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__3, 0.06431649625301361, -2, -128, 127, torch.int8); hardtanh__3 = None
dequantize_per_tensor_default_12 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_12, 0.06431649625301361, -2, -128, 127, torch.int8); quantize_per_tensor_default_12 = None
_frozen_param4 = self._frozen_param4
dequantize_per_tensor_default_13 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param4, 0.021862812340259552, 0, -127, 127, torch.int8); frozen_param4 = None
layers_1_layers_1_0_weight_bias = self.layers_1_layers_1_0_weight_bias
conv2d_4 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_12, dequantize_per_tensor_default_13, layers_1_layers_1_0_weight_bias, [2, 2], [1, 1], [1, 1], 96); dequantize_per_tensor_default_12 = dequantize_per_tensor_default_13 = layers_1_layers_1_0_weight_bias = None
quantize_per_tensor_default_14 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_4, 0.07897071540355682, -1, -128, 127, torch.int8); conv2d_4 = None
dequantize_per_tensor_default_14 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_14, 0.07897071540355682, -1, -128, 127, torch.int8); quantize_per_tensor_default_14 = None
hardtanh__4 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_14, 0.0, 6.0); dequantize_per_tensor_default_14 = None
quantize_per_tensor_default_15 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__4, 0.07897071540355682, -1, -128, 127, torch.int8); hardtanh__4 = None
dequantize_per_tensor_default_15 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_15, 0.07897071540355682, -1, -128, 127, torch.int8); quantize_per_tensor_default_15 = None
_frozen_param5 = self._frozen_param5
dequantize_per_tensor_default_16 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param5, 0.002988796913996339, 0, -127, 127, torch.int8); frozen_param5 = None
layers_1_layers_2_0_weight_bias = self.layers_1_layers_2_0_weight_bias
conv2d_5 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_15, dequantize_per_tensor_default_16, layers_1_layers_2_0_weight_bias); dequantize_per_tensor_default_15 = dequantize_per_tensor_default_16 = layers_1_layers_2_0_weight_bias = None
quantize_per_tensor_default_17 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_5, 0.058720141649246216, -7, -128, 127, torch.int8); conv2d_5 = None
dequantize_per_tensor_default_17 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_17, 0.058720141649246216, -7, -128, 127, torch.int8); quantize_per_tensor_default_17 = None
hardtanh__5 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_17, 0.0, 6.0); dequantize_per_tensor_default_17 = None
quantize_per_tensor_default_18 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__5, 0.058720141649246216, -7, -128, 127, torch.int8); hardtanh__5 = None
dequantize_per_tensor_default_170 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_18, 0.058720141649246216, -7, -128, 127, torch.int8)
dequantize_per_tensor_default_169 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_18, 0.058720141649246216, -7, -128, 127, torch.int8); quantize_per_tensor_default_18 = None
_frozen_param6 = self._frozen_param6
dequantize_per_tensor_default_19 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param6, 0.006517564412206411, 0, -127, 127, torch.int8); frozen_param6 = None
layers_2_layers_0_0_weight_bias = self.layers_2_layers_0_0_weight_bias
conv2d_6 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_169, dequantize_per_tensor_default_19, layers_2_layers_0_0_weight_bias); dequantize_per_tensor_default_169 = dequantize_per_tensor_default_19 = layers_2_layers_0_0_weight_bias = None
quantize_per_tensor_default_20 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_6, 0.06291072070598602, 3, -128, 127, torch.int8); conv2d_6 = None
dequantize_per_tensor_default_20 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_20, 0.06291072070598602, 3, -128, 127, torch.int8); quantize_per_tensor_default_20 = None
hardtanh__6 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_20, 0.0, 6.0); dequantize_per_tensor_default_20 = None
quantize_per_tensor_default_21 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__6, 0.06291072070598602, 3, -128, 127, torch.int8); hardtanh__6 = None
dequantize_per_tensor_default_21 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_21, 0.06291072070598602, 3, -128, 127, torch.int8); quantize_per_tensor_default_21 = None
_frozen_param7 = self._frozen_param7
dequantize_per_tensor_default_22 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param7, 0.0161379836499691, 0, -127, 127, torch.int8); frozen_param7 = None
layers_2_layers_1_0_weight_bias = self.layers_2_layers_1_0_weight_bias
conv2d_7 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_21, dequantize_per_tensor_default_22, layers_2_layers_1_0_weight_bias, [1, 1], [1, 1], [1, 1], 144); dequantize_per_tensor_default_21 = dequantize_per_tensor_default_22 = layers_2_layers_1_0_weight_bias = None
quantize_per_tensor_default_23 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_7, 0.06572216004133224, 1, -128, 127, torch.int8); conv2d_7 = None
dequantize_per_tensor_default_23 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_23, 0.06572216004133224, 1, -128, 127, torch.int8); quantize_per_tensor_default_23 = None
hardtanh__7 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_23, 0.0, 6.0); dequantize_per_tensor_default_23 = None
quantize_per_tensor_default_24 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__7, 0.06572216004133224, 1, -128, 127, torch.int8); hardtanh__7 = None
dequantize_per_tensor_default_24 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_24, 0.06572216004133224, 1, -128, 127, torch.int8); quantize_per_tensor_default_24 = None
_frozen_param8 = self._frozen_param8
dequantize_per_tensor_default_25 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param8, 0.002483036834746599, 0, -127, 127, torch.int8); frozen_param8 = None
layers_2_layers_2_0_weight_bias = self.layers_2_layers_2_0_weight_bias
conv2d_8 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_24, dequantize_per_tensor_default_25, layers_2_layers_2_0_weight_bias); dequantize_per_tensor_default_24 = dequantize_per_tensor_default_25 = layers_2_layers_2_0_weight_bias = None
quantize_per_tensor_default_26 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_8, 0.05523661524057388, -7, -128, 127, torch.int8); conv2d_8 = None
dequantize_per_tensor_default_26 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_26, 0.05523661524057388, -7, -128, 127, torch.int8); quantize_per_tensor_default_26 = None
hardtanh__8 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_26, 0.0, 6.0); dequantize_per_tensor_default_26 = None
quantize_per_tensor_default_27 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__8, 0.05523661524057388, -7, -128, 127, torch.int8); hardtanh__8 = None
dequantize_per_tensor_default_27 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_27, 0.05523661524057388, -7, -128, 127, torch.int8); quantize_per_tensor_default_27 = None
add = torch.ops.aten.add.Tensor(dequantize_per_tensor_default_170, dequantize_per_tensor_default_27); dequantize_per_tensor_default_170 = dequantize_per_tensor_default_27 = None
quantize_per_tensor_default_28 = torch.ops.quantized_decomposed.quantize_per_tensor.default(add, 0.04094669222831726, -128, -128, 127, torch.int8); add = None
dequantize_per_tensor_default_28 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_28, 0.04094669222831726, -128, -128, 127, torch.int8); quantize_per_tensor_default_28 = None
_frozen_param9 = self._frozen_param9
dequantize_per_tensor_default_29 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param9, 0.004612590651959181, 0, -127, 127, torch.int8); frozen_param9 = None
layers_3_layers_0_0_weight_bias = self.layers_3_layers_0_0_weight_bias
conv2d_9 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_28, dequantize_per_tensor_default_29, layers_3_layers_0_0_weight_bias); dequantize_per_tensor_default_28 = dequantize_per_tensor_default_29 = layers_3_layers_0_0_weight_bias = None
quantize_per_tensor_default_30 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_9, 0.06303300708532333, 0, -128, 127, torch.int8); conv2d_9 = None
dequantize_per_tensor_default_30 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_30, 0.06303300708532333, 0, -128, 127, torch.int8); quantize_per_tensor_default_30 = None
hardtanh__9 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_30, 0.0, 6.0); dequantize_per_tensor_default_30 = None
quantize_per_tensor_default_31 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__9, 0.06303300708532333, 0, -128, 127, torch.int8); hardtanh__9 = None
dequantize_per_tensor_default_31 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_31, 0.06303300708532333, 0, -128, 127, torch.int8); quantize_per_tensor_default_31 = None
_frozen_param10 = self._frozen_param10
dequantize_per_tensor_default_32 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param10, 0.02055157907307148, 0, -127, 127, torch.int8); frozen_param10 = None
layers_3_layers_1_0_weight_bias = self.layers_3_layers_1_0_weight_bias
conv2d_10 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_31, dequantize_per_tensor_default_32, layers_3_layers_1_0_weight_bias, [2, 2], [1, 1], [1, 1], 144); dequantize_per_tensor_default_31 = dequantize_per_tensor_default_32 = layers_3_layers_1_0_weight_bias = None
quantize_per_tensor_default_33 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_10, 0.06844215840101242, 3, -128, 127, torch.int8); conv2d_10 = None
dequantize_per_tensor_default_33 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_33, 0.06844215840101242, 3, -128, 127, torch.int8); quantize_per_tensor_default_33 = None
hardtanh__10 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_33, 0.0, 6.0); dequantize_per_tensor_default_33 = None
quantize_per_tensor_default_34 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__10, 0.06844215840101242, 3, -128, 127, torch.int8); hardtanh__10 = None
dequantize_per_tensor_default_34 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_34, 0.06844215840101242, 3, -128, 127, torch.int8); quantize_per_tensor_default_34 = None
_frozen_param11 = self._frozen_param11
dequantize_per_tensor_default_35 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param11, 0.0023859788198024035, 0, -127, 127, torch.int8); frozen_param11 = None
layers_3_layers_2_0_weight_bias = self.layers_3_layers_2_0_weight_bias
conv2d_11 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_34, dequantize_per_tensor_default_35, layers_3_layers_2_0_weight_bias); dequantize_per_tensor_default_34 = dequantize_per_tensor_default_35 = layers_3_layers_2_0_weight_bias = None
quantize_per_tensor_default_36 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_11, 0.05462232977151871, -6, -128, 127, torch.int8); conv2d_11 = None
dequantize_per_tensor_default_36 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_36, 0.05462232977151871, -6, -128, 127, torch.int8); quantize_per_tensor_default_36 = None
hardtanh__11 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_36, 0.0, 6.0); dequantize_per_tensor_default_36 = None
quantize_per_tensor_default_37 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__11, 0.05462232977151871, -6, -128, 127, torch.int8); hardtanh__11 = None
dequantize_per_tensor_default_172 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_37, 0.05462232977151871, -6, -128, 127, torch.int8)
dequantize_per_tensor_default_171 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_37, 0.05462232977151871, -6, -128, 127, torch.int8); quantize_per_tensor_default_37 = None
_frozen_param12 = self._frozen_param12
dequantize_per_tensor_default_38 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param12, 0.005703938193619251, 0, -127, 127, torch.int8); frozen_param12 = None
layers_4_layers_0_0_weight_bias = self.layers_4_layers_0_0_weight_bias
conv2d_12 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_171, dequantize_per_tensor_default_38, layers_4_layers_0_0_weight_bias); dequantize_per_tensor_default_171 = dequantize_per_tensor_default_38 = layers_4_layers_0_0_weight_bias = None
quantize_per_tensor_default_39 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_12, 0.05919458717107773, 0, -128, 127, torch.int8); conv2d_12 = None
dequantize_per_tensor_default_39 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_39, 0.05919458717107773, 0, -128, 127, torch.int8); quantize_per_tensor_default_39 = None
hardtanh__12 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_39, 0.0, 6.0); dequantize_per_tensor_default_39 = None
quantize_per_tensor_default_40 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__12, 0.05919458717107773, 0, -128, 127, torch.int8); hardtanh__12 = None
dequantize_per_tensor_default_40 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_40, 0.05919458717107773, 0, -128, 127, torch.int8); quantize_per_tensor_default_40 = None
_frozen_param13 = self._frozen_param13
dequantize_per_tensor_default_41 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param13, 0.016469432041049004, 0, -127, 127, torch.int8); frozen_param13 = None
layers_4_layers_1_0_weight_bias = self.layers_4_layers_1_0_weight_bias
conv2d_13 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_40, dequantize_per_tensor_default_41, layers_4_layers_1_0_weight_bias, [1, 1], [1, 1], [1, 1], 192); dequantize_per_tensor_default_40 = dequantize_per_tensor_default_41 = layers_4_layers_1_0_weight_bias = None
quantize_per_tensor_default_42 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_13, 0.05848604068160057, -1, -128, 127, torch.int8); conv2d_13 = None
dequantize_per_tensor_default_42 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_42, 0.05848604068160057, -1, -128, 127, torch.int8); quantize_per_tensor_default_42 = None
hardtanh__13 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_42, 0.0, 6.0); dequantize_per_tensor_default_42 = None
quantize_per_tensor_default_43 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__13, 0.05848604068160057, -1, -128, 127, torch.int8); hardtanh__13 = None
dequantize_per_tensor_default_43 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_43, 0.05848604068160057, -1, -128, 127, torch.int8); quantize_per_tensor_default_43 = None
_frozen_param14 = self._frozen_param14
dequantize_per_tensor_default_44 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param14, 0.002221668604761362, 0, -127, 127, torch.int8); frozen_param14 = None
layers_4_layers_2_0_weight_bias = self.layers_4_layers_2_0_weight_bias
conv2d_14 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_43, dequantize_per_tensor_default_44, layers_4_layers_2_0_weight_bias); dequantize_per_tensor_default_43 = dequantize_per_tensor_default_44 = layers_4_layers_2_0_weight_bias = None
quantize_per_tensor_default_45 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_14, 0.048098888248205185, -8, -128, 127, torch.int8); conv2d_14 = None
dequantize_per_tensor_default_45 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_45, 0.048098888248205185, -8, -128, 127, torch.int8); quantize_per_tensor_default_45 = None
hardtanh__14 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_45, 0.0, 6.0); dequantize_per_tensor_default_45 = None
quantize_per_tensor_default_46 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__14, 0.048098888248205185, -8, -128, 127, torch.int8); hardtanh__14 = None
dequantize_per_tensor_default_46 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_46, 0.048098888248205185, -8, -128, 127, torch.int8); quantize_per_tensor_default_46 = None
add_1 = torch.ops.aten.add.Tensor(dequantize_per_tensor_default_172, dequantize_per_tensor_default_46); dequantize_per_tensor_default_172 = dequantize_per_tensor_default_46 = None
quantize_per_tensor_default_47 = torch.ops.quantized_decomposed.quantize_per_tensor.default(add_1, 0.041856665164232254, -128, -128, 127, torch.int8); add_1 = None
dequantize_per_tensor_default_174 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_47, 0.041856665164232254, -128, -128, 127, torch.int8)
dequantize_per_tensor_default_173 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_47, 0.041856665164232254, -128, -128, 127, torch.int8); quantize_per_tensor_default_47 = None
_frozen_param15 = self._frozen_param15
dequantize_per_tensor_default_48 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param15, 0.0036569861695170403, 0, -127, 127, torch.int8); frozen_param15 = None
layers_5_layers_0_0_weight_bias = self.layers_5_layers_0_0_weight_bias
conv2d_15 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_173, dequantize_per_tensor_default_48, layers_5_layers_0_0_weight_bias); dequantize_per_tensor_default_173 = dequantize_per_tensor_default_48 = layers_5_layers_0_0_weight_bias = None
quantize_per_tensor_default_49 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_15, 0.05684017017483711, 2, -128, 127, torch.int8); conv2d_15 = None
dequantize_per_tensor_default_49 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_49, 0.05684017017483711, 2, -128, 127, torch.int8); quantize_per_tensor_default_49 = None
hardtanh__15 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_49, 0.0, 6.0); dequantize_per_tensor_default_49 = None
quantize_per_tensor_default_50 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__15, 0.05684017017483711, 2, -128, 127, torch.int8); hardtanh__15 = None
dequantize_per_tensor_default_50 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_50, 0.05684017017483711, 2, -128, 127, torch.int8); quantize_per_tensor_default_50 = None
_frozen_param16 = self._frozen_param16
dequantize_per_tensor_default_51 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param16, 0.014670155942440033, 0, -127, 127, torch.int8); frozen_param16 = None
layers_5_layers_1_0_weight_bias = self.layers_5_layers_1_0_weight_bias
conv2d_16 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_50, dequantize_per_tensor_default_51, layers_5_layers_1_0_weight_bias, [1, 1], [1, 1], [1, 1], 192); dequantize_per_tensor_default_50 = dequantize_per_tensor_default_51 = layers_5_layers_1_0_weight_bias = None
quantize_per_tensor_default_52 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_16, 0.06387784332036972, -3, -128, 127, torch.int8); conv2d_16 = None
dequantize_per_tensor_default_52 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_52, 0.06387784332036972, -3, -128, 127, torch.int8); quantize_per_tensor_default_52 = None
hardtanh__16 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_52, 0.0, 6.0); dequantize_per_tensor_default_52 = None
quantize_per_tensor_default_53 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__16, 0.06387784332036972, -3, -128, 127, torch.int8); hardtanh__16 = None
dequantize_per_tensor_default_53 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_53, 0.06387784332036972, -3, -128, 127, torch.int8); quantize_per_tensor_default_53 = None
_frozen_param17 = self._frozen_param17
dequantize_per_tensor_default_54 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(_frozen_param17, 0.00197998876683414, 0, -127, 127, torch.int8); frozen_param17 = None
layers_5_layers_2_0_weight_bias = self.layers_5_layers_2_0_weight_bias
conv2d_17 = torch.ops.aten.conv2d.default(dequantize_per_tensor_default_53, dequantize_per_tensor_default_54, layers_5_layers_2_0_weight_bias); dequantize_per_tensor_default_53 = dequantize_per_tensor_default_54 = layers_5_layers_2_0_weight_bias = None
quantize_per_tensor_default_55 = torch.ops.quantized_decomposed.quantize_per_tensor.default(conv2d_17, 0.04915322735905647, -6, -128, 127, torch.int8); conv2d_17 = None
dequantize_per_tensor_default_55 = torch.ops.quantized_decomposed.dequantize_per_tensor.default(quantize_per_tensor_default_55, 0.04915322735905647, -6, -128, 127, torch.int8); quantize_per_tensor_default_55 = None
hardtanh__17 = torch.ops.aten.hardtanh.default(dequantize_per_tensor_default_55, 0.0, 6.0); dequantize_per_tensor_default_55 = None
quantize_per_tensor_default_56 = torch.ops.quantized_decomposed.quantize_per_tensor.default(hardtanh__17, 0.04915322735905647, -6, -128, 127, torch.int8); hardtanh__17 = None