Happy new year!
I quantized UNet
except last layer as follows, because I need full precision at the last layer.
class QuantizedUNet(nn.Module):
def __init__(self, model_fp32):
super(QuantizedUNet, self).__init__()
self.quant = torch.quantization.QuantStub()
self.dequant = torch.quantization.DeQuantStub()
self.until_last = copy.deepcopy(model_fp32)
# Remove last layer from fp32 model and keep it in another variable
del self.until_last.conv2[2]
self.last_conv = model_fp32.conv2[2]
def forward(self, x):
# manually specify where tensors will be converted from floating
# point to quantized in the quantized model
x = self.quant(x)
x = self.until_last(x)
x = self.dequant(x)
x = self.last_conv(x)
return x
After static quantization and calibration, this is the model that i got.
QuantizedUNet(
(quant): QuantStub()
(dequant): DeQuantStub()
(until_last): Unet(
(down_sample_layers): ModuleList(
(0): Sequential(
(0): QuantizedConv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), scale=0.09462987631559372, zero_point=64, padding=(1, 1))
(1): QuantizedBNReLU2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), scale=0.6255205273628235, zero_point=83, padding=(1, 1))
(4): QuantizedBNReLU2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
(1): Sequential(
(0): QuantizedConv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), scale=1.403043270111084, zero_point=87, padding=(1, 1))
(1): QuantizedBNReLU2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=2.315826654434204, zero_point=60, padding=(1, 1))
(4): QuantizedBNReLU2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
(2): Sequential(
(0): QuantizedConv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), scale=5.481112957000732, zero_point=56, padding=(1, 1))
(1): QuantizedBNReLU2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=12.060239791870117, zero_point=77, padding=(1, 1))
(4): QuantizedBNReLU2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
(3): Sequential(
(0): QuantizedConv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), scale=16.808162689208984, zero_point=69, padding=(1, 1))
(1): QuantizedBNReLU2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=27.680782318115234, zero_point=80, padding=(1, 1))
(4): QuantizedBNReLU2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
)
(conv): Sequential(
(0): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=39.90061950683594, zero_point=66, padding=(1, 1))
(1): QuantizedBNReLU2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=102.32366180419922, zero_point=65, padding=(1, 1))
(4): QuantizedBNReLU2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
(up_sample_layers): ModuleList(
(0): Sequential(
(0): QuantizedConv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), scale=1064.0137939453125, zero_point=71, padding=(1, 1))
(1): QuantizedBNReLU2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=1038.538330078125, zero_point=73, padding=(1, 1))
(4): QuantizedBNReLU2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
(1): Sequential(
(0): QuantizedConv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), scale=3193.4365234375, zero_point=99, padding=(1, 1))
(1): QuantizedBNReLU2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), scale=1721.619873046875, zero_point=87, padding=(1, 1))
(4): QuantizedBNReLU2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
(2): Sequential(
(0): QuantizedConv2d(32, 8, kernel_size=(3, 3), stride=(1, 1), scale=2268.27001953125, zero_point=71, padding=(1, 1))
(1): QuantizedBNReLU2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), scale=856.855712890625, zero_point=71, padding=(1, 1))
(4): QuantizedBNReLU2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
(3): Sequential(
(0): QuantizedConv2d(16, 8, kernel_size=(3, 3), stride=(1, 1), scale=493.1239318847656, zero_point=105, padding=(1, 1))
(1): QuantizedBNReLU2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): Identity()
(3): QuantizedConv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), scale=84.60382080078125, zero_point=26, padding=(1, 1))
(4): QuantizedBNReLU2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
)
)
(conv2): Sequential(
(0): QuantizedConv2d(8, 4, kernel_size=(1, 1), stride=(1, 1), scale=15.952274322509766, zero_point=86)
(1): QuantizedConv2d(4, 1, kernel_size=(1, 1), stride=(1, 1), scale=9.816205978393555, zero_point=58)
)
)
(last_conv): Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1))
)
It looks like all layers except last layer quantized properly as I expected.
But when I do inference with this model as follows, it raises runtime error at quantized modules part.
quantized_model.eval()
quantized_ouput = quantized_model(norm_input[0:1])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-127-34dec8af2a31> in <module>
1 quantized_model.eval()
----> 2 quantized_ouput = quantized_model(norm_input[0:1])
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-118-db9442ff2b9c> in forward(self, x)
14 # point to quantized in the quantized model
15 x = self.quant(x)
---> 16 x = self.until_last(x)
17 x = self.dequant(x)
18 x = self.last_conv(x)
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/mnt/hdd2/jinwoo/airs_project/sample_forward/sample_forward/utils/unet.py in forward(self, input)
38 # Apply down-sampling layers
39 for layer in self.down_sample_layers:
---> 40 output = layer(output)
41 stack.append(output)
42 output = F.max_pool2d(output, kernel_size=2)
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/container.py in forward(self, input)
115 def forward(self, input):
116 for module in self:
--> 117 input = module(input)
118 return input
119
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/quantized/modules/conv.py in forward(self, input)
329 if len(input.shape) != 4:
330 raise ValueError("Input shape must be `(N, C, H, W)`!")
--> 331 return ops.quantized.conv2d(
332 input, self._packed_params, self.scale, self.zero_point)
333
RuntimeError: Could not run 'quantized::conv2d.new' with arguments from the 'CPU' backend. 'quantized::conv2d.new' is only available for these backends: [QuantizedCPU, BackendSelect, Named, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, Tracer, Autocast, Batched, VmapMode].
QuantizedCPU: registered at /pytorch/aten/src/ATen/native/quantized/cpu/qconv.cpp:858 [kernel]
BackendSelect: fallthrough registered at /pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Named: registered at /pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
AutogradOther: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:35 [backend fallback]
AutogradCPU: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:39 [backend fallback]
AutogradCUDA: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:43 [backend fallback]
AutogradXLA: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:47 [backend fallback]
Tracer: fallthrough registered at /pytorch/torch/csrc/jit/frontend/tracer.cpp:967 [backend fallback]
Autocast: fallthrough registered at /pytorch/aten/src/ATen/autocast_mode.cpp:254 [backend fallback]
Batched: registered at /pytorch/aten/src/ATen/BatchingRegistrations.cpp:511 [backend fallback]
VmapMode: fallthrough registered at /pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
I suspected that not-quantized module could be the one that raises this error, so I only inferenced with quantized module.
#Except last unquantized layer
quantized_model.until_last.eval()
quantized_model.until_last(norm_input[0:1])
But it raises the same error.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-129-2ede05a56b57> in <module>
1 quantized_model.until_last.eval()
----> 2 quantized_model.until_last(norm_input[0:1])
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/mnt/hdd2/jinwoo/airs_project/sample_forward/sample_forward/utils/unet.py in forward(self, input)
38 # Apply down-sampling layers
39 for layer in self.down_sample_layers:
---> 40 output = layer(output)
41 stack.append(output)
42 output = F.max_pool2d(output, kernel_size=2)
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/container.py in forward(self, input)
115 def forward(self, input):
116 for module in self:
--> 117 input = module(input)
118 return input
119
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/.conda/envs/airs_project/lib/python3.9/site-packages/torch/nn/quantized/modules/conv.py in forward(self, input)
329 if len(input.shape) != 4:
330 raise ValueError("Input shape must be `(N, C, H, W)`!")
--> 331 return ops.quantized.conv2d(
332 input, self._packed_params, self.scale, self.zero_point)
333
RuntimeError: Could not run 'quantized::conv2d.new' with arguments from the 'CPU' backend. 'quantized::conv2d.new' is only available for these backends: [QuantizedCPU, BackendSelect, Named, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, Tracer, Autocast, Batched, VmapMode].
QuantizedCPU: registered at /pytorch/aten/src/ATen/native/quantized/cpu/qconv.cpp:858 [kernel]
BackendSelect: fallthrough registered at /pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Named: registered at /pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
AutogradOther: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:35 [backend fallback]
AutogradCPU: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:39 [backend fallback]
AutogradCUDA: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:43 [backend fallback]
AutogradXLA: fallthrough registered at /pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:47 [backend fallback]
Tracer: fallthrough registered at /pytorch/torch/csrc/jit/frontend/tracer.cpp:967 [backend fallback]
Autocast: fallthrough registered at /pytorch/aten/src/ATen/autocast_mode.cpp:254 [backend fallback]
Batched: registered at /pytorch/aten/src/ATen/BatchingRegistrations.cpp:511 [backend fallback]
VmapMode: fallthrough registered at /pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
As I inspected, it seems like all layers are quantized properly in this module, too.
I searched google and pytorch docs thoroughly, but not sure what to debug for right now.
Any suggestions would be really welcome.
Thanks.