import torch
import torch.nn as nn
import torch.quantization
import numpy as np
class SiLUDataCollector(nn.Module):
def __init__(self):
super().__init__()
self.layer_id = id(self)
# Add scale and zero_point for dequantization
self.scale = None
self.zero_point = None
def forward(self, x):
# Capture quantization parameters
if self.scale is None:
self.scale = x.q_scale()
self.zero_point = x.q_zero_point()
# Save quantized input data
np.save(f'quantized_input_{self.layer_id}.npy', x.int_repr().cpu().numpy())
# For reference, save dequantized values
dequantized = (x.int_repr().float() - self.zero_point) * self.scale
np.save(f'dequantized_input_{self.layer_id}.npy', dequantized.cpu().numpy())
# Calculate sigmoid on dequantized values
sigmoid_values = torch.sigmoid(dequantized)
with open(f'sigmoid_values_{self.layer_id}.txt', 'w') as f:
for sigmoid_val in sigmoid_values.flatten():
f.write(f"{sigmoid_val.item()}\n")
return x * torch.sigmoid(dequantized) # Original SiLU computation
def replace_silu_with_collector(module):
for name, child in module.named_children():
if isinstance(child, nn.SiLU):
setattr(module, name, SiLUDataCollector())
else:
replace_silu_with_collector(child)
# Load and prepare model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
replace_silu_with_collector(model)
# Quantization setup
model.eval()
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_prepared = torch.quantization.prepare(model)
# Run inference
dummy_input = torch.rand(1, 3, 640, 640)
model_prepared(dummy_input)
# Convert and save
model_quantized = torch.quantization.convert(model_prepared)
torch.save(model_quantized.state_dict(), 'quantized_model.pth')
❯ python3 quntized_model.py
Using cache found in /home/yashas/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2025-1-22 Python-3.12.8 torch-2.5.1+cu124 CPU
Fusing layers...
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape...
/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py:867: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
with amp.autocast(autocast):
Traceback (most recent call last):
File "/home/yashas/yolov5/quntized_model.py", line 54, in <module>
model_prepared(dummy_input)
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py", line 868, in forward
return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py", line 688, in forward
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/yolo.py", line 270, in forward
return self._forward_once(x, profile, visualize) # single-scale inference, train
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/yolo.py", line 169, in _forward_once
x = m(x) # run
^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py", line 91, in forward_fuse
return self.act(self.conv(x))
^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yashas/yolov5/quntized_model.py", line 18, in forward
self.scale = x.q_scale()
^^^^^^^^^^^
NotImplementedError: Could not run 'aten::q_scale' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::q_scale' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].