The code aims to collect data about SiLU (Sigmoid Linear Unit) activation layers in a quantized YOLOv5 model. Specifically, it: Creates a custom SiLUDataCollector to replace SiLU layers Captures quantization parameters (scale and zero point) Saves quanti

Yashas_Hittalmakki · January 26, 2025, 4:44am

import torch
import torch.nn as nn
import torch.quantization
import numpy as np

class SiLUDataCollector(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_id = id(self)
        # Add scale and zero_point for dequantization
        self.scale = None
        self.zero_point = None
    
    def forward(self, x):
        
        # Capture quantization parameters
        if self.scale is None:
            self.scale = x.q_scale()
            self.zero_point = x.q_zero_point()
        
        # Save quantized input data
        np.save(f'quantized_input_{self.layer_id}.npy', x.int_repr().cpu().numpy())
        
        # For reference, save dequantized values
        dequantized = (x.int_repr().float() - self.zero_point) * self.scale
        np.save(f'dequantized_input_{self.layer_id}.npy', dequantized.cpu().numpy())
        
        # Calculate sigmoid on dequantized values
        sigmoid_values = torch.sigmoid(dequantized)
        with open(f'sigmoid_values_{self.layer_id}.txt', 'w') as f:
            for sigmoid_val in sigmoid_values.flatten():
                f.write(f"{sigmoid_val.item()}\n")
        
        return x * torch.sigmoid(dequantized)  # Original SiLU computation

def replace_silu_with_collector(module):
    for name, child in module.named_children():
        if isinstance(child, nn.SiLU):
            setattr(module, name, SiLUDataCollector())
        else:
            replace_silu_with_collector(child)

# Load and prepare model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
replace_silu_with_collector(model)

# Quantization setup
model.eval()
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_prepared = torch.quantization.prepare(model)

# Run inference
dummy_input = torch.rand(1, 3, 640, 640)
model_prepared(dummy_input)

# Convert and save
model_quantized = torch.quantization.convert(model_prepared)
torch.save(model_quantized.state_dict(), 'quantized_model.pth')

❯ python3 quntized_model.py 
Using cache found in /home/yashas/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2025-1-22 Python-3.12.8 torch-2.5.1+cu124 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py:867: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
  with amp.autocast(autocast):
Traceback (most recent call last):
  File "/home/yashas/yolov5/quntized_model.py", line 54, in <module>
    model_prepared(dummy_input)
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py", line 868, in forward
    return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py", line 688, in forward
    y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
                                                                                          ^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/yolo.py", line 270, in forward
    return self._forward_once(x, profile, visualize)  # single-scale inference, train
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/yolo.py", line 169, in _forward_once
    x = m(x)  # run
        ^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/.cache/torch/hub/ultralytics_yolov5_master/models/common.py", line 91, in forward_fuse
    return self.act(self.conv(x))
           ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/yashas/yolov5/quntized_model.py", line 18, in forward
    self.scale = x.q_scale()
                 ^^^^^^^^^^^
NotImplementedError: Could not run 'aten::q_scale' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::q_scale' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

jerryzh168 · April 6, 2025, 12:24am

looks like you are using our older quantization flow, please take a look at our new pt2e flow: Quantization — PyTorch main documentation

and also torchao focusing on LLM GPU quantization: GitHub - pytorch/ao: PyTorch native quantization and sparsity for training and inference