I want to use the quantization on my custom LLM model, in my my quantization.py
import torch
from torch.utils.data import DataLoader, TensorDataset
import ammo.torch.quantization as atq
import numpy as np
config = atq.FP8_DEFAULT_CFG
model = torch.load("model.pt")
calibration_Data = torch.from_numpy(np.load("data.npy"))
calib_dataloader= DataLoader(calibration_Data, batch_size=10, shuffle=False)
def calibrate_loop():
for data_c in calib_dataloader:
model(input_1=data_c)
with torch.no_grad():
atq.quantize(model, config, forward_loop=calibrate_loop)
I got the following output/error, any ideas how to fix that?
Replaced 54 modules to quantized modules
Traceback (most recent call last):
File "quantization.py", line 22, in
atq.quantize(model, config, forward_loop=calibrate_loop)
File "/lib/python3.10/dist-packages/ammo/torch/quantization/model_quant.py", line 112, in quantize
calibrate(model, config["algorithm"], forward_loop=forward_loop)
File "ammo/torch/quantization/model_calib.py", line 63, in ammo.torch.quantization.model_calib.calibrate
File "/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "ammo/torch/quantization/model_calib.py", line 73, in ammo.torch.quantization.model_calib.max_calibrate
File "quantization.py", line 19, in calibrate_loop
model(input_1=data_c)
File "/lib/python3.10/dist-packages/torch/fx/graph_module.py", line 736, in call_wrapped
return self._wrapped_call(self, *args, **kwargs)
File "/lib/python3.10/dist-packages/torch/fx/graph_module.py", line 315, in **call**
raise e
File "/lib/python3.10/dist-packages/torch/fx/graph_module.py", line 302, in **call**
return super(self.cls, obj).**call**(*args, **kwargs)
File "/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1510, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1519, in _call_impl
return forward_call(*args, **kwargs)
File "<eval_with_key>.1 from <eval_with_key>.0:10 in forward", line 1625, in forward
File "/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1510, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1519, in _call_impl
return forward_call(*args, **kwargs)
File "/lib/python3.10/dist-packages/onnx2torch/node_converters/reduce.py", line 158, in forward
return torch.sum(input_tensor, dim=self._axes, keepdim=self._keepdims)
TypeError: sum() received an invalid combination of arguments - got (Tensor, keepdim=int, dim=list), but expected one of:
* (Tensor input, *, torch.dtype dtype)
didn't match because some of the keywords were incorrect: keepdim, dim
* (Tensor input, tuple of ints dim, bool keepdim, *, torch.dtype dtype, Tensor out)
* (Tensor input, tuple of names dim, bool keepdim, *, torch.dtype dtype, Tensor out)
System Info
- Nvidia GPUs H100
- python3.10
- ammo version 0.7.3
- torch 2.2.0a0+81ea7a4