RuntimeError: quantized::conv2d_prepack() is missing value for argument 'stride'

I try to use torch.ao.quantization to quantize a model. However, it will give me
an error. If I remove the lines about quantization, it can work.

Traceback (most recent call last):
  File "/dev/shm/test/compress.py", line 99, in <module>
    x_hat = compress_decompress(model, x)
  File "/dev/shm/test/compress.py", line 73, in compress_decompress
    compressed = model(x_padded)
  File "/usr/lib/python3.13/site-packages/torch/fx/graph_module.py", line 830, in call_wrapped
    return self._wrapped_call(self, *args, **kwargs)
           ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.13/site-packages/torch/fx/graph_module.py", line 406, in __call__
    raise e
  File "/usr/lib/python3.13/site-packages/torch/fx/graph_module.py", line 393, in __call__
    return super(self.cls, obj).__call__(*args, **kwargs)  # type: ignore[misc]
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.13/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
    return forward_call(*args, **kwargs)
  File "<eval_with_key>.7", line 34, in forward
    conv2d_prepack = torch.ops.quantized.conv2d_prepack(quantize_per_channel, sub);  quantize_per_channel = sub = None
  File "/usr/lib/python3.13/site-packages/torch/_ops.py", line 1158, in __call__
    return self._op(*args, **(kwargs or {}))
           ~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: quantized::conv2d_prepack() is missing value for argument 'stride'. Declaration: quantized::conv2d_prepack(Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> __torch__.torch.classes.quantized.Conv2dPackedParamsBase

Another method to make it work is to comment _lower_static_weighted_ref_functional.
If I do, the printed time will be 0.6259967585404714
If I remove the lines about quantization, the printed time will be 1.8434200982252757

from time import time

import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
from compressai.zoo import cheng2020_anchor
from PIL import Image
from torch.ao.quantization import get_default_qconfig_mapping
from torch.quantization.quantize_fx import convert_fx, prepare_fx
qconfig_mapping = get_default_qconfig_mapping()
device = "cpu"
def pad(x, p):
    h, w = x.size(2), x.size(3)
    new_h = (h + p - 1) // p * p
    new_w = (w + p - 1) // p * p
    padding_left = (new_w - w) // 2
    padding_right = new_w - w - padding_left
    padding_top = (new_h - h) // 2
    padding_bottom = new_h - h - padding_top
    x_padded = F.pad(
        x,
        (padding_left, padding_right, padding_top, padding_bottom),
        mode="constant",
        value=0,
    )
    return x_padded, (padding_left, padding_right, padding_top, padding_bottom)

def pad_to_multiple_of_64(image_tensor):
    _, _, h, w = image_tensor.shape
    pad_h = (64 - h % 64) % 64
    pad_w = (64 - w % 64) % 64
    if pad_h == 0 and pad_w == 0:
        return image_tensor, (h, w)

    padded = F.pad(image_tensor, (0, pad_w, 0, pad_h), mode="reflect")
    return padded, (h, w)


def unpad(image_tensor, original_size):
    h, w = original_size
    return image_tensor[:, :, :h, :w]


def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    img = Image.open(image_path).convert("RGB")
    x = transform(img).unsqueeze(0)  # [1, 3, H, W]
    return x


def compress_decompress(model, x):
    x_padded, original_size = pad_to_multiple_of_64(x)

    sum = 0
    img_list = []
    import os
    path = "/home/wzy/Pictures/Kodak-Lossless-True-Color-Image-Suite/PhotoCD_PCD0992"
    for file in os.listdir(path):
        if file[-3:] in ["jpg", "png", "peg"]:
            img_list.append(file)
    count = 0
    with torch.no_grad():
        # breakpoint()
        for img_name in img_list:
            img_path = os.path.join(path, img_name)
            img = transforms.ToTensor()(Image.open(img_path).convert('RGB')).to(device)
            x = img.unsqueeze(0)
            x_padded, padding = pad(x, 64)
            count += 1
            t = time()
            compressed = model(x_padded)
            t = time() - t
            sum += t
            # decompressed_padded = model.decompress(
            #     compressed["strings"], compressed["shape"]
            # )["x_hat"]
    sum /= count
    print(f"time: {sum}")

_model = cheng2020_anchor(quality=3, pretrained=True)

image_path = "/home/wzy/Pictures/Kodak-Lossless-True-Color-Image-Suite/PhotoCD_PCD0992/23.png"  # 替换为你的图像路径
x = preprocess_image(image_path).to(next(_model.parameters()).device)

input_batch, original_size = pad_to_multiple_of_64(x)

model = _model
model.forward = model.compress

# remove these lines, it can work
model = prepare_fx(model, qconfig_mapping, example_inputs=input_batch)
model(input_batch)
model = convert_fx(model)

model.eval().to("cpu")

x_hat = compress_decompress(model, x)

Is it a bug? Or what something wrong I did?
TIA!

OS: linux 6.15.2
python: 3.13.3
pytorch: 2.7.0

so this is taking from the original args: pytorch/torch/ao/quantization/fx/_lower_to_native_backend.py at ab6cb34480a14f3cf2446519189ee5f0c5e7278d · pytorch/pytorch · GitHub

if original arg list is empty then there is nothing to get, can you check your F.conv2d call in your model to make sure it have full list of args like: pytorch/torch/nn/modules/conv.py at ab6cb34480a14f3cf2446519189ee5f0c5e7278d · pytorch/pytorch · GitHub