I tried to select and quantify my model, but completing this operation seems to take more memory and time. What should I do

I am trying to learn the quantization work of the model to reduce the burden on my hardware equipment, but when I tried to use the quantized model, I found that both memory usage and inference time were worse than before. What should I do

Blockquote
#!/usr/bin/env python

coding=utf8

import numpy as np
import cv2
from blazeface import BlazeFace,BlazeBlock
import torch
import time

import torch.ao.quantization.quantize_fx as quantize_fx

封装resize函数

def resize_img_keep_ratio(img,target_size):
# img = cv2.imread(img_name) # 读取图片
old_size= img.shape[0:2] # 原始图像大小
ratio = min(float(target_size[i])/(old_size[i]) for i in range(len(old_size))) # 计算原始图像宽高与目标图像大小的比例,并取其中的较小值
new_size = tuple([int(i*ratio) for i in old_size]) # 根据上边求得的比例计算在保持比例前提下得到的图像大小
img = cv2.resize(img,(new_size[1], new_size[0])) # 根据上边的大小进行放缩
pad_w = target_size[1] - new_size[1] # 计算需要填充的像素数目(图像的宽这一维度上)
pad_h = target_size[0] - new_size[0] # 计算需要填充的像素数目(图像的高这一维度上)
top,bottom = pad_h//2, pad_h-(pad_h//2)
left,right = pad_w//2, pad_w -(pad_w//2)
img_new = cv2.copyMakeBorder(img,top,bottom,left,right,cv2.BORDER_CONSTANT,None,(0,0,0))
return img_new

def plot_detections(img, detections):

if isinstance(detections, torch.Tensor):
    detections = detections.cpu().numpy()

if detections.ndim == 1:
    detections = np.expand_dims(detections, axis=0)

# print("Found %d faces" % detections.shape[0])
    
for i in range(detections.shape[0]):
    ymin = int(detections[i, 0] * img.shape[0])
    xmin = int(detections[i, 1] * img.shape[1])
    ymax = int(detections[i, 2] * img.shape[0])
    xmax = int(detections[i, 3] * img.shape[1])

    cv2.rectangle(img,(xmin,ymin),(xmax,ymax),(170,234,242),5,lineType=cv2.LINE_AA)

    # if with_keypoints:
    #     for k in range(6):
    #         kp_x = detections[i, 4 + k*2    ] * img.shape[1]
    #         kp_y = detections[i, 4 + k*2 + 1] * img.shape[0]
    #         circle = patches.Circle((kp_x, kp_y), radius=0.5, linewidth=1, 
    #                                 edgecolor="lightskyblue", facecolor="none", 
    #                                 alpha=detections[i, 16])
    #         ax.add_patch(circle)

gpu = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
gpu = torch.device(“cpu”)

front_net = BlazeFace().to(gpu)

front_net.load_weights(“blazeface.pth”)

front_net.load_anchors(“anchors.npy”)

back_net = BlazeFace(back_model=True).to(gpu)
back_net.load_weights(“blazefaceback.pth”)
back_net.load_anchors(“anchorsback.npy”)

create a quantized model instance

model_int8 = torch.quantization.quantize_dynamic(
back_net, # the original model
qconfig_spec ={torch.nn.Conv2d,torch.nn.Linear,torch.nn.ReLU,torch.nn.MaxPool2d}, # a set of layers to dynamically quantize
dtype=torch.quint8) # the target dtype for quantized weights

back_net.eval()

back_net.qconfig = torch.quantization.get_default_qconfig(‘x86’)

# 这里应该是要配cpu架构

# back_net.qconfig = torch.quantization.get_default_qconfig(‘arm’)

# model_fp32_fused = torch.quantization.fuse_modules(back_net,[[‘BlazeBlock’ , ‘batchnorm’, ‘relu’]])

bn_prepare=torch .quantization.prepare(back_net)

bn_prepare(torch.randn(1,3,256,256).to(gpu))

model_int8 = torch.quantization.convert(bn_prepare.to(gpu))

Optionally change the thresholds:

front_net.min_score_thresh = 0.75

front_net.min_suppression_threshold = 0.3

url = “rtsp://192.168.20.253/399380.sdp”

url = 0
cap = cv2.VideoCapture(url) #设备号为0
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(‘M’,‘J’,‘P’,‘G’))
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
time123 = time.time()
while(True):
if cap.isOpened() == False:
print(‘can not open camera’)
break
print((time.time()-time123 ))
time123 = time.time()
ret, frame = cap.read() #读取图像

if ret == False: #图像读取失败则直接进入下一次循环
    continue


frame =  resize_img_keep_ratio(frame, (256, 256))


dd = torch.from_numpy(frame).unsqueeze(0).permute(0,3,1,2).to(torch.float32).to(gpu)
time1 = time.time()
detect = model_int8(dd)
# print(time.time()-time1)
# time1 = time.time()
# front_detections = back_net.predict_on_image(frame)
# print(time.time()-time1)
# plot_detections(frame,front_detections)

cv2.namedWindow("frame")
cv2.imshow('frame', frame)

mykey = cv2.waitKey(1)
#按q退出循环,0xFF是为了排除一些功能键对q的ASCII码的影响
if mykey & 0xFF == ord('q'):
    break

#释放资源
cap.release()
cv2.destroyAllWindows()

Blockquote