# I tried to select and quantify my model, but completing this operation seems to take more memory and time. What should I do

I am trying to learn the quantization work of the model to reduce the burden on my hardware equipment, but when I tried to use the quantized model, I found that both memory usage and inference time were worse than before. What should I do

Blockquote
#!/usr/bin/env python

# coding=utf8

import numpy as np
import cv2
from blazeface import BlazeFace,BlazeBlock
import torch
import time

import torch.ao.quantization.quantize_fx as quantize_fx

# 封装resize函数

def resize_img_keep_ratio(img,target_size):
# img = cv2.imread(img_name) # 读取图片
old_size= img.shape[0:2] # 原始图像大小
ratio = min(float(target_size[i])/(old_size[i]) for i in range(len(old_size))) # 计算原始图像宽高与目标图像大小的比例，并取其中的较小值
new_size = tuple([int(i*ratio) for i in old_size]) # 根据上边求得的比例计算在保持比例前提下得到的图像大小
img = cv2.resize(img,(new_size[1], new_size[0])) # 根据上边的大小进行放缩
pad_w = target_size[1] - new_size[1] # 计算需要填充的像素数目（图像的宽这一维度上）
pad_h = target_size[0] - new_size[0] # 计算需要填充的像素数目（图像的高这一维度上）
img_new = cv2.copyMakeBorder(img,top,bottom,left,right,cv2.BORDER_CONSTANT,None,(0,0,0))
return img_new

def plot_detections(img, detections):

``````if isinstance(detections, torch.Tensor):
detections = detections.cpu().numpy()

if detections.ndim == 1:
detections = np.expand_dims(detections, axis=0)

# print("Found %d faces" % detections.shape[0])

for i in range(detections.shape[0]):
ymin = int(detections[i, 0] * img.shape[0])
xmin = int(detections[i, 1] * img.shape[1])
ymax = int(detections[i, 2] * img.shape[0])
xmax = int(detections[i, 3] * img.shape[1])

cv2.rectangle(img,(xmin,ymin),(xmax,ymax),(170,234,242),5,lineType=cv2.LINE_AA)

# if with_keypoints:
#     for k in range(6):
#         kp_x = detections[i, 4 + k*2    ] * img.shape[1]
#         kp_y = detections[i, 4 + k*2 + 1] * img.shape[0]
#         circle = patches.Circle((kp_x, kp_y), radius=0.5, linewidth=1,
#                                 edgecolor="lightskyblue", facecolor="none",
#                                 alpha=detections[i, 16])
``````

gpu = torch.device(“cuda:0” if torch.cuda.is_available() else “cpu”)
gpu = torch.device(“cpu”)

# front_net = BlazeFace().to(gpu)

back_net = BlazeFace(back_model=True).to(gpu)

# create a quantized model instance

model_int8 = torch.quantization.quantize_dynamic(
back_net, # the original model
qconfig_spec ={torch.nn.Conv2d,torch.nn.Linear,torch.nn.ReLU,torch.nn.MaxPool2d}, # a set of layers to dynamically quantize
dtype=torch.quint8) # the target dtype for quantized weights

# url = “rtsp://192.168.20.253/399380.sdp”

url = 0
cap = cv2.VideoCapture(url) #设备号为0
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(‘M’,‘J’,‘P’,‘G’))
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
time123 = time.time()
while(True):
if cap.isOpened() == False:
print(‘can not open camera’)
break
print((time.time()-time123 ))
time123 = time.time()

``````if ret == False: #图像读取失败则直接进入下一次循环
continue

frame =  resize_img_keep_ratio(frame, (256, 256))

dd = torch.from_numpy(frame).unsqueeze(0).permute(0,3,1,2).to(torch.float32).to(gpu)
time1 = time.time()
detect = model_int8(dd)
# print(time.time()-time1)
# time1 = time.time()
# front_detections = back_net.predict_on_image(frame)
# print(time.time()-time1)
# plot_detections(frame,front_detections)

cv2.namedWindow("frame")
cv2.imshow('frame', frame)

mykey = cv2.waitKey(1)
#按q退出循环，0xFF是为了排除一些功能键对q的ASCII码的影响
if mykey & 0xFF == ord('q'):
break
``````

#释放资源
cap.release()
cv2.destroyAllWindows()

Blockquote