I’m trying load my model like you said, but this time I have an error when I’m trying to use this model to predict. I load model like in this code:
class QuantDeQuantNet(torch.nn.Module):
def __init__(self, model):
super(QuantDeQuantNet, self).__init__()
self.quant = torch.quantization.QuantStub()
self.dequant = torch.quantization.DeQuantStub()
self.model = model
def forward(self, x):
x = self.quant(x)
x = self.model(x)
x = self.dequant(x)
return x
class Solver:
def __init__(self, weights_filename):
self.model = self.init_model(weights_filename)
self.device = 'cpu'
def createDeepLabv3(self, outputchannels=2):
model = models.segmentation.deeplabv3_resnet50(pretrained=True, progress=True, aux_loss=True)
model.classifier = DeepLabHead(2048, outputchannels)
model.aux_classifier[-1] = nn.Conv2d(256, 2, kernel_size=(1, 1), stride=(1, 1))
model_fp32 = QuantDeQuantNet(model).to('cpu')
backend = "fbgemm"
model_fp32.qconfig = torch.quantization.get_default_qconfig(backend)
torch.backends.quantized.engine = backend
model_static_quantized = torch.quantization.prepare(model_fp32, inplace=False)
model_static_quantized = torch.quantization.convert(model_static_quantized,
inplace=False)
return model_static_quantized
def init_model(self, weights_filename):
model = self.createDeepLabv3()
if torch.cuda.is_available():
checkpoint = (torch.load(weights_filename))
else:
checkpoint = (torch.load(weights_filename, map_location=torch.device('cpu')))
model.model.load_state_dict(checkpoint)
print(model)
return model
And get this huge error
NotImplementedError: Could not run ‘aten::add.out’ with arguments from the ‘QuantizedCPU’ backend. This could be because the operator doesn’t exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit Internal Login for possible resolutions. ‘aten::add.out’ is only available for these backends: [Dense, FPGA, Conjugate, VmapMode, FuncTorchGradWrapper, Functionalize, MPS, IPU, UNKNOWN_TENSOR_TYPE_ID, QuantizedXPU, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, SparseCPU, SparseCUDA, SparseHIP, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, SparseXPU, UNKNOWN_TENSOR_TYPE_ID, SparseVE, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, NestedTensorCUDA, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID, UNKNOWN_TENSOR_TYPE_ID].
CPU: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterCPU.cpp:37386 [kernel]
CUDA: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterCUDA.cpp:51977 [kernel]
Meta: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterMeta.cpp:31637 [kernel]
MkldnnCPU: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterMkldnnCPU.cpp:690 [kernel]
SparseCPU: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterSparseCPU.cpp:1858 [kernel]
SparseCUDA: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterSparseCUDA.cpp:2018 [kernel]
SparseCsrCPU: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterSparseCsrCPU.cpp:1507 [kernel]
SparseCsrCUDA: registered at C:\cb\pytorch_1000000000000\work\build\aten\src\ATen\RegisterSparseCsrCUDA.cpp:1657 [kernel]
BackendSelect: fallthrough registered at C:\cb\pytorch_1000000000000\work\aten\src\ATen\core\BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at C:\cb\pytorch_1000000000000\work\aten\src\ATen\core\PythonFallbackKernel.cpp:133 [backend fallback]
Named: fallthrough registered at C:\cb\pytorch_1000000000000\work\aten\src\ATen\core\NamedRegistrations.cpp:11 [kernel]
Conjugate: registered at C:\cb\pytorch_1000000000000\work\aten\src\ATen\ConjugateFallback.cpp:18 [backend fallback]
Negative: registered at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at C:\cb\pytorch_1000000000000\work\aten\src\ATen\ZeroTensorFallback.cpp:86 [backend fallback]
I tried to run the model using both cpu and cuda. Nothing helps.
I also noticed that when trying to quantize my other model, this two blocks
(quant_m): Quantize(scale=tensor([0.0157]), zero_point=tensor([64]), dtype=torch.quint8)
(dequant_m): DeQuantize()
placed in the end of model:
QuantizedSiameseNetwork(
(backbone): Backbone(
(conv1): QuantizedConv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), scale=0.03301658481359482, zero_point=68, padding=(1, 1))
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): QuantizedConv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.04873496666550636, zero_point=46, padding=(1, 1))
(conv3): QuantizedConv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.06440312415361404, zero_point=85, padding=(1, 1))
(conv4): QuantizedConv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.04743369296193123, zero_point=93, padding=(1, 1))
(fc1): QuantizedLinear(in_features=3136, out_features=1024, scale=0.015503729693591595, zero_point=72, qscheme=torch.per_channel_affine)
(fc2): QuantizedLinear(in_features=1024, out_features=2, scale=0.024672511965036392, zero_point=80, qscheme=torch.per_channel_affine)
)
(quant_m): Quantize(scale=tensor([0.0157]), zero_point=tensor([64]), dtype=torch.quint8)
(dequant_m): DeQuantize()
)
But in my current model that I’m trying quantize thit to blocks place at the beginning of model:
QuantDeQuantNet(
(quant): Quantize(scale=tensor([0.0079]), zero_point=tensor([0]), dtype=torch.quint8)
(dequant): DeQuantize()
(model): DeepLabV3(
(backbone): IntermediateLayerGetter(