I’ve been having an issue with torch.quantization.convert after performing QAT -

I modified the model (face detector) to do QAT by adding the lines

    net.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
    torch.quantization.prepare_qat(net, inplace=True)

in train.py and the QuantStub/DeStub in the forward() of Mb_Tiny_RFB() (vision/nn/mb_tiny_rfb.py) following this tutorial, and then saved the model via torch.save(self.state_dict(), path)

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() and args.use_cuda else "cpu")

if __name__ == '__main__':
    timer = Timer()
    create_net = create_Mb_Tiny_RFB_fd

    train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std)
    target_transform = MatchPrior(config.priors, config.center_variance,
                                  config.size_variance, args.overlap_threshold)

    test_transform = TestTransform(config.image_size, config.image_mean_test, config.image_std)

    datasets = []
    for dataset_path in args.datasets:
        if args.dataset_type == 'voc':
            dataset = VOCDataset(dataset_path, transform=train_transform,
                                 target_transform=target_transform, img_size = config.image_size)
            label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt")
            store_labels(label_file, dataset.class_names)
            num_classes = len(dataset.class_names)
    train_dataset = ConcatDataset(datasets)

    train_loader = DataLoader(train_dataset, args.batch_size,
                              shuffle=True, pin_memory=True)
    val_dataset = VOCDataset(args.validation_dataset, transform=test_transform,
                                 target_transform=target_transform, is_test=True)
    val_loader = DataLoader(val_dataset, args.batch_size,

    net = create_net(num_classes)

    min_loss = -10000.0
    last_epoch = -1

    base_net_lr = args.base_net_lr if args.base_net_lr is not None else args.lr
    extra_layers_lr = args.extra_layers_lr if args.extra_layers_lr is not None else args.lr
    params = [
            {'params': net.base_net.parameters(), 'lr': base_net_lr},
            {'params': itertools.chain(
            ), 'lr': extra_layers_lr},
            {'params': itertools.chain(

    if args.resume:
        logging.info(f"Resume from the model {args.resume}")

    criterion = MultiboxLoss(config.priors, neg_pos_ratio=3,
                             center_variance=0.1, size_variance=0.2, device=DEVICE)
    optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum,
    net.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
    torch.quantization.prepare_qat(net, inplace=True)


    for epoch in range(last_epoch + 1, args.num_epochs):
        train(train_loader, net, criterion, optimizer,
              device=DEVICE, debug_steps=args.debug_steps, epoch=epoch)
        if epoch > 3:
            # Freeze quantizer parameters
        if epoch > 2:
            # Freeze batch norm mean and variance estimates towards the end of training to better match inference numerics.

        if epoch % args.validation_epochs == 0 or epoch == args.num_epochs - 1:
            logging.info("lr rate :{}".format(optimizer.param_groups[0]['lr']))
            val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, DEVICE)

            quant_model = torch.quantization.convert(net.cpu(), inplace=False) # <-- error happens here

            model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{val_loss}.pth")

When I tried to call quantization.convert() before saving, I got the error:

Traceback (most recent call last):
  File "train.py", line 432, in <module>
    quant_model = torch.quantization.convert(net.module.eval().cpu(), inplace=False)
  File "/home/user/anaconda3/envs/FaceDetector/lib/python3.8/site-packages/torch/quantization/quantize.py", line 299, in convert
    module = copy.deepcopy(module)
  File "/home/user/anaconda3/envs/FaceDetector/lib/python3.8/copy.py", line 172, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "/home/user/anaconda3/envs/FaceDetector/lib/python3.8/copy.py", line 270, in _reconstruct
    state = deepcopy(state, memo)
  File "/home/user/anaconda3/envs/FaceDetector/lib/python3.8/copy.py", line 146, in deepcopy
    y = copier(x, memo)
  File "/home/user/anaconda3/envs/FaceDetector/lib/python3.8/copy.py", line 230, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "/home/user/anaconda3/envs/FaceDetector/lib/python3.8/copy.py", line 161, in deepcopy
    rv = reductor(4)
TypeError: cannot pickle 'module' object

So instead I tried to load the QAT’d parameters into a less confusing form of the model and then tried converting again, but got the same error:

import torchvision
from torch import nn
from vision.utils import box_utils
from vision.ssd.config.fd_config import define_img_size
from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd
import torch.nn.functional as F
import cv2
import numpy as np

class_names = ['background', 'face']
net_1 = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device='cpu')
net_1.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
torch.quantization.prepare_qat(net_1, inplace=True)

# load definition: self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
net_1.load(model_path) # load the previously QAT'd model (without quantisation conversion)

class SimpleNet(nn.Module):
    def __init__(self, base_net, regression_headers, classification_headers, extras, priors, config):
        super(SimpleNet, self).__init__()
        self.backbone0 = base_net[:8]
        self.backbone1 = base_net[8:11]
        self.backbone2 = base_net[11:13]
        self.last_chunk = base_net[13:] 
        self.regression_headers0 = regression_headers[0]
        self.regression_headers1 = regression_headers[1]
        self.regression_headers2 = regression_headers[2]
        self.regression_headers3 = regression_headers[3]
        self.classification_headers0 = classification_headers[0]
        self.classification_headers1 = classification_headers[1]
        self.classification_headers2 = classification_headers[2]
        self.classification_headers3 = classification_headers[3]
        self.extras = extras
        self.num_classes = 2
        self.priors = priors
        self.config = config
        self.last_op = nn.Softmax(dim=-1)

    def forward(self, x):
        confidences = []
        locations = []
        x = self.backbone0(x)
        confidence = self.classification_headers0(x)
        confidence = confidence.permute(0, 2, 3, 1).contiguous()
        confidence = confidence.view(confidence.size(0), -1, self.num_classes)
        location = self.regression_headers0(x)
        location = location.permute(0, 2, 3, 1).contiguous()
        location = location.view(location.size(0), -1, 4)

        x = self.backbone1(x)
        confidence = self.classification_headers1(x)
        confidence = confidence.permute(0, 2, 3, 1).contiguous()
        confidence = confidence.view(confidence.size(0), -1, self.num_classes)
        location = self.regression_headers1(x)

        x = self.backbone2(x)
        confidence = self.classification_headers2(x)
        confidence = confidence.permute(0, 2, 3, 1).contiguous()
        confidence = confidence.view(confidence.size(0), -1, self.num_classes)
        location = self.regression_headers2(x)

        x = self.last_chunk.forward(x)

        x = self.extras(x)
        confidence = self.classification_headers3(x)
        confidence = confidence.permute(0, 2, 3, 1).contiguous()
        confidence = confidence.view(confidence.size(0), -1, self.num_classes)
        location = self.regression_headers3(x)

        confidences = torch.cat(confidences, 1)
        confidences = self.last_op(confidences)
        locations = torch.cat(locations, 1)

        boxes = box_utils.convert_locations_to_boxes(
            locations, self.priors, torch.tensor([0.1]), torch.tensor([0.2]) #self.config.center_variance, self.config.size_variance
        boxes = box_utils.center_form_to_corner_form(boxes)
        return confidences, boxes

model = SimpleNet(

model = torch.quantization.convert(model, inplace=False) # error here
It appeared that something could not be pickled in the model - so I tried using dill:


but I don't really understand the output and how to solve this problem... Am I doing QAT -> quantisation correctly? Is it correct to again set the qconfig and to prepare_qat before loading a QAT'd model? Any help would be greatly appreciated.

looks like it’s failing to copy.deepcopy(module). Just to confirm, does copy.deepcopy work on your model instance before you do QAT?

Hello, yea I realised that deepcopy did not work on my original model either, and found the issue - I had some unpicklable objects saved in the init of my model

Thank you!

could you share what’s in your original model causing deepcopy to fail? I am having the same problem and need some clues to fix it. Thanks for your help.