Loading Quantized Modules with ProcessPoolExecutor Issue

I have applied the default torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8) to my models (TailorNet) then saved it using torch.save(model.state_dict() as usual.

Loading the model again with multiprocessing via ProcessPoolExecutor produces the error below. Somehow using ThreadPoolExecutor works fine.

Error message:

Traceback (most recent call last):
  File "/usr/lib/python3.8/concurrent/futures/process.py", line 368, in _queue_management_worker
    result_item = result_reader.recv()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 251, in recv
    return _ForkingPickler.loads(buf.getbuffer())
  File "/home/osboxes/.local/lib/python3.8/site-packages/torch/multiprocessing/reductions.py", line 88, in rebuild_tensor
    t = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
  File "/home/osboxes/.local/lib/python3.8/site-packages/torch/_utils.py", line 133, in _rebuild_tensor
    t = torch.tensor([], dtype=storage.dtype, device=storage.device)

RuntimeError: Could not run 'aten::empty.memory_format' with arguments from the 'QuantizedCPU' backend.
This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::empty.memory_format' is only available for these backends: [CPU, CUDA, MkldnnCPU, SparseCPU, SparseCUDA, BackendSelect, Named, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradNestedTensor, UNKNOWN_TENSOR_TYPE_ID, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, Tracer, Autocast, Batched, VmapMode].

How I collect the models:

for t in concurrent.futures.as_completed(futures):
    runner, num = t.result()

hi @mfikryrizal , to clarify, you are able to load the same successfully with ThreadPoolExecutor but not with ProcessPoolExecutor? Do you have a repro (either your full model or a small test case) you can share?

I also come up with the same error when I try to use quantized distilbert model.

Sample test case can be found in as follow. running under pytest, the first test case can pass but the second one shows error:

import torch
import torch.multiprocessing as mp
from transformers import BertTokenizer, DistilBertConfig
from transformers.models.distilbert import DistilBertPreTrainedModel, DistilBertModel


def test_quantized_distil_bert_1():
    model = DistilBertClassifier(distil_bert_config)
    model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
    tokenids, mask = _construct_model_inputs("hello world")
    result = model(tokenids, attention_mask=mask)
    assert result


def test_quantized_distil_bert_2():
    model = DistilBertClassifier(distil_bert_config)
    model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
    tokenids, mask = _construct_model_inputs("hello world")
    with mp.Pool(1) as pool:
        process = pool.apply_async(model, (tokenids, mask))
        result = process.get(10)
    assert result


def _construct_model_inputs(sentence: str):
    tokenids = torch.tensor(tokenizer.encode(sentence)).unsqueeze(0)
    mask = torch.ones_like(tokenids, dtype=torch.int64)
    return tokenids, mask


class DistilBertClassifier(DistilBertPreTrainedModel):
    def __init__(self, config: DistilBertConfig):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.distilbert = DistilBertModel(config)
        self.pre_classifier = torch.nn.Linear(config.dim, config.dim)
        self.classifier = torch.nn.Linear(config.dim, config.num_labels)
        self.dropout = torch.nn.Dropout(config.seq_classif_dropout)

        torch.manual_seed(345)
        torch.cuda.manual_seed(345)
        self.init_weights()

    def forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None):
        distilbert_output = self.distilbert(
            input_ids=input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds
        )
        hidden_state = distilbert_output[0]
        pooled_output = hidden_state[:, 0]
        pooled_output = self.pre_classifier(pooled_output)
        pooled_output = torch.nn.ReLU()(pooled_output)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        outputs = (logits,) + distilbert_output[1:]
        if labels is not None:
            if self.num_labels == 1:
                loss_fct = torch.nn.MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = torch.nn.CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

tokenizer = BertTokenizer.from_pretrained("tokenizer_path/")
distil_bert_config = DistilBertConfig()

Error stack trace:

Process SpawnPoolWorker-1:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 313, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/queues.py", line 358, in get
    return _ForkingPickler.loads(res)
  File "/Users/dennis/.virtualenvs/fano_ms_intent/lib/python3.8/site-packages/torch/multiprocessing/reductions.py", line 88, in rebuild_tensor
    t = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
  File "/Users/dennis/.virtualenvs/fano_ms_intent/lib/python3.8/site-packages/torch/_utils.py", line 133, in _rebuild_tensor
    t = torch.tensor([], dtype=storage.dtype, device=storage.device)
RuntimeError: Could not run 'aten::empty.memory_format' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::empty.memory_format' is only available for these backends: [CPU, MkldnnCPU, SparseCPU, BackendSelect, Named, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradNestedTensor, UNKNOWN_TENSOR_TYPE_ID, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, Tracer, Autocast, Batched, VmapMode].