I also come up with the same error when I try to use quantized distilbert model.
Sample test case can be found in as follow. running under pytest, the first test case can pass but the second one shows error:
import torch
import torch.multiprocessing as mp
from transformers import BertTokenizer, DistilBertConfig
from transformers.models.distilbert import DistilBertPreTrainedModel, DistilBertModel
def test_quantized_distil_bert_1():
model = DistilBertClassifier(distil_bert_config)
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
tokenids, mask = _construct_model_inputs("hello world")
result = model(tokenids, attention_mask=mask)
assert result
def test_quantized_distil_bert_2():
model = DistilBertClassifier(distil_bert_config)
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
tokenids, mask = _construct_model_inputs("hello world")
with mp.Pool(1) as pool:
process = pool.apply_async(model, (tokenids, mask))
result = process.get(10)
assert result
def _construct_model_inputs(sentence: str):
tokenids = torch.tensor(tokenizer.encode(sentence)).unsqueeze(0)
mask = torch.ones_like(tokenids, dtype=torch.int64)
return tokenids, mask
class DistilBertClassifier(DistilBertPreTrainedModel):
def __init__(self, config: DistilBertConfig):
super().__init__(config)
self.num_labels = config.num_labels
self.distilbert = DistilBertModel(config)
self.pre_classifier = torch.nn.Linear(config.dim, config.dim)
self.classifier = torch.nn.Linear(config.dim, config.num_labels)
self.dropout = torch.nn.Dropout(config.seq_classif_dropout)
torch.manual_seed(345)
torch.cuda.manual_seed(345)
self.init_weights()
def forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None):
distilbert_output = self.distilbert(
input_ids=input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds
)
hidden_state = distilbert_output[0]
pooled_output = hidden_state[:, 0]
pooled_output = self.pre_classifier(pooled_output)
pooled_output = torch.nn.ReLU()(pooled_output)
pooled_output = self.dropout(pooled_output)
logits = self.classifier(pooled_output)
outputs = (logits,) + distilbert_output[1:]
if labels is not None:
if self.num_labels == 1:
loss_fct = torch.nn.MSELoss()
loss = loss_fct(logits.view(-1), labels.view(-1))
else:
loss_fct = torch.nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
outputs = (loss,) + outputs
return outputs # (loss), logits, (hidden_states), (attentions)
tokenizer = BertTokenizer.from_pretrained("tokenizer_path/")
distil_bert_config = DistilBertConfig()
Error stack trace:
Process SpawnPoolWorker-1:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 313, in _bootstrap
self.run()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/queues.py", line 358, in get
return _ForkingPickler.loads(res)
File "/Users/dennis/.virtualenvs/fano_ms_intent/lib/python3.8/site-packages/torch/multiprocessing/reductions.py", line 88, in rebuild_tensor
t = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
File "/Users/dennis/.virtualenvs/fano_ms_intent/lib/python3.8/site-packages/torch/_utils.py", line 133, in _rebuild_tensor
t = torch.tensor([], dtype=storage.dtype, device=storage.device)
RuntimeError: Could not run 'aten::empty.memory_format' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::empty.memory_format' is only available for these backends: [CPU, MkldnnCPU, SparseCPU, BackendSelect, Named, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradNestedTensor, UNKNOWN_TENSOR_TYPE_ID, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, Tracer, Autocast, Batched, VmapMode].