when I do static quantization in BERT with pytorch 1.6,an error occurs:
Could not run ‘quantized::layer_norm’ with arguments from the ‘CPU’ backend. 'quantized::layer_norm’ is only available for these backends: [QuantizedCPU]
My model codes are as bellow:
class BertSoftmaxForNerQuantized(BertPreTrainedModel):
def __init__(self, config):
super(BertSoftmaxForNerQuantized, self).__init__(config)
self.quant = torch.quantization.QuantStub()
self.num_labels = config.num_labels
self.bert = BertModel(config)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
self.classifier = nn.Linear(config.hidden_size, config.num_labels)
self.loss_type = config.loss_type
self.dequant = torch.quantization.DeQuantStub()
def forward(self, input_ids, attention_mask=None, token_type_ids=None,
position_ids=None, head_mask=None, labels=None):
# outputs = self.bert(input_ids = input_ids,attention_mask=attention_mask,token_type_ids=token_type_ids)
input_ids = self.quant(input_ids)
attention_mask = self.quant(attention_mask)
labels = self.quant(labels)
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
sequence_output = outputs[0]
# sequence_output = self.quant(sequence_output)
sequence_output = self.dropout(sequence_output)
logits = self.classifier(sequence_output)
logits = self.dequant(logits)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
if labels is not None:
assert self.loss_type in ['lsr', 'focal', 'ce']
if self.loss_type == 'lsr':
loss_fct = LabelSmoothingCrossEntropy(ignore_index=0)
elif self.loss_type == 'focal':
loss_fct = FocalLoss(ignore_index=0)
else:
loss_fct = CrossEntropyLoss(ignore_index=0)
# Only keep active parts of the loss
if attention_mask is not None:
active_loss = attention_mask.contiguous().view(-1) == 1
active_logits = logits.contiguous().view(-1, self.num_labels)[active_loss]
active_labels = labels.contiguous().view(-1)[active_loss]
loss = loss_fct(active_logits, active_labels)
else:
loss = loss_fct(logits.contiguous().view(-1, self.num_labels), labels.view(-1))
outputs = (loss,) + outputs
return outputs # (loss), scores, (hidden_states), (attentions)
the evluate codes are as bellow:
quantized_model.eval()
quantized_model.qconfig = torch.quantization.default_qconfig
print('quantized_model.qconfig',quantized_model.qconfig)
torch.quantization.prepare(quantized_model,inplace=True)
train_dataset = load_and_cache_examples(args, args.task_name, tokenizer, data_type='train')
args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
train_sampler = SequentialSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size,
collate_fn=collate_fn)
with torch.no_grad():
for step, batch in enumerate(tqdm(train_dataloader,desc='post-training static quantization')):
inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
outputs = quantized_model(**inputs)
loss = outputs[0]
torch.quantization.convert(quantized_model, inplace=True)
print_model_size(quantized_model)
eval_output_dir = args.output_dir
if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
os.makedirs(eval_output_dir)
eval_dataset = load_and_cache_examples(args, args.task_name,tokenizer, data_type='dev')
args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size,
collate_fn=collate_fn)
for step, batch in enumerate(eval_dataloader):
model.eval()
with torch.no_grad():
inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
if args.model_type != "distilbert":
outputs = quantized_model(**inputs)
is something wrong in my codes? or It is just the pytorch vision is too latest?