I used pytorchs dynamic_quantization method to quantize the model with this script which reduced the model.bin from 2.4Gb to 1.6GB. I have downloaded mbrat50 model from huggingface to the local folder “model”
import torch
from transformers import MBartConfig, MBartForConditionalGeneration
config = MBartConfig.from_pretrained("model")
model = MBartForConditionalGeneration.from_pretrained("model",config=config)
model.eval()
quantized_model = torch.quantization.quantize_dynamic(model, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8)
print(quantized_model)
torch.save(quantized_model.state_dict(), "model/newww_new_quant.bin")
When I try to translate a word it returns empty string, without quantization model translation works…
from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
import logging
logging.basicConfig(level=logging.INFO)
class Translator:
def __init__(self, model_path) -> None:
logging.info('Loading tokenizer.')
self.tokenizer = MBart50TokenizerFast.from_pretrained(model_path)
logging.info('Tokenizer loaded.')
logging.info('Loading model.')
self.model = MBartForConditionalGeneration.from_pretrained(model_path)
logging.info('Model loaded.')
def translate(self, sentences):
encoded_ka = self.tokenizer(sentences, return_tensors="pt", padding=True)
logging.debug('encoded sentences', encoded_ka)
print(encoded_ka)
generated_tokens = self.model.generate(**encoded_ka)
logging.debug('generated tokens:', generated_tokens)
print(generated_tokens)
decoded_ka = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print(decoded_ka)
return decoded_ka
before the quantization the model translation is fine
ouput:
got sent ['პირადი გადარიცხვა']
{'input_ids': tensor([[250004, 185472, 8411, 139656, 114572, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}
tensor([[ 2, 250004, 17915, 73509, 2]])
['Personal Transfer']
transl : ['Personal Transfer']
after quantization the ouputs is
got sent ['პირადი გადარიცხვა']
{'input_ids': tensor([[250004, 185472, 8411, 139656, 114572, 2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}
tensor([[ 2, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004, 250004,
250004, 2]])
['']
transl : ['']
Does someone has any idea what can cause it?