Hello, everyone!
I have two warnings when I’m trying to quantize a part of my model.
Do you have any idea when does that comes from?
Warnings:
Warning 1 : Happens when I run torch.quantization.prepare(model.l_lnrs, inplace=True)
UserWarning: Please use quant_min and quant_max to specify the range for observers. reduce_range will be deprecated in a future release of PyTorch.
Warning 2 : Happens when I run torch.quantization.convert(model.l_lnrs, inplace=True)
warnings.warn(
/home/thytu/Prog/Blackfoot/herding-cats-poc1-2l/venv/lib/python3.9/site-packages/torch/ao/quantization/observer.py:886: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
src_bin_begin // dst_bin_width, 0, self.dst_nbins - 1
/home/thytu/Prog/Blackfoot/herding-cats-poc1-2l/venv/lib/python3.9/site-packages/torch/ao/quantization/observer.py:891: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
src_bin_end // dst_bin_width, 0, self.dst_nbins - 1
Model Class:
class LSTM(nn.Module):
def __init__(self, input_size, output_size, **kwargs):
super(LSTM, self).__init__()
self.lstm_num_layers = kwargs.get("lstm_num_layers", 1)
self.lstm_hidden_size = kwargs.get("lstm_hidden_size", 1)
self.hidden_size = kwargs.get("hidden_size", 1)
self.l_lstm = nn.LSTM(
input_size=input_size,
hidden_size=self.lstm_hidden_size,
num_layers=self.lstm_num_layers,
batch_first=True
)
self.l_lnrs = nn.Sequential(
nn.ReLU(),
nn.Linear(self.lstm_hidden_size, self.hidden_size),
nn.ReLU(),
nn.Linear(self.hidden_size, self.hidden_size),
nn.ReLU(),
nn.Linear(self.hidden_size, output_size),
)
def forward(self, x):
h_0 = torch.zeros(self.lstm_num_layers, x.size(0), self.lstm_hidden_size)
c_0 = torch.zeros(self.lstm_num_layers, x.size(0), self.lstm_hidden_size)
_, (hn, _) = self.l_lstm(x, (h_0, c_0))
hn = hn[-1].view(-1, self.lstm_hidden_size)
return self.l_lnrs(hn)
Quantization function:
def quantize_model(model: torch.nn.Module, sample: torch.Tensor) -> torch.nn.Module:
SERVER_INFERENCE_CONFIG = 'fbgemm'
model.l_lnrs = torch.nn.Sequential(
torch.quantization.QuantStub(),
*model.l_lnrs,
torch.quantization.DeQuantStub()
)
model.eval()
model.qconfig = torch.quantization.get_default_qconfig(SERVER_INFERENCE_CONFIG)
pair_of_modules_to_fuze = []
for name, layer in model.named_modules():
if isinstance(layer, torch.nn.Linear):
pair_of_modules_to_fuze.append([name.split('.')[-1]])
elif isinstance(layer, torch.nn.ReLU) and len(pair_of_modules_to_fuze) > 0:
pair_of_modules_to_fuze[-1].append(name.split('.')[-1])
pair_of_modules_to_fuze = list(filter(lambda x: len(x) == 2, pair_of_modules_to_fuze))
for i, _ in enumerate(model.l_lnrs):
model.l_lnrs[i].qconfig = torch.quantization.get_default_qconfig(SERVER_INFERENCE_CONFIG)
torch.quantization.fuse_modules(model.l_lnrs, pair_of_modules_to_fuze, inplace=True)
torch.quantization.prepare(model.l_lnrs, inplace=True)
for data in sample:
model.forward(data)
model = torch.quantization.convert(model)
torch.quantization.convert(model.l_lnrs, inplace=True)
return model