Hi there,
I am currently trying to quantizate a model.My question is how can I quantizate just the decoder of my model?.
This is my code:
class DeeplabV3X(torch.nn.Module):
def __init__(self, num_classes: int,
pretrained: bool = True, atrous_pooling_type='aspp'):
super().__init__()
self.backbone = build_swin_transformer()
with torch.no_grad():
out = self.backbone(torch.zeros(1, 3, 200, 200))
out_backbone_channels = [x.size(1) for x in out.values()]
self.quant = torch.quantization.QuantStub()
self.dequant = torch.quantization.DeQuantStub()
self.atrous_pooling_type = atrous_pooling_type
if self.atrous_pooling_type == 'aspp':
self.atrous_pooling_layer = ASPP(in_channels=out_backbone_channels[-1], out_channels=256, output_stride=16)
elif self.atrous_pooling_type == 'ksac':
self.atrous_pooling_layer = KSAC(in_channels=out_backbone_channels[-1], out_channels=256)
self.dec_cn_1 = ConvModule(in_channels=out_backbone_channels[-2], out_channels=256)
self.dec_cn_1_1 = ConvModule(in_channels=512, out_channels=256, kernel_size=1, padding=0)
self.dec_cn_2 = ConvModule(in_channels=out_backbone_channels[-3], out_channels=256)
self.dec_cn_2_1 = ConvModule(in_channels=512, out_channels=256, kernel_size=1, padding=0)
self.dec_cn_3 = ConvModule(in_channels=out_backbone_channels[-4], out_channels=256)
self.dec_cn_3_1 = ConvModule(in_channels=512, out_channels=256, kernel_size=1, padding=0)
self.head = nn.Sequential(
ConvModule(in_channels=256, out_channels=256),
ConvModule(in_channels=256, out_channels=64),
torch.nn.Conv2d(in_channels=64, out_channels=num_classes, kernel_size=3, padding=1))
self.dropout = torch.nn.Dropout2d(p=0.5)
def forward(self, x_input):
input_size = x_input.size()[2::]
f1, f2, f3, f4 = list(self.backbone(x_input).values())
# This is where decoder start and quantization should happen here
# I quantizate each of the four feature maps obtained from the backbone, as you can't quantizate a list
f4 = self.quant(f4)
f3 = self.quant(f3)
f2 = self.quant(f2)
f1 = self.quant(f1)
f4 = self.atrous_pooling_layer(f4)
f3 = self.dec_cn_1(f3)
f2 = self.dec_cn_2(f2)
f1 = self.dec_cn_3(f1)
x3 = torch.cat((f3, interpolate(f4, f3.size()[2::])), dim=1)
x3 = self.dec_cn_1_1(x3)
x2 = torch.cat((f2, interpolate(x3, f2.size()[2::])), dim=1)
x2 = self.dec_cn_2_1(x2)
x1 = torch.cat((f1, interpolate(x2, f1.size()[2::])), dim=1)
x1 = self.dec_cn_3_1(x1)
out = self.head(x1)
out = interpolate(out, input_size, mode='bilinear')
out=self.dequant(out)
return out
model = DeeplabV3X(num_classes=12, atrous_pooling_type = 'ksac')
model.eval()
model.to('cpu)
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
torch.quantization.prepare(model, inplace=True)
def evaluate(model, data_loader):
with torch.no_grad():
for inp, target in tqdm(data_loader, total=len(data_loader.dataset),
desc='Evaluating model for observers in post training static quantization...'):
model(inp)
evaluate(model, data_loader)
torch.quantization.convert(model, inplace=True)
When I print, the model is quantizated in every layer but it should only have the decoder quantizated.
I have read through the documentation but there is no explicit mention about how to get a partial quantization in the model besides my guess that this is done by writing self.quant and self.dequant in specific places. THere is apecific post by @KURI. Is there another way besides adding and deleting layers from the unquantizated model?
Thanks in advance.