Hi there,
I am working to quantizate a semantic segmentation model using the fx api provided by pytorch.
The model has a swin transformer as a backbone, aspp module and some upconvolutions following a DeepLabv3+ architecture.
I have followed the steps in the tutoriall by @jerryzh168
I am able to run the following lines of code
qconfig = get_default_qconfig("fbgemm")
qconfig_dict = {"": qconfig}
prepare_custom_config_dict = {
"non_traceable_module_class": [PatchEmbed, BasicLayer]
}
prepared_model = prepare_fx(model, qconfig_dict, prepare_custom_config_dict)
print(prepared_model.graph)
print(prepared_model.code)
I am excluding PatchEmbed and BasicLayer module classes in prepare_fx.
Here’s their code:
class PatchEmbed(nn.Module):
def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
super().__init__()
patch_size = to_2tuple(patch_size)
self.patch_size = patch_size
self.in_chans = in_chans
self.embed_dim = embed_dim
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
if norm_layer is not None:
self.norm = norm_layer(embed_dim)
else:
self.norm = None
def forward(self, x):
"""Forward function."""
# padding
_, _, H, W = x.size()
if W % self.patch_size[1] != 0:
x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1]))
if H % self.patch_size[0] != 0:
x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))
x = self.proj(x) # B C Wh Ww
if self.norm is not None:
Wh, Ww = x.size(2), x.size(3)
x = x.flatten(2).transpose(1, 2)
x = self.norm(x)
x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww)
return x
class BasicLayer(nn.Module):
""" A basic Swin Transformer layer for one stage.
"""
def __init__(self,
dim,
depth,
num_heads,
window_size=7,
mlp_ratio=4.,
qkv_bias=True,
qk_scale=None,
drop=0.,
attn_drop=0.,
drop_path=0.,
norm_layer=nn.LayerNorm,
downsample=None,
use_checkpoint=False):
super().__init__()
self.window_size = window_size
self.shift_size = window_size // 2
self.depth = depth
self.use_checkpoint = use_checkpoint
# build blocks
self.blocks = nn.ModuleList([
SwinTransformerBlock(
dim=dim,
num_heads=num_heads,
window_size=window_size,
shift_size=0 if (i % 2 == 0) else window_size // 2,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop,
attn_drop=attn_drop,
drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
norm_layer=norm_layer)
for i in range(depth)])
# patch merging layer
if downsample is not None:
self.downsample = downsample(dim=dim, norm_layer=norm_layer)
else:
self.downsample = None
def forward(self, x, H, W):
""" Forward function.
Args:
x: Input feature, tensor size (B, H*W, C).
H, W: Spatial resolution of the input feature.
"""
# calculate attention mask for SW-MSA
Hp = int(np.ceil(H / self.window_size)) * self.window_size
Wp = int(np.ceil(W / self.window_size)) * self.window_size
img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1
h_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None))
w_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None))
cnt = 0
for h in h_slices:
for w in w_slices:
img_mask[:, h, w, :] = cnt
cnt += 1
mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1
mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
for blk in self.blocks:
blk.H, blk.W = H, W
if self.use_checkpoint:
x = checkpoint.checkpoint(blk, x, attn_mask)
else:
x = blk(x, attn_mask)
if self.downsample is not None:
x_down = self.downsample(x, H, W)
Wh, Ww = (H + 1) // 2, (W + 1) // 2
return x, H, W, x_down, Wh, Ww
else:
return x, H, W, x, H, W
The PatchEmbed gave me problems due to the presence of if statements.
BasicLayer was failing when executing numpy operations with Proxys in these lines:
Hp = int(np.ceil(H / self.window_size)) * self.window_size
Wp = int(np.ceil(W / self.window_size)) * self.window_size
My problem comes, when I try to calibrate the model.
def calibrate(model, data_loader):
with torch.no_grad():
for inp, target, _, _ in tqdm(data_loader, total=len(data_loader.dataset),
desc='Calibrating model for post training static quantization...'):
model(inp)
calibrate(prepared_model, data_loader)
That’s when I get the following error:
Traceback (most recent call last):
File "/home/victor/proyectos/roof_segmentation/increase_speed_model_production/roof_condition_semseg/utils/quantization.py", line 37, in calibrate
model(inp)
File "/home/victor/proyectos/roof_segmentation/increase_speed_model_production/roof_condition_semseg/venv/lib/python3.8/site-packages/torch/fx/graph_module.py", line 513, in wrapped_call
raise e.with_traceback(None)
AttributeError: 'int' object has no attribute 'numel'
The weird part is that my input is torch.tensor not an int. So I guess the generated graph may have a problem somewhere, after executing prepare_fx.
I haven’t found any information regarding problems when calibrating the model.
Any ideas on how to solve the problem?
Thanks.
Note: I have already tried post training dynamic quantization using eager mode and it works fine. However, it only allows me to quantizate nn.Linear and activation functions of my model.