Does quantization in eager mode require inserting multiple different FFs (torch.nn.quantized.FloatFunctional) at different computation positions, or is a single FF sufficient to perform different computational functions? For example, in the code below, do I need to define both ff1 and ff2 as separate FFs?
class Block35(nn.Module):
def __init__(self, scale=1.0):
super().__init__()
self.scale = scale
self.branch0 = BasicConv2d(256, 32, kernel_size=1, stride=1)
self.branch1 = nn.Sequential(
BasicConv2d(256, 32, kernel_size=1, stride=1),
BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
)
self.branch2 = nn.Sequential(
BasicConv2d(256, 32, kernel_size=1, stride=1),
BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1),
BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
)
self.conv2d = nn.Conv2d(96, 256, kernel_size=1, stride=1)
self.relu = nn.ReLU(inplace=False)
self.ff1 = nn.quantized.FloatFunctional()
self.ff2 = nn.quantized.FloatFunctional()
def forward(self, x):
x0 = self.branch0(x)
x1 = self.branch1(x)
x2 = self.branch2(x)
out = torch.cat((x0, x1, x2), 1)
out = self.conv2d(out)
# out = out * self.scale + x
out = self.ff2.add(self.ff1.mul(out, self.scale), x)
out = self.relu(out)
return out