I am learning feature fusion, I want to add the feature fusion part in centernet network, the backbone network used is resnet50.
Is it possible to fuse the downsampled three feature maps x3(6464512), x4(32x32x1024), x5 (20481616) as an output (20481616) ?
Is the procedure to convert both l3 and l4 to the size of l5 and then sum them?
The code is as follows:
class ResNet(nn.Module).
def init(self, block, layers, num_classes=1000): self.
self.inplanes = 64
super(ResNet, self). init()
# 512,512,3 → 256,256,64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# Improvement point: replace 77 with 33
self.conv2 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.conv5 = nn.Conv2d()
# 256x256x64 -> 128x128x64
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
# 128x128x64 -> 128x128x256
self.layer1 = self._make_layer(block, 64, layers[0])
# 128x128x256 -> 64x64x512
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
# 64x64x512 -> 32x32x1024
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
# 32x32x1024 -> 16x16x2048
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks,stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x) # 128x128x64 -> 128x128x256
xl3 = self.layer2(x) # 128x128x256 -> 64x64x512
xl4 = self.layer3(xl3) # 64x64x512 -> 32x32x1024
xl5 = self.layer4(xl4)# 32x32x1024 -> 16x16x2048
x = self.avgpool(xl5)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x