How to add feature fusion to the backbone network

I am learning feature fusion, I want to add the feature fusion part in centernet network, the backbone network used is resnet50.
Is it possible to fuse the downsampled three feature maps x3(6464512), x4(32x32x1024), x5 (20481616) as an output (20481616) ?
Is the procedure to convert both l3 and l4 to the size of l5 and then sum them?
The code is as follows:
class ResNet(nn.Module).
def init(self, block, layers, num_classes=1000): self.
self.inplanes = 64
super(ResNet, self). init()
# 512,512,3 → 256,256,64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
# Improvement point: replace 77 with 33
self.conv2 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)

    self.conv5 = nn.Conv2d()
    # 256x256x64 -> 128x128x64
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change

    # 128x128x64 -> 128x128x256
    self.layer1 = self._make_layer(block, 64, layers[0])

    # 128x128x256 -> 64x64x512
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)

    # 64x64x512 -> 32x32x1024
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)

    # 32x32x1024 -> 16x16x2048
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)


    self.avgpool = nn.AvgPool2d(7)
        for m in self.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()

def _make_layer(self, block, planes, blocks,stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
        downsample = nn.Sequential(
             nn.Conv2d(self.inplanes, planes * block.expansion,
                 kernel_size=1, stride=stride, bias=False),
           
        nn.BatchNorm2d(planes * block.expansion),
    )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
        layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

def forward(self, x):
    x = self.conv1(x)
  
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)
    x = self.layer1(x)  # 128x128x64 -> 128x128x256


    xl3 = self.layer2(x)    # 128x128x256 -> 64x64x512

    xl4 = self.layer3(xl3) # 64x64x512 -> 32x32x1024

    xl5 = self.layer4(xl4)# 32x32x1024 -> 16x16x2048
    x = self.avgpool(xl5)
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x