Converting yolov7 .pt to .weights

Hello!
I’ve already asked this question on yolov7 repository

but I’m hoping to get more answers here :stuck_out_tongue:

In short, I’m trying to convert a yolo model trained on pytorch to the original darknet format.
Is anyone already attempted the conversion?
My script is below

import numpy as np
import torch

from models.yolo import Detect, Model


def load_cfg_pt(
    cfg_path: str,
    pt_path: str,
    fuse: bool = False,
) -> tuple[dict, Model]:
    with open(cfg_path, "r") as f:
        cfg_lines = f.readlines()

    # skip batch norm layers
    if fuse:
        layers = [
            line.strip()
            for line in cfg_lines
            if line.strip() in ("[convolutional]", "[yolo]")
        ]
    else:
        layers = []
        for i, line in enumerate(cfg_lines):
            line = line.strip()
            if line == "[yolo]":
                layers.append(line)
            elif line == "[convolutional]":
                if cfg_lines[i + 1].strip() == "batch_normalize=1":
                    layers.append("[bn]")
                else:
                    layers.append(line)

    ckpt = torch.load(pt_path, map_location=torch.device('cpu'))
    model = ckpt.get('model').float()
    if fuse:
        model.fuse()
    model.eval()
    return layers, model


def numpize(t: torch.Tensor) -> np.ndarray:
    return t.data.cpu().detach().numpy()


if __name__ == "__main__":
    cfg_path = "tinyv7.cfg"
    pt_path = "yolov7-tiny.pt"
    fuse = False

    if not fuse:
        print("Keep batch_norm layers!")

    arch, model = load_cfg_pt(cfg_path, pt_path, fuse)

    conv_b, conv_w = [], []
    bn_b, bn_w, bn_m, bn_v = [], [], [], []
    yolo_b, yolo_w = [], []
    for i, (_, module) in enumerate(model.named_modules()):
        # skip Model and Sequential
        if i < 2:
            continue
        # parse layer-by-layer
        isconv = isinstance(module, torch.nn.modules.conv.Conv2d)
        isyolo = isinstance(module, Detect)
        isbn = isinstance(module, torch.nn.modules.batchnorm.BatchNorm2d)
        if not isconv and not isyolo and not isbn:
            continue

        if isyolo:
            for j, (_, yolo_layer) in enumerate(module.named_modules()):
                # skip Detect and ModuleList
                if j < 2:
                    continue
                # parse layer-by-layer
                yolo_b.append(numpize(yolo_layer.bias))
                yolo_w.append(numpize(yolo_layer.weight))
        elif isconv:
            if isinstance(module.bias, torch.Tensor):
                conv_b.append(numpize(module.bias))
            conv_w.append(numpize(module.weight))
        else:  # isbn
            bn_b.append(numpize(module.bias))
            bn_w.append(numpize(module.weight))
            bn_m.append(numpize(module.running_mean))
            bn_v.append(numpize(module.running_var))

    # to fifo with pop()
    conv_w.reverse()
    conv_b.reverse()
    bn_w.reverse()
    bn_b.reverse()
    bn_m.reverse()
    bn_v.reverse()
    yolo_w.reverse()
    yolo_b.reverse()

    with open(f'darknet{"_nobn"*fuse}.weights', "wb") as f:
        # header
        np.array([0, 2, 5, 0, 0], dtype=np.int32).tofile(f)

        # https://github.com/AlexeyAB/darknet/issues/2587#issuecomment-500895636
        for v in arch:
            print(v)
            if v == "[yolo]":
                yolo_b.pop().tofile(f)
                yolo_w.pop().tofile(f)
            elif v == "[bn]":
                bn_b.pop().tofile(f)
                bn_w.pop().tofile(f)
                bn_m.pop().tofile(f)
                bn_v.pop().tofile(f)
                conv_w.pop().tofile(f)
            else:
                conv_b.pop().tofile(f)
                conv_w.pop().tofile(f)

this doesn’t give the right weights as the final binary size is larger than what it should be.

any idea?