JIT trace model with ModuleList error

jinfagang · July 11, 2019, 9:02am

I have a model which has this operation:

class FuckNet3(nn.Module):

    def __init__(self):
        super(FuckNet3, self).__init__()

        self.welcome_layer = BasicConv(3, 768, 3, 1, 1)
        self.welcome_layer2 = BasicConv(3, 128, 3, 1, 1)

        self.planes = 256
        self.smooth = True
        self.num_scales = 6
        self.num_levels = 8

        self._construct_modules()

    def _construct_modules(self, ):
        # construct tums
        # 8 levels,
        for i in range(8):
            if i == 0:
                setattr(self,
                        'unet{}'.format(i + 1),
                        TUMV2(first_level=True,
                              input_planes=self.planes // 2,
                              is_smooth=self.smooth,
                              scales=self.num_scales,
                              side_channel=512))  # side channel isn't fixed.
            else:
                setattr(self,
                        'unet{}'.format(i + 1),
                        TUMV2(first_level=False,
                              input_planes=self.planes // 2,
                              is_smooth=self.smooth,
                              scales=self.num_scales,
                              side_channel=self.planes))
# this self.leach causes my trace error
        self.leach = nn.ModuleList([BasicConv(
            256+512,
            128,
            kernel_size=(1, 1), stride=(1, 1))] * 8)

    def forward(self, x):
        base_feature = self.welcome_layer(x)

        tum_outs = [getattr(self, 'unet{}'.format(1))(self.leach[0](base_feature), 'none')]

        for i in range(1, self.num_levels, 1):
            tum_outs.append(
                getattr(self, 'unet{}'.format(i + 1))(
                    self.leach[i](base_feature), tum_outs[i - 1][-1]
                )
            )
        return tuple(tum_outs[0])

I got error of:

  File "/home/jintain/.local/lib/python3.6/site-packages/torch/jit/__init__.py", line 1469, in __init__
    check_unique(param)
  File "/home/jintain/.local/lib/python3.6/site-packages/torch/jit/__init__.py", line 1461, in check_unique
    raise ValueError("TracedModules don't support parameter sharing between modules")
ValueError: TracedModules don't support parameter sharing between modules

Does anybody knows How do I solve this problem???

I do not know why it was weights sharing since my module defined in a list and only one module called one time!!

Anybody could solve my problem I will send him a bitcion!

jinfagang · July 11, 2019, 9:09am

When I convert it into:

# self.leach = [BasicConv(
        #     deep_out + shallow_out,
        #     self.planes // 2,
        #     kernel_size=(1, 1), stride=(1, 1))] * self.num_levels

It can be trace!!! (simply replace ModuleList with list), But the result is totally wrong!!!

So I am much confused, Why ModuleList replace with list not work and it can be traced???

I want make it same behaviour and can be traced, what should I do???

ptrblck · July 11, 2019, 11:22am

You are copying the BasicConv layer by using

[BasicConv(...)] * 8

which will result of 8 identical layers (same parameters, same id).
If you would like to initialize 8 different layers, use:

self.leach = nn.ModuleList([BasicConv(
            256+512,
            128,
            kernel_size=(1, 1), stride=(1, 1)) for _ in  range(8)])

jinfagang · July 11, 2019, 11:27am

Well, thank u ptrblck… Why are u always pointed out right answer so easily… Problem solved! this should be the right way to do! thanks!!

karl7 · July 9, 2020, 6:36am

i followed your way to define the model arch, but also report the trace error when visualizing the model arch…

    # view network architecture
    writer = SummaryWriter('arch', flush_secs=120)
    with writer:
        writer.add_graph(model, torch.rand(32, 3, 250, 250))  # model graph, with input

btw, my model is a multi-task model…

the error:

Traceback (most recent call last):
  File "/home/user1/test.py", line 28, in <module>
    writer.add_graph(model, torch.rand(32, 3, 250, 250))  # model graph, with input
  File "/home/user1/miniconda3/lib/python3.7/site-packages/tensorboardX/writer.py", line 804, in add_graph
    self._get_file_writer().add_graph(graph(model, input_to_model, verbose, profile_with_cuda, **kwargs))
  File "/home/user1/miniconda3/lib/python3.7/site-packages/tensorboardX/pytorch_graph.py", line 324, in graph
    trace = torch.jit.trace(model, args)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 686, in trace
    traced = _module_class(func, **executor_options)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1046, in init_then_register
    original_init(self, *args, **kwargs)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1046, in init_then_register
    original_init(self, *args, **kwargs)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1482, in __init__
    self._modules[name] = TracedModule(submodule, id_set, optimize=optimize)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1046, in init_then_register
    original_init(self, *args, **kwargs)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1482, in __init__
    self._modules[name] = TracedModule(submodule, id_set, optimize=optimize)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1046, in init_then_register
    original_init(self, *args, **kwargs)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1482, in __init__
    self._modules[name] = TracedModule(submodule, id_set, optimize=optimize)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1046, in init_then_register
    original_init(self, *args, **kwargs)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1469, in __init__
    check_unique(param)
  File "/home/user1/miniconda3/lib/python3.7/site-packages/torch/jit/__init__.py", line 1461, in check_unique
    raise ValueError("TracedModules don't support parameter sharing between modules")
ValueError: TracedModules don't support parameter sharing between modules

my model arch


class Backbone(nn.Module):
    def __init__(self, num_layers, drop_ratio, mode='ir'):
        super(Backbone, self).__init__()
        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
        blocks = get_blocks(num_layers)
        if mode == 'ir':
            unit_module = bottleneck_IR
        elif mode == 'ir_se':
            unit_module = bottleneck_IR_SE
        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1 ,bias=False), 
                                      BatchNorm2d(64), 
                                      PReLU(64))
        self.output_layer = Sequential(BatchNorm2d(512), 
                                       Dropout(drop_ratio),
                                       Flatten(),
                                       Linear(512 * 7 * 7, 512),
                                       BatchNorm1d(512))
        modules = []
        for block in blocks:
            for bottleneck in block:
                modules.append(
                    unit_module(bottleneck.in_channel,
                                bottleneck.depth,
                                bottleneck.stride))
        self.body = Sequential(*modules)

        # for MTL
        self.tower = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Linear(32, 2),
        )

        self.towers = nn.ModuleList([self.tower for _ in range(40)])

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self,x):
        x = self.input_layer(x)
        x = self.body(x)
        h_shared = self.output_layer(x)
        out = [tower(h_shared) for tower in self.towers]
        return out