Is this a bug? iterating over model.modules() creates all combinations of modules used in model

Shisho_Sama · August 7, 2019, 2:55pm

Hello everyone.
I made a simple network and tried to access the modules.(print them for now)
This is the network I’m talking about:

def convlayer(input_dim, output_dim, kernel_size=3, stride=1, padding=1, batchnorm=False):
    layers = []
    conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride, padding)
    layers.append(conv)
    if batchnorm: 
        layers.append(nn.BatchNorm2d(output_dim))
    layers.append(nn.ReLU())

    return nn.Sequential(*layers)

class sequential_net3_2(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
                                    convlayer(3, 6, 3, stride=2), 
                                    convlayer(6, 12, 3, stride=2, batchnorm=True),
                                    convlayer(12, 12, 3, stride=2, batchnorm=True)
                                    )
        self.classifer = nn.Linear(12, 2)

    def forward(self, x):
        output = self.features(x)
        output = output.view(x.size(0), -1)
        output = self.classifer(output)
        return output

sequential_net3_2 = sequential_net3_2()
for i, m in enumerate(sequential_net3_2.modules()):
    print(f'{i}, {m}')

I expected to see the modules I see when printing the model which is :

sequential_net3_2(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(6, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): Sequential(
      (0): Conv2d(12, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (classifer): Linear(in_features=12, out_features=2, bias=True)
)

But instead I got:

0, sequential_net3_2(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(6, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): Sequential(
      (0): Conv2d(12, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (classifer): Linear(in_features=12, out_features=2, bias=True)
)
1, Sequential(
  (0): Sequential(
    (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(6, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(12, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
)
2, Sequential(
  (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): ReLU()
)
3, Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
4, ReLU()
5, Sequential(
  (0): Conv2d(6, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)
6, Conv2d(6, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
7, BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
8, ReLU()
9, Sequential(
  (0): Conv2d(12, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)
10, Conv2d(12, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
11, BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
12, ReLU()
13, Linear(in_features=12, out_features=2, bias=True)

I was only expecting :

0, sequential_net3_2(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 6, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(6, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (2): Sequential(
      (0): Conv2d(12, 12, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (classifer): Linear(in_features=12, out_features=2, bias=True)
)

and not all the combinations of the modules!
Is this a bug or is it just expected behavior? if it is expected behavior what is the use of such spurious information?

Thank you all very much

Shisho_Sama · September 11, 2019, 10:34am

The answer is no. modules() was created for this very reason that is to recursively return all modules.
named_children() and children() are what you (me) want . they only return the top module as the name suggests.
for altering networks, nearly 99% of the times, one should use children() or named_children().