How to initialize weights in nn.Sequential container?

How do I use nn.init.xavier_normal() to initialize weights inside nn.Sequential container like the one below? Thanks for your help!

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(    # input shape (1, 60, 60)
            nn.Conv2d(
                in_channels=1,    # input channel
                out_channels=32,  # output channels - > 32
                kernel_size=5,    # filter size
                stride=1,         # step size
                padding=2,        # padding = (kernel size - 1)/2
            ),                    # output (32, 60, 60)
            nn.BatchNorm2d(32),   
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # same output (32, 30, 30) as stride =2 
/kernel size
            nn.Dropout2d(0.7), 
        )
        self.conv2 = nn.Sequential(     # input (32, 30, 30)
            nn.Conv2d(
                in_channels=32,
                out_channels=64,
                kernel_size=3,
                stride=1,
                padding=1,
            ),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # output (64, 15, 15)
            nn.Dropout2d(0.7),   
        )
        # before processing to the dense layer, it need to be flatenned into: batch size, 
64*15*15
        self.fc1 = nn.Sequential(
            nn.Linear(64*15*15,400),  # need to change this, when picture size is changed.
            nn.BatchNorm1d(400),
            nn.ReLU(),
            nn.Dropout(0.7),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(400, 7),
            nn.Dropout(0.5), 
            nn.LogSoftmax()
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        output = self.fc2(x)
        return output

You can loop over container’s modules.

See here for an example (follow the usage of weights_init function): https://github.com/pytorch/examples/blob/master/dcgan/main.py#L95

5 Likes

thanks. actually I find the piece in the the standard document is directly answering my questions.

def init_weights(m):
print(m)
if type(m) == nn.Linear:
m.weight.data.fill_(1.0)
print(m.weight)

net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
net.apply(init_weights)

7 Likes

What does the parameter m here signify?

it inputs layers in net one by one.