The output of SyncBatchNorm

as754770178 · June 8, 2020, 5:53am

I view the code of SyncBatchNorm in /usr/local/lib/python3.6/site-packages/torch/nn/modules/batchnorm.py.

@classmethod
    def convert_sync_batchnorm(cls, module, process_group=None):
        module_output = module
        if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
            module_output = torch.nn.SyncBatchNorm(module.num_features,
                                                   module.eps, module.momentum,
                                                   module.affine,
                                                   module.track_running_stats,
                                                   process_group)
            if module.affine:
                module_output.weight.data = module.weight.data.clone().detach()
                module_output.bias.data = module.bias.data.clone().detach()
                # keep reuqires_grad unchanged
                module_output.weight.requires_grad = module.weight.requires_grad
                module_output.bias.requires_grad = module.bias.requires_grad
            module_output.running_mean = module.running_mean
            module_output.running_var = module.running_var
            module_output.num_batches_tracked = module.num_batches_tracked
        for name, child in module.named_children():
            module_output.add_module(name, cls.convert_sync_batchnorm(child, process_group))
        del module
        return module_output

why the weight and bias of bn layer need to exec module_output.weight.data = module.weight.data.clone().detach(), not module_output.weight.data = module.weight.data as running_mean and running_var.
why must exec convert_sync_batchnorm after torch.distributed.new_group
What parameters to synchronize by SyncBatchNorm? weight, bias, running_mean and running_var all synchronized?