Batch normalization forward pass vs compute by hand

mystic123 · April 25, 2018, 8:50am

Hello.

I try to translate model from PyTorch to Tensorflow. I successfully converted net architecture and weights from PyTorch to TF but I found inconsistency in batch normalization layers. Expected results differ much between models with exactly same weights. After some investigation I found something I can’t explain.

In PyTorch, when I compute output of bn layer directly from equation, the result is much different from result obtained from forward pass.

Can anyone explain this to me?

(Python 3.6, PyTorch 0.4.0)

import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.module_list = nn.ModuleList()
        module = nn.Sequential()

        conv = nn.Conv2d(3, 32, 3, 1, 1, bias=False)
        module.add_module('conv_0', conv)

        bn = nn.BatchNorm2d(32)
        module.add_module('batch_norm_0', bn)

        gamma = np.random.rand(32)
        gamma = torch.from_numpy(gamma)
        bn.weight.data.copy_(gamma)

        beta = np.random.rand(32)
        beta = torch.from_numpy(beta)
        bn.bias.data.copy_(beta)

        mean = np.random.rand(32)
        mean = torch.from_numpy(mean)
        bn.running_mean.data.copy_(mean)

        var = np.random.rand(32)
        var = torch.from_numpy(var)
        bn.running_var.data.copy_(var)

        self.module_list.append(module)

    def forward(self, input):
        conv = self.module_list[0][0](input)
        bn = self.module_list[0][1](conv)
        return conv, bn


if __name__ == '__main__':
    model = Model()
    bn = model.module_list[0][1]
    gamma = bn.weight.data.numpy().reshape(1, 32, 1, 1)
    beta = bn.bias.data.numpy().reshape(1, 32, 1, 1)
    mean = bn.running_mean.numpy().reshape(1, 32, 1, 1)
    var = bn.running_var.numpy().reshape(1, 32, 1, 1)

    x = np.random.rand(1, 3, 64, 64)
    x = Variable(torch.from_numpy(x).float())
    conv_out, bn_out = model.forward(x)

    conv_out = conv_out.data.numpy()
    x = ((conv_out - mean) / np.sqrt(var + 1e-05)) * gamma + beta

    # I expect this to be negligible but is ~0.5
    print(np.sum(np.abs(bn_out.data.numpy() - x))/np.prod(x.shape))

albanD · April 25, 2018, 9:20am

Is your model in eval mode?

mystic123 · April 25, 2018, 9:24am

Thank you! Now it works just as I expected

lin_ye · February 24, 2020, 5:07am

Thank you for the good solution!