Is it a pytorch bug with eps set in BatchNorm

xiefeiwhu · February 28, 2020, 9:40am

In my network, I first set eps in BN is:

class BNPReLU(nn.Module):
def init(self, nIn):
super().init()
self.bn = nn.BatchNorm2d(nIn, eps=1e-3)
self.acti = nn.PReLU(nIn)

def forward(self, input):
    output = self.bn(input)
    output = self.acti(output)

    return output

Then, I add another BN like this:

Class Network(nn.Module):
self.bn_relu = BNPReLU(nIn)
self.primary_conv = nn.Sequential(
nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size//2, bias=False),
nn.BatchNorm2d(init_channels),
nn.ReLU(inplace=True) if relu else nn.Sequential(),
)

But, when run my code in model.train(). the eps is set 1e-3 in all places. So, I debug this network and find the eps is set correctly in network initialization, but when go into forward(), all the eps is set 1e-3. I don’t know why

albanD · February 28, 2020, 5:04pm

Hi,

Are you sure you’re not setting it somewhere?
This prints the right thing for me:

import torch
from torch import nn

a = nn.BatchNorm2d(10, eps=1e-3)
b = nn.BatchNorm2d(10)

print(a.eps)
print(b.eps)

inp = torch.rand(2, 10, 3, 3)

a(inp)
b(inp)

print(a.eps)
print(b.eps)

xiefeiwhu · February 28, 2020, 6:19pm

You can’t test it like this. As I said, the eps is right in initialization, but in the forward function, they are all set to 1e-3.
Code is like this:

class BNPReLU(nn.Module):
    def __init__(self, nIn):
        super().__init__()
        self.bn = nn.BatchNorm2d(nIn, eps=1e-3)
        self.acti = nn.PReLU(nIn)

    def forward(self, input):
        output = self.bn(input)
        output = self.acti(output)

        return output


class Main_net(nn.Module):
    def __init__(self, classes=2):
        super().__init__()
        self.bn_prelu_1 = BNPReLU(32)
        self.decode1 = nn.Sequential(nn.Conv2d(67, 16, kernel_size=1, stride=1, bias=False),
                                     nn.BatchNorm2d(16, eps=1e-5),
                                     nn.ReLU())
         print('self in initialization:', self)

     def forward(self, input):
        print('self in forward:', self)
        output0 = self.bn_prelu_1(input)
        output = self.decode1(output0)

In the main.py, I set like this:

    model = Main_net(class=2)
    model.train()
    output = model(input)

The result: because i am a new user and only upload one picture, so I merge these results in one

By the way, I also set the eps other value, such as 1e-4 in BNPReLU and nn.BatchNorm2d. But in the forward process, the eps is all set to 1e-3. Very weird.

albanD · February 28, 2020, 6:26pm

That gives the right result as well:

import torch
from torch import nn, optim

class BNPReLU(nn.Module):
    def __init__(self, nIn):
        super().__init__()
        self.bn = nn.BatchNorm2d(nIn, eps=1e-3)
        self.acti = nn.PReLU(nIn)

    def forward(self, input):
        output = self.bn(input)
        output = self.acti(output)

        return output


class Main_net(nn.Module):
    def __init__(self, classes=2):
        super().__init__()
        self.bn_prelu_1 = BNPReLU(32)
        self.decode1 = nn.Sequential(nn.Conv2d(32, 16, kernel_size=1, stride=1, bias=False),
                                     nn.BatchNorm2d(16, eps=1e-5),
                                     nn.ReLU())
        print('self in initialization:', self)

    def forward(self, input):
        print('self in forward:', self)
        output0 = self.bn_prelu_1(input)
        output = self.decode1(output0)

mod = Main_net()
inp = torch.rand(1, 32, 20, 20)
mod(inp)

Gives me:

self in initialization: Main_net(
  (bn_prelu_1): BNPReLU(
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (acti): PReLU(num_parameters=32)
  )
  (decode1): Sequential(
    (0): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
)
self in forward: Main_net(
  (bn_prelu_1): BNPReLU(
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (acti): PReLU(num_parameters=32)
  )
  (decode1): Sequential(
    (0): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
)

You get something different?

xiefeiwhu · February 28, 2020, 6:52pm

Sorry, I initialize the BN with 1e-3 in the main code and forget it, my fault.