My question is: why are the output of a step of batchnorm with momentum=1 and the output of a subsequent eval step not identical.

```
import numpy as np
from torch.autograd import Variable
import torchvision
class BNNet(torch.nn.Module):
"""
Module with single BN layer
"""
def __init__(self):
super(BNNet, self).__init__()
self.bn = torch.nn.BatchNorm2d(2, momentum = 1.0)
def forward(self, x):
return self.bn(x)
def example():
model = BNNet()
model.train()
inp = Variable(10*torch.randn(1, 2, 10, 10))
# output after train forward pass
output1 = model(inp)
model.eval()
# output after eval forward pass
output2 = model(inp)
# the two outputs differ. The magnitude of their difference decreases as
# the window size (10x10 in this example) increases.
print("max abs diff = %.4f"%(np.max(np.abs(output1.detach().numpy()
- output2.detach().numpy()))))
```