How come these two pieces of code do not give approximately equivalent outputs? Is there something about calculating BCE loss I’m not understanding?

```
import numpy as np
import torch
import torch.nn.functional as F
from torch.autograd import Variable as V
import torch.nn as nn
epsilon = 10e-8
def BCE_loss(x, y):
x = np.reshape(x, (x.shape[0], -1))
y = np.reshape(y, (y.shape[0], -1))
loss = np.sum(-y * np.log(x + epsilon) - (1 - y) * np.log(1 - x + epsilon))
#loss = -np.sum(y * np.log(x + epsilon))
return loss
x = np.array([[.5,.6, .7],[.9, .8, .9]])
y = np.array([[.9, .8, .7],[.4, .5, .6]])
print(BCE_loss(x, y))
xt = V(torch.from_numpy(x).unsqueeze(1))
yt = V(torch.from_numpy(y).unsqueeze(1))
loss_func = nn.BCELoss(size_average=False)
print(loss_func(xt, yt))
```