As an exercise I’m rewriting a simple numpy example in pytorch and so far I’ve been having problems to match the results. As a pytorch newbie it’s highly possible I have done some stupid mistake. So far it seems the loss always converges to 0.25 (in my example) and I have no idea why.

Ah, yes I’m still on 0.3.1, perhaps I should also consider an upgrade.

Thanks

```
import torch
import numpy as np
from torch.autograd import Variable
N, D_in, H, D_out = 4, 3, 4, 1
x_np = np.array([[0,0,1],
[0,1,1],
[1,0,1],
[1,1,1]])
x = Variable(torch.Tensor(x_np), requires_grad=True)
y = np.array([[0],
[1],
[1],
[0]])
y = Variable(torch.Tensor(y))
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduce=False)
learning_rate = 1e-4
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
y_pred = model(x)
loss = loss_fn(y_pred, y)
print loss
optimizer.zero_grad()
loss.backward(torch.ones(4).view(-1,1))
optimizer.step()
predicted = model.forward(Variable(torch.from_numpy(x_np).float())).data.numpy()
print '\n', predicted
```

numpy version:

```
import numpy as np
import pdb
def nonlin(x,deriv=False):
''' sigmoid'''
if(deriv==True):
return x*(1-x)
return 1/(1+np.exp(-x))
X = np.array([[0,0,1],
[0,1,1],
[1,0,1],
[1,1,1]])
y = np.array([[0],
[1],
[1],
[0]])
np.random.seed(1)
# randomly initialize our weights with mean 0
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1
for j in xrange(500):
l0 = X
l1 = nonlin(np.dot(l0,syn0))
l2 = nonlin(np.dot(l1,syn1))
l2_error = y - l2
print l2_error
l2_delta = l2_error*nonlin(l2,deriv=True)
l1_error = l2_delta.dot(syn1.T)
l1_delta = l1_error * nonlin(l1,deriv=True)
syn1 += l1.T.dot(l2_delta)
syn0 += l0.T.dot(l1_delta)
print '\n', l2
```