Hi guys,
Pytorch is not considering the sign which I calculate my error. Gradient update should be by subtraction if the error is calculating by hypothesis - target and gradient update should be by addition if the error is target - hypothesis. In numpy we should change the sign of gradient update w += or w -= and it is pretty intuitive. But with pytorch, in any cases, we should do w -= , doesn’t matter how we calculate the error. Pytorch is designed for this behavior?
Below is my code with target - hypothesis.
import torch as th
from torch.autograd import Variable
epochs = 501
lr = 1
XOR_X = [[0, 0], [0, 1], [1, 0], [1, 1]]
XOR_Y = [[0, 1], [1, 0], [1, 0], [0, 1]]
if th.cuda.is_available():
dtype = th.cuda.FloatTensor
else:
dtype = th.FloatTensor
x_ = Variable(th.FloatTensor(XOR_X).type(dtype), requires_grad=False)
y_ = Variable(th.FloatTensor(XOR_Y).type(dtype), requires_grad=False)
w1 = Variable(th.randn(2, 5).type(dtype), requires_grad=True)
w2 = Variable(th.randn(5, 2).type(dtype), requires_grad=True)
b1 = Variable(th.zeros(5).type(dtype), requires_grad=True)
b2 = Variable(th.zeros(2).type(dtype), requires_grad=True)
def forward(x):
a2 = x.mm(w1)
# pytorch didn't have numpy like broadcasting when i wrote this script
# expand_as make the tensor as similar size as the other tensor
a2 = a2.add(b1.expand_as(a2))
h2 = a2.sigmoid()
a3 = h2.mm(w2)
a3 = a3.add(b2.expand_as(a3))
hyp = a3.sigmoid()
return hyp
for epoch in range(epochs):
hyp = forward(x_)
cost = y_ - hyp
cost = cost.pow(2).sum()
if epoch % 500 == 0:
print(cost.data[0])
cost.backward()
# why negative
w1.data -= lr * w1.grad.data
w2.data -= lr * w2.grad.data
b1.data -= lr * b1.grad.data
b2.data -= lr * b2.grad.data
w1.grad.data.zero_()
w2.grad.data.zero_()
for x in XOR_X:
hyp = forward(Variable(th.FloatTensor([x])))
print(x, hyp.max(1)[1].data)