I am coding a perceptron from scratch just out of curiosity in plain python for OR gate, but a loss won’t converge.
import numpy as np
class Perceptron:
def __init__(self):
self.learning_rate = 0.01
self.sigmoid = torch.nn.Sigmoid()
# initializing weights
self.w1, self.w2, self.bias = 0.01, 0.03, 0.05
def predict(self, inputs):
x1, x2 = inputs
logits = (x1 * self.w1) + (x2 * self.w2) + self.bias
predictions = self.sigmoid(torch.tensor(logits))
return predictions
def fit(self, training_inputs, targets, epochs=10000):
for epoch in range(epochs):
loss = 0
gradient_0 = []
gradient_1 = []
gradient_2_w1 = []
gradient_2_w2 = []
for training_input, target in zip(training_inputs, targets):
x1, x2 = training_input
logits = (x1 * self.w1) + (x2 * self.w2) + self.bias
prediction = self.sigmoid(torch.tensor(logits)).numpy()
# sum of squared residuals, alternatively you can use mean squared error
loss += self.calculate_loss(target, prediction)
# Accumulating gradients
d_loss_and_d_prediction = -2 * (target - prediction)
d_sigmoid_and_d_logits = logits * (1 - logits)
gradient_0.append(d_loss_and_d_prediction)
gradient_1.append(d_sigmoid_and_d_logits)
gradient_2_w1.append(x1)
gradient_2_w2.append(x2)
print("loss: ", loss)
for i in range(len(gradient_0)):
d_loss_and_d_w1 = gradient_2_w1[i] * gradient_0[i] * gradient_1[i]
d_loss_and_d_w2 = gradient_2_w2[i] * gradient_0[i] * gradient_1[i]
# calculate_step_size
step_size_w1 = d_loss_and_d_w1 * self.learning_rate
step_size_w2 = d_loss_and_d_w2 * self.learning_rate
# update weights
self.w1 -= step_size_w1
self.w2 -= step_size_w2
def calculate_loss(self, target, prediction):
return (target - prediction)**2
model = Perceptron()
training_inputs = [[1., 1.], [1., 0.], [0., 1.], [0., 0.]]
targets = [1., 1., 1., 0.]
model.fit(training_inputs, targets)
However when I try the same with pytorch, it works
class Model(torch.nn.Module):
def __init__(self):
super().__init__()
self.FC1 = torch.nn.Linear(2, 1)
def forward(self, training_inputs):
return F.sigmoid(self.FC1(training_inputs))
model = Model()
model = model.train()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.0)
training_inputs = torch.tensor([[1, 1], [1, 0], [0, 1], [0, 0]], dtype = torch.float32)
targets = torch.tensor([[1], [1], [1], [0]], dtype = torch.float32)
def calculate_loss(target, prediction):
loss = 0
for target, prediction in zip(targets, predictions):
loss += (target - prediction)**2
return loss
epochs = 10000
for epoch in range(epochs):
optimizer.zero_grad()
predictions = model(training_inputs)
loss = calculate_loss(targets, predictions)
loss.backward()
optimizer.step()
print(loss)