Hello,
I’m trying to understand the calculations and manually calculate this simple network.
All calculations are good except for the weight.
What am I doing wrong?
import torch
import torch.nn as nn
import torch.optim as optim
weight_0 = 0.25
bias_0 = 0.68
l_rate = 0.01
input_data = torch.Tensor([[2.2], [4.0]])
target_data = torch.Tensor([[4.1], [5.1]])
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(1, 1)
self.fc1.weight.data = torch.tensor([[weight_0]])
self.fc1.bias.data = torch.tensor([bias_0])
def forward(self, x):
x = self.fc1(x)
return x
net = Net()
loss_f = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr = l_rate)
#net.train() #one epoch
net_out = net(input_data)
loss = loss_f(net_out, target_data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
net.eval()
print("loss: ", loss.data) # 9.9666
print("weight:", net.fc1.weight.data, net.fc1.weight.grad.data) # 0.4499, -19.9940
print("bias: ", net.fc1.bias.data, net.fc1.bias.grad.data) # 0.7429, -6.2900
print("calculations:")
in_mean = (input_data[0] + input_data[1]) / 2
out_1 = (input_data[0] * weight_0 + bias_0)
out_2 = (input_data[1] * weight_0 + bias_0)
loss_1 = (out_1 - target_data[0]) ** 2
loss_2 = (out_2 - target_data[1]) ** 2
loss_out = (loss_1 + loss_2) / 2
loss_d_1 = (out_1 - target_data[0]) * 2
loss_d_2 = (out_2 - target_data[1]) * 2
loss_d_out = (loss_d_1 + loss_d_2) / 2
weight = weight_0 - loss_d_out * l_rate * in_mean
bias = bias_0 - loss_d_out * l_rate
print("loss:", loss_out) # 9.9666
print("loss_d:", loss_d_out) # -6.2900
print("weight:", weight) # 0.4450 (should be 0.4499)
print("bias:", bias) # 0.7429
print(net.fc1.weight.data, "-", weight, "=", net.fc1.weight.data-weight.data) # 0.4499 - 0.4450 = 0.0050
Thanks!