I am new to pytorch, not familar with the gradient graph. I just created 2-layers NN, however the weights did not update during training process. Please help me out.
import torch
import torch.nn as nn
torch.set_default_dtype(torch.float64)
class Nets(nn.Module):
def __init__(self):
super().__init__()
self.hidden1 = nn.Linear(26, 15)
self.hidden2 = nn.Linear(15, 15)
self.out = nn.Linear(15, 1)
def forward(self, training_sets):
trained_results = torch.tensor([], requires_grad=True)
for ts in training_sets:
x = self.hidden1(ts)
x = torch.tanh(x)
# flow to 2nd hidden layer
x = self.hidden2(x)
x = torch.tanh(x)
# flow to output layer
x = self.out(x)
e = torch.sum(x)
trained_results = torch.cat((trained_results, torch.tensor([e])))
return trained_results
import torch.nn as nn
import numpy as np
from math import sqrt
net = Nets()
# Mean Square Error
loss_func = nn.MSELoss(reduction='mean')
learning_rate = 0.01
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
epochs = 1001
error = []
for i in range(epochs):
loss = loss_func(net(inputs), target_results)
if i % 100 == 0:
print(i, loss, net(inputs))
error.append(loss)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
results outputs:
0 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
100 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
200 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
300 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
400 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
500 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
600 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
700 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
800 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
900 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)
1000 tensor(5629.6088, grad_fn=) tensor([-17.9612, -17.9524, -17.9473], grad_fn=)