Hello!
Please help me figure it out. I sketched a simple network and try different configurations with 60 * 5 inputs. All the models I get are not learning, or, if I’m very lucky, stop learning after a few steps. I compared the network parameters at different steps - they are the same. The total weight is also unchanged.
class NeuralNetwork(torch.nn.Module):
def crutch(self):
after = []
for param in self.net.parameters():
after.append(param.clone().cpu())
for i in range(len(self.before)):
print(torch.equal(self.before[i], after[i]))
print(" ")
self.before = after
def save(self):
torch.save(self.net, "data/" + self.name)
def init_weights(self, m):
if type(m) == torch.nn.Linear:
m.weight.data.uniform_(-0.5, 0.5)
m.bias.data.fill_(0.0)
def overallWeight(self):
result = 0.0
def passLayer(m):
if type(m) == torch.nn.Linear:
nonlocal result
result += torch.sum(m.weight.data).item()
self.net.apply(passLayer)
return result
def load(self):
buffernet = None
try:
buffernet = torch.load("data/" + self.name)
self.net = buffernet
return True
except:
print("cannot load the model. will use the new one")
return False
def __init__(self, dimensions, name):
super(NeuralNetwork, self).__init__()
self.name = name
for part in dimensions:
self.name += "_"
self.name += str(part)
self.setTrainingDataCollection(False)
self.dimensions = dimensions
if self.load() == False:
layers = OrderedDict([])
for i in range(len(dimensions)):
if i == 0:
continue
layers.update({str(i): torch.nn.Linear(dimensions[i-1], dimensions[i], True).cuda()})
layers.update({str(i)+'_func': torch.nn.Sigmoid()})
self.net = torch.nn.Sequential(layers)
self.net.apply(self.init_weights)
self.optimizer = torch.optim.SGD(self.net.parameters(), lr=0.9, momentum=0.9)
self.optimizerCriterion = torch.nn.MSELoss(reduction='none').cuda()
self.before = []
for param in self.net.parameters():
self.before.append(param.clone().cpu())
def forward(self, X):
return self.net(X)
def backward(self, y, o):
self.optimizer.zero_grad()
loss = self.optimizerCriterion(o, y)
loss.backward()
self.optimizer.step()
def train(self, X, y):
o = self.forward(X.cuda())
self.backward(y.cuda(), o)
return o
crutch with 1000 iterations difference outputs True for each layer.
overallWeight does not change.
Checked loss - it contains the correct deviation.
I would be grateful for any hint on what I am missing.