I am trying some weird evolutionary neuron connections on a simple XOR problem.
And when I am back-propagating the losses, the loss.backward()
throws a IndexError:
IndexError Traceback (most recent call last)
<ipython-input-2-39958196b2d6> in <module>
86 for loss, idx, network in fittest:
87 print(loss)
---> 88 loss['model'].backward() # Bug in PyTorch/Numpy so network don't train now.
89 network['optim'].step()
90
IndexError: too many indices for tensor of dimension 0
When I check the loss.shape
it returns torch.Size([])
=(
Any idea why is the tensor for the loss in a weird shape? How should I resolve the issue?
Full code:
import copy
import random
import itertools
# Plotting candies
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
# Number stuff.
import numpy as np
import pandas as pd
import torch
from torch import tensor
from torch.nn import Sequential, Linear, Sigmoid, L1Loss
from torch.optim import Adam
X = tensor([[0.,0.], [0.,1.], [1.,0.], [1.,1.]])
Y = tensor([[0,],[1.],[1.],[0.]])
# Use tensor.shape to get in-/output dimension size.
num_data, input_dim = X.shape
num_data, output_dim = Y.shape
init_population_size = 80
mutation_rate = 0.01
generations = 30
hidden_dim = 10
dimensions = hidden_dim + hidden_dim + output_dim * num_data
def get_random_pairs(numbers):
# Generate all possible non-repeating pairs
pairs = list(itertools.combinations(numbers, 2))
# Randomly shuffle these pairs
random.shuffle(pairs)
return pairs
# Define the model.
def create_model(lr):
# Each network will be initialized as such.
model = Sequential(Linear(input_dim, hidden_dim),
Sigmoid(),
Linear(hidden_dim, output_dim),
Sigmoid())
return {'model': model,
'optim': Adam(model.parameters(), lr=lr)
}
# Initialize the optimizer.
lr = 0.03
# Define the optimization criterion
criterion = L1Loss()
# We create 30 networks.
population = 30
population_of_networks = [create_model(lr) for i in range(population)]
survival_rate = 0.2
##mutation_rate = 0.1
for _g in range(generations):
for net in population_of_networks:
net['optim'].zero_grad()
# Get through every network.
loss_of_networks = []
for i, network in enumerate(population_of_networks):
pred = network['model'](X) # Fowards.
loss = criterion(pred, Y) # Loss.
loss_of_networks.append((loss, i, population_of_networks[i]))
# Only the fittest (i.e. lowest loss) survive
# at pre-defined survival rate.
num_survivors = int(survival_rate*population)
fittest = sorted(loss_of_networks)[:num_survivors]
# Populate the rest of the generation with offsprings of the fittest.
for mating_pairs in get_random_pairs(fittest):
if len(fittest) == population:
break
else:
(female_loss, female_id, female), (male_loss, male_id, male) = mating_pairs
# Access female's model and optimizers
female_model = population_of_networks[female_id]['model']
female_optim = population_of_networks[female_id]['optim']
# Access male's model and optimizers
male_model = population_of_networks[male_id]['model']
male_optim = population_of_networks[male_id]['optim']
# Make an "empty" baby by copying first.
baby_model = copy.deepcopy(female_model)
baby_optim = copy.deepcopy(female_optim)
for layer_name in zip(female_model.state_dict()):
## Crossover process.
f = female_model.state_dict()[layer_name[0]]
m = male_model.state_dict()[layer_name[0]]
# Randomly generate a crossover point.
crossover_pt = int(random.uniform(0, f.shape[0]))
# Make baby with crossover tensors.
tmp = copy.deepcopy(m[:crossover_pt])
m[:crossover_pt], f[:crossover_pt] = f[:crossover_pt], tmp
baby_layer = random.choice([m, f])
## No mutation process.
# Overwrite the state_dict layers of the baby
baby_model.state_dict()[layer_name[0]] = baby_layer
# Compute baby's loss.
_pred = baby_model(X)
_loss = criterion(_pred, Y)
fittest.append((_loss,
"_".join([str(female_id), str(male_id)]),
{'model': baby_model, 'optim': baby_optim}))
# Back propagate for all models.
for loss, idx, network in fittest:
print(loss)
loss['model'].backward() # Bug in PyTorch/Numpy so network don't train now.
network['optim'].step()