This is not a problem solving or debugging question. I am more seeking for advise from experienced ML and AI coders as a beginner. I have wrote this very simple genetic algorithm. I have no good machine to test it fully, but it seems like fitness does increase with the number of interactions, so it learns.
I wonder what would experienced coders think of it and if there are suggestions for improvements. I am also in a search for good entry level textbook on GAs. I found one that I can read but it is so old, I am afraid a lot of approaches are very much out of date.
Here is my code:
import torch
from pylab import *
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.autograd import Variable
import numpy as np
import random
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_data = datasets.MNIST(
root = 'data',
train = True,
transform = ToTensor(),
download = True,
)
test_data = datasets.MNIST(
root = 'data',
train = False,
transform = ToTensor()
)
batch_size = 32
loaders = {
'train' : torch.utils.data.DataLoader(train_data,
batch_size=batch_size,
shuffle=True,
num_workers=1),
'test' : torch.utils.data.DataLoader(test_data,
batch_size=batch_size,
shuffle=True,
num_workers=1),
}
loaders
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2)
self.MaxPool2d = nn.MaxPool2d(2,2)
self.BatchNorm2d = nn.BatchNorm2d(16)
self.Flatten = nn.Flatten()
self.Linear = nn.Linear(16 * 14 * 14, batch_size * 8)
self.ReLU = nn.ReLU()
self.Flatten = nn.Flatten()
self.out = nn.Sequential(nn.Linear(batch_size*8, 10), nn.Softmax(dim=1))
self.weights_initialization()
def forward(self, x):
x = self.conv1(x)
x = self.MaxPool2d(x)
x = self.BatchNorm2d(x)
x = self.Flatten(x)
x = self.Linear(x)
x = self.ReLU(x)
x = self.Flatten(x)
output = self.out(x)
return output
def weights_initialization(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight)
nn.init.constant_(m.bias, 0)
cnn = CNN()
activation = {}
def getActivation(name):
# the hook signature
def hook(model, input, output):
activation[name] = output.detach()
return hook
h = cnn.out.register_forward_hook(getActivation('out'))
images, labels = next(iter(loaders['train']))
num_epochs = 1
chromosome = []
fitness = 0
def single_run(images, labels, num_epochs, chromosome, fitness):
weights_conv1 = torch.reshape(torch.from_numpy(chromosome[0:400]), (16,1,5,5))
weights_BatchNorm = torch.from_numpy(chromosome[400:416])
weights_Linear = torch.reshape(torch.from_numpy(chromosome[416:803232]), (256, 3136))
with torch.no_grad():
for layer in cnn.state_dict():
# cnn.state_dict()[layer].data.fill_(2)
if layer == 'conv1.weight': cnn.state_dict()[layer].data.copy_(weights_conv1)#fill_(random.random())
if layer == 'BatchNorm.weight': cnn.state_dict()[layer].data.copy_(weights_BatchNorm)
if layer == 'Linear.weight': cnn.state_dict()[layer].data.copy_(weights_Linear)
def train(num_epochs, cnn, loaders):
cnn.train()
for i, (images, labels) in enumerate(loaders['train']):
b_x = Variable(images) # batch x
b_y = Variable(labels) # batch y
output = cnn(b_x)[0]
out_ten = activation['out']
conv1_weights = cnn.conv1.weight.detach().numpy().flatten() #[16,1,5,5]
BatchNorm_weights = cnn.BatchNorm2d.weight.detach().numpy().flatten() #[16]
Linear = cnn.Linear.weight.detach().numpy().flatten() #[256,3136]
chromosome = np.append(conv1_weights,BatchNorm_weights)
chromosome = np.append(chromosome, Linear)
fitness
for j in range(len(out_ten)):
if out_ten[j].argmax().item() == labels[j].item():
fitness += 1
return fitness, chromosome
chromosome_init = np.random.rand(803232)
fitness1, chromosome1 = single_run(images, labels, num_epochs, chromosome_init, fitness)
print(fitness1)
print('______')
print(chromosome1, len(chromosome1))
population_c = []
population_f = []
size=10
def populate(size, population_c,population_f):
chromosome_init = np.random.rand(803232)
for i in range(size):
curr_fitness, curr_chromosome = single_run(images, labels,num_epochs, chromosome_init, fitness)
population_f = np.append(population_f, curr_fitness)
if i==0: population_c = np.append(population_c, curr_chromosome)
else:
population_c = np.vstack((population_c, curr_chromosome))
return population_c, population_f
population_c1, population_f1 = populate(size, population_c, population_f)
import math
def selection(population_c, population_f):
indices = np.arange(10)
# elite = int(ceil(len(population_f) * 0.2))
random_ind = random.choices(indices, weights=population_f1, k=2)
# elite_ind = np.argpartition(population_f, -elite)[-elite:]
elite_c = np.take(population_c, random_ind, 0)
elite_f = np.take(population_f, random_ind, 0)
return elite_c, elite_f
elite_c1, elite_f1 = selection(population_c1, population_f1)
def crossover(parent_1, parent_2):
index = random.randint(0, len(parent_1))
parent_1[0:index] = parent_2[0:index]
return parent_1
child = crossover(elite_c1[0], elite_c1[1])
def new_populate(size, population_c,population_f):
child = crossover(elite_c1[0], elite_c1[1])
for i in range(size):
curr_fitness, curr_chromosome = single_run(images, labels,num_epochs, chromosome_init, fitness)
population_f = np.append(population_f, curr_fitness)
if i==0: population_c = np.append(population_c, curr_chromosome)
else:
population_c = np.vstack((population_c, curr_chromosome))
return population_c, population_f
fitness2, chromosome2 = single_run(images, labels, num_epochs, child, fitness)
print(fitness2)
for i in range(25):
over_fitness = []
population_c2, population_f2 = new_populate(size, population_c, population_f)
elite_c2, elite_f2 = selection(population_c2, population_f2)
child = crossover(elite_c1[0], elite_c1[1])
fitness2, chromosome2 = single_run(images, labels, num_epochs, child, fitness)
over_fitness = append(over_fitness, fitness2)
print(fitness2)
P.S.: I am not using backdrop on purpose. I need a gradient-free algorithm.