This is not a problem solving or debugging question. I am more seeking for advise from experienced ML and AI coders as a beginner. I have wrote this very simple genetic algorithm. I have no good machine to test it fully, but it seems like fitness does increase with the number of interactions, so it learns.

I wonder what would experienced coders think of it and if there are suggestions for improvements. I am also in a search for good entry level textbook on GAs. I found one that I can read but it is so old, I am afraid a lot of approaches are very much out of date.

Here is my code:

``````import torch
from pylab import *
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn as nn

import numpy as np
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_data = datasets.MNIST(
root = 'data',
train = True,
transform = ToTensor(),
)
test_data = datasets.MNIST(
root = 'data',
train = False,
transform = ToTensor()
)

batch_size = 32

batch_size=batch_size,
shuffle=True,
num_workers=1),

batch_size=batch_size,
shuffle=True,
num_workers=1),
}

class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,

self.MaxPool2d = nn.MaxPool2d(2,2)
self.BatchNorm2d = nn.BatchNorm2d(16)
self.Flatten = nn.Flatten()

self.Linear = nn.Linear(16 * 14 * 14, batch_size * 8)
self.ReLU = nn.ReLU()
self.Flatten = nn.Flatten()
self.out = nn.Sequential(nn.Linear(batch_size*8, 10), nn.Softmax(dim=1))

self.weights_initialization()

def forward(self, x):
x = self.conv1(x)
x = self.MaxPool2d(x)
x = self.BatchNorm2d(x)
x = self.Flatten(x)
x = self.Linear(x)
x = self.ReLU(x)
x = self.Flatten(x)
output = self.out(x)
return output

def weights_initialization(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight)
nn.init.constant_(m.bias, 0)

cnn = CNN()

activation = {}
def getActivation(name):
# the hook signature
def hook(model, input, output):
activation[name] = output.detach()
return hook

h = cnn.out.register_forward_hook(getActivation('out'))

num_epochs = 1

chromosome = []
fitness = 0

def single_run(images, labels, num_epochs, chromosome, fitness):

weights_conv1 = torch.reshape(torch.from_numpy(chromosome[0:400]), (16,1,5,5))
weights_BatchNorm = torch.from_numpy(chromosome[400:416])
weights_Linear = torch.reshape(torch.from_numpy(chromosome[416:803232]), (256, 3136))

for layer in cnn.state_dict():
# cnn.state_dict()[layer].data.fill_(2)
if layer == 'conv1.weight': cnn.state_dict()[layer].data.copy_(weights_conv1)#fill_(random.random())
if layer == 'BatchNorm.weight': cnn.state_dict()[layer].data.copy_(weights_BatchNorm)
if layer == 'Linear.weight': cnn.state_dict()[layer].data.copy_(weights_Linear)

cnn.train()

for i, (images, labels) in enumerate(loaders['train']):

b_x = Variable(images)   # batch x
b_y = Variable(labels)   # batch y

output = cnn(b_x)

out_ten = activation['out']

conv1_weights = cnn.conv1.weight.detach().numpy().flatten() #[16,1,5,5]
BatchNorm_weights = cnn.BatchNorm2d.weight.detach().numpy().flatten() #
Linear = cnn.Linear.weight.detach().numpy().flatten() #[256,3136]

chromosome = np.append(conv1_weights,BatchNorm_weights)
chromosome = np.append(chromosome, Linear)

fitness
for j in range(len(out_ten)):
if out_ten[j].argmax().item() == labels[j].item():
fitness += 1

return fitness, chromosome

chromosome_init = np.random.rand(803232)

fitness1, chromosome1 = single_run(images, labels, num_epochs, chromosome_init, fitness)

print(fitness1)
print('______')
print(chromosome1, len(chromosome1))

population_c = []
population_f = []

size=10

def populate(size, population_c,population_f):

chromosome_init = np.random.rand(803232)

for i in range(size):

curr_fitness, curr_chromosome = single_run(images, labels,num_epochs, chromosome_init, fitness)

population_f = np.append(population_f, curr_fitness)
if i==0: population_c = np.append(population_c, curr_chromosome)
else:
population_c = np.vstack((population_c, curr_chromosome))

return population_c, population_f

population_c1, population_f1 = populate(size, population_c, population_f)

import math

def selection(population_c, population_f):

indices = np.arange(10)

# elite = int(ceil(len(population_f) * 0.2))
random_ind = random.choices(indices, weights=population_f1, k=2)

# elite_ind = np.argpartition(population_f, -elite)[-elite:]
elite_c = np.take(population_c, random_ind, 0)
elite_f = np.take(population_f, random_ind, 0)

return elite_c, elite_f

elite_c1, elite_f1 = selection(population_c1, population_f1)

def crossover(parent_1, parent_2):

index = random.randint(0, len(parent_1))

parent_1[0:index] = parent_2[0:index]

return parent_1

child = crossover(elite_c1, elite_c1)

def new_populate(size, population_c,population_f):

child = crossover(elite_c1, elite_c1)

for i in range(size):

curr_fitness, curr_chromosome = single_run(images, labels,num_epochs, chromosome_init, fitness)

population_f = np.append(population_f, curr_fitness)
if i==0: population_c = np.append(population_c, curr_chromosome)
else:
population_c = np.vstack((population_c, curr_chromosome))

return population_c, population_f

fitness2, chromosome2 = single_run(images, labels, num_epochs, child, fitness)

print(fitness2)

for i in range(25):

over_fitness = []

population_c2, population_f2 = new_populate(size, population_c, population_f)
elite_c2, elite_f2 = selection(population_c2, population_f2)

child = crossover(elite_c1, elite_c1)

fitness2, chromosome2 = single_run(images, labels, num_epochs, child, fitness)

over_fitness = append(over_fitness, fitness2)

print(fitness2)
``````

P.S.: I am not using backdrop on purpose. I need a gradient-free algorithm.