Skip to content

Instantly share code, notes, and snippets.

@dariocazzani
Last active September 1, 2017 19:15
Show Gist options
  • Save dariocazzani/8ab0048589a159a12181840ad78fd307 to your computer and use it in GitHub Desktop.
Save dariocazzani/8ab0048589a159a12181840ad78fd307 to your computer and use it in GitHub Desktop.
Genetic Algorithm Engine
from __future__ import division
import numpy as np
import pandas as pd
import copy
from Engine import Engine
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
def get_iris_data():
""" Read the iris data set and split them into training and test sets """
RANDOM_SEED = 42
iris = datasets.load_iris()
data = iris["data"]
target = iris["target"]
# Prepend the column of 1s for bias
N, M = data.shape
all_X = np.ones((N, M + 1))
all_X[:, 1:] = data
# Convert into one-hot vectors
num_labels = len(np.unique(target))
all_Y = np.eye(num_labels)[target]
return train_test_split(all_X, all_Y, test_size=0.33, random_state=RANDOM_SEED)
"""
Global variables
"""
input_size = 5
num_hidden_neurons = 8
num_classes = 3
train_X, test_X, train_y, test_y = get_iris_data()
def softmax(x):
num = np.exp(x)
# print(num)
den = np.sum(np.exp(x),axis=1)[:, None] # Add extra axis
return num/den
def step(x):
return 2.*(x>0) - 1
def forward(data, encoded_genes):
W1, b1, W2, b2 = encoded_genes
h1 = step(np.dot(data, W1) + b1)
logits = np.dot(h1, W2) + b2
probs = softmax(logits)
return probs, logits
def cross_entropy(label, probs):
# https://en.wikipedia.org/wiki/Cross_entropy
epsilon = 1E-6
return np.mean(np.nan_to_num(label * np.log(probs + epsilon) + (1 - label)*np.log(1 - probs + epsilon)))
def pop_from_list(list_in, n):
out = []
for _ in range(n):
out.append(list_in.pop())
return out, list_in
def encode_genes(genes_in):
genes = copy.deepcopy(genes_in)
W1, genes = pop_from_list(genes, input_size*num_hidden_neurons)
b1, genes = pop_from_list(genes, num_hidden_neurons)
W2, genes = pop_from_list(genes, num_hidden_neurons*num_classes)
b2, genes = pop_from_list(genes, num_classes)
if genes:
print(len(genes))
sys.exit()
W1 = np.reshape(np.asarray(map(float, W1)), [input_size, num_hidden_neurons])
b1 = np.asarray(map(float, b1))
W2 = np.reshape(np.asarray(map(float, W2)), [num_hidden_neurons, num_classes])
b2 = np.asarray(map(float, b2))
return W1, b1, W2, b2
def fitness(encoded_genes):
"""
Instead of minimizing the negative log lokelihood,
we maximize the log likelihood
"""
probs, logits = forward(train_X, encoded_genes)
accuracy = np.mean((np.argmax(probs, axis=1) == np.argmax(train_y, axis=1))*1)
#return accuracy
reversed_loss = cross_entropy(train_y, probs)
return reversed_loss
def main():
tot_neurons = input_size*num_hidden_neurons + num_hidden_neurons + num_hidden_neurons*num_classes + num_classes
genes_alphabet = ['-1', '1', '2']
mutation_probability = 0.03
population_size = 100
num_genes = tot_neurons
iterations = 100
checkpoints = 100
engine = Engine(genes_alphabet,
encode_genes,
population_size,
num_genes,
fitness,
mutation_probability,
iterations,
checkpoints)
best_individual, best_fitness = engine.run()
encoded_genes = encode_genes(best_individual)
train_results, _ = forward(train_X, encoded_genes)
accuracy_train = np.mean((np.argmax(train_results, axis=1) == np.argmax(train_y, axis=1))*1)
test_results, _ = forward(test_X, encoded_genes)
accuracy_test = np.mean((np.argmax(test_results, axis=1) == np.argmax(test_y, axis=1))*1)
print('Accuracy for training set: {:.2f}%'.format(accuracy_train*100))
print('Accuracy for test set: {:.2f}%'.format(accuracy_test*100))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment