├── NEAT.py ├── NEAT_multiclass.py ├── algorithm_runs └── xor_full │ └── run_1 │ ├── average_population_fitness_generation_5.png │ ├── avg_num_disjoint_generation_5.png │ ├── avg_num_excess_generation_5.png │ ├── avg_weight_diff_generation_5.png │ ├── best_all_time_genome_accuracy_generation_5.png │ ├── best_all_time_genome_f1_score_generation_5.png │ ├── best_all_time_genome_fitness_generation_5.png │ ├── best_genome_pickle │ ├── generation_tracker │ ├── mean_compatibility_distance_generation_5.png │ ├── mean_number_connections_enabled_generation_5.png │ ├── mean_number_connections_overall_generation_5.png │ ├── mean_number_nodes_enabled_generation_5.png │ ├── num_generation_add_connection_generation_5.png │ ├── num_generation_add_node_generation_5.png │ ├── num_generation_delete_connection_generation_5.png │ ├── num_generation_delete_node_generation_5.png │ ├── num_generation_weight_mutations_generation_5.png │ └── num_species_generation_5.png ├── config.py ├── config_multiclass.py ├── data_storage.py ├── data_visualisation.py ├── data_visualisation_averaging.py ├── deconstruct_genome.py ├── deconstruct_genome_multiclass.py ├── f_test.py ├── gene.py ├── generation_statistics.py ├── genome.py ├── genome_multiclass.py ├── genome_neural_network.py ├── genome_neural_network_multiclass.py ├── graph_algorithm.py ├── graph_algorithm_mutliclass.py ├── graphs ├── average_population_fitness_generation_5.jpg ├── avg_num_disjoint_generation_5.jpg ├── avg_num_excess_generation_5.jpg ├── avg_weight_diff_generation_5.jpg ├── best_all_time_genome_f1_score_generation_5.jpg ├── best_all_time_genome_fitness_generation_5.jpg ├── mean_compatibility_distance_generation_5.jpg ├── mean_number_connections_enabled_generation_5.jpg ├── mean_number_connections_overall_generation_5.jpg ├── mean_number_nodes_enabled_generation_5.jpg ├── num_generation_add_connection_generation_5.jpg ├── num_generation_add_node_generation_5.jpg ├── num_generation_delete_connection_generation_5.jpg ├── num_generation_delete_node_generation_5.jpg ├── num_generation_weight_mutations_generation_5.jpg └── num_species_generation_5.jpg ├── main.py ├── main_multiclass.py ├── multi_processing.py ├── multi_threading.py ├── neural_network.py ├── neural_network_components.py ├── pickles ├── best_genome_pickle ├── best_genome_pickle_-1.7547243063454208 ├── best_genome_pickle_0.0328201636785844 ├── best_genome_pickle_0.40751628299650783 ├── best_genome_pickle_circle_data_8 ├── best_genome_pickle_shm_two_class_618056 ├── best_genome_pickle_shm_two_class_8 ├── best_genome_pickle_xor_full_132164 ├── best_genome_pickle_xor_full_351148 ├── genome_circle_data └── genome_noise_trained ├── read_mat_files.py ├── reproduce.py ├── reproduce_multiclass.py ├── species.py ├── stagnation.py ├── testing_multiclass.py └── tests.py /NEAT.py: -------------------------------------------------------------------------------- 1 | from generation_statistics import GenerationStatistics 2 | import matplotlib.pyplot as plt 3 | import time 4 | import numpy as np 5 | from genome_neural_network import GenomeNeuralNetwork 6 | from gene import NodeGene, ConnectionGene 7 | from reproduce import Reproduce 8 | from genome import Genome 9 | from species import SpeciesSet 10 | import sklearn.metrics 11 | import pickle 12 | import os 13 | 14 | # Exception used to check if there are no more species 15 | from stagnation import Stagnation 16 | 17 | 18 | class CompleteExtinctionException(Exception): 19 | pass 20 | 21 | 22 | class NEAT: 23 | 24 | def __init__(self, x_training_data, y_training_data, x_test_data, y_test_data, config, fitness_threshold, 25 | f1_score_threshold, algorithm_running=None): 26 | # Where all the parameters are saved 27 | self.config = config 28 | # Takes care of reproduction of populations 29 | self.reproduction = Reproduce(stagnation=Stagnation, config=config) 30 | self.generation_tracker = GenerationStatistics() 31 | # Track the best genome across generations 32 | self.best_all_time_genome = None 33 | # If the fitness threshold is met it will stop the algorithm (if set) 34 | self.fitness_threshold = fitness_threshold 35 | self.f1_score_threshold = f1_score_threshold 36 | # A class containing the different species within the population 37 | self.species_set = SpeciesSet(config=config, generation_tracker=self.generation_tracker) 38 | self.x_train = x_training_data 39 | self.y_train = y_training_data 40 | self.x_test = x_test_data 41 | self.y_test = y_test_data 42 | 43 | # Keep track of best genome through generations 44 | self.best_genome_history = {} 45 | 46 | # Keeps information of population complexity for each generation 47 | self.population_complexity_tracker = {} 48 | 49 | if algorithm_running: 50 | # Defines which of the algorithms is being currently tested (e.g. xor with 5000 examples of xor with 200 51 | # examples and noise) 52 | self.algorithm_running = algorithm_running 53 | 54 | # Initialise the starting population 55 | self.population = self.reproduction.create_new_population(population_size=self.config.population_size, 56 | num_features=x_training_data.shape[1]) 57 | 58 | # Speciate the initial population 59 | self.species_set.speciate(population=self.population, compatibility_threshold=3, generation=0) 60 | 61 | @staticmethod 62 | def create_genome_nn(genome, x_data, y_data, algorithm_running=None): 63 | # TODO: I encountered a bug where I trained a genome on a relu activation function, but when I recreated using this function I had problems because I forgot that everything defined inside here uses sigmoid. Should improve implementation of this 64 | # TODO: The x_data, y_data isn't always used, particularly if we only create the network to get a prediction. This implementation should be improved for clarity 65 | if algorithm_running == 'xor_full': 66 | learning_rate = 0.1 67 | num_epochs = 1000 68 | batch_size = 64 69 | activation_type = 'sigmoid' 70 | elif algorithm_running == 'xor_small_noise': 71 | learning_rate = 0.1 72 | num_epochs = 5000 73 | batch_size = 10 74 | activation_type = 'sigmoid' 75 | elif algorithm_running == 'circle_data': 76 | learning_rate = 0.1 77 | num_epochs = 5000 78 | batch_size = 50 79 | activation_type = 'sigmoid' 80 | elif algorithm_running == 'shm_two_class': 81 | learning_rate = 0.1 82 | num_epochs = 5000 83 | batch_size = 50 84 | activation_type = 'sigmoid' 85 | elif algorithm_running == 'spiral_data': 86 | learning_rate = 0.1 87 | num_epochs = 5000 88 | batch_size = 50 89 | activation_type = 'sigmoid' 90 | # TODO: Choose more suitable default 91 | else: 92 | learning_rate = 0.1 93 | num_epochs = 500 94 | batch_size = 64 95 | activation_type = 'sigmoid' 96 | 97 | return GenomeNeuralNetwork(genome=genome, x_train=x_data, y_train=y_data, 98 | create_weights_bias_from_genome=True, activation_type=activation_type, 99 | learning_rate=learning_rate, num_epochs=num_epochs, batch_size=batch_size) 100 | 101 | def evaluate_population(self, use_backprop, generation): 102 | """ 103 | Calculates the fitness value for each individual genome in the population 104 | :type use_backprop: True or false on whether you're calculating the fitness using backprop or not 105 | :param generation: Which generation number it currently is 106 | :return: The best genome of the population 107 | """ 108 | 109 | # Should return the best genome 110 | current_best_genome = None 111 | current_worst_genome = None 112 | 113 | for genome in self.population.values(): 114 | 115 | genome_nn = self.create_genome_nn(genome=genome, x_data=self.x_train, y_data=self.y_train, 116 | algorithm_running=self.algorithm_running) 117 | 118 | # Optimise the neural_network_first. However, the generation should allow for one pass so that we are not 119 | # just optimising all the same topologies 120 | genome_fitness_before = genome.fitness 121 | if use_backprop and generation > 1: 122 | print('\n') 123 | print('OPTIMISING GENOME') 124 | genome_nn.optimise(print_epoch=False) 125 | 126 | # We use genome_nn.x_train instead of self.x_train because the genome_nn might have deleted a row if there 127 | # is no connection to one of the sources 128 | cost = genome_nn.run_one_pass(input_data=genome_nn.x_train, labels=self.y_train, return_cost_only=True) 129 | 130 | # The fitness is the negative of the cost. Because less cost = greater fitness 131 | genome.fitness = -cost 132 | 133 | # Only print genome fitness after is back prop is used since back prop takes a long time so this can be a 134 | # way of tracking progress in the meantime 135 | if use_backprop and generation > 1: 136 | # NOTE: Genome fitness can be none due to crossover because fitness value not carried over 137 | print('Genome Fitness Before: {}'.format(genome_fitness_before)) 138 | print('Genome Fitness After: {}'.format(genome.fitness)) 139 | 140 | if current_best_genome is None or genome.fitness > current_best_genome.fitness: 141 | current_best_genome = genome 142 | if current_worst_genome is None or genome.fitness < current_worst_genome.fitness: 143 | current_worst_genome = genome 144 | 145 | return current_best_genome, current_worst_genome 146 | 147 | def update_population_toplogy_info(self, current_gen): 148 | num_nodes_overall = [] 149 | num_nodes_enabled = [] 150 | num_connections_overall = [] 151 | num_connections_enabled = [] 152 | all_fitnesses = [] 153 | for genome in self.population.values(): 154 | num_nodes_overall.append(len(genome.nodes)) 155 | num_nodes_enabled.append(len(genome.get_active_nodes())) 156 | num_connections_overall.append(len(genome.connections)) 157 | num_connections_enabled.append(genome.check_connection_enabled_amount()) 158 | if genome.fitness: 159 | all_fitnesses.append(genome.fitness) 160 | 161 | avg_num_connections_enabled = np.mean(num_connections_enabled) 162 | avg_num_connections_overall = np.mean(num_connections_overall) 163 | avg_num_nodes_enabled = np.mean(num_nodes_enabled) 164 | avg_num_nodes_overall = np.mean(num_nodes_overall) 165 | 166 | complexity_tracker = {'num_connections_enabled': avg_num_connections_enabled, 167 | 'num_connections_overall': avg_num_connections_overall, 168 | 'num_nodes_enabled': avg_num_nodes_enabled, 'num_nodes_overall': avg_num_nodes_overall} 169 | self.population_complexity_tracker[current_gen] = complexity_tracker 170 | 171 | self.generation_tracker.mean_number_connections_enabled = avg_num_connections_enabled 172 | self.generation_tracker.mean_number_connections_overall = avg_num_connections_overall 173 | self.generation_tracker.mean_number_nodes_enabled = avg_num_nodes_enabled 174 | self.generation_tracker.mean_number_nodes_overall = avg_num_nodes_overall 175 | self.generation_tracker.average_population_fitness = np.mean(all_fitnesses) 176 | 177 | def add_successful_genome_for_test(self, current_gen, use_this_genome): 178 | """ 179 | This function adds a pre programmed genome which is known to converge for the XOR dataset. 180 | :param current_gen: 181 | :param use_this_genome: Whether this genome should be added to the population or not 182 | :return: 183 | """ 184 | # Wait for current_gen > 1 because if using backprop the first gen skips using backprop. 185 | if current_gen > 1 and use_this_genome: 186 | node_list = [ 187 | NodeGene(node_id=0, node_type='source'), 188 | NodeGene(node_id=1, node_type='source'), 189 | NodeGene(node_id=2, node_type='output', bias=0.5), 190 | NodeGene(node_id=3, node_type='hidden', bias=1), 191 | NodeGene(node_id=4, node_type='hidden', bias=1), 192 | NodeGene(node_id=5, node_type='hidden', bias=1), 193 | NodeGene(node_id=6, node_type='hidden', bias=1), 194 | ] 195 | 196 | connection_list = [ConnectionGene(input_node=0, output_node=3, innovation_number=1, enabled=True, 197 | weight=np.random.randn()), 198 | ConnectionGene(input_node=1, output_node=3, innovation_number=2, enabled=True, 199 | weight=np.random.randn()), 200 | ConnectionGene(input_node=0, output_node=4, innovation_number=3, enabled=True, 201 | weight=np.random.randn()), 202 | ConnectionGene(input_node=1, output_node=4, innovation_number=4, enabled=True, 203 | weight=np.random.randn()), 204 | ConnectionGene(input_node=3, output_node=5, innovation_number=5, enabled=True, 205 | weight=np.random.randn()), 206 | ConnectionGene(input_node=4, output_node=5, innovation_number=6, enabled=True, 207 | weight=np.random.randn()), 208 | ConnectionGene(input_node=3, output_node=6, innovation_number=7, enabled=True, 209 | weight=np.random.randn()), 210 | ConnectionGene(input_node=4, output_node=6, innovation_number=8, enabled=True, 211 | weight=np.random.randn()), 212 | ConnectionGene(input_node=5, output_node=2, innovation_number=9, enabled=True, 213 | weight=np.random.rand()), 214 | ConnectionGene(input_node=6, output_node=2, innovation_number=10, enabled=True, 215 | weight=np.random.randn()) 216 | ] 217 | 218 | test_genome = Genome(connections=connection_list, nodes=node_list, key=1) 219 | test_genome.fitness = -99999999999 220 | self.population[32131231] = test_genome 221 | 222 | @staticmethod 223 | def calculate_f_statistic(genome, x_test_data, y_test_data): 224 | genome_nn = NEAT.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data) 225 | prediction = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True).round() 226 | return sklearn.metrics.f1_score(y_test_data, prediction) 227 | 228 | @staticmethod 229 | def calculate_accuracy(genome, x_test_data, y_test_data): 230 | genome_nn = NEAT.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data) 231 | prediction = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True).round() 232 | num_correct = 0 233 | for row in range(y_test_data.shape[0]): 234 | if np.array_equal(prediction[row, :], y_test_data[row, :]): 235 | num_correct += 1 236 | 237 | percentage_correct = (num_correct / y_test_data.shape[0]) * 100 238 | return percentage_correct 239 | 240 | def save_run_information(self, current_gen): 241 | base_filepath = 'algorithm_runs' 242 | if not os.path.exists(base_filepath): 243 | # Make the directory before saving graphs 244 | os.makedirs(base_filepath) 245 | 246 | folders = len(os.listdir('{}/{}'.format(base_filepath, self.algorithm_running))) 247 | 248 | # Folders + 1 because it will be the next folder in the sub directory 249 | file_path_for_run = '{}/{}/run_{}'.format(base_filepath, self.algorithm_running, (folders + 1)) 250 | 251 | # Make the directory before saving all other files 252 | os.makedirs(file_path_for_run) 253 | 254 | # Save best genome in pickle 255 | outfile = open('{}/best_genome_pickle'.format(file_path_for_run), 'wb') 256 | pickle.dump(self.best_all_time_genome, outfile) 257 | outfile.close() 258 | 259 | # Save graph information 260 | self.generation_tracker.plot_graphs(current_gen=current_gen, save_plots=True, 261 | file_path=file_path_for_run) 262 | 263 | # Save generation tracker in pickle 264 | outfile = open('{}/generation_tracker'.format(file_path_for_run), 'wb') 265 | pickle.dump(self.generation_tracker, outfile) 266 | outfile.close() 267 | 268 | # Save NEAT class instance so we can access the population again later 269 | outfile = open('{}/NEAT_instance'.format(file_path_for_run), 'wb') 270 | pickle.dump(self, outfile) 271 | outfile.close() 272 | 273 | def check_algorithm_break_point(self, current_gen, f1_score_of_best_all_time_genome, max_num_generations): 274 | break_point_reached = False 275 | # If the fitness threshold is met, stop the algorithm 276 | if self.fitness_threshold and self.best_all_time_genome.fitness > self.fitness_threshold: 277 | break_point_reached = True 278 | if self.f1_score_threshold and f1_score_of_best_all_time_genome > self.f1_score_threshold: 279 | break_point_reached = True 280 | if current_gen > max_num_generations: 281 | break_point_reached = True 282 | 283 | if break_point_reached: 284 | self.save_run_information(current_gen=current_gen) 285 | 286 | return True 287 | return False 288 | 289 | def run(self, max_num_generations, use_backprop, print_generation_information, show_population_weight_distribution): 290 | """ 291 | Run the algorithm 292 | """ 293 | 294 | current_gen = 0 295 | # Break condition now in function 296 | while True: 297 | # Every generation increment 298 | current_gen += 1 299 | 300 | self.add_successful_genome_for_test(current_gen=current_gen, use_this_genome=False) 301 | 302 | self.generation_tracker.population_size = len(self.population) 303 | 304 | start_evaluate_time = time.time() 305 | # Evaluate the current generation and get the best genome in the current generation 306 | best_current_genome, worst_current_genome = self.evaluate_population(use_backprop=use_backprop, 307 | generation=current_gen) 308 | print('WORST CURRENT GENOME FITNESS: {}'.format(worst_current_genome.fitness)) 309 | end_evaluate_time = time.time() 310 | self.update_population_toplogy_info(current_gen=current_gen) 311 | self.generation_tracker.evaluate_execute_time = end_evaluate_time - start_evaluate_time 312 | 313 | # Keep track of the best genome across generations 314 | if self.best_all_time_genome is None or best_current_genome.fitness > self.best_all_time_genome.fitness: 315 | # Keep track of the best genome through generations 316 | self.best_genome_history[current_gen] = best_current_genome 317 | 318 | self.best_all_time_genome = best_current_genome 319 | 320 | self.generation_tracker.best_all_time_genome_fitness = self.best_all_time_genome.fitness 321 | 322 | start_reproduce_time = time.time() 323 | 324 | # Reset attributes for the current generation 325 | self.generation_tracker.reset_tracker_attributes() 326 | 327 | # Reproduce and get the next generation 328 | self.population = self.reproduction.reproduce(species_set=self.species_set, 329 | population_size=self.config.population_size, 330 | generation=current_gen, 331 | generation_tracker=self.generation_tracker, 332 | # current_gen should be greater than one ot use 333 | # backprop_mutation because we let the first generation 334 | # mutate just as if it was the normal genetic algorithm, 335 | # so that we're not optimising all of the same structure 336 | backprop_mutation=(use_backprop and current_gen > 1)) 337 | end_reproduce_time = time.time() 338 | self.generation_tracker.reproduce_execute_time = end_reproduce_time - start_reproduce_time 339 | 340 | # Check to ensure no genes share the same connection gene addresses. (This problem has been fixed but is 341 | # here just incase now). 342 | self.ensure_no_duplicate_genes() 343 | 344 | # Check if there are any species, if not raise an exception. TODO: Let user reset population if extinction 345 | if not self.species_set.species: 346 | raise CompleteExtinctionException() 347 | 348 | start_specify_time = time.time() 349 | # Speciate the current generation 350 | self.species_set.speciate(population=self.population, generation=current_gen, 351 | compatibility_threshold=self.config.compatibility_threshold, 352 | generation_tracker=self.generation_tracker) 353 | end_specify_time = time.time() 354 | self.generation_tracker.species_execute_time = end_specify_time - start_specify_time 355 | 356 | f1_score_of_best_all_time_genome = self.calculate_f_statistic( 357 | self.best_all_time_genome, self.x_test, self.y_test) 358 | 359 | best_all_time_genome_accuracy = self.calculate_accuracy(genome=self.best_all_time_genome, 360 | x_test_data=self.x_test, y_test_data=self.y_test) 361 | 362 | self.generation_tracker.best_all_time_genome_f1_score = f1_score_of_best_all_time_genome 363 | self.generation_tracker.best_all_time_genome_accuracy = best_all_time_genome_accuracy 364 | self.generation_tracker.update_generation_information(generation=current_gen) 365 | 366 | if print_generation_information: 367 | self.generation_tracker.print_generation_information(generation_interval_for_graph=1, 368 | plot_graphs_every_gen=False) 369 | 370 | if self.check_algorithm_break_point(f1_score_of_best_all_time_genome=f1_score_of_best_all_time_genome, 371 | current_gen=current_gen, max_num_generations=max_num_generations): 372 | break 373 | 374 | # Gives distribution of the weights in the population connections 375 | if show_population_weight_distribution: 376 | self.reproduction.show_population_weight_distribution(population=self.population) 377 | 378 | print('f1 score for best genome after optimising is: {}'.format(f1_score_of_best_all_time_genome)) 379 | return self.best_all_time_genome 380 | 381 | def ensure_no_duplicate_genes(self): 382 | connection_gene_dict = {} 383 | for genome in self.population.values(): 384 | for connection in genome.connections.values(): 385 | if connection not in connection_gene_dict: 386 | connection_gene_dict[connection] = 1 387 | else: 388 | connection_gene_dict[connection] += 1 389 | 390 | for connection_gene, amount in connection_gene_dict.items(): 391 | if amount > 1: 392 | raise Exception('You have duplicated a connection gene') 393 | -------------------------------------------------------------------------------- /NEAT_multiclass.py: -------------------------------------------------------------------------------- 1 | from generation_statistics import GenerationStatistics 2 | import time 3 | import numpy as np 4 | from genome_neural_network_multiclass import GenomeNeuralNetworkMultiClass 5 | from gene import NodeGene, ConnectionGene 6 | from reproduce_multiclass import ReproduceMultiClass 7 | from genome import Genome 8 | from species import SpeciesSet 9 | import sklearn.metrics 10 | import pickle 11 | 12 | # Exception used to check if there are no more species 13 | from stagnation import Stagnation 14 | import os 15 | 16 | 17 | class CompleteExtinctionException(Exception): 18 | pass 19 | 20 | 21 | class NEATMultiClass: 22 | 23 | def __init__(self, x_training_data, y_training_data, x_test_data, y_test_data, config, fitness_threshold, 24 | f1_score_threshold, algorithm_running=None): 25 | # Where all the parameters are saved 26 | self.config = config 27 | # Takes care of reproduction of populations 28 | self.reproduction = ReproduceMultiClass(stagnation=Stagnation, config=config) 29 | self.generation_tracker = GenerationStatistics() 30 | # Track the best genome across generations 31 | self.best_all_time_genome = None 32 | # If the fitness threshold is met it will stop the algorithm (if set) 33 | self.fitness_threshold = fitness_threshold 34 | self.f1_score_threshold = f1_score_threshold 35 | # A class containing the different species within the population 36 | self.species_set = SpeciesSet(config=config, generation_tracker=self.generation_tracker) 37 | self.x_train = x_training_data 38 | self.y_train = y_training_data 39 | self.x_test = x_test_data 40 | self.y_test = y_test_data 41 | 42 | # Keep track of best genome through generations 43 | self.best_genome_history = {} 44 | 45 | # Keeps information of population complexity for each generation 46 | self.population_complexity_tracker = {} 47 | 48 | if algorithm_running: 49 | # Defines which of the algorithms is being currently tested (e.g. xor with 5000 examples of xor with 200 50 | # examples and noise) 51 | self.algorithm_running = algorithm_running 52 | 53 | # Initialise the starting population 54 | self.population = self.reproduction.create_new_population(population_size=self.config.population_size, 55 | num_features=x_training_data.shape[1], 56 | num_classes=y_training_data.shape[1]) 57 | 58 | # Speciate the initial population 59 | self.species_set.speciate(population=self.population, compatibility_threshold=3, generation=0) 60 | 61 | @staticmethod 62 | def create_genome_nn(genome, x_data, y_data, algorithm_running=None): 63 | # TODO: I encountered a bug where I trained a genome on a relu activation function, but when I recreated using this function I had problems because I forgot that everything defined inside here uses sigmoid. Should improve implementation of this 64 | # TODO: The x_data, y_data isn't always used, particularly if we only create the network to get a prediction. This implementation should be improved for clarity 65 | if algorithm_running == 'xor_full': 66 | learning_rate = 0.1 67 | num_epochs = 1000 68 | batch_size = 64 69 | activation_type = 'sigmoid' 70 | elif algorithm_running == 'xor_small_noise': 71 | learning_rate = 0.1 72 | num_epochs = 5000 73 | batch_size = 10 74 | activation_type = 'sigmoid' 75 | elif algorithm_running == 'circle_data': 76 | learning_rate = 0.1 77 | num_epochs = 5000 78 | batch_size = 50 79 | activation_type = 'sigmoid' 80 | elif algorithm_running == 'shm_two_class': 81 | learning_rate = 0.1 82 | num_epochs = 5000 83 | batch_size = 50 84 | activation_type = 'sigmoid' 85 | elif algorithm_running == 'shm_multi_class': 86 | learning_rate = 0.1 87 | num_epochs = 250 88 | # num_epochs = 500 89 | batch_size = 64 90 | activation_type = 'sigmoid' 91 | # TODO: Choose more suitable default 92 | else: 93 | learning_rate = 0.1 94 | num_epochs = 500 95 | batch_size = 64 96 | activation_type = 'sigmoid' 97 | 98 | return GenomeNeuralNetworkMultiClass(genome=genome, x_train=x_data, y_train=y_data, 99 | create_weights_bias_from_genome=True, activation_type=activation_type, 100 | learning_rate=learning_rate, num_epochs=num_epochs, batch_size=batch_size) 101 | 102 | def evaluate_population(self, use_backprop, generation): 103 | """ 104 | Calculates the fitness value for each individual genome in the population 105 | :type use_backprop: True or false on whether you're calculating the fitness using backprop or not 106 | :param generation: Which generation number it currently is 107 | :return: The best genome of the population 108 | """ 109 | 110 | # Should return the best genome 111 | current_best_genome = None 112 | current_worst_genome = None 113 | 114 | for genome in self.population.values(): 115 | 116 | genome_nn = self.create_genome_nn(genome=genome, x_data=self.x_train, y_data=self.y_train, 117 | algorithm_running=self.algorithm_running) 118 | 119 | # Optimise the neural_network_first. However, the generation should allow for one pass so that we are not 120 | # just optimising all the same topologies 121 | genome_fitness_before = genome.fitness 122 | if use_backprop and generation > 1: 123 | print('\n') 124 | print('OPTIMISING GENOME') 125 | genome_nn.optimise(print_epoch=False) 126 | 127 | # We use genome_nn.x_train instead of self.x_train because the genome_nn might have deleted a row if there 128 | # is no connection to one of the sources 129 | cost = genome_nn.run_one_pass(input_data=genome_nn.x_train, labels=self.y_train, return_cost_only=True) 130 | 131 | # The fitness is the negative of the cost. Because less cost = greater fitness 132 | genome.fitness = -cost 133 | 134 | # Only print genome fitness after is back prop is used since back prop takes a long time so this can be a 135 | # way of tracking progress in the meantime 136 | if use_backprop and generation > 1: 137 | # NOTE: Genome fitness can be none due to crossover because fitness value not carried over 138 | print('Genome Fitness Before: {}'.format(genome_fitness_before)) 139 | print('Genome Fitness After: {}'.format(genome.fitness)) 140 | 141 | if current_best_genome is None or genome.fitness > current_best_genome.fitness: 142 | current_best_genome = genome 143 | if current_worst_genome is None or genome.fitness < current_worst_genome.fitness: 144 | current_worst_genome = genome 145 | 146 | return current_best_genome, current_worst_genome 147 | 148 | def update_population_toplogy_info(self, current_gen): 149 | num_nodes_overall = [] 150 | num_nodes_enabled = [] 151 | num_connections_overall = [] 152 | num_connections_enabled = [] 153 | all_fitnesses = [] 154 | for genome in self.population.values(): 155 | num_nodes_overall.append(len(genome.nodes)) 156 | num_nodes_enabled.append(len(genome.get_active_nodes())) 157 | num_connections_overall.append(len(genome.connections)) 158 | num_connections_enabled.append(genome.check_connection_enabled_amount()) 159 | if genome.fitness: 160 | all_fitnesses.append(genome.fitness) 161 | 162 | avg_num_connections_enabled = np.mean(num_connections_enabled) 163 | avg_num_connections_overall = np.mean(num_connections_overall) 164 | avg_num_nodes_enabled = np.mean(num_nodes_enabled) 165 | avg_num_nodes_overall = np.mean(num_nodes_overall) 166 | 167 | complexity_tracker = {'num_connections_enabled': avg_num_connections_enabled, 168 | 'num_connections_overall': avg_num_connections_overall, 169 | 'num_nodes_enabled': avg_num_nodes_enabled, 'num_nodes_overall': avg_num_nodes_overall} 170 | self.population_complexity_tracker[current_gen] = complexity_tracker 171 | 172 | self.generation_tracker.mean_number_connections_enabled = avg_num_connections_enabled 173 | self.generation_tracker.mean_number_connections_overall = avg_num_connections_overall 174 | self.generation_tracker.mean_number_nodes_enabled = avg_num_nodes_enabled 175 | self.generation_tracker.mean_number_nodes_overall = avg_num_nodes_overall 176 | 177 | self.generation_tracker.average_population_fitness = np.mean(all_fitnesses) 178 | 179 | def add_successful_genome_for_test(self, current_gen, use_this_genome): 180 | """ 181 | This function adds a pre programmed genome which is known to converge for the XOR dataset. 182 | :param current_gen: 183 | :param use_this_genome: Whether this genome should be added to the population or not 184 | :return: 185 | """ 186 | # Wait for current_gen > 1 because if using backprop the first gen skips using backprop. 187 | if current_gen > 1 and use_this_genome: 188 | node_list = [ 189 | NodeGene(node_id=0, node_type='source'), 190 | NodeGene(node_id=1, node_type='source'), 191 | NodeGene(node_id=2, node_type='output', bias=0.5), 192 | NodeGene(node_id=3, node_type='hidden', bias=1), 193 | NodeGene(node_id=4, node_type='hidden', bias=1), 194 | NodeGene(node_id=5, node_type='hidden', bias=1), 195 | NodeGene(node_id=6, node_type='hidden', bias=1), 196 | ] 197 | 198 | connection_list = [ConnectionGene(input_node=0, output_node=3, innovation_number=1, enabled=True, 199 | weight=np.random.randn()), 200 | ConnectionGene(input_node=1, output_node=3, innovation_number=2, enabled=True, 201 | weight=np.random.randn()), 202 | ConnectionGene(input_node=0, output_node=4, innovation_number=3, enabled=True, 203 | weight=np.random.randn()), 204 | ConnectionGene(input_node=1, output_node=4, innovation_number=4, enabled=True, 205 | weight=np.random.randn()), 206 | ConnectionGene(input_node=3, output_node=5, innovation_number=5, enabled=True, 207 | weight=np.random.randn()), 208 | ConnectionGene(input_node=4, output_node=5, innovation_number=6, enabled=True, 209 | weight=np.random.randn()), 210 | ConnectionGene(input_node=3, output_node=6, innovation_number=7, enabled=True, 211 | weight=np.random.randn()), 212 | ConnectionGene(input_node=4, output_node=6, innovation_number=8, enabled=True, 213 | weight=np.random.randn()), 214 | ConnectionGene(input_node=5, output_node=2, innovation_number=9, enabled=True, 215 | weight=np.random.rand()), 216 | ConnectionGene(input_node=6, output_node=2, innovation_number=10, enabled=True, 217 | weight=np.random.randn()) 218 | ] 219 | 220 | test_genome = Genome(connections=connection_list, nodes=node_list, key=1) 221 | test_genome.fitness = -99999999999 222 | self.population[32131231] = test_genome 223 | 224 | @staticmethod 225 | def calculate_f_statistic(genome, x_test_data, y_test_data): 226 | genome_nn = NEATMultiClass.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data) 227 | prediction_array = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True) 228 | prediction_real = np.zeros((y_test_data.shape[0], y_test_data.shape[1])) 229 | for row in range(prediction_array.shape[0]): 230 | prediction_index = np.argmax(prediction_array[row, :]) 231 | prediction_real[row, prediction_index] = 1.0 232 | return sklearn.metrics.f1_score(y_test_data, prediction_real, average='samples') 233 | 234 | @staticmethod 235 | def calculate_accuracy(genome, x_test_data, y_test_data): 236 | genome_nn = NEATMultiClass.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data) 237 | prediction_array = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True) 238 | prediction_real = np.zeros((y_test_data.shape[0], y_test_data.shape[1])) 239 | for row in range(prediction_array.shape[0]): 240 | prediction_index = np.argmax(prediction_array[row, :]) 241 | prediction_real[row, prediction_index] = 1.0 242 | num_correct = 0 243 | for row in range(y_test_data.shape[0]): 244 | if np.array_equal(prediction_real[row, :], y_test_data[row, :]): 245 | num_correct += 1 246 | 247 | percentage_correct = (num_correct / y_test_data.shape[0]) * 100 248 | return percentage_correct 249 | 250 | def save_run_information(self, current_gen): 251 | base_filepath = 'algorithm_runs_multi' 252 | if not os.path.exists(base_filepath): 253 | # Make the directory before saving graphs 254 | os.makedirs(base_filepath) 255 | 256 | folders = len(os.listdir('{}/{}'.format(base_filepath, self.algorithm_running))) 257 | 258 | # Folders + 1 because it will be the next folder in the sub directory 259 | file_path_for_run = '{}/{}/run_{}'.format(base_filepath, self.algorithm_running, (folders + 1)) 260 | 261 | # Make the directory before saving all other files 262 | os.makedirs(file_path_for_run) 263 | 264 | # Save best genome in pickle 265 | outfile = open('{}/best_genome_pickle'.format(file_path_for_run), 'wb') 266 | pickle.dump(self.best_all_time_genome, outfile) 267 | outfile.close() 268 | 269 | # Save graph information 270 | self.generation_tracker.plot_graphs(current_gen=current_gen, save_plots=True, 271 | file_path=file_path_for_run) 272 | 273 | # Save generation tracker in pickle 274 | outfile = open('{}/generation_tracker'.format(file_path_for_run), 'wb') 275 | pickle.dump(self.generation_tracker, outfile) 276 | outfile.close() 277 | 278 | # Save NEAT class instance so we can access the population again later 279 | outfile = open('{}/NEAT_instance'.format(file_path_for_run), 'wb') 280 | pickle.dump(self, outfile) 281 | outfile.close() 282 | 283 | def check_algorithm_break_point(self, current_gen, f1_score_of_best_all_time_genome, max_num_generations): 284 | break_point_reached = False 285 | if self.fitness_threshold and self.best_all_time_genome.fitness > self.fitness_threshold: 286 | break_point_reached = True 287 | if self.f1_score_threshold and f1_score_of_best_all_time_genome > self.f1_score_threshold: 288 | break_point_reached = True 289 | if current_gen > max_num_generations: 290 | break_point_reached = True 291 | 292 | if break_point_reached: 293 | self.save_run_information(current_gen=current_gen) 294 | 295 | return True 296 | return False 297 | 298 | def run(self, max_num_generations, use_backprop, print_generation_information, show_population_weight_distribution): 299 | """ 300 | Run the algorithm 301 | """ 302 | 303 | current_gen = 0 304 | # Break condition now in function 305 | while True: 306 | # Every generation increment 307 | current_gen += 1 308 | 309 | self.add_successful_genome_for_test(current_gen=current_gen, use_this_genome=False) 310 | 311 | self.generation_tracker.population_size = len(self.population) 312 | 313 | start_evaluate_time = time.time() 314 | # Evaluate the current generation and get the best genome in the current generation 315 | best_current_genome, worst_current_genome = self.evaluate_population(use_backprop=use_backprop, 316 | generation=current_gen) 317 | print('WORST CURRENT GENOME FITNESS: {}'.format(worst_current_genome.fitness)) 318 | end_evaluate_time = time.time() 319 | self.update_population_toplogy_info(current_gen=current_gen) 320 | self.generation_tracker.evaluate_execute_time = end_evaluate_time - start_evaluate_time 321 | 322 | # Keep track of the best genome across generations 323 | if self.best_all_time_genome is None or best_current_genome.fitness > self.best_all_time_genome.fitness: 324 | # Keep track of the best genome through generations 325 | self.best_genome_history[current_gen] = best_current_genome 326 | self.best_all_time_genome = best_current_genome 327 | 328 | self.generation_tracker.best_all_time_genome_fitness = self.best_all_time_genome.fitness 329 | 330 | start_reproduce_time = time.time() 331 | 332 | # Reset attributes for the current generation 333 | self.generation_tracker.reset_tracker_attributes() 334 | 335 | # Reproduce and get the next generation 336 | self.population = self.reproduction.reproduce(species_set=self.species_set, 337 | population_size=self.config.population_size, 338 | generation=current_gen, 339 | generation_tracker=self.generation_tracker, 340 | # current_gen should be greater than one ot use 341 | # backprop_mutation because we let the first generation 342 | # mutate just as if it was the normal genetic algorithm, 343 | # so that we're not optimising all of the same structure 344 | backprop_mutation=(use_backprop and current_gen > 1)) 345 | end_reproduce_time = time.time() 346 | self.generation_tracker.reproduce_execute_time = end_reproduce_time - start_reproduce_time 347 | 348 | # Check to ensure no genes share the same connection gene addresses. (This problem has been fixed but is 349 | # here just incase now). 350 | self.ensure_no_duplicate_genes() 351 | 352 | # Check if there are any species, if not raise an exception. TODO: Let user reset population if extinction 353 | if not self.species_set.species: 354 | raise CompleteExtinctionException() 355 | 356 | start_specify_time = time.time() 357 | # Speciate the current generation 358 | self.species_set.speciate(population=self.population, generation=current_gen, 359 | compatibility_threshold=self.config.compatibility_threshold, 360 | generation_tracker=self.generation_tracker) 361 | end_specify_time = time.time() 362 | self.generation_tracker.species_execute_time = end_specify_time - start_specify_time 363 | 364 | f1_score_of_best_all_time_genome = self.calculate_f_statistic( 365 | self.best_all_time_genome, self.x_test, self.y_test) 366 | 367 | best_all_time_genome_accuracy = self.calculate_accuracy(genome=self.best_all_time_genome, 368 | x_test_data=self.x_test, y_test_data=self.y_test) 369 | 370 | self.generation_tracker.best_all_time_genome_f1_score = f1_score_of_best_all_time_genome 371 | self.generation_tracker.best_all_time_genome_accuracy = best_all_time_genome_accuracy 372 | self.generation_tracker.update_generation_information(generation=current_gen) 373 | 374 | if print_generation_information: 375 | self.generation_tracker.print_generation_information(generation_interval_for_graph=1, 376 | plot_graphs_every_gen=False) 377 | 378 | if self.check_algorithm_break_point(f1_score_of_best_all_time_genome=f1_score_of_best_all_time_genome, 379 | current_gen=current_gen, max_num_generations=max_num_generations): 380 | break 381 | 382 | # Gives distribution of the weights in the population connections 383 | if show_population_weight_distribution: 384 | self.reproduction.show_population_weight_distribution(population=self.population) 385 | 386 | print('f1 score for best genome after optimising is: {}'.format(f1_score_of_best_all_time_genome)) 387 | 388 | return self.best_all_time_genome 389 | 390 | def ensure_no_duplicate_genes(self): 391 | connection_gene_dict = {} 392 | for genome in self.population.values(): 393 | for connection in genome.connections.values(): 394 | if connection not in connection_gene_dict: 395 | connection_gene_dict[connection] = 1 396 | else: 397 | connection_gene_dict[connection] += 1 398 | 399 | for connection_gene, amount in connection_gene_dict.items(): 400 | if amount > 1: 401 | raise Exception('You have duplicated a connection gene') 402 | -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/average_population_fitness_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/average_population_fitness_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/avg_num_disjoint_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/avg_num_disjoint_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/avg_num_excess_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/avg_num_excess_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/avg_weight_diff_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/avg_weight_diff_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/best_all_time_genome_accuracy_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_all_time_genome_accuracy_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/best_all_time_genome_f1_score_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_all_time_genome_f1_score_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/best_all_time_genome_fitness_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_all_time_genome_fitness_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/best_genome_pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_genome_pickle -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/generation_tracker: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/generation_tracker -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/mean_compatibility_distance_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_compatibility_distance_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/mean_number_connections_enabled_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_number_connections_enabled_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/mean_number_connections_overall_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_number_connections_overall_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/mean_number_nodes_enabled_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_number_nodes_enabled_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/num_generation_add_connection_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_add_connection_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/num_generation_add_node_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_add_node_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/num_generation_delete_connection_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_delete_connection_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/num_generation_delete_node_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_delete_node_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/num_generation_weight_mutations_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_weight_mutations_generation_5.png -------------------------------------------------------------------------------- /algorithm_runs/xor_full/run_1/num_species_generation_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_species_generation_5.png -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | class Config: 2 | """ 3 | Population 4 | """ 5 | 6 | population_size = 15 7 | 8 | # TODO: Ensure each of these are used somewhere in the code 9 | """ 10 | Compatibility distance 11 | """ 12 | # The coefficients used for calculating the compatibility distance between two genomes 13 | excess_coefficient = 1 14 | disjoint_coefficient = 1 15 | # This is for when the genes are the same so they check the similarity of the weights 16 | matching_genes_coefficient = 0.4 17 | 18 | # How close they have to be to be considered in the same species 19 | compatibility_threshold = 3 20 | 21 | """ 22 | Mutation 23 | """ 24 | # Whether only one type of mutation can happen at any time. 25 | single_mutation_only = False 26 | 27 | # Weight changes 28 | weight_mutation_chance = 0.8 29 | weight_mutation_perturbe_chance = 0.9 30 | weight_mutation_reset_connection_chance = 0.1 31 | 32 | weight_mutation_perturbe_chance_backprop = 0.9 33 | weight_mutation_reset_connection_chance_backprop = 0.1 34 | weight_mutation_reset_all_connections_chance_backprop = 0.1 35 | 36 | # Standard deviation for the distribution for which we pick the pertubation value from 37 | weight_mutation_sigma = 0.5 38 | # Mean for the distribution for which we pick the pertubation value from 39 | weight_mutation_mean = 0.0 40 | 41 | # Backprop versions 42 | weight_mutation_sigma_backprop = 1 43 | weight_mutation_mean_backprop = 0.0 44 | 45 | # This is the chance a gene is disabled if it was disabled in either parent 46 | change_to_disable_gene_if_either_parent_disabled = 0.75 47 | 48 | chance_for_mutation_without_crossover = 0.25 49 | 50 | inter_species_mating_rate = 0.001 51 | 52 | 53 | add_node_mutation_chance = 0.4 54 | add_connection_mutation_chance = 0.5 55 | remove_node_mutation_chance = 0.3 56 | remove_connection_mutation_chance = 0.2 57 | 58 | ## These chances are used when we're performing mutation whilst using backprop optimisation 59 | add_node_mutation_chance_backprop = 0.7 60 | add_connection_mutation_chance_backprop = 0.5 61 | remove_node_mutation_chance_backprop = 0.05 62 | remove_connection_mutation_chance_backprop = 0.05 63 | reset_all_weights_mutation_chance_backprop = 0.05 64 | # add_node_mutation_chance_backprop = 0.4 65 | # add_connection_mutation_chance_backprop = 0.7 66 | # remove_node_mutation_chance_backprop = 0.05 67 | # remove_connection_mutation_chance_backprop = 0.05 68 | # reset_all_weights_mutation_chance_backprop = 0.05 69 | 70 | ## OLD VALUES FOR WHEN WE WERE JUST USING GENETIC ALGORITHM 71 | # add_node_mutation_chance = 0.03 72 | # add_connection_mutation_chance = 0.05 73 | # remove_node_mutation_chance = 0.01 74 | # remove_connection_mutation_chance = 0.01 75 | 76 | """ 77 | Speciation 78 | """ 79 | 80 | # Parameters used when check stagnation 81 | # Allowable number of generations before considered stagnant 82 | max_stagnation_generations = 15 83 | # Min number of species required before throwing out due to stagnation 84 | num_species_min = 2 85 | 86 | """ 87 | Reproduction 88 | """ 89 | # Minimum species size 90 | # TODO: Change back to default value of 2 91 | min_species_size = 0 92 | 93 | """ 94 | Survival 95 | """ 96 | # Percentage of the population which carries on un-changed(?) 97 | survival_threshold = 0.2 98 | # This means that a certain percentage of the top elite genomes will carry over to the next population un changed 99 | keep_unmutated_top_percentage = True # (Default should be False) 100 | -------------------------------------------------------------------------------- /config_multiclass.py: -------------------------------------------------------------------------------- 1 | class ConfigMultiClass: 2 | """ 3 | Population 4 | """ 5 | 6 | population_size = 15 7 | 8 | # TODO: Ensure each of these are used somewhere in the code 9 | """ 10 | Compatibility distance 11 | """ 12 | # The coefficients used for calculating the compatibility distance between two genomes 13 | excess_coefficient = 1 14 | disjoint_coefficient = 1 15 | # This is for when the genes are the same so they check the similarity of the weights 16 | matching_genes_coefficient = 0.4 17 | 18 | # How close they have to be to be considered in the same species 19 | compatibility_threshold = 3 20 | 21 | """ 22 | Mutation 23 | """ 24 | # Whether only one type of mutation can happen at any time. 25 | single_mutation_only = False 26 | 27 | # Weight changes 28 | weight_mutation_chance = 0.8 29 | weight_mutation_perturbe_chance = 0.9 30 | weight_mutation_reset_connection_chance = 0.1 31 | 32 | weight_mutation_perturbe_chance_backprop = 0.9 33 | weight_mutation_reset_connection_chance_backprop = 0.1 34 | weight_mutation_reset_all_connections_chance_backprop = 0.1 35 | 36 | # Standard deviation for the distribution for which we pick the pertubation value from 37 | weight_mutation_sigma = 0.5 38 | # Mean for the distribution for which we pick the pertubation value from 39 | weight_mutation_mean = 0.0 40 | 41 | # Backprop versions 42 | weight_mutation_sigma_backprop = 1 43 | weight_mutation_mean_backprop = 0.0 44 | 45 | # This is the chance a gene is disabled if it was disabled in either parent 46 | change_to_disable_gene_if_either_parent_disabled = 0.75 47 | 48 | chance_for_mutation_without_crossover = 0.25 49 | 50 | inter_species_mating_rate = 0.001 51 | 52 | 53 | add_node_mutation_chance = 0.4 54 | add_connection_mutation_chance = 0.5 55 | remove_node_mutation_chance = 0.15 56 | remove_connection_mutation_chance = 0.15 57 | 58 | ## These chances are used when we're performing mutation whilst using backprop optimisation 59 | add_node_mutation_chance_backprop = 0.7 60 | add_connection_mutation_chance_backprop = 0.5 61 | remove_node_mutation_chance_backprop = 0.05 62 | remove_connection_mutation_chance_backprop = 0.05 63 | reset_all_weights_mutation_chance_backprop = 0.05 64 | 65 | ## OLD VALUES FOR WHEN WE WERE JUST USING GENETIC ALGORITHM 66 | # add_node_mutation_chance = 0.03 67 | # add_connection_mutation_chance = 0.05 68 | # remove_node_mutation_chance = 0.01 69 | # remove_connection_mutation_chance = 0.01 70 | 71 | """ 72 | Speciation 73 | """ 74 | 75 | # Parameters used when check stagnation 76 | # Allowable number of generations before considered stagnant 77 | max_stagnation_generations = 15 78 | # Min number of species required before throwing out due to stagnation 79 | num_species_min = 2 80 | 81 | """ 82 | Reproduction 83 | """ 84 | # Minimum species size 85 | min_species_size = 0 86 | 87 | """ 88 | Survival 89 | """ 90 | # Percentage of the population which carries on un-changed(?) 91 | survival_threshold = 0.2 92 | # This means that a certain percentage of the top elite genomes will carry over to the next population un changed 93 | keep_unmutated_top_percentage = True # (Default should be False) 94 | -------------------------------------------------------------------------------- /data_storage.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_circle_data(): 5 | # Skip first row because headings 6 | circle_data = np.loadtxt('C:/Users/tsdev/Desktop/circle_25/input.txt', skiprows=1) 7 | y_data = circle_data[:, circle_data.shape[1] - 1] 8 | x_data = circle_data[:, 1:(circle_data.shape[1] - 1)] 9 | return x_data, y_data 10 | 11 | 12 | def get_spiral_data(): 13 | # Skip first row because headings 14 | spiral_data = np.loadtxt('C:/Users/tsdev/Desktop/spiral_25/input.txt', skiprows=1) 15 | y_data = spiral_data[:, spiral_data.shape[1] - 1] 16 | x_data = spiral_data[:, 1:(spiral_data.shape[1] - 1)] 17 | return x_data, y_data 18 | -------------------------------------------------------------------------------- /data_visualisation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import seaborn as sns 4 | import matplotlib.pyplot as plt 5 | from numpy.random import rand 6 | import pickle 7 | import matplotlib.pyplot as plt 8 | import copy 9 | from mpl_toolkits.mplot3d import Axes3D 10 | 11 | from NEAT import NEAT 12 | from genome_neural_network import GenomeNeuralNetwork 13 | from neural_network import create_data 14 | from data_storage import get_circle_data, get_spiral_data 15 | from read_mat_files import get_shm_two_class_data 16 | 17 | 18 | def initialise_genome(genome_pickle_filepath): 19 | """ 20 | Function to intialise a genome from a pickle file 21 | :param genome_pickle_filepath: File path to pickle 22 | :return: the intialised genome 23 | """ 24 | infile = open(genome_pickle_filepath, 'rb') 25 | genome = pickle.load(infile) 26 | infile.close() 27 | return genome 28 | 29 | 30 | def get_genome_predictions(genome, x_data): 31 | """ 32 | Function to return predictions for a given genome 33 | :param genome: The genome class instance 34 | :param x_data: The data to be predicted on 35 | :param y_data: The true labels for the data 36 | :return: the predictions for the given x_data 37 | """ 38 | # y_data isn't important but it's needed as a parameter 39 | _, y_data = create_data(n_generated=500) 40 | genome_nn = NEAT.create_genome_nn(genome=genome, x_data=x_data, y_data=y_data) 41 | return genome_nn.run_one_pass(input_data=x_data, return_prediction_only=True).round() 42 | 43 | 44 | def plot_decision_boundary(genome, data_being_used): 45 | assert (data_being_used in {'circle_data', 'xor_data', 'spiral_data', 'shm_two_class'}) 46 | 47 | number_of_data_points = 50 48 | if data_being_used == 'xor_data': 49 | x_values = np.linspace(0, 1, number_of_data_points).tolist() 50 | elif data_being_used == 'circle_data': 51 | x_values = np.linspace(-4, 4, number_of_data_points).tolist() 52 | elif data_being_used == 'shm_two_class': 53 | x_values = np.linspace(-29, 1, number_of_data_points).tolist() 54 | y_values = np.linspace(-34, 4, number_of_data_points).tolist() 55 | z_values = np.linspace(-31, 11, number_of_data_points).tolist() 56 | 57 | prediction_list = [] 58 | if data_being_used != 'shm_two_class': 59 | x_values_reverse = copy.deepcopy(x_values) 60 | x_values_reverse.reverse() 61 | current_x = [] 62 | current_y = [] 63 | for x in x_values: 64 | for y in x_values_reverse: 65 | # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]]) 66 | x_data = np.array([[x, y]]) 67 | current_x.append(x) 68 | current_y.append(y) 69 | predictions = get_genome_predictions(genome=genome, x_data=x_data) 70 | prediction_list += predictions[0].tolist() 71 | for x in x_values: 72 | for y in x_values_reverse: 73 | # x_data = np.array([[y, x, y ** 2, x ** 2, y * x, np.sin(y), np.sin(x)]]) 74 | x_data = np.array([[y, x]]) 75 | # This is correct, should be reverse to previous loop 76 | current_x.append(y) 77 | current_y.append(x) 78 | predictions = get_genome_predictions(genome=genome, x_data=x_data) 79 | prediction_list += predictions[0].tolist() 80 | plt.scatter(current_x, current_y, color=create_label_colours(labels=np.array(prediction_list))) 81 | plt.title('Decisionary boundary for optimized genome') 82 | plt.xlabel('X1') 83 | plt.ylabel('X2') 84 | plt.show() 85 | 86 | else: 87 | # REMEMBER WHEN PLOTTING SHM DATA NEED TO MINUS AND DIVIDE BY 10 88 | current_x = [] 89 | current_y = [] 90 | current_z = [] 91 | 92 | x_values_reverse = copy.deepcopy(x_values) 93 | x_values_reverse.reverse() 94 | 95 | y_values_reverse = copy.deepcopy(y_values) 96 | y_values_reverse.reverse() 97 | 98 | z_values_reverse = copy.deepcopy(z_values) 99 | z_values_reverse.reverse() 100 | for x in x_values: 101 | for y in y_values: 102 | for z in z_values: 103 | print(x, y, z) 104 | # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]]) 105 | x_data = np.array([[x, y, z]]) 106 | current_x.append(x) 107 | current_y.append(y) 108 | current_z.append(z) 109 | predictions = get_genome_predictions(genome=genome, x_data=x_data) 110 | prediction_list += predictions[0].tolist() 111 | for x in x_values_reverse: 112 | for y in y_values: 113 | for z in z_values: 114 | print(x, y, z) 115 | # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]]) 116 | x_data = np.array([[x, y, z]]) 117 | current_x.append(x) 118 | current_y.append(y) 119 | current_z.append(z) 120 | predictions = get_genome_predictions(genome=genome, x_data=x_data) 121 | prediction_list += predictions[0].tolist() 122 | for x in x_values: 123 | for y in y_values_reverse: 124 | for z in z_values: 125 | print(x, y, z) 126 | # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]]) 127 | x_data = np.array([[x, y, z]]) 128 | current_x.append(x) 129 | current_y.append(y) 130 | current_z.append(z) 131 | predictions = get_genome_predictions(genome=genome, x_data=x_data) 132 | prediction_list += predictions[0].tolist() 133 | for x in x_values: 134 | for y in y_values: 135 | for z in z_values_reverse: 136 | # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]]) 137 | x_data = np.array([[x, y, z]]) 138 | current_x.append(x) 139 | current_y.append(y) 140 | current_z.append(z) 141 | predictions = get_genome_predictions(genome=genome, x_data=x_data) 142 | prediction_list += predictions[0].tolist() 143 | 144 | fig = plt.figure() 145 | ax = Axes3D(fig) 146 | ax.scatter(current_x, current_y, current_z, color=create_label_colours(np.array(prediction_list))) 147 | ax.view_init(-140, 30) 148 | plt.show() 149 | 150 | 151 | def create_label_colours(labels): 152 | """ 153 | Function turns the binary classification classes into two seperate colours e.g. 1, 0 => 'green', 'red' 154 | :param labels: numpy array of labels in shape (n, 1) 155 | :return: 156 | """ 157 | main_features = np.unique(labels) 158 | if main_features.shape[0] != 2: 159 | raise ValueError('There can only be two class labels') 160 | try: 161 | if labels.shape[1] != 1: 162 | raise ValueError('Labels must be in shape (num_examples, 1)') 163 | labels_list = labels[:, 0] 164 | except IndexError: 165 | labels_list = labels[:, ] 166 | coloured_labels = ['red' if label == main_features[0] else 'green' for label in labels_list] 167 | return coloured_labels 168 | 169 | 170 | def plot_shm_data(elevation, rotation_angle): 171 | x_data, y_data = get_shm_two_class_data(normalise_x=False) 172 | 173 | x_vals = x_data[:, 0].tolist() 174 | y_vals = x_data[:, 1].tolist() 175 | z_vals = x_data[:, 2].tolist() 176 | 177 | x_min, x_max = min(x_vals), max(x_vals) 178 | y_min, y_max = min(y_vals), max(y_vals) 179 | z_min, z_max = min(z_vals), max(z_vals) 180 | 181 | labels = create_label_colours(labels=y_data) 182 | 183 | fig = plt.figure() 184 | ax = Axes3D(fig) 185 | ax.scatter(x_vals, y_vals, z_vals, color=labels) 186 | ax.view_init(elevation, rotation_angle) 187 | plt.show() 188 | 189 | fig = plt.figure() 190 | ax = Axes3D(fig) 191 | x1_reds = [] 192 | x2_reds = [] 193 | x3_reds = [] 194 | x1_greens = [] 195 | x2_greens = [] 196 | x3_greens = [] 197 | for index in range(len(labels)): 198 | if labels[index] == 'green': 199 | x1_greens.append(x_vals[index]) 200 | x2_greens.append(y_vals[index]) 201 | x3_greens.append(z_vals[index]) 202 | else: 203 | x1_reds.append(x_vals[index]) 204 | x2_reds.append(y_vals[index]) 205 | x3_reds.append(z_vals[index]) 206 | ax.scatter(x1_greens, x2_greens, x3_greens, c='green', label='Undamaged', 207 | ) 208 | ax.scatter(x1_reds, x2_reds, x3_reds, c='red', label='Damaged', 209 | ) 210 | ax.legend(loc='upper right') 211 | ax.view_init(elevation, rotation_angle) 212 | plt.show() 213 | 214 | 215 | def plot_generation_graph(*args, same_axis=None, generation_information, y_label=None, title): 216 | """" 217 | Generic function to plot data 218 | :param title: String for the title 219 | :param y_label: String for the y label 220 | :param same_axis: Defines whether two or more datasets should be plotted on the same y axis 221 | """ 222 | # Plus one because of how the range function works 223 | generations_to_go_through = list(range(1, max(generation_information) + 1)) 224 | 225 | if len(args) > 1: 226 | 227 | # Can't plot more than two items on a 2d plot 228 | assert (len(args) == 2) 229 | assert (same_axis is not None) 230 | if same_axis: 231 | # Need a common y_label 232 | assert (y_label is not None) 233 | 234 | y_data_list = [] 235 | y_labels = [] 236 | for information in args: 237 | information_type = information[0] 238 | information_plot_type = information[1] 239 | if not same_axis: 240 | y_label = information[2] 241 | y_labels.append(y_label) 242 | 243 | y_data = [] 244 | for generation in generations_to_go_through: 245 | y_data.append(generation_information[generation][information_type]) 246 | if information_plot_type == 'line' and same_axis: 247 | plt.plot(generations_to_go_through, y_data) 248 | elif information_plot_type == 'bar' and same_axis: 249 | plt.bar(generations_to_go_through, y_data) 250 | y_data_list.append(y_data) 251 | 252 | if not same_axis: 253 | plt.plot(generations_to_go_through, y_data_list[0], color='r') 254 | plt.ylabel(y_labels[0]) 255 | axes2 = plt.twinx() 256 | axes2.plot(generations_to_go_through, y_data_list[1], color='g') 257 | axes2.set_ylabel(y_labels[1]) 258 | else: 259 | plt.ylabel(y_label) 260 | plt.xticks(generations_to_go_through) 261 | plt.xlabel('Generation') 262 | plt.title(title) 263 | plt.show() 264 | 265 | else: 266 | y_data = [] 267 | information = args[0] 268 | information_type = information[0] 269 | information_plot_type = information[1] 270 | for generation in generations_to_go_through: 271 | y_data.append(generation_information[generation][information_type]) 272 | if information_plot_type == 'line': 273 | plt.plot(generations_to_go_through, y_data) 274 | elif information_plot_type == 'bar': 275 | plt.plot(generations_to_go_through, y_data) 276 | plt.xticks(generations_to_go_through) 277 | plt.xlabel('Generation') 278 | plt.ylabel(y_label) 279 | plt.title(title) 280 | plt.show() 281 | 282 | 283 | def visualise_generation_tracker(filepath_to_genome): 284 | infile = open(filepath_to_genome, 'rb') 285 | generation_tracker_instance = pickle.load(infile) 286 | generation_information_dict = generation_tracker_instance.generation_information 287 | 288 | # If more than one information type is specified, MUST define the same_axis variable 289 | plot_generation_graph(('best_all_time_genome_fitness', 'line'), 290 | ('average_population_fitness', 'line'), 291 | same_axis=True, 292 | y_label='Fitness value', 293 | generation_information=generation_information_dict, 294 | title='Best All Time Genome Accuracy through generations') 295 | 296 | plot_generation_graph(('best_all_time_genome_accuracy', 'line', 'Best Genome Accuracy (%)'), 297 | ('best_all_time_genome_f1_score', 'line', 'Best Genome F1 score'), 298 | same_axis=False, 299 | generation_information=generation_information_dict, 300 | title='Best All Time Genome Accuracy through generations') 301 | 302 | plot_generation_graph(('best_all_time_genome_accuracy', 'line'), 303 | generation_information=generation_information_dict, y_label='Best Genome Accuracy (%)', 304 | title='Best All Time Genome Accuracy through generations') 305 | infile.close() 306 | 307 | 308 | def plot_population_complexity(filepath_to_neat_instance, font_size): 309 | infile = open(filepath_to_neat_instance, 'rb') 310 | neat_instance = pickle.load(infile) 311 | x_data = [] 312 | connection_count = [] 313 | node_count = [] 314 | counter = 0 315 | for population_member in neat_instance.population.values(): 316 | counter += 1 317 | x_data.append(counter) 318 | node_count.append(len(population_member.nodes)) 319 | connection_count.append(len(population_member.connections)) 320 | 321 | test = [11 for i in range(len(x_data))] 322 | 323 | connection_count.sort() 324 | 325 | plt.bar(x_data, connection_count) 326 | plt.xticks(x_data) 327 | if font_size: 328 | plt.xlabel('Individual', fontsize=font_size) 329 | plt.ylabel('Test label', fontsize=font_size) 330 | plt.title('Test title', fontsize=font_size) 331 | plt.xticks(fontsize=font_size) 332 | plt.yticks(fontsize=font_size) 333 | else: 334 | plt.xlabel('Individual') 335 | plt.ylabel('Test label') 336 | plt.title('Test title') 337 | axes2 = plt.twinx() 338 | axes2.plot(x_data, test, color='r') 339 | # axes2.plot(x_data, node_count, color='r') 340 | 341 | if font_size: 342 | plt.xlabel('Individual', fontsize=font_size) 343 | plt.ylabel('Test label', fontsize=font_size) 344 | plt.title('Test title', fontsize=font_size) 345 | plt.xticks(fontsize=font_size) 346 | plt.yticks(fontsize=font_size) 347 | else: 348 | plt.xlabel('Individual') 349 | plt.ylabel('Test label') 350 | plt.title('Test title') 351 | plt.show() 352 | 353 | test = [7 for i in range(len(x_data))] 354 | 355 | plt.bar(x_data, node_count) 356 | plt.xticks(x_data) 357 | plt.xlabel('Individual') 358 | plt.ylabel('Test label') 359 | plt.title('Test title') 360 | plt.plot(x_data, test, color='r') 361 | 362 | plt.xlabel('Individual') 363 | plt.ylabel('Test label') 364 | plt.title('Test title') 365 | plt.show() 366 | 367 | best_genome = neat_instance.best_all_time_genome 368 | 369 | print(len(best_genome.connections)) 370 | print(len(best_genome.nodes)) 371 | 372 | infile.close() 373 | 374 | 375 | def create_confusion_matrix(): 376 | import pandas as pd 377 | import seaborn as sn 378 | y_predicted = np.random.random_integers(low=0, high=1, size=(300, 1)) 379 | y_actual = np.random.random_integers(low=0, high=1, size=(300, 1)) 380 | 381 | y_predicted = y_predicted[:, 0] 382 | y_actual = y_actual[:, 0] 383 | data = {'y_Predicted': y_predicted, 384 | 'y_Actual': y_actual, 385 | } 386 | 387 | df = pd.DataFrame(data, columns=['y_Actual', 'y_Predicted']) 388 | confusion_matrix = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['Actual'], colnames=['Predicted']) 389 | 390 | sn.heatmap(confusion_matrix, annot=True) 391 | plt.show() 392 | 393 | 394 | def main(): 395 | # DATA 396 | x_data, y_data = create_data(n_generated=200, add_noise=True) 397 | x_circle, y_circle = get_circle_data() 398 | x_spiral, y_spiral = get_spiral_data() 399 | 400 | # X1, X2 for all datasets 401 | feature_1_xor = x_data[:, 0] 402 | feature_2_xor = x_data[:, 1] 403 | feature_1_circle = x_circle[:, 0] 404 | feature_2_circle = x_circle[:, 1] 405 | feature_1_spiral = x_spiral[:, 0] 406 | feature_2_spiral = x_spiral[:, 1] 407 | 408 | plot_data = False 409 | show_decision_boundary = False 410 | visualise_generation = False 411 | plot_confusion_matrix = False 412 | visualise_population_complexity = False 413 | plot_shm_data_figure = True 414 | 415 | font_size = 20 416 | # PLOT DATA 417 | if plot_data: 418 | # TODO: Add legends 419 | plt.scatter(feature_1_xor, feature_2_xor, color=create_label_colours(labels=y_data)) 420 | plt.title('XOR Data', fontsize=font_size) 421 | plt.xlabel('X1', fontsize=font_size) 422 | plt.ylabel('X2', fontsize=font_size) 423 | plt.tick_params(axis='both', which='major', labelsize=10) 424 | plt.xticks(fontsize=font_size) 425 | plt.yticks(fontsize=font_size) 426 | plt.show() 427 | 428 | fig, ax = plt.subplots() 429 | label_colours = create_label_colours(labels=y_data) 430 | x1_reds = [] 431 | x2_reds = [] 432 | x1_greens = [] 433 | x2_greens = [] 434 | for index in range(len(label_colours)): 435 | if label_colours[index] == 'green': 436 | x1_greens.append(feature_1_xor[index]) 437 | x2_greens.append(feature_2_xor[index]) 438 | else: 439 | x1_reds.append(feature_1_xor[index]) 440 | x2_reds.append(feature_2_xor[index]) 441 | 442 | ax.scatter(x1_greens, x2_greens, c='green', label='Class 1', 443 | alpha=1, edgecolors='none') 444 | ax.scatter(x1_reds, x2_reds, c='red', label='Class 0', 445 | alpha=1, edgecolors='none') 446 | ax.legend(loc='upper right') 447 | plt.xlabel('X1') 448 | plt.ylabel('X2') 449 | plt.show() 450 | 451 | # plt.scatter(feature_1_circle, feature_2_circle, color=create_label_colours(labels=y_circle)) 452 | # plt.title('Circle Data') 453 | # plt.xlabel('X1') 454 | # plt.ylabel('X2') 455 | # plt.show() 456 | # plt.scatter(feature_1_spiral, feature_2_spiral, color=create_label_colours(labels=y_spiral)) 457 | # plt.title('Spiral Data') 458 | # plt.xlabel('X1') 459 | # plt.ylabel('X2') 460 | # plt.show() 461 | 462 | if show_decision_boundary: 463 | # Test genome accuracy 464 | genome = initialise_genome(genome_pickle_filepath='pickles/best_genome_pickle_shm_two_class_618056') 465 | plot_decision_boundary(genome=genome, data_being_used='shm_two_class') 466 | 467 | if visualise_generation: 468 | visualise_generation_tracker(filepath_to_genome='algorithm_runs/xor_small_noise/run_1/generation_tracker') 469 | if visualise_population_complexity: 470 | plot_population_complexity(filepath_to_neat_instance='algorithm_runs/xor_small_noise/run_1/NEAT_instance', 471 | font_size=None) 472 | 473 | if plot_confusion_matrix: 474 | create_confusion_matrix() 475 | if plot_shm_data_figure: 476 | plot_shm_data(rotation_angle=30, elevation=-160) 477 | # 478 | # plt.figure() 479 | # N = 5 480 | # menMeans = (20, 35, 30, 35, 27) 481 | # menStd = (2, 3, 4, 1, 2) 482 | # width = 0.35 # the width of the bars 483 | # womenMeans = (25, 32, 34, 20, 25) 484 | # womenStd = (3, 5, 2, 3, 3) 485 | # ind = np.arange(N) 486 | # plt.ylim(0.0, 65.0) 487 | # plt.bar(ind, menMeans, width, color='r', yerr=menStd, label='Men means') 488 | # plt.bar(ind + width, womenMeans, width, color='y', yerr=womenStd, label='Women means') 489 | # # plt.plot(ind + width, womenMeans, color='k', label='Sine') 490 | # plt.ylabel('Bar plot') 491 | # 492 | # x = np.linspace(0, N) 493 | # y = np.sin(x) 494 | # axes2 = plt.twinx() 495 | # # axes2.plot(ind+width, womenMeans, color='k', label='Sine') 496 | # axes2.plot(x, y, color='k', label='Sine') 497 | # # axes2.set_ylim(-1, 1) 498 | # # axes2.set_ylabel('Line plot') 499 | # 500 | # plt.show() 501 | 502 | 503 | if __name__ == "__main__": 504 | main() 505 | -------------------------------------------------------------------------------- /f_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.stats as stats 3 | import sklearn.metrics 4 | 5 | 6 | def calculate_f_score(real_data, prediction): 7 | predicted_num_1 = prediction.count(1) 8 | predicted_actual_num_1 = prediction.count(1) 9 | real_num_1 = real_data.count(1) 10 | 11 | precision = predicted_num_1 / predicted_actual_num_1 12 | recall = predicted_actual_num_1 / real_num_1 13 | 14 | return 2 * ((precision * recall) / (precision + recall)) 15 | 16 | 17 | def main(): 18 | real_data = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] 19 | prediction = [1, 0, 1, 1, 1, 0, 1, 0, 0, 0] 20 | 21 | f1_score_2 = sklearn.metrics.f1_score(real_data, prediction) 22 | f1_score = calculate_f_score(real_data, prediction) 23 | 24 | print(f1_score_2) 25 | print(f1_score) 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /gene.py: -------------------------------------------------------------------------------- 1 | class ConnectionGene: 2 | 3 | def __init__(self, input_node, output_node, innovation_number=None, enabled=True, weight=None, keep_constant_weight=False): 4 | self.innovation_number = innovation_number 5 | self.input_node = input_node 6 | self._output_node = output_node 7 | self.enabled = enabled 8 | self.weight = weight 9 | # This attribute is used in ghost nodes 10 | self.keep_constant_weight = keep_constant_weight 11 | 12 | @property 13 | def output_node(self): 14 | return self._output_node 15 | 16 | @output_node.setter 17 | def output_node(self, value): 18 | # You can't have a node which loops back to itself 19 | assert (value != self.input_node) 20 | self._output_node = value 21 | 22 | def __str__(self): 23 | return 'Input: {}, Output: {}, Enabled: {}'.format(self.input_node, self._output_node, self.enabled) 24 | 25 | def __repr__(self): 26 | return '{}-->{}'.format(self.input_node, self._output_node) 27 | 28 | 29 | class NodeGene: 30 | 31 | def __init__(self, node_type, node_id, bias=None): 32 | # Specifies the type of node 33 | self._node_type = node_type 34 | # This is to keep track of which node is which 35 | self.node_id = node_id 36 | self.bias = bias 37 | 38 | @property 39 | def node_type(self): 40 | return self._node_type 41 | 42 | @node_type.setter 43 | def node_type(self, value): 44 | # There are only 3 possible types for a node gene 45 | assert (value in {'source', 'hidden', 'output'}) 46 | self._node_type = value 47 | 48 | def __str__(self): 49 | return 'This is node number {} which is a {} node with a bias of {}'.format(self.node_id, self._node_type, 50 | self.bias) 51 | 52 | def __repr__(self): 53 | return '{}:{}'.format(self.node_id, self._node_type) 54 | 55 | # def __add__(self, other): 56 | # return self._node_number + other 57 | -------------------------------------------------------------------------------- /generation_statistics.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import collections 4 | import os 5 | 6 | 7 | class GenerationStatistics: 8 | 9 | def __init__(self): 10 | self.generation_information = {} 11 | self.mean_compatibility_distance = None 12 | self.std_dev_compatibility_distance = None 13 | self.best_all_time_genome_fitness = None 14 | self.average_population_fitness = None 15 | self.num_species = None 16 | self.mean_number_connections_overall = None 17 | self.mean_number_connections_enabled = None 18 | self.population_size = None 19 | self.mean_number_nodes_overall = None 20 | self.mean_number_nodes_enabled = None 21 | self.species_execute_time = None 22 | self.reproduce_execute_time = None 23 | self.evaluate_execute_time = None 24 | self.num_generation_add_node = None 25 | self.num_generation_delete_node = None 26 | self.num_generation_add_connection = None 27 | self.num_generation_delete_connection = None 28 | self.num_generation_weight_mutations = None 29 | self.perturbation_values_max = None 30 | self.perturbation_values_min = None 31 | self.perturbation_values_list = None 32 | self.num_disjoint_list = None 33 | self.num_excess_list = None 34 | self.weight_diff_list = None 35 | self.avg_num_disjoint = None 36 | self.avg_num_excess = None 37 | self.avg_weight_diff = None 38 | self.best_all_time_genome_f1_score = None 39 | self.best_all_time_genome_accuracy = None 40 | 41 | def update_generation_information(self, generation): 42 | 43 | # Update min and max values of perturbation to weights 44 | self.perturbation_values_max = max(self.perturbation_values_list) 45 | self.perturbation_values_min = min(self.perturbation_values_list) 46 | self.avg_num_disjoint = np.mean(self.num_disjoint_list) 47 | self.avg_num_excess = np.mean(self.num_excess_list) 48 | self.avg_weight_diff = np.mean(self.weight_diff_list) 49 | 50 | information = {} 51 | for info_type, info_value in self.__dict__.items(): 52 | if isinstance(info_value, float) or isinstance(info_value, np.float64): 53 | information[info_type] = round(info_value, 6) 54 | else: 55 | information[info_type] = info_value 56 | 57 | self.generation_information[generation] = information 58 | 59 | def reset_tracker_attributes(self): 60 | """ 61 | Reset the number of mutations which have occured for the current generation. 62 | :return: 63 | """ 64 | self.num_generation_add_connection = 0 65 | self.num_generation_add_node = 0 66 | self.num_generation_delete_connection = 0 67 | self.num_generation_delete_node = 0 68 | self.num_generation_weight_mutations = 0 69 | self.perturbation_values_list = [] 70 | self.num_excess_list = [] 71 | self.num_disjoint_list = [] 72 | self.weight_diff_list = [] 73 | 74 | def plot_graphs(self, current_gen, save_plots=False, file_path=None): 75 | 76 | if (save_plots and not file_path) or (file_path and not save_plots): 77 | raise Exception('Save_plots and file_paths must be specified at the same time') 78 | 79 | important_information_keys = { 80 | 'num_species', 'num_generation_add_node', 'num_generation_delete_node', 'num_generation_add_connection', 81 | 'num_generation_delete_connection', 'num_generation_weight_mutations', 'average_population_fitness', 82 | 'best_all_time_genome_fitness', 'mean_number_connections_enabled', 'mean_number_nodes_enabled', 83 | 'mean_compatibility_distance', 'avg_num_disjoint', 'avg_num_excess', 'avg_weight_diff', 84 | 'mean_number_connections_overall', 'best_all_time_genome_f1_score', 'best_all_time_genome_accuracy' 85 | } 86 | 87 | # Plot information to graph every certain amount of generations 88 | # for information_type, information in self.generation_information[current_gen].items(): 89 | for information_type in important_information_keys: 90 | # Don't need to print the dictionary 91 | if information_type != 'generation_information': 92 | # print(information_type, ':', ' {}'.format(information)) 93 | # if current_gen % generation_interval_for_graph == 0 and current_gen != 1: 94 | generations_to_go_through = list(range(1, current_gen + 1)) 95 | y_data = [] 96 | for generation in generations_to_go_through: 97 | y_data.append(self.generation_information[generation][information_type]) 98 | 99 | plt.plot(generations_to_go_through, y_data) 100 | plt.title(information_type) 101 | if save_plots: 102 | graphs_filepath = '{}/graphs'.format(file_path) 103 | if not os.path.exists(graphs_filepath): 104 | # Make the directory before saving graphs 105 | os.makedirs(graphs_filepath) 106 | plt.savefig('{}/{}_generation_{}.png'.format(graphs_filepath, information_type, current_gen)) 107 | plt.show() 108 | 109 | def print_generation_information(self, generation_interval_for_graph, plot_graphs_every_gen): 110 | current_gen = max(self.generation_information.keys()) 111 | print('**************************** Generation {} *******************************'.format(current_gen)) 112 | 113 | important_information = [ 114 | ('Number of Species', self.generation_information[current_gen]['num_species']), 115 | ('Added Node Mutations', self.generation_information[current_gen]['num_generation_add_node']), 116 | ('Delete Node Mutations', self.generation_information[current_gen]['num_generation_delete_node']), 117 | ('Add Connection Mutations', self.generation_information[current_gen]['num_generation_add_connection']), 118 | ('Delete Connection Mutations', 119 | self.generation_information[current_gen]['num_generation_delete_connection']), 120 | ('Weight Mutations', self.generation_information[current_gen]['num_generation_weight_mutations']), 121 | ('Average Fitness', self.generation_information[current_gen]['average_population_fitness']), 122 | ('Best All Time Genome Fitness', self.generation_information[current_gen]['best_all_time_genome_fitness']), 123 | ( 124 | 'Best All Time Genome f1 score', 125 | self.generation_information[current_gen]['best_all_time_genome_f1_score']), 126 | ( 127 | 'Best All Time Genome Accuracy Percent', 128 | self.generation_information[current_gen]['best_all_time_genome_accuracy']), 129 | 130 | ('Average Number of Connections Per Genome', 131 | self.generation_information[current_gen]['mean_number_connections_enabled']), 132 | ('Average Number of Nodes Per Genome', 133 | self.generation_information[current_gen]['mean_number_nodes_enabled']), 134 | ('Average Compatibility Distance', self.generation_information[current_gen]['mean_compatibility_distance']), 135 | ('Perturbation Max Value', self.generation_information[current_gen]['perturbation_values_max']), 136 | ('Perturbation Min Value', self.generation_information[current_gen]['perturbation_values_min']), 137 | ('Average Number of Disjoint Genes', self.generation_information[current_gen]['avg_num_disjoint']), 138 | ('Average Number of Excess Genes', self.generation_information[current_gen]['avg_num_excess']), 139 | ('Average Weight Difference', self.generation_information[current_gen]['avg_weight_diff']), 140 | ('Average Number of Connections', 141 | self.generation_information[current_gen]['mean_number_connections_overall']), 142 | # ('Average Number of Nodes', self.generation_information[current_gen]['avg_weight_diff']), 143 | ] 144 | 145 | # Make it an ordereddict to keep the order above. 146 | important_information = collections.OrderedDict(important_information) 147 | 148 | # Print the information 149 | for info_type, info_value in important_information.items(): 150 | print('{}:{}'.format(info_type, info_value)) 151 | print('\n') 152 | 153 | if current_gen % generation_interval_for_graph == 0 and current_gen != 1 and plot_graphs_every_gen: 154 | self.plot_graphs(current_gen=current_gen) 155 | -------------------------------------------------------------------------------- /graph_algorithm.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | 4 | class Graph: 5 | """ 6 | Class used to find the number of paths between two nodes in a grsph 7 | """ 8 | 9 | def __init__(self): 10 | # Keeps track of the vertex's in the graph 11 | self.vertex_list = [] 12 | # Keeps track of the connections for each node 13 | self.connections = {} 14 | self.max_layer_for_node = {} 15 | 16 | def count_path_utils(self, current_node, destination, visited, path_count, path, overall_paths, layer_number): 17 | """ 18 | Checks if we're at the destination, adds one if we are, if not check all the neighbours of the current node 19 | :param layer_number: Which layer number we're on 20 | :param overall_paths: List containing all the possible paths 21 | :param path: The current path (list) 22 | :param current_node: The node we're currently add 23 | :param destination: The end of the expected path 24 | :param visited: A dict which keeps which nodes have been visited or not 25 | :param path_count: Keeps the number of paths from the designated start and end node 26 | :return: 27 | """ 28 | # We've visited the current node since we're at it 29 | visited[current_node] = True 30 | path.append(current_node) 31 | layer_number.append(1) 32 | if current_node not in self.max_layer_for_node: 33 | self.max_layer_for_node[current_node] = sum(layer_number) 34 | else: 35 | if sum(layer_number) > self.max_layer_for_node[current_node]: 36 | self.max_layer_for_node[current_node] = sum(layer_number) 37 | 38 | # If the current node is the destination then we can increas the path_count number 39 | if current_node == destination: 40 | path_count.append(1) 41 | overall_paths.append(copy.deepcopy(path)) 42 | else: 43 | # Go through all the neighbours looking for the destination 44 | if current_node in self.connections: 45 | for neighbour in self.connections[current_node]: 46 | # If we haven't visited the neighbour, look through the neighbour for the destination 47 | if neighbour in visited and not visited[neighbour]: 48 | # Call the function recursively 49 | self.count_path_utils(neighbour, destination, visited, path_count, path, overall_paths, 50 | layer_number) 51 | 52 | layer_number.pop() 53 | # Remove current vertex from path[] and mark it as unvisited 54 | path.pop() 55 | # Once we've checked all the neighbour's we can set the visited to false again 56 | visited[current_node] = False 57 | 58 | def add_edge(self, start_node, end_node): 59 | # TODO: Check if the multiclass version of this is the better way of adding an edge 60 | connection_dict = self.connections.get(start_node) 61 | if connection_dict: 62 | connection_dict.append(end_node) 63 | else: 64 | self.connections[start_node] = [end_node] 65 | self.vertex_list.append(start_node) 66 | self.vertex_list.append(end_node) 67 | 68 | def count_paths(self, start_node, end_node, return_paths=False): 69 | """ 70 | Count paths from start_node to end_node 71 | :param start_node: Where the path starts 72 | :param end_node: Where the end of the path is 73 | :return: 74 | """ 75 | 76 | # Keep's track of a node has been visited or not 77 | visited = {node: False for node in self.vertex_list} 78 | paths = [] 79 | overall_paths = [] 80 | path_count = [] 81 | layer_number = [] 82 | self.count_path_utils(start_node, end_node, visited, path_count, paths, overall_paths, layer_number) 83 | if return_paths: 84 | return sum(path_count), overall_paths 85 | else: 86 | return sum(path_count) 87 | 88 | 89 | def main(): 90 | g = Graph() 91 | g.add_edge(2, 3) 92 | g.add_edge(3, 5) 93 | g.add_edge(1, 3) 94 | g.add_edge(1, 5) 95 | 96 | print(g.count_paths(2, 5, True)) 97 | print(g.count_paths(1, 5, True)) 98 | 99 | print(g.max_layer_for_node) 100 | 101 | 102 | if __name__ == "__main__": 103 | main() 104 | -------------------------------------------------------------------------------- /graph_algorithm_mutliclass.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | 4 | class GraphMultiClass: 5 | """ 6 | Class used to find the number of paths between two nodes in a grsph 7 | """ 8 | 9 | def __init__(self): 10 | # Keeps track of the vertex's in the graph 11 | self.vertex_list = [] 12 | # Keeps track of the connections for each node 13 | self.connections = {} 14 | self.max_layer_for_node = {} 15 | 16 | def count_path_utils(self, current_node, destination, visited, path_count, path, overall_paths, layer_number): 17 | """ 18 | Checks if we're at the destination, adds one if we are, if not check all the neighbours of the current node 19 | :param layer_number: Which layer number we're on 20 | :param overall_paths: List containing all the possible paths 21 | :param path: The current path (list) 22 | :param current_node: The node we're currently add 23 | :param destination: The end of the expected path 24 | :param visited: A dict which keeps which nodes have been visited or not 25 | :param path_count: Keeps the number of paths from the designated start and end node 26 | :return: 27 | """ 28 | # We've visited the current node since we're at it 29 | visited[current_node] = True 30 | path.append(current_node) 31 | layer_number.append(1) 32 | if current_node not in self.max_layer_for_node: 33 | self.max_layer_for_node[current_node] = sum(layer_number) 34 | else: 35 | if sum(layer_number) > self.max_layer_for_node[current_node]: 36 | self.max_layer_for_node[current_node] = sum(layer_number) 37 | 38 | # If the current node is the destination then we can increas the path_count number 39 | if current_node == destination: 40 | path_count.append(1) 41 | overall_paths.append(copy.deepcopy(path)) 42 | else: 43 | # Go through all the neighbours looking for the destination 44 | if current_node in self.connections: 45 | for neighbour in self.connections[current_node]: 46 | # If we haven't visited the neighbour, look through the neighbour for the destination 47 | if neighbour in visited and not visited[neighbour]: 48 | # Call the function recursively 49 | self.count_path_utils(neighbour, destination, visited, path_count, path, overall_paths, 50 | layer_number) 51 | 52 | layer_number.pop() 53 | # Remove current vertex from path[] and mark it as unvisited 54 | path.pop() 55 | # Once we've checked all the neighbour's we can set the visited to false again 56 | visited[current_node] = False 57 | 58 | def add_edge(self, start_node, end_node): 59 | connection_dict = self.connections.get(start_node) 60 | if connection_dict: 61 | connection_dict.append(end_node) 62 | else: 63 | self.connections[start_node] = [end_node] 64 | self.vertex_list.append(start_node) 65 | self.vertex_list.append(end_node) 66 | # Remove duplicates 67 | self.vertex_list = list(set(self.vertex_list)) 68 | 69 | def count_paths(self, start_node, end_node, return_paths=False): 70 | """ 71 | Count paths from start_node to end_node 72 | :param start_node: Where the path starts 73 | :param end_node: Where the end of the path is 74 | :return: 75 | """ 76 | 77 | # Keep's track of a node has been visited or not 78 | visited = {node: False for node in self.vertex_list} 79 | paths = [] 80 | overall_paths = [] 81 | path_count = [] 82 | layer_number = [] 83 | self.count_path_utils(start_node, end_node, visited, path_count, paths, overall_paths, layer_number) 84 | if return_paths: 85 | return sum(path_count), overall_paths 86 | else: 87 | return sum(path_count) 88 | 89 | 90 | def main(): 91 | g = Graph() 92 | g.add_edge(2, 3) 93 | g.add_edge(3, 5) 94 | g.add_edge(1, 3) 95 | g.add_edge(1, 5) 96 | 97 | print(g.count_paths(2, 5, True)) 98 | print(g.count_paths(1, 5, True)) 99 | 100 | print(g.max_layer_for_node) 101 | 102 | 103 | if __name__ == "__main__": 104 | main() 105 | -------------------------------------------------------------------------------- /graphs/average_population_fitness_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/average_population_fitness_generation_5.jpg -------------------------------------------------------------------------------- /graphs/avg_num_disjoint_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/avg_num_disjoint_generation_5.jpg -------------------------------------------------------------------------------- /graphs/avg_num_excess_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/avg_num_excess_generation_5.jpg -------------------------------------------------------------------------------- /graphs/avg_weight_diff_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/avg_weight_diff_generation_5.jpg -------------------------------------------------------------------------------- /graphs/best_all_time_genome_f1_score_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/best_all_time_genome_f1_score_generation_5.jpg -------------------------------------------------------------------------------- /graphs/best_all_time_genome_fitness_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/best_all_time_genome_fitness_generation_5.jpg -------------------------------------------------------------------------------- /graphs/mean_compatibility_distance_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_compatibility_distance_generation_5.jpg -------------------------------------------------------------------------------- /graphs/mean_number_connections_enabled_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_number_connections_enabled_generation_5.jpg -------------------------------------------------------------------------------- /graphs/mean_number_connections_overall_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_number_connections_overall_generation_5.jpg -------------------------------------------------------------------------------- /graphs/mean_number_nodes_enabled_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_number_nodes_enabled_generation_5.jpg -------------------------------------------------------------------------------- /graphs/num_generation_add_connection_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_add_connection_generation_5.jpg -------------------------------------------------------------------------------- /graphs/num_generation_add_node_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_add_node_generation_5.jpg -------------------------------------------------------------------------------- /graphs/num_generation_delete_connection_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_delete_connection_generation_5.jpg -------------------------------------------------------------------------------- /graphs/num_generation_delete_node_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_delete_node_generation_5.jpg -------------------------------------------------------------------------------- /graphs/num_generation_weight_mutations_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_weight_mutations_generation_5.jpg -------------------------------------------------------------------------------- /graphs/num_species_generation_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_species_generation_5.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from NEAT import NEAT 2 | import time 3 | from config import Config 4 | from data_storage import get_circle_data, get_spiral_data 5 | from neural_network import create_data 6 | import numpy as np 7 | 8 | from read_mat_files import get_shm_two_class_data 9 | 10 | 11 | def main(): 12 | # Keep a consistent seed to make debugging easier TODO: Check if this work's across files 13 | np.random.seed(1) 14 | 15 | algorithm_options = {0: 'xor_full', 1: 'xor_small_noise', 2: 'circle_data', 3: 'shm_two_class', 4: 'spiral_data'} 16 | # Choose which algorithm is running using keys 17 | algorithm_running = algorithm_options[3] 18 | 19 | if algorithm_running == algorithm_options[0]: 20 | num_data_to_generate = 6250 21 | 22 | # Create data 23 | x_data, y_data = create_data(n_generated=num_data_to_generate, add_noise=False) 24 | elif algorithm_running == algorithm_options[1]: 25 | num_data_to_generate = 300 26 | 27 | # Create data 28 | x_data, y_data = create_data(n_generated=num_data_to_generate, add_noise=True) 29 | elif algorithm_running == algorithm_options[2]: 30 | x_data, y_data = get_circle_data() 31 | x_data = x_data[:, 0:2] 32 | y_data.shape = (len(x_data), 1) 33 | for row in range(y_data.shape[0]): 34 | if y_data[row, 0] == -1: 35 | y_data[row, 0] = 0 36 | num_data_to_generate = len(x_data) 37 | elif algorithm_running == algorithm_options[3]: 38 | x_data, y_data = get_shm_two_class_data() 39 | num_data_to_generate = len(x_data) 40 | elif algorithm_running == algorithm_options[4]: 41 | x_data, y_data = get_spiral_data() 42 | x_data = x_data[:, 0:2] 43 | y_data.shape = (len(x_data), 1) 44 | for row in range(y_data.shape[0]): 45 | if y_data[row, 0] == -1: 46 | y_data[row, 0] = 0 47 | num_data_to_generate = len(x_data) 48 | 49 | # Training data 50 | training_percentage = 0.8 51 | training_upper_limit_index = round(num_data_to_generate * training_percentage) 52 | x_training = x_data[0:training_upper_limit_index] 53 | y_training = y_data[0:training_upper_limit_index] 54 | 55 | # Test data 56 | x_test = x_data[training_upper_limit_index:] 57 | y_test = y_data[training_upper_limit_index:] 58 | 59 | neat = NEAT(x_training_data=x_training, y_training_data=y_training, x_test_data=x_test, y_test_data=y_test, 60 | config=Config, fitness_threshold=-0.000001, f1_score_threshold=0.95, algorithm_running=algorithm_running) 61 | 62 | start_evaluate_time = time.time() 63 | neat.run(max_num_generations=10000, use_backprop=True, print_generation_information=True, 64 | show_population_weight_distribution=False) 65 | end_evaluate_time = time.time() 66 | total_time = end_evaluate_time - start_evaluate_time 67 | print(total_time) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /main_multiclass.py: -------------------------------------------------------------------------------- 1 | from NEAT_multiclass import NEATMultiClass 2 | from config_multiclass import ConfigMultiClass 3 | from data_storage import get_circle_data 4 | from neural_network import create_data 5 | import numpy as np 6 | from read_mat_files import get_shm_two_class_data, get_shm_multi_class_data 7 | 8 | 9 | def main(): 10 | np.random.seed(1) 11 | 12 | # Choose which algorithm is running using keys 13 | algorithm_options = {0: 'xor_full', 1: 'shm_multi_class'} 14 | algorithm_running = algorithm_options[1] 15 | 16 | if algorithm_running == algorithm_options[0]: 17 | num_data_to_generate = 6250 18 | 19 | # Create data 20 | x_data, y_data = create_data(n_generated=num_data_to_generate, add_noise=False, use_one_hot=True) 21 | elif algorithm_running == algorithm_options[1]: 22 | # Create data 23 | x_data, y_data = get_shm_multi_class_data() 24 | num_data_to_generate = len(x_data) 25 | 26 | # Training data 27 | training_percentage = 0.8 28 | training_upper_limit_index = round(num_data_to_generate * training_percentage) 29 | x_training = x_data[0:training_upper_limit_index] 30 | y_training = y_data[0:training_upper_limit_index] 31 | 32 | # Test data 33 | x_test = x_data[training_upper_limit_index:] 34 | y_test = y_data[training_upper_limit_index:] 35 | 36 | f1_score_threshold = 0.95 if algorithm_running != algorithm_options[1] else None 37 | fitness_threshold = -0.1 if algorithm_running != algorithm_options[1] else None 38 | 39 | neat = NEATMultiClass(x_training_data=x_training, y_training_data=y_training, x_test_data=x_test, 40 | y_test_data=y_test, 41 | config=ConfigMultiClass, fitness_threshold=fitness_threshold, 42 | f1_score_threshold=f1_score_threshold, 43 | algorithm_running=algorithm_running) 44 | 45 | neat.run(max_num_generations=250, use_backprop=True, print_generation_information=True, 46 | show_population_weight_distribution=False) 47 | 48 | 49 | if __name__ == "__main__": 50 | main() 51 | -------------------------------------------------------------------------------- /multi_processing.py: -------------------------------------------------------------------------------- 1 | import time 2 | import multiprocessing 3 | 4 | 5 | def calc_square(numbers, result): 6 | print('Calculating square numbers') 7 | for number in numbers: 8 | time.sleep(0.2) 9 | print('square', number * number) 10 | 11 | 12 | def calc_cube(numbers, result, value, queue): 13 | print('Calculate cube of numbers') 14 | value.value = 5 15 | for index, number in enumerate(numbers): 16 | time.sleep(0.2) 17 | queue.put(number * number * number) 18 | result[index] = number * number * number 19 | 20 | 21 | def main(): 22 | # SHARED MEMORY CONCEPT 23 | arr = [2, 3, 8, 9] 24 | start_time = time.time() 25 | # We have to create a shared memory variable. Specify data type and size 26 | result = multiprocessing.Array('i', 4) 27 | value = multiprocessing.Value('i', 0) 28 | queue = multiprocessing.Queue() 29 | # p1 = multiprocessing.Process(target=calc_square, args=(arr,)) 30 | p2 = multiprocessing.Process(target=calc_cube, args=(arr, result, value, queue)) 31 | 32 | # p1.start() 33 | p2.start() 34 | 35 | # p1.join() 36 | p2.join() 37 | 38 | while not queue.empty(): 39 | print('Getting') 40 | print(queue.get()) 41 | print(result[:]) 42 | print(value.value) 43 | print("done in: {}".format(time.time() - start_time)) 44 | print('Done with everything') 45 | 46 | 47 | import time 48 | import multiprocessing 49 | 50 | 51 | def deposit(balance, lock): 52 | for i in range(100): 53 | time.sleep(0.01) 54 | lock.acquire() 55 | balance.value = balance.value + 1 56 | lock.release() 57 | 58 | 59 | def withdraw(balance, lock): 60 | for i in range(100): 61 | time.sleep(0.01) 62 | lock.acquire() 63 | balance.value = balance.value - 1 64 | lock.release() 65 | 66 | 67 | def main2(): 68 | # LOCK CONCEPT 69 | balance = multiprocessing.Value('i', 200) 70 | # Lock is used to lock a variable so that the value isn't frozen in time and when it is changed, it is not changed fast 71 | # enough for the other prcoess to know the updated value thus using the old value of the variable 72 | lock = multiprocessing.Lock() 73 | d = multiprocessing.Process(target=deposit, args=(balance, lock)) 74 | w = multiprocessing.Process(target=withdraw, args=(balance, lock)) 75 | d.start() 76 | w.start() 77 | d.join() 78 | w.join() 79 | print(balance.value) 80 | 81 | 82 | from multiprocessing import Pool 83 | 84 | 85 | def f(n): 86 | return n * n 87 | 88 | 89 | def main3(): 90 | # MAP AND REDUCE CONCEPT 91 | array = [1, 2, 3, 4, 5, 6] 92 | 93 | start_time = time.time() 94 | p = Pool() 95 | result = p.map(f, array) 96 | end_time = time.time() - start_time 97 | print(result, end_time) 98 | 99 | start_time = time.time() 100 | squared = [] 101 | for n in array: 102 | time.sleep(5) 103 | squared.append(n * n) 104 | end_time = time.time() - start_time 105 | print(squared, end_time) 106 | 107 | 108 | if __name__ == '__main__': 109 | # main() 110 | # main2() 111 | main3() 112 | -------------------------------------------------------------------------------- /multi_threading.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | 4 | 5 | def calc_square(numbers): 6 | print('Calculating square numbers') 7 | for number in numbers: 8 | time.sleep(0.2) 9 | print('square', number * number) 10 | 11 | 12 | def calc_cube(numbers): 13 | print('Calculate cube of numbers') 14 | for number in numbers: 15 | time.sleep(0.2) 16 | print('cube', number * number * number) 17 | 18 | 19 | def main(): 20 | arr = [2, 3, 8, 9] 21 | start_time = time.time() 22 | t1 = threading.Thread(target=calc_square, args=(arr,)) 23 | t2 = threading.Thread(target=calc_cube, args=(arr,)) 24 | 25 | t1.start() 26 | t2.start() 27 | 28 | t1.join() 29 | t2.join() 30 | print("done in: {}".format(time.time() - start_time)) 31 | print('Done with everything') 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /neural_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from neural_network_components import * 4 | 5 | 6 | class NeuralNetwork: 7 | 8 | def __init__(self, x_train, y_train, layer_sizes, activation_function_dict, learning_rate=0.0001, 9 | num_epochs=1000, batch_size=64): 10 | self.x_train = x_train 11 | self.y_train = y_train 12 | self.batch_size = batch_size 13 | self.weights_dict = {} 14 | self.bias_dict = {} 15 | self.layer_sizes = layer_sizes 16 | self.activation_function_dict = activation_function_dict 17 | self.learning_rate = learning_rate 18 | self.num_epochs = num_epochs 19 | self.num_layers = len(self.layer_sizes) - 1 20 | 21 | # Layer sizes should be a list with number of hidden nodes per layer. First number in list should be number of 22 | # features. And last number should always be one because there is one output. 23 | assert (layer_sizes[len(layer_sizes) - 1] == 1) 24 | assert (layer_sizes[0] == x_train.shape[1]) 25 | 26 | # This is to check that there is an activation function specified for each layer 27 | assert (len(layer_sizes[1:len(layer_sizes)]) == len(activation_function_dict)) 28 | 29 | # The activation function for the last layer should be a sigmoid due to how the gradients were calculated 30 | assert (activation_function_dict[len(layer_sizes) - 1] == ActivationFunctions.sigmoid) 31 | 32 | self.initialise_parameters(have_bias=True) 33 | 34 | @staticmethod 35 | def xavier_initalizer(num_inputs, num_outputs): 36 | """ 37 | NOTE: if using RELU then use constant 2 instead of 1 for sqrt 38 | """ 39 | np.random.seed(7) 40 | weights = np.random.randn(num_inputs, num_outputs) * np.sqrt(1 / num_inputs) 41 | 42 | return weights 43 | 44 | def initialise_parameters(self, have_bias=False): 45 | """ 46 | :param have_bias: Indicates whether to intialise a bias parameter as well 47 | """ 48 | # Initialise parameters 49 | for index in range(1, self.num_layers + 1): 50 | # Index +1 because we want layer_numbers to start at 1. Index -1 because number of inputs is number of 51 | # features from last layer. 52 | self.weights_dict[index] = self.xavier_initalizer(num_inputs=self.layer_sizes[index - 1], 53 | num_outputs=self.layer_sizes[index]) 54 | 55 | if have_bias: 56 | # Shape is (1, num_outputs) for the layer 57 | self.bias_dict[index] = np.zeros((1, self.layer_sizes[index])) 58 | 59 | def run_one_pass(self, input_data, labels): 60 | """ 61 | One pass counts as one forward propagation and one backware propogation including the optimisation of the 62 | paramters 63 | :return: The cost for the current step 64 | """ 65 | 66 | n_examples = input_data.shape[0] 67 | 68 | prediction, layer_input_dict = ForwardProp.forward_prop(num_layers=self.num_layers, initial_input=input_data, 69 | layer_weights=self.weights_dict, 70 | layer_activation_functions=self.activation_function_dict) 71 | 72 | # Asserting that the prediction gives the same number of outputs as expected 73 | assert (labels.shape[0] == prediction.shape[0]) 74 | 75 | # Excluded bias gradients here 76 | weight_gradients, bias_gradients = BackProp.back_prop(num_layers=self.num_layers, 77 | layer_inputs=layer_input_dict, 78 | layer_weights=self.weights_dict, 79 | layer_activation_functions=self.activation_function_dict, 80 | expected_y=labels, predicted_y=prediction) 81 | 82 | self.optimise_parameters(weight_gradients=weight_gradients, bias_gradients=bias_gradients) 83 | 84 | # Define cost function 85 | loss = -((labels * np.log(prediction)) + ((1 - labels) * np.log(1 - prediction))) 86 | cost = (1 / n_examples) * np.sum(loss + 1e-8, axis=0) 87 | 88 | return cost[0] 89 | 90 | def optimise_parameters(self, weight_gradients, bias_gradients=None): 91 | """ 92 | :param weight_gradients: Dictionary containing weight gradients for each layer 93 | :param bias_gradients: Dictionary containing bias gradients for each layer 94 | """ 95 | 96 | for layer_number in weight_gradients: 97 | self.weights_dict[layer_number] = self.weights_dict[layer_number] - ( 98 | self.learning_rate * weight_gradients[layer_number]) 99 | 100 | if bias_gradients is not None: 101 | self.bias_dict[layer_number] = self.bias_dict[layer_number] - ( 102 | self.learning_rate * bias_gradients[layer_number]) 103 | 104 | def optimise(self, print_epoch_cost, error_stop=None): 105 | """ 106 | Train the neural network 107 | :return: a list of each epoch with the cost associated with it 108 | """ 109 | 110 | epoch_list = [] 111 | cost_list = [] 112 | 113 | for epoch in range(self.num_epochs): 114 | 115 | for batch_start in range(0, self.x_train.shape[0], self.batch_size): 116 | current_batch = self.x_train[batch_start:batch_start + self.batch_size, :] 117 | current_labels = self.y_train[batch_start:batch_start + self.batch_size, :] 118 | 119 | epoch_cost = self.run_one_pass(input_data=current_batch, labels=current_labels) 120 | 121 | epoch_list.append(epoch) 122 | cost_list.append(epoch_cost) 123 | 124 | # Finish early if it is optimised to a certain error 125 | if error_stop and epoch_cost < error_stop: 126 | break 127 | 128 | if print_epoch_cost: 129 | print('EPOCH:', epoch, 'Cost:', round(epoch_cost, 3)) 130 | 131 | return epoch_list, cost_list 132 | 133 | 134 | def create_architecture(num_features_training, hidden_nodes_per_layer): 135 | assert (isinstance(hidden_nodes_per_layer, list)) 136 | # See NeuralNetwork class for reasoning for this layout 137 | return [num_features_training] + hidden_nodes_per_layer + [1] 138 | 139 | 140 | def create_data(n_generated, add_noise=False, use_one_hot=False): 141 | if add_noise: 142 | x_data = np.random.uniform(low=0.0, high=1.0, size=(n_generated, 2)) 143 | y_data = np.empty((n_generated, 1)) 144 | # Sets y data to 1 or 0 according to XOR rules 145 | for column in range(x_data.shape[0]): 146 | x_feature_1 = round(x_data[column, 0]) 147 | x_feature_2 = round(x_data[column, 1]) 148 | y_data[column] = (x_feature_1 == 1 and x_feature_2 == 1) or ( 149 | x_feature_1 == 0 and x_feature_2 == 0) 150 | else: 151 | x_data = np.random.randint(2, size=(n_generated, 2)) 152 | y_data = np.empty((n_generated, 1)) 153 | # Sets y data to 1 or 0 according to XOR rules 154 | for column in range(x_data.shape[0]): 155 | y_data[column] = ((x_data[column, 0] == 1 and x_data[column, 1] == 1) or ( 156 | x_data[column, 0] == 0 and x_data[column, 1] == 0)) 157 | 158 | if use_one_hot: 159 | y_data_new = np.empty((y_data.shape[0], 2)) 160 | for row in range(y_data.shape[0]): 161 | if y_data[row, 0] == 0: 162 | y_data_new[row, 0] = 1 163 | y_data_new[row, 1] = 0 164 | else: 165 | y_data_new[row, 0] = 0 166 | y_data_new[row, 1] = 1 167 | 168 | return x_data, y_data_new 169 | return x_data, y_data 170 | 171 | 172 | def main(): 173 | # Test and Train data 174 | data_train, labels_train = create_data(n_generated=5000) 175 | 176 | num_features = data_train.shape[1] 177 | 178 | # This means it will be a two layer neural network with one layer being hidden with 2 nodes 179 | desired_architecture = [2, 2] 180 | nn_architecture = create_architecture(num_features, desired_architecture) 181 | 182 | # Defines the activation functions used for each layer 183 | activations_dict = {1: ActivationFunctions.relu, 2: ActivationFunctions.relu, 3: ActivationFunctions.sigmoid} 184 | 185 | neural_network = NeuralNetwork(x_train=data_train, y_train=labels_train, layer_sizes=nn_architecture, 186 | activation_function_dict=activations_dict, learning_rate=0.1, num_epochs=1000) 187 | 188 | epochs, cost = neural_network.optimise(error_stop=0.09, print_epoch_cost=True) 189 | 190 | plt.plot(epochs, cost) 191 | plt.show() 192 | 193 | 194 | if __name__ == '__main__': 195 | main() 196 | -------------------------------------------------------------------------------- /neural_network_components.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class ActivationFunctions: 5 | 6 | @staticmethod 7 | def relu(input_matrix): 8 | output = np.maximum(input_matrix, 0, input_matrix) 9 | 10 | return output 11 | 12 | @staticmethod 13 | def sigmoid(x): 14 | # TODO: Create the version for the genome using -4.9 15 | activation = 1 / (1 + np.exp(-x)) 16 | 17 | return activation 18 | 19 | @staticmethod 20 | def sigmoid_gradient(a): 21 | gradient = (a * (1 - a)) 22 | 23 | return gradient 24 | 25 | @staticmethod 26 | def relu_gradient(x): 27 | gradient = x + 1e-8 28 | gradient[gradient < 0] = 0 29 | gradient[gradient > 1] = 1 30 | 31 | return gradient 32 | 33 | @staticmethod 34 | def get_activation_gradient(activation_function): 35 | """ 36 | :param activation_function: A function, of the the activation function 37 | :return: the correct function to calculate the gradient 38 | """ 39 | 40 | if activation_function == ActivationFunctions.relu: 41 | return ActivationFunctions.relu_gradient 42 | elif activation_function == ActivationFunctions.sigmoid: 43 | return ActivationFunctions.sigmoid_gradient 44 | 45 | 46 | class ForwardProp: 47 | 48 | @staticmethod 49 | def compute_layer(input_data, weights, bias=None): 50 | # Need to ensure there are enough weights for number of features in input data 51 | assert (input_data.shape[1] == weights.shape[0]) 52 | 53 | if bias is not None: 54 | # Need to ensure that there is a bias term for each hidden node 55 | assert (weights.shape[1] == bias.shape[1]) 56 | # Broadcast so we can remove the required bias for genes 57 | broadcasted_bias = np.broadcast_to(bias, (input_data.shape[0], bias.shape[1])) 58 | 59 | return np.dot(input_data, weights) + broadcasted_bias if bias is not None else np.dot(input_data, weights) 60 | 61 | @staticmethod 62 | def ensure_no_activation_applied(output_without_activation, output_with_activation, constant_connections, 63 | current_layer, node_map): 64 | """ 65 | Ensures the activation function isn't applied to the nodes which are dummy nodes 66 | :param output_without_activation: The output with the activation function applied 67 | :param output_with_activation: The output with the activation function applied 68 | :param constant_connections: The connections where there should be an activation function applied 69 | :param current_layer: The current layer we're calculating in 70 | :param node_map: A dictionary of which number each node is in their respective layer 71 | :return: 72 | """ 73 | # Need to keep the values where the 74 | for connection in constant_connections[current_layer]: 75 | # Need to convert to their position in the layer. Minus one because of python indexing 76 | output_position_within_layer = node_map[connection.output_node] - 1 77 | # The output node position is the node which shouldn't have any activations applied. So we use all the 78 | # values from before the activation was applied 79 | output_with_activation[:, output_position_within_layer] = \ 80 | output_without_activation[ 81 | :, output_position_within_layer] 82 | 83 | return output_with_activation 84 | 85 | @staticmethod 86 | def genome_forward_prop(num_layers, initial_input, layer_weights, keep_constant_connections, node_map, 87 | layer_activation_functions, layer_biases): 88 | """ 89 | :param no_activations_matrix_per_layer: A dict containing an array for each layer which showcases which biases should not be applied 90 | :param node_map: A dict for each node which shows which number node they are in their respective layer 91 | :param keep_constant_connections: A list of connection for which the connection should remain constand and no activation function applied 92 | :param layer_activation_functions: The activation functions to be used on each layer. Should be reference to the function at each key. 93 | :param layer_biases: the biases associated with every layer. Is of type dict 94 | :param num_layers: number of layers for the neural network 95 | :param initial_input: the input data 96 | :param layer_weights: the weights associated with every layer contained in a dictionary with the key being which layer they are for (starting at 1) 97 | :return: the ouput vector after the forward propogation 98 | """ 99 | 100 | # This is done to ensure I can use the .get function later to return None 101 | if layer_biases is None: 102 | layer_biases = {} 103 | else: 104 | assert (len(layer_biases) == num_layers) 105 | 106 | if layer_activation_functions is None: 107 | layer_activation_functions = {} 108 | else: 109 | assert (len(layer_activation_functions) == num_layers) 110 | 111 | assert (isinstance(layer_weights, dict)) 112 | assert (len(layer_weights) == num_layers) 113 | 114 | # Dictionary to keep track of inputs for each layer 115 | layer_input_dict = {} 116 | 117 | current_input = initial_input 118 | for current_layer_number in range(1, num_layers + 1): 119 | # Get weights for current layer 120 | current_weights = layer_weights[current_layer_number] 121 | 122 | # Get bias vector for current layer. If there is no bias for that layer returns none 123 | current_bias = layer_biases.get(current_layer_number, None) 124 | 125 | # Get current activation function for the layer 126 | current_activation_function = layer_activation_functions.get(current_layer_number, None) 127 | 128 | # Get output matrix for current_layer 129 | output = ForwardProp.compute_layer(current_input, current_weights, current_bias) 130 | 131 | # If there is an activation function for the layer 132 | if current_activation_function: 133 | saved_output = output 134 | output = current_activation_function(output) 135 | output = ForwardProp.ensure_no_activation_applied(output_without_activation=saved_output, 136 | output_with_activation=output, node_map=node_map, 137 | constant_connections=keep_constant_connections, 138 | current_layer=current_layer_number) 139 | 140 | layer_input_dict[current_layer_number] = current_input 141 | 142 | # The input into the next layer becomes the output from the previous layer 143 | current_input = output 144 | 145 | return current_input, layer_input_dict 146 | 147 | @staticmethod 148 | def forward_prop(num_layers, initial_input, layer_weights, layer_activation_functions=None, layer_biases=None, 149 | return_number_before_last_activation=False): 150 | """ 151 | :param return_number_before_last_activation: If you want the raw output number instead of the sigmoid applied to it 152 | :param layer_activation_functions: The activation functions to be used on each layer. Should be reference to the function at each key. 153 | :param layer_biases: the biases associated with every layer. Is of type dict 154 | :param num_layers: number of layers for the neural network 155 | :param initial_input: the input data 156 | :param layer_weights: the weights associated with every layer contained in a dictionary with the key being which layer they are for (starting at 1) 157 | :return: the ouput vector after the forward propogation 158 | """ 159 | 160 | # This is done to ensure I can use the .get function later to return None 161 | if layer_biases is None: 162 | layer_biases = {} 163 | else: 164 | assert (len(layer_biases) == num_layers) 165 | 166 | if layer_activation_functions is None: 167 | layer_activation_functions = {} 168 | else: 169 | assert (len(layer_activation_functions) == num_layers) 170 | 171 | assert (isinstance(layer_weights, dict)) 172 | assert (len(layer_weights) == num_layers) 173 | 174 | # Dictionary to keep track of inputs for each layer 175 | layer_input_dict = {} 176 | 177 | current_input = initial_input 178 | for current_layer_number in range(1, num_layers + 1): 179 | # Get weights for current layer 180 | current_weights = layer_weights[current_layer_number] 181 | 182 | # Get bias vector for current layer. If there is no bias for that layer returns none 183 | current_bias = layer_biases.get(current_layer_number, None) 184 | 185 | # Get current activation function for the layer 186 | current_activation_function = layer_activation_functions.get(current_layer_number, None) 187 | # Get output matrix for current_layer 188 | output = ForwardProp.compute_layer(current_input, current_weights, current_bias) 189 | 190 | # If you want to return the output before the sigmoid is applied on the last layer 191 | if return_number_before_last_activation and current_layer_number == num_layers: 192 | return output 193 | 194 | # If there is an activation function for the layer 195 | if current_activation_function: 196 | output = current_activation_function(output) 197 | 198 | layer_input_dict[current_layer_number] = current_input 199 | 200 | # The input into the next layer becomes the output from the previous layer 201 | current_input = output 202 | 203 | return current_input, layer_input_dict 204 | 205 | 206 | class BackProp: 207 | 208 | def computer_layer_gradients(self): 209 | pass 210 | 211 | @staticmethod 212 | def back_prop(num_layers, layer_inputs, layer_weights, layer_activation_functions, expected_y, predicted_y): 213 | """ 214 | :param layer_activation_functions: activation functions for each layer 215 | :param num_layers: number of layers 216 | :param layer_inputs: the inputs calculated for each layer. The key 1 should return the initial data we put in. 217 | :param layer_weights: the weights used in each layer of the neural network 218 | :param expected_y: what the expected value of the output should be. I.e. the real data 219 | :param predicted_y: What the neural network put out 220 | :return: two dicts, one for the weights and one for the biases which contains the gradients for each layer 221 | """ 222 | 223 | assert (isinstance(layer_inputs, dict)) 224 | assert (isinstance(layer_weights, dict)) 225 | assert (isinstance(layer_activation_functions, dict)) 226 | 227 | # Ensure information is defined for every layer 228 | assert (len(layer_activation_functions) == num_layers) 229 | assert (len(layer_weights) == num_layers) 230 | assert (len(layer_inputs) == num_layers) 231 | 232 | weight_gradients = {} 233 | bias_gradients = {} 234 | 235 | # Key 1 in layer_inputs should contain the initial data input. So the shape 0 will return number of rows, hence 236 | # number of examples 237 | n_examples = layer_inputs[1].shape[0] 238 | 239 | # This assumes that we will always use a SIGMOID activation function for the last output 240 | dz_last = (predicted_y - expected_y) * (1 / n_examples) 241 | # num_layers because we want the inputs into the last layer 242 | dw_last = np.dot(layer_inputs[num_layers].T, dz_last) 243 | db_last = np.sum(dz_last) 244 | 245 | # Set gradients for final layer 246 | weight_gradients[num_layers] = dw_last 247 | bias_gradients[num_layers] = db_last 248 | 249 | current_dz = dz_last 250 | # Have to go backwards in layer numbers. Start at num_layer-1 because last layer is always the same code as 251 | # above 252 | for current_layer_number in range(num_layers - 1, 0, -1): 253 | # Get the activation gradient function for the current activation function 254 | current_activation_gradient_function = ActivationFunctions.get_activation_gradient( 255 | layer_activation_functions[current_layer_number]) 256 | 257 | current_dz, current_dw, current_db = BackProp.compute_gradient(next_layer_dz=current_dz, 258 | next_layer_weights=layer_weights[ 259 | current_layer_number + 1], 260 | activation_gradient_function=current_activation_gradient_function, 261 | current_layer_input=layer_inputs[ 262 | current_layer_number], 263 | next_layer_inputs=layer_inputs[ 264 | current_layer_number + 1]) 265 | 266 | # Store information of gradients for each layer 267 | weight_gradients[current_layer_number] = current_dw 268 | bias_gradients[current_layer_number] = current_db 269 | 270 | return weight_gradients, bias_gradients 271 | 272 | @staticmethod 273 | def compute_gradient(next_layer_dz, next_layer_weights, activation_gradient_function, current_layer_input, 274 | next_layer_inputs): 275 | # Calculate dZ for current layer (we use weights from the next layer hence the +1) 276 | dz_current = np.dot(next_layer_dz, 277 | next_layer_weights.T) * activation_gradient_function(next_layer_inputs) 278 | 279 | current_dw = np.dot(current_layer_input.T, dz_current) 280 | current_db = np.sum(dz_current, axis=0) 281 | 282 | return dz_current, current_dw, current_db 283 | -------------------------------------------------------------------------------- /pickles/best_genome_pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle -------------------------------------------------------------------------------- /pickles/best_genome_pickle_-1.7547243063454208: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_-1.7547243063454208 -------------------------------------------------------------------------------- /pickles/best_genome_pickle_0.0328201636785844: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_0.0328201636785844 -------------------------------------------------------------------------------- /pickles/best_genome_pickle_0.40751628299650783: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_0.40751628299650783 -------------------------------------------------------------------------------- /pickles/best_genome_pickle_circle_data_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_circle_data_8 -------------------------------------------------------------------------------- /pickles/best_genome_pickle_shm_two_class_618056: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_shm_two_class_618056 -------------------------------------------------------------------------------- /pickles/best_genome_pickle_shm_two_class_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_shm_two_class_8 -------------------------------------------------------------------------------- /pickles/best_genome_pickle_xor_full_132164: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_xor_full_132164 -------------------------------------------------------------------------------- /pickles/best_genome_pickle_xor_full_351148: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_xor_full_351148 -------------------------------------------------------------------------------- /pickles/genome_circle_data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/genome_circle_data -------------------------------------------------------------------------------- /pickles/genome_noise_trained: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/genome_noise_trained -------------------------------------------------------------------------------- /read_mat_files.py: -------------------------------------------------------------------------------- 1 | import scipy.io as sio 2 | from sklearn.decomposition import PCA 3 | import numpy as np 4 | 5 | 6 | def get_shm_multi_class_data(): 7 | mat_contents = sio.loadmat('C:/Users/tsdev/Desktop/SHM DATA/4dof_features.mat') 8 | y_data = mat_contents['labels'][:, 0] 9 | y_data.shape = (y_data.shape[0]) 10 | x_data = mat_contents['multi_class_feats'] 11 | 12 | shuffled_data = np.empty([x_data.shape[0], 11]) 13 | shuffled_data[:, 0:10] = x_data 14 | shuffled_data[:, 10] = y_data 15 | # Shuffle data because it was ordered before by class 16 | np.random.shuffle(shuffled_data) 17 | 18 | x_data = shuffled_data[:, 0:10] 19 | y_data = shuffled_data[:, 10] 20 | y_data.shape = (y_data.shape[0], 1) 21 | 22 | y_data_one_hot = np.zeros((y_data.shape[0], 17)) 23 | for row in range(y_data.shape[0]): 24 | label = int(y_data[row, 0]) 25 | # label - 1 for indexing reasons, for example label = 1 means that the first column (index = 0) is the one with the value one 26 | y_data_one_hot[row, label - 1] = 1 27 | 28 | # if normalise_x: 29 | # # We perform these operations because for this data, the values are too high are negative causing issues during 30 | # # optimisation otherwise 31 | # x_data = x_data * -1 32 | # x_data = x_data / 100 33 | 34 | return x_data, y_data_one_hot 35 | 36 | 37 | def get_shm_two_class_data(normalise_x=True): 38 | mat_contents = sio.loadmat('C:/Users/tsdev/Desktop/SHM DATA/4dof_features.mat') 39 | y_data = mat_contents['labels'][:, 1] 40 | y_data.shape = (y_data.shape[0]) 41 | x_data = mat_contents['two_class_feats'] 42 | 43 | shuffled_data = np.empty([x_data.shape[0], 4]) 44 | shuffled_data[:, 0:3] = x_data 45 | shuffled_data[:, 3] = y_data 46 | # Shuffle data because it was ordered before by class 47 | np.random.shuffle(shuffled_data) 48 | 49 | x_data = shuffled_data[:, 0:3] 50 | y_data = shuffled_data[:, 3] 51 | y_data.shape = (y_data.shape[0], 1) 52 | 53 | shuffle_check = y_data[0:400, :] 54 | unique, counts = np.unique(shuffle_check, return_counts=True) 55 | shuffle_check_length = len(shuffle_check) 56 | class_1_percentage = counts[0] / shuffle_check_length * 100 57 | class_2_percentage = counts[1] / shuffle_check_length * 100 58 | 59 | if normalise_x: 60 | # We perform these operations because for this data, the values are too high are negative causing issues during 61 | # optimisation otherwise 62 | x_data = x_data * -1 63 | x_data = x_data / 100 64 | 65 | if class_1_percentage < 40 or class_2_percentage < 40: 66 | raise ValueError('Imbalanced classes due to shuffle, please re-initialise') 67 | 68 | return x_data, y_data 69 | 70 | 71 | def main(): 72 | x_data, y_data = get_shm_two_class_data() 73 | assert (y_data.shape[1] == 1) 74 | 75 | x_data_multi, y_data_multi = get_shm_multi_class_data() 76 | print(x_data_multi.shape) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /reproduce.py: -------------------------------------------------------------------------------- 1 | from gene import NodeGene, ConnectionGene 2 | from genome import Genome 3 | from species import SpeciesSet 4 | import random 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import seaborn as sns 8 | import math 9 | import copy 10 | import math 11 | 12 | 13 | class Reproduce: 14 | """ 15 | Will handle reproduction of the genomes 16 | """ 17 | 18 | def __init__(self, stagnation, config): 19 | self.global_innovation_number = 0 20 | self.stagnation = stagnation 21 | self.ancestors = {} 22 | self.genome_indexer = 0 23 | self.config = config 24 | # Key: The tuple of the connection e.g. (1,3) value: the innovation number 25 | self.innovation_tracker = {} 26 | 27 | def create_new_population(self, population_size, num_features): 28 | population = {} 29 | 30 | node_list = [] 31 | connection_list = [] 32 | # Create the source nodes 33 | for node in range(num_features): 34 | node_list.append(NodeGene(node_id=node, node_type='source')) 35 | 36 | # Add the output node (There is only one in this case) 37 | node_list.append(NodeGene(node_id=num_features, node_type='output', bias=1)) 38 | 39 | # Save the innovations for the first generation. 40 | for source_node_id in range(num_features): 41 | # Increment for the new innovation 42 | self.global_innovation_number += 1 43 | # The output node will always have the node_id equal to the number of features 44 | self.innovation_tracker[(source_node_id, num_features)] = self.global_innovation_number 45 | 46 | # For each feature there will be a connection to the output 47 | for i in range(num_features): 48 | connection = (i, num_features) 49 | # The connection was already saved, so this should return true 50 | assert (connection in self.innovation_tracker) 51 | connection_list.append(ConnectionGene(input_node=i, output_node=num_features, 52 | innovation_number=self.innovation_tracker[connection], enabled=True)) 53 | 54 | # Create a population of size population_size 55 | for index in range(population_size): 56 | # Deep copies otherwise changing the connection weight change's it for every genome that has the same 57 | # reference to the class 58 | deep_copy_connections = copy.deepcopy(connection_list) 59 | deep_copy_nodes = copy.deepcopy(node_list) 60 | # Set all the connections to a random weight for each genome 61 | for connection in deep_copy_connections: 62 | connection.weight = np.random.randn() 63 | # Increment since the index value has been assigned 64 | self.genome_indexer += 1 65 | 66 | # Create the genome 67 | population[index] = Genome(connections=deep_copy_connections, nodes=deep_copy_nodes, 68 | key=self.genome_indexer) 69 | 70 | self.show_population_weight_distribution(population=population) 71 | 72 | return population 73 | 74 | @staticmethod 75 | def show_population_weight_distribution(population): 76 | # See the spread of starting weights 77 | list_of_weights = [] 78 | for genome in population.values(): 79 | for connection in genome.connections.values(): 80 | list_of_weights.append(connection.weight) 81 | 82 | sns.distplot(list_of_weights) 83 | plt.title('Weight distribution of connections in each population member') 84 | plt.xlabel('Connection weight values') 85 | plt.show() 86 | 87 | @staticmethod 88 | def compute_adjusted_species_sizes(adjusted_species_fitnesses, previous_species_sizes, population_size, 89 | min_species_size): 90 | """ 91 | Compute the number of offspring per species, proportional to their fitnesses (See page 110 of NEAT paper) 92 | :param adjusted_species_fitnesses: 93 | :param previous_species_sizes: 94 | :param population_size: 95 | :param min_species_size: 96 | :return: 97 | """ 98 | 99 | # Sum all the remaining adjusted species fitnesses 100 | adjusted_fitness_sum = sum(adjusted_species_fitnesses) 101 | 102 | adjusted_species_sizes = [] 103 | 104 | for adjusted_fitness, previous_size in zip(adjusted_species_fitnesses, previous_species_sizes): 105 | if adjusted_fitness_sum is not None: 106 | # Calculate the adjusted species size for how much of the overall fitness they account for. If this 107 | # value is less than the min_species_size then we set it to that instead 108 | species_size = max(min_species_size, ((adjusted_fitness / adjusted_fitness_sum) * population_size)) 109 | 110 | else: 111 | species_size = min_species_size 112 | 113 | # difference = (species_size - previous_size) * 0.5 114 | # rounded_difference = int(round(difference)) 115 | # adjusted_size = previous_size 116 | # if abs(rounded_difference) > 0: 117 | # adjusted_size += rounded_difference 118 | # elif difference > 0: 119 | # adjusted_size += 1 120 | # elif difference < 0: 121 | # adjusted_size -= 1 122 | # adjusted_species_sizes.append(adjusted_size) 123 | 124 | # TODO: This allows for the fitter species to more aggressively have more population to create. If you want this behaviour comment out everything above until the end of the if else statement and uncomment this 125 | adjusted_species_sizes.append(round(species_size)) 126 | 127 | # Normalize the spawn amounts so that the next generation is roughly 128 | # the population size requested by the user. 129 | total_adjusted_size = sum(adjusted_species_sizes) 130 | norm = population_size / total_adjusted_size 131 | adjusted_species_sizes = [max(min_species_size, int(round(n * norm))) for n in adjusted_species_sizes] 132 | 133 | print('NEW POPULATION SIZE: {}'.format(sum(adjusted_species_sizes))) 134 | 135 | return adjusted_species_sizes 136 | 137 | def get_non_stagnant_species(self, species_set, generation): 138 | """ 139 | Checks which species are stagnant ant returns the ones which aren't 140 | :param generation: Which generation number it is 141 | :param species_set: The species set instance which stores all the species 142 | :return: A list of non stagnant species 143 | """ 144 | # Keeps track of all the fitnesses for the genomes in the population 145 | all_fitnesses = [] 146 | # Keeps track of the species which aren't stagnant 147 | remaining_species = [] 148 | 149 | # (Id, species instance, boolean) 150 | for species_id, species, is_stagnant in self.stagnation.update(species_set=species_set, generation=generation, 151 | config=self.config): 152 | # Only save species if it is not stagnant 153 | if not is_stagnant: 154 | # Save all the fitness in the species that isn't stagnant 155 | all_fitnesses += [member.fitness for member in species.members.values()] 156 | remaining_species.append(species) 157 | 158 | # The case where there are no species left 159 | if not remaining_species: 160 | # TODO: Would this ever come here? 161 | raise Exception('There are no remaining species in the reproduce function') 162 | 163 | return all_fitnesses, remaining_species 164 | 165 | def get_adjusted_species_sizes(self, all_fitnesses, remaining_species, population_size): 166 | """ 167 | Adjusts the size of the species for their fitness values 168 | :param all_fitnesses: A list of all fitness values for all genomes in the population 169 | :param remaining_species: A list of species which aren't stagnant 170 | :param population_size: The population size 171 | :return: A list of sizes for the new remaining species, adjusted for their respective fitness values 172 | """ 173 | 174 | # Find min and max fitness across the entire population. We use this for explicit fitness sharing. 175 | min_genome_fitness = min(all_fitnesses) 176 | max_genome_fitness = max(all_fitnesses) 177 | 178 | # TODO: The value 1.0 is arbtrirary from the neat python package from previous. Should let it be configurable? 179 | fitness_range = max(1.0, max_genome_fitness - min_genome_fitness) 180 | 181 | # TODO: Not sure if this is the right method to do adjusted fitness 182 | for species in remaining_species: 183 | # The adjusted fitness is the mean of the species members fitnesses TODO: Is this correct? 184 | mean_species_fitness = np.mean([member.fitness for member in species.members.values()]) 185 | adjusted_fitness = (mean_species_fitness - min_genome_fitness) / fitness_range 186 | species.adjusted_fitness = adjusted_fitness 187 | 188 | adjusted_species_fitnesses = [species.adjusted_fitness for species in remaining_species] 189 | 190 | # Get a list of the amount of members in each of the remaining species 191 | previous_species_sizes = [len(species.members) for species in remaining_species] 192 | 193 | # If the sum of the adjusted species fitnesses is less than 0.1, it suggests there isn't much fitness variation 194 | # in the population. Thus we put an artificial barrier to the min species size because there is no species that 195 | # entirely beats all other species 196 | # TODO: 0.1 is an random number and should be configurable 197 | if sum(adjusted_species_fitnesses) < 0.1: 198 | min_species_size = 2 199 | else: 200 | min_species_size = self.config.min_species_size 201 | 202 | adjusted_species_sizes = self.compute_adjusted_species_sizes( 203 | adjusted_species_fitnesses=adjusted_species_fitnesses, min_species_size=min_species_size, 204 | previous_species_sizes=previous_species_sizes, population_size=population_size) 205 | 206 | return adjusted_species_sizes 207 | 208 | def get_new_population(self, adjusted_species_sizes, remaining_species, species_set, generation_tracker, 209 | backprop_mutation): 210 | """ 211 | Creates the dictionary of the new genomes for the next generation population 212 | :param: genetation_tracker: 213 | :param adjusted_species_sizes: 214 | :param remaining_species: 215 | :param species_set: 216 | :param new_population: 217 | :return: 218 | """ 219 | new_population = {} 220 | 221 | for species_size, species in zip(adjusted_species_sizes, remaining_species): 222 | 223 | # TODO: Uncomment if you removed min_species_size 224 | # assert (species_size > 0) 225 | if species_size > 0: 226 | 227 | # List of old species members 228 | old_species_members = list(species.members.values()) 229 | # Reset the members for the current species 230 | species.members = {} 231 | # Save the species in the species set object 232 | species_set.species[species.key] = species 233 | 234 | # Sort the members into the descending fitness 235 | old_species_members.sort(reverse=True, key=lambda x: x.fitness) 236 | 237 | # Double check that it is descending 238 | if len(old_species_members) > 1: 239 | assert (old_species_members[0].fitness >= old_species_members[1].fitness) 240 | 241 | # If we have specified a number of genomes to carry over, carry them over to the new population 242 | num_genomes_without_crossover = int( 243 | round(species_size * self.config.chance_for_mutation_without_crossover)) 244 | if num_genomes_without_crossover > 0: 245 | 246 | for member in old_species_members[:num_genomes_without_crossover]: 247 | 248 | # Check if we should carry over a member un-mutated or not 249 | if not self.config.keep_unmutated_top_percentage: 250 | child = copy.deepcopy(member) 251 | 252 | child.mutate(reproduction_instance=self, 253 | innovation_tracker=self.innovation_tracker, config=self.config, 254 | backprop_mutation=backprop_mutation) 255 | 256 | if not child.check_connection_enabled_amount() and not child.check_num_paths( 257 | only_add_enabled_connections=True): 258 | raise Exception('This child has no enabled connections') 259 | 260 | new_population[child.key] = child 261 | self.ancestors[child.key] = () 262 | # new_population[member.key] = member 263 | species_size -= 1 264 | assert (species_size >= 0) 265 | else: 266 | # Else we just add the current member to the new population 267 | new_population[member.key] = member 268 | species_size -= 1 269 | assert (species_size >= 0) 270 | 271 | # If there are no more genomes for the current species, then restart the loop for the next species 272 | if species_size <= 0: 273 | continue 274 | 275 | # Only use the survival threshold fraction to use as parents for the next generation. 276 | reproduction_cutoff = int(math.ceil((1 - self.config.chance_for_mutation_without_crossover) * 277 | len(old_species_members))) 278 | 279 | # Need at least two parents no matter what the previous result 280 | reproduction_cutoff = max(reproduction_cutoff, 2) 281 | old_species_members = old_species_members[:reproduction_cutoff] 282 | 283 | # Randomly choose parents and choose whilst there can still be additional genomes for the given species 284 | while species_size > 0: 285 | species_size -= 1 286 | 287 | # TODO: If you don't allow them to mate with themselves then it's a problem because if the species previous 288 | # TODO: size is 1, then how can you do with or without crossover? 289 | parent_1 = copy.deepcopy(random.choice(old_species_members)) 290 | parent_2 = copy.deepcopy(random.choice(old_species_members)) 291 | 292 | # Has to be a deep copy otherwise the connections which are crossed over are also modified if mutation 293 | # occurs on the child. 294 | parent_1 = copy.deepcopy(parent_1) 295 | parent_2 = copy.deepcopy(parent_2) 296 | 297 | self.genome_indexer += 1 298 | genome_id = self.genome_indexer 299 | 300 | child = Genome(key=genome_id) 301 | # TODO: Save the parent_1 and parent_2 mutation history as well as what connections they had 302 | # Create the genome from the parents 303 | num_connections_enabled = child.crossover(genome_1=parent_1, genome_2=parent_2, config=self.config) 304 | 305 | # If there are no connections enabled we forget about this child and don't add it to the existing 306 | # population 307 | if num_connections_enabled: 308 | child.mutate(reproduction_instance=self, 309 | innovation_tracker=self.innovation_tracker, config=self.config, 310 | generation_tracker=generation_tracker, backprop_mutation=backprop_mutation) 311 | 312 | if not child.check_connection_enabled_amount() and not child.check_num_paths( 313 | only_add_enabled_connections=True): 314 | raise Exception('This child has no enabled connections') 315 | 316 | new_population[child.key] = child 317 | self.ancestors[child.key] = (parent_1.key, parent_2.key) 318 | else: 319 | # Else if the crossover resulted in an invalid genome. 320 | assert num_connections_enabled == 0 321 | species_size += 1 322 | self.genome_indexer -= 1 323 | 324 | return new_population 325 | 326 | def reproduce(self, species_set, population_size, generation, generation_tracker, backprop_mutation=False): 327 | """ 328 | Handles reproduction of a population 329 | :param generation_tracker: An class instance which keeps track of certain parameters for each generation 330 | :param generation: Which generation number it is 331 | :param species_set: The SpeciesSet instance which keeps track of species 332 | :param population_size: The population size 333 | :return: A new population 334 | """ 335 | # Check it is a class instance 336 | assert (isinstance(species_set, SpeciesSet)) 337 | 338 | all_fitnesses, remaining_species = self.get_non_stagnant_species(species_set=species_set, generation=generation) 339 | 340 | adjusted_species_sizes = self.get_adjusted_species_sizes(all_fitnesses=all_fitnesses, 341 | population_size=population_size, 342 | remaining_species=remaining_species) 343 | 344 | # Set the species dict to an empty one for now as the new species will be configured later 345 | species_set.species = {} 346 | 347 | # Keeps track of the new population (key, object) 348 | new_population = self.get_new_population(adjusted_species_sizes=adjusted_species_sizes, species_set=species_set, 349 | remaining_species=remaining_species, 350 | generation_tracker=generation_tracker, 351 | backprop_mutation=backprop_mutation) 352 | 353 | return new_population 354 | -------------------------------------------------------------------------------- /reproduce_multiclass.py: -------------------------------------------------------------------------------- 1 | from gene import NodeGene, ConnectionGene 2 | from genome_multiclass import GenomeMultiClass 3 | from species import SpeciesSet 4 | import random 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import seaborn as sns 8 | import math 9 | import copy 10 | import math 11 | 12 | 13 | class ReproduceMultiClass: 14 | """ 15 | Will handle reproduction of the genomes 16 | """ 17 | 18 | def __init__(self, stagnation, config): 19 | self.global_innovation_number = 0 20 | self.stagnation = stagnation 21 | self.ancestors = {} 22 | self.genome_indexer = 0 23 | self.config = config 24 | # Key: The tuple of the connection e.g. (1,3) value: the innovation number 25 | self.innovation_tracker = {} 26 | 27 | def create_new_population(self, population_size, num_features, num_classes): 28 | population = {} 29 | 30 | source_node_list = [] 31 | output_node_list = [] 32 | 33 | # Create the source and output nodes 34 | for node_id in range(num_features + num_classes): 35 | if node_id < num_features: 36 | source_node_list.append(NodeGene(node_id=node_id, node_type='source')) 37 | else: 38 | output_node_list.append(NodeGene(node_id=node_id, node_type='output', bias=1)) 39 | 40 | # Save innovations on population creation 41 | for source_node in source_node_list: 42 | for output_node in output_node_list: 43 | # Increment for the new innovation 44 | self.global_innovation_number += 1 45 | # The output node will always have the node_id equal to the number of features 46 | self.innovation_tracker[(source_node.node_id, output_node.node_id)] = self.global_innovation_number 47 | 48 | connection_list = [] 49 | # For each feature there will be a connection to the output 50 | for source_node in source_node_list: 51 | for output_node in output_node_list: 52 | connection = (source_node.node_id, output_node.node_id) 53 | # The connection was already saved, so this should return true 54 | assert (connection in self.innovation_tracker) 55 | connection_list.append(ConnectionGene(input_node=source_node.node_id, output_node=output_node.node_id, 56 | innovation_number=self.innovation_tracker[connection], 57 | enabled=True)) 58 | all_nodes_list = source_node_list + output_node_list 59 | # Create a population of size population_size 60 | for index in range(population_size): 61 | # Deep copies otherwise changing the connection weight change's it for every genome that has the same 62 | # reference to the class 63 | deep_copy_connections = copy.deepcopy(connection_list) 64 | deep_copy_nodes = copy.deepcopy(all_nodes_list) 65 | # Set all the connections to a random weight for each genome 66 | for connection in deep_copy_connections: 67 | connection.weight = np.random.randn() 68 | # Increment since the index value has been assigned 69 | self.genome_indexer += 1 70 | 71 | # Create the genome 72 | population[index] = GenomeMultiClass(connections=deep_copy_connections, nodes=deep_copy_nodes, 73 | key=self.genome_indexer) 74 | 75 | self.show_population_weight_distribution(population=population) 76 | 77 | return population 78 | 79 | @staticmethod 80 | def show_population_weight_distribution(population): 81 | # See the spread of starting weights 82 | list_of_weights = [] 83 | for genome in population.values(): 84 | for connection in genome.connections.values(): 85 | list_of_weights.append(connection.weight) 86 | 87 | sns.distplot(list_of_weights) 88 | plt.title('Weight distribution of connections in each population member') 89 | plt.xlabel('Connection weight values') 90 | plt.show() 91 | 92 | @staticmethod 93 | def compute_adjusted_species_sizes(adjusted_species_fitnesses, previous_species_sizes, population_size, 94 | min_species_size): 95 | """ 96 | Compute the number of offspring per species, proportional to their fitnesses (See page 110 of NEAT paper) 97 | :param adjusted_species_fitnesses: 98 | :param previous_species_sizes: 99 | :param population_size: 100 | :param min_species_size: 101 | :return: 102 | """ 103 | 104 | # Sum all the remaining adjusted species fitnesses 105 | adjusted_fitness_sum = sum(adjusted_species_fitnesses) 106 | 107 | adjusted_species_sizes = [] 108 | 109 | for adjusted_fitness, previous_size in zip(adjusted_species_fitnesses, previous_species_sizes): 110 | if adjusted_fitness_sum is not None: 111 | # Calculate the adjusted species size for how much of the overall fitness they account for. If this 112 | # value is less than the min_species_size then we set it to that instead 113 | species_size = max(min_species_size, ((adjusted_fitness / adjusted_fitness_sum) * population_size)) 114 | 115 | else: 116 | species_size = min_species_size 117 | 118 | # difference = (species_size - previous_size) * 0.5 119 | # rounded_difference = int(round(difference)) 120 | # adjusted_size = previous_size 121 | # if abs(rounded_difference) > 0: 122 | # adjusted_size += rounded_difference 123 | # elif difference > 0: 124 | # adjusted_size += 1 125 | # elif difference < 0: 126 | # adjusted_size -= 1 127 | # 128 | # adjusted_species_sizes.append(adjusted_size) 129 | # TODO: This allows for the fitter species to more aggressively have more population to create. If you want this behaviour comment out everything above until the end of the if else statement and uncomment this 130 | adjusted_species_sizes.append(round(species_size)) 131 | 132 | # Normalize the spawn amounts so that the next generation is roughly 133 | # the population size requested by the user. 134 | total_adjusted_size = sum(adjusted_species_sizes) 135 | norm = population_size / total_adjusted_size 136 | adjusted_species_sizes = [max(min_species_size, int(round(n * norm))) for n in adjusted_species_sizes] 137 | 138 | print('NEW POPULATION SIZE: {}'.format(sum(adjusted_species_sizes))) 139 | 140 | return adjusted_species_sizes 141 | 142 | def get_non_stagnant_species(self, species_set, generation): 143 | """ 144 | Checks which species are stagnant ant returns the ones which aren't 145 | :param generation: Which generation number it is 146 | :param species_set: The species set instance which stores all the species 147 | :return: A list of non stagnant species 148 | """ 149 | # Keeps track of all the fitnesses for the genomes in the population 150 | all_fitnesses = [] 151 | # Keeps track of the species which aren't stagnant 152 | remaining_species = [] 153 | 154 | # (Id, species instance, boolean) 155 | for species_id, species, is_stagnant in self.stagnation.update(species_set=species_set, generation=generation, 156 | config=self.config): 157 | # if is_stagnant: 158 | # # TODO: What to do here?? 159 | # pass 160 | # else: 161 | # # Save all the fitness in the species that isn't stagnant 162 | # all_fitnesses += [member.fitness for member in species.members.values()] 163 | # remaining_species.append(species) 164 | 165 | # Only save species if it is not stagnant 166 | if not is_stagnant: 167 | # Save all the fitness in the species that isn't stagnant 168 | all_fitnesses += [member.fitness for member in species.members.values()] 169 | remaining_species.append(species) 170 | 171 | # The case where there are no species left 172 | if not remaining_species: 173 | # TODO: Would this ever come here? 174 | raise Exception('There are no remaining species in the reproduce function') 175 | 176 | return all_fitnesses, remaining_species 177 | 178 | def get_adjusted_species_sizes(self, all_fitnesses, remaining_species, population_size): 179 | """ 180 | Adjusts the size of the species for their fitness values 181 | :param all_fitnesses: A list of all fitness values for all genomes in the population 182 | :param remaining_species: A list of species which aren't stagnant 183 | :param population_size: The population size 184 | :return: A list of sizes for the new remaining species, adjusted for their respective fitness values 185 | """ 186 | 187 | # Find min and max fitness across the entire population. We use this for explicit fitness sharing. 188 | min_genome_fitness = min(all_fitnesses) 189 | max_genome_fitness = max(all_fitnesses) 190 | 191 | # TODO: The value 1.0 is arbtrirary from the neat python package from previous. Should let it be configurable? 192 | fitness_range = max(1.0, max_genome_fitness - min_genome_fitness) 193 | 194 | # TODO: Not sure if this is the right method to do adjusted fitness 195 | for species in remaining_species: 196 | # The adjusted fitness is the mean of the species members fitnesses TODO: Is this correct? 197 | mean_species_fitness = np.mean([member.fitness for member in species.members.values()]) 198 | species.adjusted_fitness = (mean_species_fitness - min_genome_fitness) / fitness_range 199 | 200 | adjusted_species_fitnesses = [species.adjusted_fitness for species in remaining_species] 201 | 202 | # Get a list of the amount of members in each of the remaining species 203 | previous_species_sizes = [len(species.members) for species in remaining_species] 204 | 205 | # If the sum of the adjusted species fitnesses is less than 0.1, it suggests there isn't much fitness variation 206 | # in the population. Thus we put an artificial barrier to the min species size because there is no species that 207 | # entirely beats all other species 208 | # TODO: 0.1 is an random number and should be configurable 209 | adjusted_species_sum = sum(adjusted_species_fitnesses) 210 | if adjusted_species_sum < 0.1: 211 | min_species_size = 2 212 | else: 213 | min_species_size = self.config.min_species_size 214 | 215 | adjusted_species_sizes = self.compute_adjusted_species_sizes( 216 | adjusted_species_fitnesses=adjusted_species_fitnesses, min_species_size=min_species_size, 217 | previous_species_sizes=previous_species_sizes, population_size=population_size) 218 | 219 | return adjusted_species_sizes 220 | 221 | def get_new_population(self, adjusted_species_sizes, remaining_species, species_set, generation_tracker, 222 | backprop_mutation): 223 | """ 224 | Creates the dictionary of the new genomes for the next generation population 225 | :param: genetation_tracker: 226 | :param adjusted_species_sizes: 227 | :param remaining_species: 228 | :param species_set: 229 | :param new_population: 230 | :return: 231 | """ 232 | new_population = {} 233 | 234 | for species_size, species in zip(adjusted_species_sizes, remaining_species): 235 | 236 | # TODO: Uncomment if you removed min_species_size 237 | # assert (species_size > 0) 238 | if species_size > 0: 239 | 240 | # List of old species members 241 | old_species_members = list(species.members.values()) 242 | # Reset the members for the current species 243 | species.members = {} 244 | # Save the species in the species set object 245 | species_set.species[species.key] = species 246 | 247 | # Sort the members into the descending fitness 248 | old_species_members.sort(reverse=True, key=lambda x: x.fitness) 249 | 250 | # Double check that it is descending 251 | if len(old_species_members) > 1: 252 | assert (old_species_members[0].fitness >= old_species_members[1].fitness) 253 | 254 | # If we have specified a number of genomes to carry over, carry them over to the new population 255 | num_genomes_without_crossover = int( 256 | round(species_size * self.config.chance_for_mutation_without_crossover)) 257 | if num_genomes_without_crossover > 0: 258 | 259 | for member in old_species_members[:num_genomes_without_crossover]: 260 | 261 | # Check if we should carry over a member un-mutated or not 262 | if not self.config.keep_unmutated_top_percentage: 263 | child = copy.deepcopy(member) 264 | 265 | child.mutate(reproduction_instance=self, 266 | innovation_tracker=self.innovation_tracker, config=self.config, 267 | backprop_mutation=backprop_mutation) 268 | 269 | if not child.check_connection_enabled_amount() and not child.check_num_paths( 270 | only_add_enabled_connections=True): 271 | raise Exception('This child has no enabled connections') 272 | 273 | new_population[child.key] = child 274 | self.ancestors[child.key] = () 275 | # new_population[member.key] = member 276 | species_size -= 1 277 | assert (species_size >= 0) 278 | else: 279 | # Else we just add the current member to the new population 280 | new_population[member.key] = member 281 | species_size -= 1 282 | assert (species_size >= 0) 283 | 284 | # If there are no more genomes for the current species, then restart the loop for the next species 285 | if species_size <= 0: 286 | continue 287 | 288 | # Only use the survival threshold fraction to use as parents for the next generation. 289 | reproduction_cutoff = int(math.ceil((1 - self.config.chance_for_mutation_without_crossover) * 290 | len(old_species_members))) 291 | 292 | # Need at least two parents no matter what the previous result 293 | reproduction_cutoff = max(reproduction_cutoff, 2) 294 | old_species_members = old_species_members[:reproduction_cutoff] 295 | 296 | # Randomly choose parents and choose whilst there can still be additional genomes for the given species 297 | while species_size > 0: 298 | species_size -= 1 299 | 300 | # TODO: If you don't allow them to mate with themselves then it's a problem because if the species previous 301 | # TODO: size is 1, then how can you do with or without crossover? 302 | parent_1 = copy.deepcopy(random.choice(old_species_members)) 303 | parent_2 = copy.deepcopy(random.choice(old_species_members)) 304 | 305 | # Has to be a deep copy otherwise the connections which are crossed over are also modified if mutation 306 | # occurs on the child. 307 | parent_1 = copy.deepcopy(parent_1) 308 | parent_2 = copy.deepcopy(parent_2) 309 | 310 | self.genome_indexer += 1 311 | genome_id = self.genome_indexer 312 | 313 | child = GenomeMultiClass(key=genome_id) 314 | # TODO: Save the parent_1 and parent_2 mutation history as well as what connections they had 315 | # Create the genome from the parents 316 | num_connections_enabled = child.crossover(genome_1=parent_1, genome_2=parent_2, config=self.config) 317 | 318 | # If there are no connections enabled we forget about this child and don't add it to the existing 319 | # population 320 | if num_connections_enabled: 321 | child.mutate(reproduction_instance=self, 322 | innovation_tracker=self.innovation_tracker, config=self.config, 323 | generation_tracker=generation_tracker, backprop_mutation=backprop_mutation) 324 | 325 | if not child.check_connection_enabled_amount() and not child.check_num_paths( 326 | only_add_enabled_connections=True): 327 | raise Exception('This child has no enabled connections') 328 | 329 | new_population[child.key] = child 330 | self.ancestors[child.key] = (parent_1.key, parent_2.key) 331 | else: 332 | # Else if the crossover resulted in an invalid genome. 333 | assert num_connections_enabled == 0 334 | species_size += 1 335 | self.genome_indexer -= 1 336 | 337 | return new_population 338 | 339 | def reproduce(self, species_set, population_size, generation, generation_tracker, backprop_mutation=False): 340 | """ 341 | Handles reproduction of a population 342 | :param generation_tracker: An class instance which keeps track of certain parameters for each generation 343 | :param generation: Which generation number it is 344 | :param species_set: The SpeciesSet instance which keeps track of species 345 | :param population_size: The population size 346 | :return: A new population 347 | """ 348 | # Check it is a class instance 349 | assert (isinstance(species_set, SpeciesSet)) 350 | 351 | all_fitnesses, remaining_species = self.get_non_stagnant_species(species_set=species_set, generation=generation) 352 | 353 | adjusted_species_sizes = self.get_adjusted_species_sizes(all_fitnesses=all_fitnesses, 354 | population_size=population_size, 355 | remaining_species=remaining_species) 356 | 357 | # Set the species dict to an empty one for now as the new species will be configured later 358 | species_set.species = {} 359 | 360 | # Keeps track of the new population (key, object) 361 | new_population = self.get_new_population(adjusted_species_sizes=adjusted_species_sizes, species_set=species_set, 362 | remaining_species=remaining_species, 363 | generation_tracker=generation_tracker, 364 | backprop_mutation=backprop_mutation) 365 | 366 | return new_population 367 | -------------------------------------------------------------------------------- /species.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Species: 5 | def __init__(self, key, generation): 6 | self.key = key 7 | # Which generation the species was created 8 | self.created = generation 9 | # Keeps track of what generation the fitness improved 10 | self.last_improved = generation 11 | self.representative = None 12 | # All members of the species 13 | self.members = {} 14 | self.fitness = None 15 | self.adjusted_fitness = None 16 | # History of the fitness for the species 17 | self.fitness_history = [] 18 | 19 | # Keeps track of the unique characteristics of its members 20 | self.species_info = None 21 | 22 | def update(self, representative, members): 23 | self.representative = representative 24 | self.members = members 25 | 26 | 27 | class SpeciesSet: 28 | 29 | def __init__(self, config, generation_tracker): 30 | self.config = config 31 | self.species_indexer = 0 32 | self.species = {} 33 | # For each genome if you index the dict it will return which species it is a part of 34 | self.genome_species = {} 35 | self.generation_tracker = generation_tracker 36 | 37 | @staticmethod 38 | def species_fitness_function(species_members, function_type): 39 | """ 40 | Finds the fitness for a species. For now all it doesn is find the mean fitness of the species. 41 | :param species_members: The members of the species 42 | :param function_type: What type of function you want to apply, e.g. mean, max, min, median 43 | :return: The fitness value for the species 44 | """ 45 | # TODO: Allow max, min, median fitness function types 46 | if function_type not in {'mean', 'max', 'median', 'min'}: 47 | raise Exception('Invalid function type specified for species fitness function') 48 | 49 | species_member_fitnesses = [member.fitness for member in species_members.values()] 50 | 51 | if function_type == 'mean': 52 | return np.mean(species_member_fitnesses) 53 | 54 | def calculate_compatibility_distance(self, species_representative, genome, generation_tracker=None): 55 | compatibility_distance_1 = species_representative.compute_compatibility_distance(other_genome=genome, 56 | config=self.config, 57 | generation_tracker=generation_tracker) 58 | compatibility_distance_2 = genome.compute_compatibility_distance(other_genome=species_representative, 59 | config=self.config) 60 | 61 | # There's no reason for this to be different depending on who you choose to be the other genome 62 | if round(compatibility_distance_1, 3) != round(compatibility_distance_2, 3): 63 | raise Exception('There is an error in how compatibility distance is calculated') 64 | 65 | return compatibility_distance_1 66 | 67 | def find_new_species_representative(self, unspeciated, population, dict_of_compatibility_distances, 68 | new_representatives, new_members): 69 | """ 70 | :param unspeciated: Set of genome_id's which haven't been assigned a species 71 | :param population: A dict of (genome_id, genome_class) for the population 72 | :param dict_of_compatibility_distances: An empty dict to store the distance between different genomes 73 | :param new_representatives: A dict to save the new representative for a species 74 | :param new_members: A dict to save the new members for each of the species 75 | """ 76 | # For each species we find the new representative 77 | for species_id, species_object in self.species.items(): 78 | candidates = [] 79 | for genome_id in unspeciated: 80 | genome = population[genome_id] 81 | compatibility_distance = self.calculate_compatibility_distance( 82 | species_representative=species_object.representative, 83 | genome=genome) 84 | dict_of_compatibility_distances[(species_object.representative, genome)] = compatibility_distance 85 | candidates.append((compatibility_distance, genome)) 86 | 87 | _, new_rep = min(candidates, key=lambda x: x[0]) 88 | # Set the new representative for the species for the genome with the lowest distance 89 | new_rep_id = new_rep.key 90 | new_representatives[species_id] = new_rep_id 91 | new_members[species_id] = [new_rep_id] 92 | unspeciated.remove(new_rep_id) 93 | 94 | def find_species_members(self, unspeciated, population, dict_of_compatibility_distances, 95 | new_representatives, new_members, compatibility_threshold, generation_tracker): 96 | while unspeciated: 97 | genome_id = unspeciated.pop() 98 | genome = population[genome_id] 99 | 100 | candidates = [] 101 | 102 | # Keeps track of the distances with each representative 103 | compatibility_distances_dict = {} 104 | representative_genomes_list = [] 105 | 106 | for species_id, representative_id in new_representatives.items(): 107 | representative_genome = population[representative_id] 108 | compatibility_distance = self.calculate_compatibility_distance( 109 | species_representative=representative_genome, genome=genome, generation_tracker=generation_tracker) 110 | 111 | compatibility_distances_dict[representative_genome] = compatibility_distance 112 | representative_genomes_list.append(representative_genome) 113 | 114 | dict_of_compatibility_distances[(representative_genome, genome)] = compatibility_distance 115 | if compatibility_distance < compatibility_threshold: 116 | candidates.append((compatibility_distance, species_id)) 117 | 118 | if candidates: 119 | _, species_id = min(candidates, key=lambda x: x[0]) 120 | new_members[species_id].append(genome_id) 121 | 122 | # We have to create a new species for the genome since it's not compatible 123 | else: 124 | # increment for a new species 125 | self.species_indexer += 1 126 | 127 | species_id = self.species_indexer 128 | new_representatives[species_id] = genome_id 129 | new_members[species_id] = [genome_id] 130 | 131 | def save_species_info(self, new_representatives, generation, new_members, population): 132 | """ 133 | saves the species to a species object and then within the species set instance 134 | :param new_representatives: The representatives for the new species 135 | :param generation: Which generation number it is 136 | :param new_members: The members for each species 137 | :param population: A dict of (key: genome instance) 138 | """ 139 | # For each genome_id keeps track of which species_id it belongs to 140 | self.genome_species = {} 141 | 142 | for species_id, representative_id in new_representatives.items(): 143 | species_object = self.species.get(species_id) 144 | 145 | if species_object is None: 146 | species_object = Species(key=species_id, generation=generation) 147 | self.species[species_id] = species_object 148 | 149 | members = new_members[species_id] 150 | 151 | for genome_id in members: 152 | self.genome_species[genome_id] = species_id 153 | 154 | members_dict = dict((genome_id, population[genome_id]) for genome_id in members) 155 | species_object.update(representative=population[representative_id], members=members_dict) 156 | 157 | def speciate(self, population, compatibility_threshold, generation, generation_tracker=None): 158 | """ 159 | 160 | :param generation: Which generation number it is 161 | :param compatibility_threshold: 162 | :param population: A dict of (genome_id, genome_class) 163 | :param generation_tracker: Tracks attributes for the current generation 164 | :return: 165 | """ 166 | unspeciated = set(population) 167 | new_representatives = {} 168 | new_members = {} 169 | dict_of_compatibility_distances = {} 170 | 171 | self.find_new_species_representative(unspeciated=unspeciated, 172 | dict_of_compatibility_distances=dict_of_compatibility_distances, 173 | new_members=new_members, new_representatives=new_representatives, 174 | population=population) 175 | 176 | self.find_species_members(unspeciated=unspeciated, 177 | dict_of_compatibility_distances=dict_of_compatibility_distances, 178 | new_members=new_members, new_representatives=new_representatives, 179 | compatibility_threshold=compatibility_threshold, population=population, 180 | generation_tracker=generation_tracker) 181 | 182 | self.save_species_info(new_representatives=new_representatives, new_members=new_members, population=population, 183 | generation=generation) 184 | 185 | # Mean compatability distance 186 | self.generation_tracker.mean_compatibility_distance = np.mean(list(dict_of_compatibility_distances.values())) 187 | # Standard deviation 188 | self.generation_tracker.std_dev_compatibility_distance = np.std(list(dict_of_compatibility_distances.values())) 189 | self.generation_tracker.num_species = len(self.species) 190 | -------------------------------------------------------------------------------- /stagnation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | 5 | class Stagnation: 6 | 7 | @staticmethod 8 | def update(species_set, generation, config): 9 | """ 10 | Required interface method. Updates species fitness history information, 11 | checking for ones that have not improved in max_stagnation generations, 12 | and - unless it would result in the number of species dropping below the configured 13 | species_elitism parameter if they were removed, 14 | in which case the highest-fitness species are spared - 15 | returns a list with stagnant species marked for removal. 16 | """ 17 | 18 | species_data = [] 19 | for species_id, species in species_set.species.items(): 20 | if species.fitness_history: 21 | # If there is fitness_history get the previous generation fitness 22 | prev_fitness = max(species.fitness_history) 23 | else: 24 | # Else just set it to the lowest possible value for now 25 | prev_fitness = -sys.float_info.max 26 | 27 | # Calculate the fitness for the species 28 | species.fitness = species_set.species_fitness_function(species_members=species.members, 29 | function_type='mean') 30 | 31 | num_nodes_overall = [] 32 | num_nodes_enabled = [] 33 | num_connections_overall = [] 34 | num_connections_enabled = [] 35 | all_fitnesses = [] 36 | species_member_fitnesses = [member.fitness for member in species.members.values()] 37 | for genome in species.members.values(): 38 | num_nodes_overall.append(len(genome.nodes)) 39 | num_nodes_enabled.append(len(genome.get_active_nodes())) 40 | num_connections_overall.append(len(genome.connections)) 41 | num_connections_enabled.append(genome.check_connection_enabled_amount()) 42 | if genome.fitness: 43 | all_fitnesses.append(genome.fitness) 44 | 45 | avg_num_connections_enabled = np.mean(num_connections_enabled) 46 | avg_num_connections_overall = np.mean(num_connections_overall) 47 | avg_num_nodes_enabled = np.mean(num_nodes_enabled) 48 | avg_num_nodes_overall = np.mean(num_nodes_overall) 49 | 50 | complexity_tracker = {'num_connections_enabled': avg_num_connections_enabled, 51 | 'num_connections_overall': avg_num_connections_overall, 52 | 'num_nodes_enabled': avg_num_nodes_enabled, 53 | 'num_nodes_overall': avg_num_nodes_overall, 54 | 'mean_weight': np.mean(species_member_fitnesses)} 55 | species.species_info = complexity_tracker 56 | 57 | # Keep track of historical fitness 58 | species.fitness_history.append(species.fitness) 59 | 60 | species.adjusted_fitness = None 61 | 62 | # Keep track of when the generation was last improved 63 | if prev_fitness is None or species.fitness > prev_fitness: 64 | species.last_improved = generation 65 | 66 | species_data.append((species_id, species)) 67 | 68 | # Sort the species data into ascending fitness order. 69 | species_data.sort(key=lambda x: x[1].fitness) 70 | 71 | # Keeps track of which species are stagnant or not 72 | results = [] 73 | # Keeps track of each species's fitness 74 | species_fitnesses = [] 75 | num_non_stagnant = len(species_data) 76 | 77 | for index, (species_id, species) in enumerate(species_data): 78 | # Override stagnant state if marking this species as stagnant would 79 | # result in the total number of species dropping below the limit. 80 | # Because species are in ascending fitness order, less fit species 81 | # will be marked as stagnant first. 82 | stagnant_time = generation - species.last_improved 83 | is_stagnant = False 84 | 85 | if num_non_stagnant > config.num_species_min: 86 | # Check if the stagnation time for the species is greater than the max set in the config 87 | is_stagnant = (stagnant_time >= config.max_stagnation_generations) 88 | 89 | # This will ensure that whatever the value of species_min is will be allowed to continue un-stagnated even 90 | # if they are. Example: if self.config.num_species_min = 2, then as long as the index is the last two of the 91 | # length of species_data it will set is_stagnant to False 92 | if (len(species_data) - index) <= config.num_species_min: 93 | is_stagnant = False 94 | 95 | if is_stagnant: 96 | num_non_stagnant -= 1 97 | 98 | results.append((species_id, species, is_stagnant)) 99 | species_fitnesses.append(species_fitnesses) 100 | 101 | return results 102 | -------------------------------------------------------------------------------- /testing_multiclass.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | class LazyProperty: 7 | 8 | def __init__(self, func): 9 | self._func = func 10 | self.__name__ = func.__name__ 11 | 12 | def __get__(self, instance, owner): 13 | if instance is None: 14 | return None 15 | result = instance.__dict__[self.__name__] = self._func(instance) 16 | 17 | return result 18 | 19 | 20 | class Model: 21 | 22 | def __init__(self, train_data, train_labels, test_data, test_labels, batch_size, learning_rate, keep_prob, 23 | num_layers, 24 | num_epochs, layer_sizes): 25 | 26 | # Data 27 | self.x_train = train_data # REMEMBER YOU WANT THIS TO CONTAIN TEST DATA AS WELL 28 | self.y_train = train_labels 29 | self.x_test = test_data 30 | self.y_test = test_labels 31 | 32 | # Network architecture 33 | self.num_layers = num_layers 34 | self.num_epochs = num_epochs 35 | self.layer_sizes = layer_sizes 36 | 37 | # Parameters 38 | self.batch_size = batch_size 39 | self.learning_rate = learning_rate 40 | self.keep_prob = keep_prob 41 | self.parameters = {} # Empty dictionary to hold weights and biases once initialised 42 | self.initialise_params() # initialise parameters 43 | 44 | # Hold data 45 | self.epoch_cost = [] 46 | self.epoch_counter = [] 47 | 48 | @staticmethod 49 | def relu(x): 50 | return x * (x > 0) 51 | 52 | @staticmethod 53 | def sigmoid(x): 54 | 55 | activation = 1 / (1 + np.exp(-x)) 56 | 57 | return activation 58 | 59 | @staticmethod 60 | def softmax(x): 61 | """Compute softmax values for each sets of scores in x.""" 62 | 63 | sum_ = np.sum(np.exp(x), axis=1) 64 | sum_.shape = (sum_.shape[0], 1) 65 | 66 | return np.exp(x) / sum_ 67 | 68 | @staticmethod 69 | def xavier_initalizer(num_inputs, num_outputs): 70 | """ 71 | NOTE: if using RELU then use constant 2 instead of 1 for sqrt 72 | """ 73 | np.random.seed(7) 74 | weights = np.random.randn(num_inputs, num_outputs) * np.sqrt(1 / num_inputs) 75 | 76 | return weights 77 | 78 | @staticmethod 79 | def sigmoid_gradient(a): 80 | 81 | gradient = (a * (1 - a)) 82 | 83 | return gradient 84 | 85 | @staticmethod 86 | def relu_gradient(x): 87 | return 1. * (x > 0) 88 | 89 | @staticmethod 90 | def cost_fn(logits, labels, n_examples, loss_type='single_class'): 91 | 92 | if loss_type is 'single_class': 93 | # Define cost function 94 | loss = -((labels * np.log(logits)) + ((1 - labels) * np.log(1 - logits))) 95 | cost_ = (1 / n_examples) * np.sum(loss + eps, axis=0) 96 | 97 | elif loss_type is 'multi_class': 98 | loss = -(np.sum((labels*np.log(logits)))) 99 | cost_ = np.sum(loss) * (1/n_examples) 100 | 101 | return cost_ 102 | 103 | def plot_graph(self): 104 | plt.plot(self.epoch_counter, self.epoch_cost) 105 | plt.show() 106 | 107 | @staticmethod 108 | def batchnorm_forward(x, gamma, beta): 109 | # compute per-features mean and std_deviation 110 | mean = np.mean(x, axis=0) 111 | var = np.var(x, axis=0) 112 | 113 | # normalize and zero-center (explicit for caching purposes) 114 | x_mu = x - mean 115 | inv_var = 1.0 / np.sqrt(var + eps) 116 | x_hat = x_mu * inv_var 117 | 118 | # squash 119 | out = gamma * x_hat + beta 120 | 121 | # cache variables for backward pass 122 | cache = x_mu, inv_var, x_hat, gamma 123 | 124 | return out, cache 125 | 126 | @staticmethod 127 | def batchnorm_backward(dout, cache): 128 | 129 | N, D = dout.shape 130 | x_mu, inv_var, x_hat, gamma = cache 131 | 132 | dbeta = np.sum(dout, axis=0) 133 | dgamma = np.sum(x_hat * dout, axis=0) 134 | dx_hat = np.dot(dout, gamma.T) 135 | 136 | dvar = np.sum((dx_hat * x_mu * (-0.5) * inv_var ** 3), axis=0) 137 | dmu = (np.sum((dx_hat * -inv_var), axis=0)) + (dvar * (-2.0 / N) * np.sum(x_mu, axis=0)) 138 | 139 | dx1 = dx_hat * inv_var 140 | dx2 = dvar * (2.0 / N) * x_mu 141 | dx3 = (1.0 / N) * dmu 142 | 143 | # final partial derivatives 144 | dx = dx1 + dx2 + dx3 145 | 146 | return dx, dgamma, dbeta 147 | 148 | def bias_correction(self, variable_name, timestep, moment=None): 149 | assert (isinstance(variable_name, str)) 150 | if moment == 1: 151 | bias_corrected = self.parameters['moment1_{}'.format(variable_name)] / (1 - beta1 ** timestep) 152 | 153 | elif moment == 2: 154 | bias_corrected = self.parameters['moment2_{}'.format(variable_name)] / (1 - beta2 ** timestep) 155 | else: 156 | raise ValueError 157 | 158 | return bias_corrected 159 | 160 | def update_moment(self, variable_name, gradient, moment=None): 161 | assert (isinstance(variable_name, str)) 162 | if moment == 1: 163 | self.parameters['moment1_{}'.format(variable_name)] = \ 164 | (beta1 * self.parameters['moment1_{}'.format(variable_name)]) + ((1 - beta1) * gradient) 165 | 166 | if moment == 2: 167 | self.parameters['moment2_{}'.format(variable_name)] = \ 168 | (beta2 * self.parameters['moment2_{}'.format(variable_name)]) + ((1 - beta2) * np.power(gradient, 2)) 169 | 170 | def create_layer(self, input_matrix, weights_matrix, bias_matrix, activation_fn='relu', batch_norm=False): 171 | 172 | assert(isinstance(activation_fn, str)) 173 | 174 | z = np.dot(input_matrix, weights_matrix) + bias_matrix 175 | # print(z, 'zzzzzzzzzzzzzzzzzz') 176 | if batch_norm is True: 177 | gamma = self.parameters['gamma1'] 178 | beta = self.parameters['beta1'] 179 | z, cache = self.batchnorm_forward(z, gamma, beta) 180 | 181 | if activation_fn is 'sigmoid': 182 | activation = self.sigmoid(z) 183 | elif activation_fn is 'relu': 184 | activation = self.relu(z) 185 | elif activation_fn is 'softmax': 186 | activation = self.softmax(z) 187 | # print(activation, 'aaaaaaaaaaa') 188 | if batch_norm is True: 189 | return activation, cache 190 | else: 191 | return activation 192 | 193 | def initialise_params(self): 194 | 195 | if self.layer_sizes[0, 0] != self.x_train.shape[1]: 196 | raise ValueError('Number of inputs must match first entry in layer_sizes') 197 | 198 | if self.layer_sizes.shape[0] != self.num_layers: 199 | raise ValueError('Number of layers defined must be equal to number of layers set') 200 | 201 | if self.layer_sizes.shape[1] != 1: 202 | raise ValueError('layer_sizes must be a row vector') 203 | 204 | # Iterate through each layer to find num inputs and outputs 205 | for i in range(self.num_layers - 1): 206 | index = i + 1 # Keep indexing of parameters to begin from 1 for convenience 207 | num_inputs = self.layer_sizes[i, 0] # Current layer number of inputs 208 | num_outputs = self.layer_sizes[i + 1, 0] # Next layer expected number of inputs 209 | 210 | self.parameters['w{}'.format(index)] = self.xavier_initalizer(num_inputs, num_outputs) 211 | 212 | # Initialise moment vectors 213 | self.parameters['moment1_w{}'.format(index)] = 0 214 | self.parameters['moment2_w{}'.format(index)] = 0 215 | 216 | self.parameters['moment1_b{}'.format(index)] = 0 217 | self.parameters['moment2_b{}'.format(index)] = 0 218 | 219 | # Only need one bias for last layer 220 | if i == self.num_layers - 2: 221 | self.parameters['b{}'.format(index)] = np.zeros((1, n_classes)) 222 | 223 | else: 224 | self.parameters['b{}'.format(index)] = np.zeros((1, num_outputs)) 225 | 226 | if i != 0: 227 | # Parameters for batch norm 228 | n_parameters = self.layer_sizes[1, 0] # Number of neurons on second layer 229 | self.parameters['gamma{}'.format(index - 1)] = np.ones((1, n_parameters)) 230 | self.parameters['moment1_gamma{}'.format(index - 1)] = 0 231 | self.parameters['moment2_gamma{}'.format(index - 1)] = 0 232 | 233 | self.parameters['beta{}'.format(index - 1)] = np.zeros((1, n_parameters)) 234 | self.parameters['moment1_beta{}'.format(index - 1)] = 0 235 | self.parameters['moment2_beta{}'.format(index - 1)] = 0 236 | 237 | return self.parameters 238 | 239 | def calc_train_accuracy(self, train=True): 240 | 241 | if train is True: 242 | data = self.x_train 243 | labels = self.y_train 244 | else: 245 | data = self.x_test 246 | labels = self.y_test 247 | 248 | prediction = self.predict(data, activation_fn_list) 249 | 250 | accuracy = (prediction == labels) # Returns bool array 251 | accuracy = accuracy * 1 # Turns bools into ints 252 | accuracy = np.average(accuracy) 253 | 254 | return accuracy * 100 255 | 256 | def calc_gradients(self, data_batch, labels_batch, n_examples, batch_norm): 257 | 258 | if batch_norm is True: 259 | prediction, a1, cache = self.predict(data_batch, activation_fn_list, optimise=True) 260 | 261 | # Compute gradients last layer 262 | dZ2 = (prediction - labels_batch) * (1 / n_examples) 263 | dW2 = np.dot(a1.T, dZ2) 264 | dB2 = np.sum(dZ2, axis=0) 265 | 266 | # Compute gradients for batch norm 267 | if activation_fn_list[0] is 'sigmoid': 268 | dY = np.dot(dZ2, self.parameters['w2'].T) * self.sigmoid_gradient(a1) 269 | elif activation_fn_list[0] is 'relu': 270 | dY = np.dot(dZ2, self.parameters['w2'].T) * self.relu_gradient(a1) 271 | 272 | dZ1, dgamma, dbeta = self.batchnorm_backward(dY, cache) 273 | 274 | # Compute gradients second layer 275 | dW1 = np.dot(data_batch.T, dZ1) 276 | dB1 = np.sum(dZ1, axis=0) 277 | 278 | gradients_dict = {'w1': dW1, 'w2': dW2, 'b1': dB1, 'b2': dB2, 'beta1': dbeta, 'gamma1': dgamma} 279 | 280 | elif batch_norm is False: 281 | prediction, a1 = self.predict(data_batch, activation_fn_list, optimise=True) 282 | 283 | # Compute gradients first layer 284 | dZ2 = (prediction - labels_batch) * (1 / n_examples) 285 | dW2 = np.dot(a1.T, dZ2) 286 | dB2 = np.sum(dZ2, axis=0) 287 | 288 | # Compute gradients second layer 289 | if activation_fn_list[0] is 'sigmoid': 290 | dZ1 = np.dot(dZ2, self.parameters['w2'].T) * self.sigmoid_gradient(a1) 291 | elif activation_fn_list[0] is 'relu': 292 | dZ1 = np.dot(dZ2, self.parameters['w2'].T) * self.relu_gradient(a1) 293 | 294 | dW1 = np.dot(data_batch.T, dZ1) 295 | dB1 = np.sum(dZ1, axis=0) 296 | 297 | gradients_dict = {'w1': dW1, 'w2': dW2, 'b1': dB1, 'b2': dB2} 298 | 299 | dB2.shape = (1, dB2.shape[0]) 300 | dB1.shape = (1, dB1.shape[0]) 301 | 302 | assert(self.parameters['w1'].shape == dW1.shape) 303 | assert(self.parameters['w2'].shape == dW2.shape) 304 | assert(self.parameters['b1'].shape == dB1.shape) 305 | assert(self.parameters['b2'].shape == dB2.shape) 306 | 307 | return gradients_dict, prediction 308 | 309 | def predict(self, current_batch, activation_fn_list, optimise=False): 310 | 311 | # Define parameters 312 | w1 = self.parameters['w1'] 313 | b1 = self.parameters['b1'] 314 | w2 = self.parameters['w2'] 315 | b2 = self.parameters['b2'] 316 | 317 | # Define network and prediction 318 | if batch_norm is True: 319 | 320 | a1, cache = self.create_layer(current_batch, w1, b1, activation_fn=activation_fn_list[0], batch_norm=True) # First layer 321 | prediction = self.create_layer(a1, w2, b2, activation_fn=activation_fn_list[1], batch_norm=False) # Second layer 322 | 323 | elif batch_norm is False: 324 | 325 | a1 = self.create_layer(current_batch, w1, b1, activation_fn=activation_fn_list[0]) # First layer 326 | prediction = self.create_layer(a1, w2, b2, activation_fn=activation_fn_list[1]) # Second layer 327 | 328 | if optimise is True and batch_norm is True: 329 | return prediction, a1, cache # Gives raw values to calculate loss during training 330 | if optimise is True and batch_norm is False: 331 | return prediction, a1 332 | if optimise is False: 333 | return np.around(prediction) # Gives values rounded to 0 or 1 to see prediction result on test set 334 | 335 | def optimise(self): 336 | 337 | timestep = 0 338 | 339 | for batch_start in range(0, self.x_train.shape[0], self.batch_size): 340 | 341 | timestep += 1 342 | 343 | current_batch = self.x_train[batch_start:batch_start + self.batch_size, :] 344 | current_labels = self.y_train[batch_start:batch_start + self.batch_size, :] 345 | 346 | n_examples = current_batch.shape[0] 347 | 348 | gradients_dict, prediction = self.calc_gradients(current_batch, current_labels, n_examples, batch_norm=batch_norm) 349 | 350 | cost = self.cost_fn(prediction, current_labels, n_examples, loss_type='multi_class') 351 | 352 | # Update parameters 353 | if adam_optimizer is False: 354 | 355 | for variable in gradients_dict: 356 | self.parameters[variable] = self.parameters[variable] - \ 357 | (self.learning_rate * gradients_dict[variable]) 358 | 359 | elif adam_optimizer is True: 360 | 361 | for variable in gradients_dict: 362 | 363 | self.update_moment(variable, gradients_dict[variable], 1) 364 | self.update_moment(variable, gradients_dict[variable], 2) 365 | 366 | # Bias correction 367 | bias_corr_m1 = self.bias_correction(variable, timestep, 1) 368 | bias_corr_m2 = self.bias_correction(variable, timestep, 2) 369 | 370 | self.parameters[variable] = self.parameters[variable] - \ 371 | (self.learning_rate * (bias_corr_m1 / np.sqrt(bias_corr_m2))) 372 | 373 | return cost.item() 374 | 375 | 376 | # Test and Train data 377 | n_generated = 5000 # How many training examples to be generated 378 | use_onehot = True 379 | 380 | if use_onehot is True: 381 | data_train = np.random.randint(2, size=(n_generated, 2)) 382 | labels_train = np.empty((n_generated, 2)) 383 | for row in range(data_train.shape[0]): 384 | if data_train[row, 0] == 1 and data_train[row, 1] == 1: 385 | labels_train[row, 0] = 1 386 | labels_train[row, 1] = 0 387 | elif data_train[row, 0] == 0 and data_train[row, 1] == 0: 388 | labels_train[row, 0] = 1 389 | else: 390 | labels_train[row, 0] = 0 391 | labels_train[row, 1] = 0 392 | labels_train[row, 1] = 1 393 | 394 | data_test = np.array( 395 | [[1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1], [1, 1], [0, 0], [1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1], 396 | [1, 1], [0, 0]]) 397 | labels_test = np.array([[1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0]]) 398 | 399 | elif use_onehot is False: 400 | data_train = np.random.randint(2, size=(n_generated, 2)) 401 | labels_train = np.empty((n_generated, 1)) 402 | for column in range(data_train.shape[0]): 403 | if data_train[column, 0] == 1 and data_train[column, 1] == 1: 404 | labels_train[column] = 1 405 | elif data_train[column, 0] == 0 and data_train[column, 1] == 0: 406 | labels_train[column] = 1 407 | else: 408 | labels_train[column] = 0 409 | 410 | data_test = np.array( 411 | [[1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1], [1, 1], [0, 0], [1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1], 412 | [1, 1], [0, 0]]) 413 | labels_test = np.array([[1], [0], [1], [1], [0], [0], [1], [1], [1], [0], [1], [1], [0], [0], [1], [1]]) 414 | # Parameters 415 | batch_size = 32 416 | batch_norm = False 417 | adam_optimizer = False 418 | beta1 = 0.9 419 | beta2 = 0.999 420 | eps = 1e-8 421 | learning_rate = 0.1 422 | keep_prob = 0.5 423 | num_layers = 3 424 | num_epochs = 125 425 | n_features = data_train.shape[1] 426 | n_classes = 2 427 | activation_fn_list = ['sigmoid', 'sigmoid'] # specify what activation function you for a given layer 428 | 429 | # Architecture for network 430 | layer_sizes = np.array([[n_features], [2], [n_classes]]) 431 | 432 | # Initialise model 433 | model = Model(data_train, labels_train, data_test, labels_test, batch_size, learning_rate, keep_prob, num_layers, 434 | num_epochs, layer_sizes) 435 | 436 | for i in range(model.num_epochs): 437 | cost = model.optimise() 438 | 439 | # Keep track of costs 440 | model.epoch_counter.append(i) 441 | model.epoch_cost.append(cost) 442 | 443 | # Check cost and accuracy at every quarter and last epoch 444 | if i % (round(num_epochs * 0.25, 0)) == 0 or i == model.num_epochs - 1: 445 | accuracy = round(model.calc_train_accuracy(train=True), 0) 446 | 447 | print('EPOCH:', i, '\t', 'Cost:', round(cost, 3), '\t', 448 | 'Accuracy:', '%{}'.format(accuracy)) 449 | 450 | model.plot_graph() 451 | 452 | # Test model on validation data 453 | test_prediction = model.predict(model.x_test, activation_fn_list) 454 | accuracy = round(model.calc_train_accuracy(train=False), 0) 455 | 456 | print(test_prediction, '\n', 'Test Accuracy:', '%{}'.format(accuracy)) 457 | 458 | def main(): 459 | pass 460 | 461 | 462 | if __name__ == "__main__": 463 | main() --------------------------------------------------------------------------------