├── NEAT.py
├── NEAT_multiclass.py
├── algorithm_runs
    └── xor_full
    │   └── run_1
    │       ├── average_population_fitness_generation_5.png
    │       ├── avg_num_disjoint_generation_5.png
    │       ├── avg_num_excess_generation_5.png
    │       ├── avg_weight_diff_generation_5.png
    │       ├── best_all_time_genome_accuracy_generation_5.png
    │       ├── best_all_time_genome_f1_score_generation_5.png
    │       ├── best_all_time_genome_fitness_generation_5.png
    │       ├── best_genome_pickle
    │       ├── generation_tracker
    │       ├── mean_compatibility_distance_generation_5.png
    │       ├── mean_number_connections_enabled_generation_5.png
    │       ├── mean_number_connections_overall_generation_5.png
    │       ├── mean_number_nodes_enabled_generation_5.png
    │       ├── num_generation_add_connection_generation_5.png
    │       ├── num_generation_add_node_generation_5.png
    │       ├── num_generation_delete_connection_generation_5.png
    │       ├── num_generation_delete_node_generation_5.png
    │       ├── num_generation_weight_mutations_generation_5.png
    │       └── num_species_generation_5.png
├── config.py
├── config_multiclass.py
├── data_storage.py
├── data_visualisation.py
├── data_visualisation_averaging.py
├── deconstruct_genome.py
├── deconstruct_genome_multiclass.py
├── f_test.py
├── gene.py
├── generation_statistics.py
├── genome.py
├── genome_multiclass.py
├── genome_neural_network.py
├── genome_neural_network_multiclass.py
├── graph_algorithm.py
├── graph_algorithm_mutliclass.py
├── graphs
    ├── average_population_fitness_generation_5.jpg
    ├── avg_num_disjoint_generation_5.jpg
    ├── avg_num_excess_generation_5.jpg
    ├── avg_weight_diff_generation_5.jpg
    ├── best_all_time_genome_f1_score_generation_5.jpg
    ├── best_all_time_genome_fitness_generation_5.jpg
    ├── mean_compatibility_distance_generation_5.jpg
    ├── mean_number_connections_enabled_generation_5.jpg
    ├── mean_number_connections_overall_generation_5.jpg
    ├── mean_number_nodes_enabled_generation_5.jpg
    ├── num_generation_add_connection_generation_5.jpg
    ├── num_generation_add_node_generation_5.jpg
    ├── num_generation_delete_connection_generation_5.jpg
    ├── num_generation_delete_node_generation_5.jpg
    ├── num_generation_weight_mutations_generation_5.jpg
    └── num_species_generation_5.jpg
├── main.py
├── main_multiclass.py
├── multi_processing.py
├── multi_threading.py
├── neural_network.py
├── neural_network_components.py
├── pickles
    ├── best_genome_pickle
    ├── best_genome_pickle_-1.7547243063454208
    ├── best_genome_pickle_0.0328201636785844
    ├── best_genome_pickle_0.40751628299650783
    ├── best_genome_pickle_circle_data_8
    ├── best_genome_pickle_shm_two_class_618056
    ├── best_genome_pickle_shm_two_class_8
    ├── best_genome_pickle_xor_full_132164
    ├── best_genome_pickle_xor_full_351148
    ├── genome_circle_data
    └── genome_noise_trained
├── read_mat_files.py
├── reproduce.py
├── reproduce_multiclass.py
├── species.py
├── stagnation.py
├── testing_multiclass.py
└── tests.py


/NEAT.py:
--------------------------------------------------------------------------------
  1 | from generation_statistics import GenerationStatistics
  2 | import matplotlib.pyplot as plt
  3 | import time
  4 | import numpy as np
  5 | from genome_neural_network import GenomeNeuralNetwork
  6 | from gene import NodeGene, ConnectionGene
  7 | from reproduce import Reproduce
  8 | from genome import Genome
  9 | from species import SpeciesSet
 10 | import sklearn.metrics
 11 | import pickle
 12 | import os
 13 | 
 14 | # Exception used to check if there are no more species
 15 | from stagnation import Stagnation
 16 | 
 17 | 
 18 | class CompleteExtinctionException(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | class NEAT:
 23 | 
 24 |     def __init__(self, x_training_data, y_training_data, x_test_data, y_test_data, config, fitness_threshold,
 25 |                  f1_score_threshold, algorithm_running=None):
 26 |         # Where all the parameters are saved
 27 |         self.config = config
 28 |         # Takes care of reproduction of populations
 29 |         self.reproduction = Reproduce(stagnation=Stagnation, config=config)
 30 |         self.generation_tracker = GenerationStatistics()
 31 |         # Track the best genome across generations
 32 |         self.best_all_time_genome = None
 33 |         # If the fitness threshold is met it will stop the algorithm (if set)
 34 |         self.fitness_threshold = fitness_threshold
 35 |         self.f1_score_threshold = f1_score_threshold
 36 |         # A class containing the different species within the population
 37 |         self.species_set = SpeciesSet(config=config, generation_tracker=self.generation_tracker)
 38 |         self.x_train = x_training_data
 39 |         self.y_train = y_training_data
 40 |         self.x_test = x_test_data
 41 |         self.y_test = y_test_data
 42 | 
 43 |         # Keep track of best genome through generations
 44 |         self.best_genome_history = {}
 45 | 
 46 |         # Keeps information of population complexity for each generation
 47 |         self.population_complexity_tracker = {}
 48 | 
 49 |         if algorithm_running:
 50 |             # Defines which of the algorithms is being currently tested (e.g. xor with 5000 examples of xor with 200
 51 |             # examples and noise)
 52 |             self.algorithm_running = algorithm_running
 53 | 
 54 |         # Initialise the starting population
 55 |         self.population = self.reproduction.create_new_population(population_size=self.config.population_size,
 56 |                                                                   num_features=x_training_data.shape[1])
 57 | 
 58 |         # Speciate the initial population
 59 |         self.species_set.speciate(population=self.population, compatibility_threshold=3, generation=0)
 60 | 
 61 |     @staticmethod
 62 |     def create_genome_nn(genome, x_data, y_data, algorithm_running=None):
 63 |         # TODO: I encountered a bug where I trained a genome on a relu activation function, but when I recreated using this function I had problems because I forgot that everything defined inside here uses sigmoid. Should improve implementation of this
 64 |         # TODO: The x_data, y_data isn't always used, particularly if we only create the network to get a prediction. This implementation should be improved for clarity
 65 |         if algorithm_running == 'xor_full':
 66 |             learning_rate = 0.1
 67 |             num_epochs = 1000
 68 |             batch_size = 64
 69 |             activation_type = 'sigmoid'
 70 |         elif algorithm_running == 'xor_small_noise':
 71 |             learning_rate = 0.1
 72 |             num_epochs = 5000
 73 |             batch_size = 10
 74 |             activation_type = 'sigmoid'
 75 |         elif algorithm_running == 'circle_data':
 76 |             learning_rate = 0.1
 77 |             num_epochs = 5000
 78 |             batch_size = 50
 79 |             activation_type = 'sigmoid'
 80 |         elif algorithm_running == 'shm_two_class':
 81 |             learning_rate = 0.1
 82 |             num_epochs = 5000
 83 |             batch_size = 50
 84 |             activation_type = 'sigmoid'
 85 |         elif algorithm_running == 'spiral_data':
 86 |             learning_rate = 0.1
 87 |             num_epochs = 5000
 88 |             batch_size = 50
 89 |             activation_type = 'sigmoid'
 90 |         # TODO: Choose more suitable default
 91 |         else:
 92 |             learning_rate = 0.1
 93 |             num_epochs = 500
 94 |             batch_size = 64
 95 |             activation_type = 'sigmoid'
 96 | 
 97 |         return GenomeNeuralNetwork(genome=genome, x_train=x_data, y_train=y_data,
 98 |                                    create_weights_bias_from_genome=True, activation_type=activation_type,
 99 |                                    learning_rate=learning_rate, num_epochs=num_epochs, batch_size=batch_size)
100 | 
101 |     def evaluate_population(self, use_backprop, generation):
102 |         """
103 |         Calculates the fitness value for each individual genome in the population
104 |         :type use_backprop: True or false on whether you're calculating the fitness using backprop or not
105 |         :param generation: Which generation number it currently is
106 |         :return: The best genome of the population
107 |         """
108 | 
109 |         # Should return the best genome
110 |         current_best_genome = None
111 |         current_worst_genome = None
112 | 
113 |         for genome in self.population.values():
114 | 
115 |             genome_nn = self.create_genome_nn(genome=genome, x_data=self.x_train, y_data=self.y_train,
116 |                                               algorithm_running=self.algorithm_running)
117 | 
118 |             # Optimise the neural_network_first. However, the generation should allow for one pass so that we are not
119 |             #  just optimising all the same topologies
120 |             genome_fitness_before = genome.fitness
121 |             if use_backprop and generation > 1:
122 |                 print('\n')
123 |                 print('OPTIMISING GENOME')
124 |                 genome_nn.optimise(print_epoch=False)
125 | 
126 |             # We use genome_nn.x_train instead of self.x_train because the genome_nn might have deleted a row if there
127 |             # is no connection to one of the sources
128 |             cost = genome_nn.run_one_pass(input_data=genome_nn.x_train, labels=self.y_train, return_cost_only=True)
129 | 
130 |             # The fitness is the negative of the cost. Because less cost = greater fitness
131 |             genome.fitness = -cost
132 | 
133 |             # Only print genome fitness after is back prop is used since back prop takes a long time so this can be a
134 |             #  way of tracking progress in the meantime
135 |             if use_backprop and generation > 1:
136 |                 # NOTE: Genome fitness can be none due to crossover because fitness value not carried over
137 |                 print('Genome Fitness Before: {}'.format(genome_fitness_before))
138 |                 print('Genome Fitness After: {}'.format(genome.fitness))
139 | 
140 |             if current_best_genome is None or genome.fitness > current_best_genome.fitness:
141 |                 current_best_genome = genome
142 |             if current_worst_genome is None or genome.fitness < current_worst_genome.fitness:
143 |                 current_worst_genome = genome
144 | 
145 |         return current_best_genome, current_worst_genome
146 | 
147 |     def update_population_toplogy_info(self, current_gen):
148 |         num_nodes_overall = []
149 |         num_nodes_enabled = []
150 |         num_connections_overall = []
151 |         num_connections_enabled = []
152 |         all_fitnesses = []
153 |         for genome in self.population.values():
154 |             num_nodes_overall.append(len(genome.nodes))
155 |             num_nodes_enabled.append(len(genome.get_active_nodes()))
156 |             num_connections_overall.append(len(genome.connections))
157 |             num_connections_enabled.append(genome.check_connection_enabled_amount())
158 |             if genome.fitness:
159 |                 all_fitnesses.append(genome.fitness)
160 | 
161 |         avg_num_connections_enabled = np.mean(num_connections_enabled)
162 |         avg_num_connections_overall = np.mean(num_connections_overall)
163 |         avg_num_nodes_enabled = np.mean(num_nodes_enabled)
164 |         avg_num_nodes_overall = np.mean(num_nodes_overall)
165 | 
166 |         complexity_tracker = {'num_connections_enabled': avg_num_connections_enabled,
167 |                               'num_connections_overall': avg_num_connections_overall,
168 |                               'num_nodes_enabled': avg_num_nodes_enabled, 'num_nodes_overall': avg_num_nodes_overall}
169 |         self.population_complexity_tracker[current_gen] = complexity_tracker
170 | 
171 |         self.generation_tracker.mean_number_connections_enabled = avg_num_connections_enabled
172 |         self.generation_tracker.mean_number_connections_overall = avg_num_connections_overall
173 |         self.generation_tracker.mean_number_nodes_enabled = avg_num_nodes_enabled
174 |         self.generation_tracker.mean_number_nodes_overall = avg_num_nodes_overall
175 |         self.generation_tracker.average_population_fitness = np.mean(all_fitnesses)
176 | 
177 |     def add_successful_genome_for_test(self, current_gen, use_this_genome):
178 |         """
179 |         This function adds a pre programmed genome which is known to converge for the XOR dataset.
180 |         :param current_gen:
181 |         :param use_this_genome: Whether this genome should be added to the population or not
182 |         :return:
183 |         """
184 |         # Wait for current_gen > 1 because if using backprop the first gen skips using backprop.
185 |         if current_gen > 1 and use_this_genome:
186 |             node_list = [
187 |                 NodeGene(node_id=0, node_type='source'),
188 |                 NodeGene(node_id=1, node_type='source'),
189 |                 NodeGene(node_id=2, node_type='output', bias=0.5),
190 |                 NodeGene(node_id=3, node_type='hidden', bias=1),
191 |                 NodeGene(node_id=4, node_type='hidden', bias=1),
192 |                 NodeGene(node_id=5, node_type='hidden', bias=1),
193 |                 NodeGene(node_id=6, node_type='hidden', bias=1),
194 |             ]
195 | 
196 |             connection_list = [ConnectionGene(input_node=0, output_node=3, innovation_number=1, enabled=True,
197 |                                               weight=np.random.randn()),
198 |                                ConnectionGene(input_node=1, output_node=3, innovation_number=2, enabled=True,
199 |                                               weight=np.random.randn()),
200 |                                ConnectionGene(input_node=0, output_node=4, innovation_number=3, enabled=True,
201 |                                               weight=np.random.randn()),
202 |                                ConnectionGene(input_node=1, output_node=4, innovation_number=4, enabled=True,
203 |                                               weight=np.random.randn()),
204 |                                ConnectionGene(input_node=3, output_node=5, innovation_number=5, enabled=True,
205 |                                               weight=np.random.randn()),
206 |                                ConnectionGene(input_node=4, output_node=5, innovation_number=6, enabled=True,
207 |                                               weight=np.random.randn()),
208 |                                ConnectionGene(input_node=3, output_node=6, innovation_number=7, enabled=True,
209 |                                               weight=np.random.randn()),
210 |                                ConnectionGene(input_node=4, output_node=6, innovation_number=8, enabled=True,
211 |                                               weight=np.random.randn()),
212 |                                ConnectionGene(input_node=5, output_node=2, innovation_number=9, enabled=True,
213 |                                               weight=np.random.rand()),
214 |                                ConnectionGene(input_node=6, output_node=2, innovation_number=10, enabled=True,
215 |                                               weight=np.random.randn())
216 |                                ]
217 | 
218 |             test_genome = Genome(connections=connection_list, nodes=node_list, key=1)
219 |             test_genome.fitness = -99999999999
220 |             self.population[32131231] = test_genome
221 | 
222 |     @staticmethod
223 |     def calculate_f_statistic(genome, x_test_data, y_test_data):
224 |         genome_nn = NEAT.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data)
225 |         prediction = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True).round()
226 |         return sklearn.metrics.f1_score(y_test_data, prediction)
227 | 
228 |     @staticmethod
229 |     def calculate_accuracy(genome, x_test_data, y_test_data):
230 |         genome_nn = NEAT.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data)
231 |         prediction = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True).round()
232 |         num_correct = 0
233 |         for row in range(y_test_data.shape[0]):
234 |             if np.array_equal(prediction[row, :], y_test_data[row, :]):
235 |                 num_correct += 1
236 | 
237 |         percentage_correct = (num_correct / y_test_data.shape[0]) * 100
238 |         return percentage_correct
239 | 
240 |     def save_run_information(self, current_gen):
241 |         base_filepath = 'algorithm_runs'
242 |         if not os.path.exists(base_filepath):
243 |             # Make the directory before saving graphs
244 |             os.makedirs(base_filepath)
245 | 
246 |         folders = len(os.listdir('{}/{}'.format(base_filepath, self.algorithm_running)))
247 | 
248 |         # Folders + 1 because it will be the next folder in the sub directory
249 |         file_path_for_run = '{}/{}/run_{}'.format(base_filepath, self.algorithm_running, (folders + 1))
250 | 
251 |         # Make the directory before saving all other files
252 |         os.makedirs(file_path_for_run)
253 | 
254 |         # Save best genome in pickle
255 |         outfile = open('{}/best_genome_pickle'.format(file_path_for_run), 'wb')
256 |         pickle.dump(self.best_all_time_genome, outfile)
257 |         outfile.close()
258 | 
259 |         # Save graph information
260 |         self.generation_tracker.plot_graphs(current_gen=current_gen, save_plots=True,
261 |                                             file_path=file_path_for_run)
262 | 
263 |         # Save generation tracker in pickle
264 |         outfile = open('{}/generation_tracker'.format(file_path_for_run), 'wb')
265 |         pickle.dump(self.generation_tracker, outfile)
266 |         outfile.close()
267 | 
268 |         # Save NEAT class instance so we can access the population again later
269 |         outfile = open('{}/NEAT_instance'.format(file_path_for_run), 'wb')
270 |         pickle.dump(self, outfile)
271 |         outfile.close()
272 | 
273 |     def check_algorithm_break_point(self, current_gen, f1_score_of_best_all_time_genome, max_num_generations):
274 |         break_point_reached = False
275 |         # If the fitness threshold is met, stop the algorithm
276 |         if self.fitness_threshold and self.best_all_time_genome.fitness > self.fitness_threshold:
277 |             break_point_reached = True
278 |         if self.f1_score_threshold and f1_score_of_best_all_time_genome > self.f1_score_threshold:
279 |             break_point_reached = True
280 |         if current_gen > max_num_generations:
281 |             break_point_reached = True
282 | 
283 |         if break_point_reached:
284 |             self.save_run_information(current_gen=current_gen)
285 | 
286 |             return True
287 |         return False
288 | 
289 |     def run(self, max_num_generations, use_backprop, print_generation_information, show_population_weight_distribution):
290 |         """
291 |         Run the algorithm
292 |         """
293 | 
294 |         current_gen = 0
295 |         # Break condition now in function
296 |         while True:
297 |             # Every generation increment
298 |             current_gen += 1
299 | 
300 |             self.add_successful_genome_for_test(current_gen=current_gen, use_this_genome=False)
301 | 
302 |             self.generation_tracker.population_size = len(self.population)
303 | 
304 |             start_evaluate_time = time.time()
305 |             # Evaluate the current generation and get the best genome in the current generation
306 |             best_current_genome, worst_current_genome = self.evaluate_population(use_backprop=use_backprop,
307 |                                                                                  generation=current_gen)
308 |             print('WORST CURRENT GENOME FITNESS: {}'.format(worst_current_genome.fitness))
309 |             end_evaluate_time = time.time()
310 |             self.update_population_toplogy_info(current_gen=current_gen)
311 |             self.generation_tracker.evaluate_execute_time = end_evaluate_time - start_evaluate_time
312 | 
313 |             # Keep track of the best genome across generations
314 |             if self.best_all_time_genome is None or best_current_genome.fitness > self.best_all_time_genome.fitness:
315 |                 # Keep track of the best genome through generations
316 |                 self.best_genome_history[current_gen] = best_current_genome
317 | 
318 |                 self.best_all_time_genome = best_current_genome
319 | 
320 |             self.generation_tracker.best_all_time_genome_fitness = self.best_all_time_genome.fitness
321 | 
322 |             start_reproduce_time = time.time()
323 | 
324 |             # Reset attributes for the current generation
325 |             self.generation_tracker.reset_tracker_attributes()
326 | 
327 |             # Reproduce and get the next generation
328 |             self.population = self.reproduction.reproduce(species_set=self.species_set,
329 |                                                           population_size=self.config.population_size,
330 |                                                           generation=current_gen,
331 |                                                           generation_tracker=self.generation_tracker,
332 |                                                           # current_gen should be greater than one ot use
333 |                                                           # backprop_mutation because we let the first generation
334 |                                                           # mutate just as if it was the normal genetic algorithm,
335 |                                                           # so that we're not optimising all of the same structure
336 |                                                           backprop_mutation=(use_backprop and current_gen > 1))
337 |             end_reproduce_time = time.time()
338 |             self.generation_tracker.reproduce_execute_time = end_reproduce_time - start_reproduce_time
339 | 
340 |             # Check to ensure no genes share the same connection gene addresses. (This problem has been fixed but is
341 |             # here just incase now).
342 |             self.ensure_no_duplicate_genes()
343 | 
344 |             # Check if there are any species, if not raise an exception. TODO: Let user reset population if extinction
345 |             if not self.species_set.species:
346 |                 raise CompleteExtinctionException()
347 | 
348 |             start_specify_time = time.time()
349 |             # Speciate the current generation
350 |             self.species_set.speciate(population=self.population, generation=current_gen,
351 |                                       compatibility_threshold=self.config.compatibility_threshold,
352 |                                       generation_tracker=self.generation_tracker)
353 |             end_specify_time = time.time()
354 |             self.generation_tracker.species_execute_time = end_specify_time - start_specify_time
355 | 
356 |             f1_score_of_best_all_time_genome = self.calculate_f_statistic(
357 |                 self.best_all_time_genome, self.x_test, self.y_test)
358 | 
359 |             best_all_time_genome_accuracy = self.calculate_accuracy(genome=self.best_all_time_genome,
360 |                                                                     x_test_data=self.x_test, y_test_data=self.y_test)
361 | 
362 |             self.generation_tracker.best_all_time_genome_f1_score = f1_score_of_best_all_time_genome
363 |             self.generation_tracker.best_all_time_genome_accuracy = best_all_time_genome_accuracy
364 |             self.generation_tracker.update_generation_information(generation=current_gen)
365 | 
366 |             if print_generation_information:
367 |                 self.generation_tracker.print_generation_information(generation_interval_for_graph=1,
368 |                                                                      plot_graphs_every_gen=False)
369 | 
370 |             if self.check_algorithm_break_point(f1_score_of_best_all_time_genome=f1_score_of_best_all_time_genome,
371 |                                                 current_gen=current_gen, max_num_generations=max_num_generations):
372 |                 break
373 | 
374 |             # Gives distribution of the weights in the population connections
375 |             if show_population_weight_distribution:
376 |                 self.reproduction.show_population_weight_distribution(population=self.population)
377 | 
378 |         print('f1 score for best genome after optimising is: {}'.format(f1_score_of_best_all_time_genome))
379 |         return self.best_all_time_genome
380 | 
381 |     def ensure_no_duplicate_genes(self):
382 |         connection_gene_dict = {}
383 |         for genome in self.population.values():
384 |             for connection in genome.connections.values():
385 |                 if connection not in connection_gene_dict:
386 |                     connection_gene_dict[connection] = 1
387 |                 else:
388 |                     connection_gene_dict[connection] += 1
389 | 
390 |         for connection_gene, amount in connection_gene_dict.items():
391 |             if amount > 1:
392 |                 raise Exception('You have duplicated a connection gene')
393 | 


--------------------------------------------------------------------------------
/NEAT_multiclass.py:
--------------------------------------------------------------------------------
  1 | from generation_statistics import GenerationStatistics
  2 | import time
  3 | import numpy as np
  4 | from genome_neural_network_multiclass import GenomeNeuralNetworkMultiClass
  5 | from gene import NodeGene, ConnectionGene
  6 | from reproduce_multiclass import ReproduceMultiClass
  7 | from genome import Genome
  8 | from species import SpeciesSet
  9 | import sklearn.metrics
 10 | import pickle
 11 | 
 12 | # Exception used to check if there are no more species
 13 | from stagnation import Stagnation
 14 | import os
 15 | 
 16 | 
 17 | class CompleteExtinctionException(Exception):
 18 |     pass
 19 | 
 20 | 
 21 | class NEATMultiClass:
 22 | 
 23 |     def __init__(self, x_training_data, y_training_data, x_test_data, y_test_data, config, fitness_threshold,
 24 |                  f1_score_threshold, algorithm_running=None):
 25 |         # Where all the parameters are saved
 26 |         self.config = config
 27 |         # Takes care of reproduction of populations
 28 |         self.reproduction = ReproduceMultiClass(stagnation=Stagnation, config=config)
 29 |         self.generation_tracker = GenerationStatistics()
 30 |         # Track the best genome across generations
 31 |         self.best_all_time_genome = None
 32 |         # If the fitness threshold is met it will stop the algorithm (if set)
 33 |         self.fitness_threshold = fitness_threshold
 34 |         self.f1_score_threshold = f1_score_threshold
 35 |         # A class containing the different species within the population
 36 |         self.species_set = SpeciesSet(config=config, generation_tracker=self.generation_tracker)
 37 |         self.x_train = x_training_data
 38 |         self.y_train = y_training_data
 39 |         self.x_test = x_test_data
 40 |         self.y_test = y_test_data
 41 | 
 42 |         # Keep track of best genome through generations
 43 |         self.best_genome_history = {}
 44 | 
 45 |         # Keeps information of population complexity for each generation
 46 |         self.population_complexity_tracker = {}
 47 | 
 48 |         if algorithm_running:
 49 |             # Defines which of the algorithms is being currently tested (e.g. xor with 5000 examples of xor with 200
 50 |             # examples and noise)
 51 |             self.algorithm_running = algorithm_running
 52 | 
 53 |         # Initialise the starting population
 54 |         self.population = self.reproduction.create_new_population(population_size=self.config.population_size,
 55 |                                                                   num_features=x_training_data.shape[1],
 56 |                                                                   num_classes=y_training_data.shape[1])
 57 | 
 58 |         # Speciate the initial population
 59 |         self.species_set.speciate(population=self.population, compatibility_threshold=3, generation=0)
 60 | 
 61 |     @staticmethod
 62 |     def create_genome_nn(genome, x_data, y_data, algorithm_running=None):
 63 |         # TODO: I encountered a bug where I trained a genome on a relu activation function, but when I recreated using this function I had problems because I forgot that everything defined inside here uses sigmoid. Should improve implementation of this
 64 |         # TODO: The x_data, y_data isn't always used, particularly if we only create the network to get a prediction. This implementation should be improved for clarity
 65 |         if algorithm_running == 'xor_full':
 66 |             learning_rate = 0.1
 67 |             num_epochs = 1000
 68 |             batch_size = 64
 69 |             activation_type = 'sigmoid'
 70 |         elif algorithm_running == 'xor_small_noise':
 71 |             learning_rate = 0.1
 72 |             num_epochs = 5000
 73 |             batch_size = 10
 74 |             activation_type = 'sigmoid'
 75 |         elif algorithm_running == 'circle_data':
 76 |             learning_rate = 0.1
 77 |             num_epochs = 5000
 78 |             batch_size = 50
 79 |             activation_type = 'sigmoid'
 80 |         elif algorithm_running == 'shm_two_class':
 81 |             learning_rate = 0.1
 82 |             num_epochs = 5000
 83 |             batch_size = 50
 84 |             activation_type = 'sigmoid'
 85 |         elif algorithm_running == 'shm_multi_class':
 86 |             learning_rate = 0.1
 87 |             num_epochs = 250
 88 |             # num_epochs = 500
 89 |             batch_size = 64
 90 |             activation_type = 'sigmoid'
 91 |         # TODO: Choose more suitable default
 92 |         else:
 93 |             learning_rate = 0.1
 94 |             num_epochs = 500
 95 |             batch_size = 64
 96 |             activation_type = 'sigmoid'
 97 | 
 98 |         return GenomeNeuralNetworkMultiClass(genome=genome, x_train=x_data, y_train=y_data,
 99 |                                              create_weights_bias_from_genome=True, activation_type=activation_type,
100 |                                              learning_rate=learning_rate, num_epochs=num_epochs, batch_size=batch_size)
101 | 
102 |     def evaluate_population(self, use_backprop, generation):
103 |         """
104 |         Calculates the fitness value for each individual genome in the population
105 |         :type use_backprop: True or false on whether you're calculating the fitness using backprop or not
106 |         :param generation: Which generation number it currently is
107 |         :return: The best genome of the population
108 |         """
109 | 
110 |         # Should return the best genome
111 |         current_best_genome = None
112 |         current_worst_genome = None
113 | 
114 |         for genome in self.population.values():
115 | 
116 |             genome_nn = self.create_genome_nn(genome=genome, x_data=self.x_train, y_data=self.y_train,
117 |                                               algorithm_running=self.algorithm_running)
118 | 
119 |             # Optimise the neural_network_first. However, the generation should allow for one pass so that we are not
120 |             #  just optimising all the same topologies
121 |             genome_fitness_before = genome.fitness
122 |             if use_backprop and generation > 1:
123 |                 print('\n')
124 |                 print('OPTIMISING GENOME')
125 |                 genome_nn.optimise(print_epoch=False)
126 | 
127 |             # We use genome_nn.x_train instead of self.x_train because the genome_nn might have deleted a row if there
128 |             # is no connection to one of the sources
129 |             cost = genome_nn.run_one_pass(input_data=genome_nn.x_train, labels=self.y_train, return_cost_only=True)
130 | 
131 |             # The fitness is the negative of the cost. Because less cost = greater fitness
132 |             genome.fitness = -cost
133 | 
134 |             # Only print genome fitness after is back prop is used since back prop takes a long time so this can be a
135 |             #  way of tracking progress in the meantime
136 |             if use_backprop and generation > 1:
137 |                 # NOTE: Genome fitness can be none due to crossover because fitness value not carried over
138 |                 print('Genome Fitness Before: {}'.format(genome_fitness_before))
139 |                 print('Genome Fitness After: {}'.format(genome.fitness))
140 | 
141 |             if current_best_genome is None or genome.fitness > current_best_genome.fitness:
142 |                 current_best_genome = genome
143 |             if current_worst_genome is None or genome.fitness < current_worst_genome.fitness:
144 |                 current_worst_genome = genome
145 | 
146 |         return current_best_genome, current_worst_genome
147 | 
148 |     def update_population_toplogy_info(self, current_gen):
149 |         num_nodes_overall = []
150 |         num_nodes_enabled = []
151 |         num_connections_overall = []
152 |         num_connections_enabled = []
153 |         all_fitnesses = []
154 |         for genome in self.population.values():
155 |             num_nodes_overall.append(len(genome.nodes))
156 |             num_nodes_enabled.append(len(genome.get_active_nodes()))
157 |             num_connections_overall.append(len(genome.connections))
158 |             num_connections_enabled.append(genome.check_connection_enabled_amount())
159 |             if genome.fitness:
160 |                 all_fitnesses.append(genome.fitness)
161 | 
162 |         avg_num_connections_enabled = np.mean(num_connections_enabled)
163 |         avg_num_connections_overall = np.mean(num_connections_overall)
164 |         avg_num_nodes_enabled = np.mean(num_nodes_enabled)
165 |         avg_num_nodes_overall = np.mean(num_nodes_overall)
166 | 
167 |         complexity_tracker = {'num_connections_enabled': avg_num_connections_enabled,
168 |                               'num_connections_overall': avg_num_connections_overall,
169 |                               'num_nodes_enabled': avg_num_nodes_enabled, 'num_nodes_overall': avg_num_nodes_overall}
170 |         self.population_complexity_tracker[current_gen] = complexity_tracker
171 | 
172 |         self.generation_tracker.mean_number_connections_enabled = avg_num_connections_enabled
173 |         self.generation_tracker.mean_number_connections_overall = avg_num_connections_overall
174 |         self.generation_tracker.mean_number_nodes_enabled = avg_num_nodes_enabled
175 |         self.generation_tracker.mean_number_nodes_overall = avg_num_nodes_overall
176 | 
177 |         self.generation_tracker.average_population_fitness = np.mean(all_fitnesses)
178 | 
179 |     def add_successful_genome_for_test(self, current_gen, use_this_genome):
180 |         """
181 |         This function adds a pre programmed genome which is known to converge for the XOR dataset.
182 |         :param current_gen:
183 |         :param use_this_genome: Whether this genome should be added to the population or not
184 |         :return:
185 |         """
186 |         # Wait for current_gen > 1 because if using backprop the first gen skips using backprop.
187 |         if current_gen > 1 and use_this_genome:
188 |             node_list = [
189 |                 NodeGene(node_id=0, node_type='source'),
190 |                 NodeGene(node_id=1, node_type='source'),
191 |                 NodeGene(node_id=2, node_type='output', bias=0.5),
192 |                 NodeGene(node_id=3, node_type='hidden', bias=1),
193 |                 NodeGene(node_id=4, node_type='hidden', bias=1),
194 |                 NodeGene(node_id=5, node_type='hidden', bias=1),
195 |                 NodeGene(node_id=6, node_type='hidden', bias=1),
196 |             ]
197 | 
198 |             connection_list = [ConnectionGene(input_node=0, output_node=3, innovation_number=1, enabled=True,
199 |                                               weight=np.random.randn()),
200 |                                ConnectionGene(input_node=1, output_node=3, innovation_number=2, enabled=True,
201 |                                               weight=np.random.randn()),
202 |                                ConnectionGene(input_node=0, output_node=4, innovation_number=3, enabled=True,
203 |                                               weight=np.random.randn()),
204 |                                ConnectionGene(input_node=1, output_node=4, innovation_number=4, enabled=True,
205 |                                               weight=np.random.randn()),
206 |                                ConnectionGene(input_node=3, output_node=5, innovation_number=5, enabled=True,
207 |                                               weight=np.random.randn()),
208 |                                ConnectionGene(input_node=4, output_node=5, innovation_number=6, enabled=True,
209 |                                               weight=np.random.randn()),
210 |                                ConnectionGene(input_node=3, output_node=6, innovation_number=7, enabled=True,
211 |                                               weight=np.random.randn()),
212 |                                ConnectionGene(input_node=4, output_node=6, innovation_number=8, enabled=True,
213 |                                               weight=np.random.randn()),
214 |                                ConnectionGene(input_node=5, output_node=2, innovation_number=9, enabled=True,
215 |                                               weight=np.random.rand()),
216 |                                ConnectionGene(input_node=6, output_node=2, innovation_number=10, enabled=True,
217 |                                               weight=np.random.randn())
218 |                                ]
219 | 
220 |             test_genome = Genome(connections=connection_list, nodes=node_list, key=1)
221 |             test_genome.fitness = -99999999999
222 |             self.population[32131231] = test_genome
223 | 
224 |     @staticmethod
225 |     def calculate_f_statistic(genome, x_test_data, y_test_data):
226 |         genome_nn = NEATMultiClass.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data)
227 |         prediction_array = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True)
228 |         prediction_real = np.zeros((y_test_data.shape[0], y_test_data.shape[1]))
229 |         for row in range(prediction_array.shape[0]):
230 |             prediction_index = np.argmax(prediction_array[row, :])
231 |             prediction_real[row, prediction_index] = 1.0
232 |         return sklearn.metrics.f1_score(y_test_data, prediction_real, average='samples')
233 | 
234 |     @staticmethod
235 |     def calculate_accuracy(genome, x_test_data, y_test_data):
236 |         genome_nn = NEATMultiClass.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data)
237 |         prediction_array = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True)
238 |         prediction_real = np.zeros((y_test_data.shape[0], y_test_data.shape[1]))
239 |         for row in range(prediction_array.shape[0]):
240 |             prediction_index = np.argmax(prediction_array[row, :])
241 |             prediction_real[row, prediction_index] = 1.0
242 |         num_correct = 0
243 |         for row in range(y_test_data.shape[0]):
244 |             if np.array_equal(prediction_real[row, :], y_test_data[row, :]):
245 |                 num_correct += 1
246 | 
247 |         percentage_correct = (num_correct / y_test_data.shape[0]) * 100
248 |         return percentage_correct
249 | 
250 |     def save_run_information(self, current_gen):
251 |         base_filepath = 'algorithm_runs_multi'
252 |         if not os.path.exists(base_filepath):
253 |             # Make the directory before saving graphs
254 |             os.makedirs(base_filepath)
255 | 
256 |         folders = len(os.listdir('{}/{}'.format(base_filepath, self.algorithm_running)))
257 | 
258 |         # Folders + 1 because it will be the next folder in the sub directory
259 |         file_path_for_run = '{}/{}/run_{}'.format(base_filepath, self.algorithm_running, (folders + 1))
260 | 
261 |         # Make the directory before saving all other files
262 |         os.makedirs(file_path_for_run)
263 | 
264 |         # Save best genome in pickle
265 |         outfile = open('{}/best_genome_pickle'.format(file_path_for_run), 'wb')
266 |         pickle.dump(self.best_all_time_genome, outfile)
267 |         outfile.close()
268 | 
269 |         # Save graph information
270 |         self.generation_tracker.plot_graphs(current_gen=current_gen, save_plots=True,
271 |                                             file_path=file_path_for_run)
272 | 
273 |         # Save generation tracker in pickle
274 |         outfile = open('{}/generation_tracker'.format(file_path_for_run), 'wb')
275 |         pickle.dump(self.generation_tracker, outfile)
276 |         outfile.close()
277 | 
278 |         # Save NEAT class instance so we can access the population again later
279 |         outfile = open('{}/NEAT_instance'.format(file_path_for_run), 'wb')
280 |         pickle.dump(self, outfile)
281 |         outfile.close()
282 | 
283 |     def check_algorithm_break_point(self, current_gen, f1_score_of_best_all_time_genome, max_num_generations):
284 |         break_point_reached = False
285 |         if self.fitness_threshold and self.best_all_time_genome.fitness > self.fitness_threshold:
286 |             break_point_reached = True
287 |         if self.f1_score_threshold and f1_score_of_best_all_time_genome > self.f1_score_threshold:
288 |             break_point_reached = True
289 |         if current_gen > max_num_generations:
290 |             break_point_reached = True
291 | 
292 |         if break_point_reached:
293 |             self.save_run_information(current_gen=current_gen)
294 | 
295 |             return True
296 |         return False
297 | 
298 |     def run(self, max_num_generations, use_backprop, print_generation_information, show_population_weight_distribution):
299 |         """
300 |         Run the algorithm
301 |         """
302 | 
303 |         current_gen = 0
304 |         # Break condition now in function
305 |         while True:
306 |             # Every generation increment
307 |             current_gen += 1
308 | 
309 |             self.add_successful_genome_for_test(current_gen=current_gen, use_this_genome=False)
310 | 
311 |             self.generation_tracker.population_size = len(self.population)
312 | 
313 |             start_evaluate_time = time.time()
314 |             # Evaluate the current generation and get the best genome in the current generation
315 |             best_current_genome, worst_current_genome = self.evaluate_population(use_backprop=use_backprop,
316 |                                                                                  generation=current_gen)
317 |             print('WORST CURRENT GENOME FITNESS: {}'.format(worst_current_genome.fitness))
318 |             end_evaluate_time = time.time()
319 |             self.update_population_toplogy_info(current_gen=current_gen)
320 |             self.generation_tracker.evaluate_execute_time = end_evaluate_time - start_evaluate_time
321 | 
322 |             # Keep track of the best genome across generations
323 |             if self.best_all_time_genome is None or best_current_genome.fitness > self.best_all_time_genome.fitness:
324 |                 # Keep track of the best genome through generations
325 |                 self.best_genome_history[current_gen] = best_current_genome
326 |                 self.best_all_time_genome = best_current_genome
327 | 
328 |             self.generation_tracker.best_all_time_genome_fitness = self.best_all_time_genome.fitness
329 | 
330 |             start_reproduce_time = time.time()
331 | 
332 |             # Reset attributes for the current generation
333 |             self.generation_tracker.reset_tracker_attributes()
334 | 
335 |             # Reproduce and get the next generation
336 |             self.population = self.reproduction.reproduce(species_set=self.species_set,
337 |                                                           population_size=self.config.population_size,
338 |                                                           generation=current_gen,
339 |                                                           generation_tracker=self.generation_tracker,
340 |                                                           # current_gen should be greater than one ot use
341 |                                                           # backprop_mutation because we let the first generation
342 |                                                           # mutate just as if it was the normal genetic algorithm,
343 |                                                           # so that we're not optimising all of the same structure
344 |                                                           backprop_mutation=(use_backprop and current_gen > 1))
345 |             end_reproduce_time = time.time()
346 |             self.generation_tracker.reproduce_execute_time = end_reproduce_time - start_reproduce_time
347 | 
348 |             # Check to ensure no genes share the same connection gene addresses. (This problem has been fixed but is
349 |             # here just incase now).
350 |             self.ensure_no_duplicate_genes()
351 | 
352 |             # Check if there are any species, if not raise an exception. TODO: Let user reset population if extinction
353 |             if not self.species_set.species:
354 |                 raise CompleteExtinctionException()
355 | 
356 |             start_specify_time = time.time()
357 |             # Speciate the current generation
358 |             self.species_set.speciate(population=self.population, generation=current_gen,
359 |                                       compatibility_threshold=self.config.compatibility_threshold,
360 |                                       generation_tracker=self.generation_tracker)
361 |             end_specify_time = time.time()
362 |             self.generation_tracker.species_execute_time = end_specify_time - start_specify_time
363 | 
364 |             f1_score_of_best_all_time_genome = self.calculate_f_statistic(
365 |                 self.best_all_time_genome, self.x_test, self.y_test)
366 | 
367 |             best_all_time_genome_accuracy = self.calculate_accuracy(genome=self.best_all_time_genome,
368 |                                                                     x_test_data=self.x_test, y_test_data=self.y_test)
369 | 
370 |             self.generation_tracker.best_all_time_genome_f1_score = f1_score_of_best_all_time_genome
371 |             self.generation_tracker.best_all_time_genome_accuracy = best_all_time_genome_accuracy
372 |             self.generation_tracker.update_generation_information(generation=current_gen)
373 | 
374 |             if print_generation_information:
375 |                 self.generation_tracker.print_generation_information(generation_interval_for_graph=1,
376 |                                                                      plot_graphs_every_gen=False)
377 | 
378 |             if self.check_algorithm_break_point(f1_score_of_best_all_time_genome=f1_score_of_best_all_time_genome,
379 |                                                 current_gen=current_gen, max_num_generations=max_num_generations):
380 |                 break
381 | 
382 |             # Gives distribution of the weights in the population connections
383 |             if show_population_weight_distribution:
384 |                 self.reproduction.show_population_weight_distribution(population=self.population)
385 | 
386 |         print('f1 score for best genome after optimising is: {}'.format(f1_score_of_best_all_time_genome))
387 | 
388 |         return self.best_all_time_genome
389 | 
390 |     def ensure_no_duplicate_genes(self):
391 |         connection_gene_dict = {}
392 |         for genome in self.population.values():
393 |             for connection in genome.connections.values():
394 |                 if connection not in connection_gene_dict:
395 |                     connection_gene_dict[connection] = 1
396 |                 else:
397 |                     connection_gene_dict[connection] += 1
398 | 
399 |         for connection_gene, amount in connection_gene_dict.items():
400 |             if amount > 1:
401 |                 raise Exception('You have duplicated a connection gene')
402 | 


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/average_population_fitness_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/average_population_fitness_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/avg_num_disjoint_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/avg_num_disjoint_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/avg_num_excess_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/avg_num_excess_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/avg_weight_diff_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/avg_weight_diff_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/best_all_time_genome_accuracy_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_all_time_genome_accuracy_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/best_all_time_genome_f1_score_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_all_time_genome_f1_score_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/best_all_time_genome_fitness_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_all_time_genome_fitness_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/best_genome_pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/best_genome_pickle


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/generation_tracker:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/generation_tracker


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/mean_compatibility_distance_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_compatibility_distance_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/mean_number_connections_enabled_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_number_connections_enabled_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/mean_number_connections_overall_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_number_connections_overall_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/mean_number_nodes_enabled_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/mean_number_nodes_enabled_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/num_generation_add_connection_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_add_connection_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/num_generation_add_node_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_add_node_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/num_generation_delete_connection_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_delete_connection_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/num_generation_delete_node_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_delete_node_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/num_generation_weight_mutations_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_generation_weight_mutations_generation_5.png


--------------------------------------------------------------------------------
/algorithm_runs/xor_full/run_1/num_species_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/algorithm_runs/xor_full/run_1/num_species_generation_5.png


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | class Config:
  2 |     """
  3 |     Population
  4 |     """
  5 | 
  6 |     population_size = 15
  7 | 
  8 |     # TODO: Ensure each of these are used somewhere in the code
  9 |     """
 10 |     Compatibility distance
 11 |     """
 12 |     # The coefficients used for calculating the compatibility distance between two genomes
 13 |     excess_coefficient = 1
 14 |     disjoint_coefficient = 1
 15 |     # This is for when the genes are the same so they check the similarity of the weights
 16 |     matching_genes_coefficient = 0.4
 17 | 
 18 |     # How close they have to be to be considered in the same species
 19 |     compatibility_threshold = 3
 20 | 
 21 |     """
 22 |     Mutation
 23 |     """
 24 |     # Whether only one type of mutation can happen at any time.
 25 |     single_mutation_only = False
 26 | 
 27 |     # Weight changes
 28 |     weight_mutation_chance = 0.8
 29 |     weight_mutation_perturbe_chance = 0.9
 30 |     weight_mutation_reset_connection_chance = 0.1
 31 | 
 32 |     weight_mutation_perturbe_chance_backprop = 0.9
 33 |     weight_mutation_reset_connection_chance_backprop = 0.1
 34 |     weight_mutation_reset_all_connections_chance_backprop = 0.1
 35 | 
 36 |     # Standard deviation for the distribution for which we pick the pertubation value from
 37 |     weight_mutation_sigma = 0.5
 38 |     # Mean for the distribution for which we pick the pertubation value from
 39 |     weight_mutation_mean = 0.0
 40 | 
 41 |     # Backprop versions
 42 |     weight_mutation_sigma_backprop = 1
 43 |     weight_mutation_mean_backprop = 0.0
 44 | 
 45 |     # This is the chance a gene is disabled if it was disabled in either parent
 46 |     change_to_disable_gene_if_either_parent_disabled = 0.75
 47 | 
 48 |     chance_for_mutation_without_crossover = 0.25
 49 | 
 50 |     inter_species_mating_rate = 0.001
 51 | 
 52 | 
 53 |     add_node_mutation_chance = 0.4
 54 |     add_connection_mutation_chance = 0.5
 55 |     remove_node_mutation_chance = 0.3
 56 |     remove_connection_mutation_chance = 0.2
 57 | 
 58 |     ## These chances are used when we're performing mutation whilst using backprop optimisation
 59 |     add_node_mutation_chance_backprop = 0.7
 60 |     add_connection_mutation_chance_backprop = 0.5
 61 |     remove_node_mutation_chance_backprop = 0.05
 62 |     remove_connection_mutation_chance_backprop = 0.05
 63 |     reset_all_weights_mutation_chance_backprop = 0.05
 64 |     # add_node_mutation_chance_backprop = 0.4
 65 |     # add_connection_mutation_chance_backprop = 0.7
 66 |     # remove_node_mutation_chance_backprop = 0.05
 67 |     # remove_connection_mutation_chance_backprop = 0.05
 68 |     # reset_all_weights_mutation_chance_backprop = 0.05
 69 | 
 70 |     ## OLD VALUES FOR WHEN WE WERE JUST USING GENETIC ALGORITHM
 71 |     # add_node_mutation_chance = 0.03
 72 |     # add_connection_mutation_chance = 0.05
 73 |     # remove_node_mutation_chance = 0.01
 74 |     # remove_connection_mutation_chance = 0.01
 75 | 
 76 |     """
 77 |     Speciation
 78 |     """
 79 | 
 80 |     # Parameters used when check stagnation
 81 |     # Allowable number of generations before considered stagnant
 82 |     max_stagnation_generations = 15
 83 |     # Min number of species required before throwing out due to stagnation
 84 |     num_species_min = 2
 85 | 
 86 |     """
 87 |     Reproduction
 88 |     """
 89 |     # Minimum species size
 90 |     # TODO: Change back to default value of 2
 91 |     min_species_size = 0
 92 | 
 93 |     """
 94 |     Survival
 95 |     """
 96 |     # Percentage of the population which carries on un-changed(?)
 97 |     survival_threshold = 0.2
 98 |     # This means that a certain percentage of the top elite genomes will carry over to the next population un changed
 99 |     keep_unmutated_top_percentage = True  # (Default should be False)
100 | 


--------------------------------------------------------------------------------
/config_multiclass.py:
--------------------------------------------------------------------------------
 1 | class ConfigMultiClass:
 2 |     """
 3 |     Population
 4 |     """
 5 | 
 6 |     population_size = 15
 7 | 
 8 |     # TODO: Ensure each of these are used somewhere in the code
 9 |     """
10 |     Compatibility distance
11 |     """
12 |     # The coefficients used for calculating the compatibility distance between two genomes
13 |     excess_coefficient = 1
14 |     disjoint_coefficient = 1
15 |     # This is for when the genes are the same so they check the similarity of the weights
16 |     matching_genes_coefficient = 0.4
17 | 
18 |     # How close they have to be to be considered in the same species
19 |     compatibility_threshold = 3
20 | 
21 |     """
22 |     Mutation
23 |     """
24 |     # Whether only one type of mutation can happen at any time.
25 |     single_mutation_only = False
26 | 
27 |     # Weight changes
28 |     weight_mutation_chance = 0.8
29 |     weight_mutation_perturbe_chance = 0.9
30 |     weight_mutation_reset_connection_chance = 0.1
31 | 
32 |     weight_mutation_perturbe_chance_backprop = 0.9
33 |     weight_mutation_reset_connection_chance_backprop = 0.1
34 |     weight_mutation_reset_all_connections_chance_backprop = 0.1
35 | 
36 |     # Standard deviation for the distribution for which we pick the pertubation value from
37 |     weight_mutation_sigma = 0.5
38 |     # Mean for the distribution for which we pick the pertubation value from
39 |     weight_mutation_mean = 0.0
40 | 
41 |     # Backprop versions
42 |     weight_mutation_sigma_backprop = 1
43 |     weight_mutation_mean_backprop = 0.0
44 | 
45 |     # This is the chance a gene is disabled if it was disabled in either parent
46 |     change_to_disable_gene_if_either_parent_disabled = 0.75
47 | 
48 |     chance_for_mutation_without_crossover = 0.25
49 | 
50 |     inter_species_mating_rate = 0.001
51 | 
52 | 
53 |     add_node_mutation_chance = 0.4
54 |     add_connection_mutation_chance = 0.5
55 |     remove_node_mutation_chance = 0.15
56 |     remove_connection_mutation_chance = 0.15
57 | 
58 |     ## These chances are used when we're performing mutation whilst using backprop optimisation
59 |     add_node_mutation_chance_backprop = 0.7
60 |     add_connection_mutation_chance_backprop = 0.5
61 |     remove_node_mutation_chance_backprop = 0.05
62 |     remove_connection_mutation_chance_backprop = 0.05
63 |     reset_all_weights_mutation_chance_backprop = 0.05
64 | 
65 |     ## OLD VALUES FOR WHEN WE WERE JUST USING GENETIC ALGORITHM
66 |     # add_node_mutation_chance = 0.03
67 |     # add_connection_mutation_chance = 0.05
68 |     # remove_node_mutation_chance = 0.01
69 |     # remove_connection_mutation_chance = 0.01
70 | 
71 |     """
72 |     Speciation
73 |     """
74 | 
75 |     # Parameters used when check stagnation
76 |     # Allowable number of generations before considered stagnant
77 |     max_stagnation_generations = 15
78 |     # Min number of species required before throwing out due to stagnation
79 |     num_species_min = 2
80 | 
81 |     """
82 |     Reproduction
83 |     """
84 |     # Minimum species size
85 |     min_species_size = 0
86 | 
87 |     """
88 |     Survival
89 |     """
90 |     # Percentage of the population which carries on un-changed(?)
91 |     survival_threshold = 0.2
92 |     # This means that a certain percentage of the top elite genomes will carry over to the next population un changed
93 |     keep_unmutated_top_percentage = True  # (Default should be False)
94 | 


--------------------------------------------------------------------------------
/data_storage.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_circle_data():
 5 |     # Skip first row because headings
 6 |     circle_data = np.loadtxt('C:/Users/tsdev/Desktop/circle_25/input.txt', skiprows=1)
 7 |     y_data = circle_data[:, circle_data.shape[1] - 1]
 8 |     x_data = circle_data[:, 1:(circle_data.shape[1] - 1)]
 9 |     return x_data, y_data
10 | 
11 | 
12 | def get_spiral_data():
13 |     # Skip first row because headings
14 |     spiral_data = np.loadtxt('C:/Users/tsdev/Desktop/spiral_25/input.txt', skiprows=1)
15 |     y_data = spiral_data[:, spiral_data.shape[1] - 1]
16 |     x_data = spiral_data[:, 1:(spiral_data.shape[1] - 1)]
17 |     return x_data, y_data
18 | 


--------------------------------------------------------------------------------
/data_visualisation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import seaborn as sns
  4 | import matplotlib.pyplot as plt
  5 | from numpy.random import rand
  6 | import pickle
  7 | import matplotlib.pyplot as plt
  8 | import copy
  9 | from mpl_toolkits.mplot3d import Axes3D
 10 | 
 11 | from NEAT import NEAT
 12 | from genome_neural_network import GenomeNeuralNetwork
 13 | from neural_network import create_data
 14 | from data_storage import get_circle_data, get_spiral_data
 15 | from read_mat_files import get_shm_two_class_data
 16 | 
 17 | 
 18 | def initialise_genome(genome_pickle_filepath):
 19 |     """
 20 |     Function to intialise a genome from a pickle file
 21 |     :param genome_pickle_filepath: File path to pickle
 22 |     :return: the intialised genome
 23 |     """
 24 |     infile = open(genome_pickle_filepath, 'rb')
 25 |     genome = pickle.load(infile)
 26 |     infile.close()
 27 |     return genome
 28 | 
 29 | 
 30 | def get_genome_predictions(genome, x_data):
 31 |     """
 32 |     Function to return predictions for a given genome
 33 |     :param genome: The genome class instance
 34 |     :param x_data:  The data to be predicted on
 35 |     :param y_data: The true labels for the data
 36 |     :return: the predictions for the given x_data
 37 |     """
 38 |     # y_data isn't important but it's needed as a parameter
 39 |     _, y_data = create_data(n_generated=500)
 40 |     genome_nn = NEAT.create_genome_nn(genome=genome, x_data=x_data, y_data=y_data)
 41 |     return genome_nn.run_one_pass(input_data=x_data, return_prediction_only=True).round()
 42 | 
 43 | 
 44 | def plot_decision_boundary(genome, data_being_used):
 45 |     assert (data_being_used in {'circle_data', 'xor_data', 'spiral_data', 'shm_two_class'})
 46 | 
 47 |     number_of_data_points = 50
 48 |     if data_being_used == 'xor_data':
 49 |         x_values = np.linspace(0, 1, number_of_data_points).tolist()
 50 |     elif data_being_used == 'circle_data':
 51 |         x_values = np.linspace(-4, 4, number_of_data_points).tolist()
 52 |     elif data_being_used == 'shm_two_class':
 53 |         x_values = np.linspace(-29, 1, number_of_data_points).tolist()
 54 |         y_values = np.linspace(-34, 4, number_of_data_points).tolist()
 55 |         z_values = np.linspace(-31, 11, number_of_data_points).tolist()
 56 | 
 57 |     prediction_list = []
 58 |     if data_being_used != 'shm_two_class':
 59 |         x_values_reverse = copy.deepcopy(x_values)
 60 |         x_values_reverse.reverse()
 61 |         current_x = []
 62 |         current_y = []
 63 |         for x in x_values:
 64 |             for y in x_values_reverse:
 65 |                 # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]])
 66 |                 x_data = np.array([[x, y]])
 67 |                 current_x.append(x)
 68 |                 current_y.append(y)
 69 |                 predictions = get_genome_predictions(genome=genome, x_data=x_data)
 70 |                 prediction_list += predictions[0].tolist()
 71 |         for x in x_values:
 72 |             for y in x_values_reverse:
 73 |                 # x_data = np.array([[y, x, y ** 2, x ** 2, y * x, np.sin(y), np.sin(x)]])
 74 |                 x_data = np.array([[y, x]])
 75 |                 # This is correct, should be reverse to previous loop
 76 |                 current_x.append(y)
 77 |                 current_y.append(x)
 78 |                 predictions = get_genome_predictions(genome=genome, x_data=x_data)
 79 |                 prediction_list += predictions[0].tolist()
 80 |         plt.scatter(current_x, current_y, color=create_label_colours(labels=np.array(prediction_list)))
 81 |         plt.title('Decisionary boundary for optimized genome')
 82 |         plt.xlabel('X1')
 83 |         plt.ylabel('X2')
 84 |         plt.show()
 85 | 
 86 |     else:
 87 |         # REMEMBER WHEN PLOTTING SHM DATA NEED TO MINUS AND DIVIDE BY 10
 88 |         current_x = []
 89 |         current_y = []
 90 |         current_z = []
 91 | 
 92 |         x_values_reverse = copy.deepcopy(x_values)
 93 |         x_values_reverse.reverse()
 94 | 
 95 |         y_values_reverse = copy.deepcopy(y_values)
 96 |         y_values_reverse.reverse()
 97 | 
 98 |         z_values_reverse = copy.deepcopy(z_values)
 99 |         z_values_reverse.reverse()
100 |         for x in x_values:
101 |             for y in y_values:
102 |                 for z in z_values:
103 |                     print(x, y, z)
104 |                     # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]])
105 |                     x_data = np.array([[x, y, z]])
106 |                     current_x.append(x)
107 |                     current_y.append(y)
108 |                     current_z.append(z)
109 |                     predictions = get_genome_predictions(genome=genome, x_data=x_data)
110 |                     prediction_list += predictions[0].tolist()
111 |         for x in x_values_reverse:
112 |             for y in y_values:
113 |                 for z in z_values:
114 |                     print(x, y, z)
115 |                     # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]])
116 |                     x_data = np.array([[x, y, z]])
117 |                     current_x.append(x)
118 |                     current_y.append(y)
119 |                     current_z.append(z)
120 |                     predictions = get_genome_predictions(genome=genome, x_data=x_data)
121 |                     prediction_list += predictions[0].tolist()
122 |         for x in x_values:
123 |             for y in y_values_reverse:
124 |                 for z in z_values:
125 |                     print(x, y, z)
126 |                     # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]])
127 |                     x_data = np.array([[x, y, z]])
128 |                     current_x.append(x)
129 |                     current_y.append(y)
130 |                     current_z.append(z)
131 |                     predictions = get_genome_predictions(genome=genome, x_data=x_data)
132 |                     prediction_list += predictions[0].tolist()
133 |         for x in x_values:
134 |             for y in y_values:
135 |                 for z in z_values_reverse:
136 |                     # x_data = np.array([[x, y, x ** 2, y ** 2, x * y, np.sin(x), np.sin(y)]])
137 |                     x_data = np.array([[x, y, z]])
138 |                     current_x.append(x)
139 |                     current_y.append(y)
140 |                     current_z.append(z)
141 |                     predictions = get_genome_predictions(genome=genome, x_data=x_data)
142 |                     prediction_list += predictions[0].tolist()
143 | 
144 |         fig = plt.figure()
145 |         ax = Axes3D(fig)
146 |         ax.scatter(current_x, current_y, current_z, color=create_label_colours(np.array(prediction_list)))
147 |         ax.view_init(-140, 30)
148 |         plt.show()
149 | 
150 | 
151 | def create_label_colours(labels):
152 |     """
153 |     Function turns the binary classification classes into two seperate colours e.g. 1, 0 => 'green', 'red'
154 |     :param labels: numpy array of labels in shape (n, 1)
155 |     :return:
156 |     """
157 |     main_features = np.unique(labels)
158 |     if main_features.shape[0] != 2:
159 |         raise ValueError('There can only be two class labels')
160 |     try:
161 |         if labels.shape[1] != 1:
162 |             raise ValueError('Labels must be in shape (num_examples, 1)')
163 |         labels_list = labels[:, 0]
164 |     except IndexError:
165 |         labels_list = labels[:, ]
166 |     coloured_labels = ['red' if label == main_features[0] else 'green' for label in labels_list]
167 |     return coloured_labels
168 | 
169 | 
170 | def plot_shm_data(elevation, rotation_angle):
171 |     x_data, y_data = get_shm_two_class_data(normalise_x=False)
172 | 
173 |     x_vals = x_data[:, 0].tolist()
174 |     y_vals = x_data[:, 1].tolist()
175 |     z_vals = x_data[:, 2].tolist()
176 | 
177 |     x_min, x_max = min(x_vals), max(x_vals)
178 |     y_min, y_max = min(y_vals), max(y_vals)
179 |     z_min, z_max = min(z_vals), max(z_vals)
180 | 
181 |     labels = create_label_colours(labels=y_data)
182 | 
183 |     fig = plt.figure()
184 |     ax = Axes3D(fig)
185 |     ax.scatter(x_vals, y_vals, z_vals, color=labels)
186 |     ax.view_init(elevation, rotation_angle)
187 |     plt.show()
188 | 
189 |     fig = plt.figure()
190 |     ax = Axes3D(fig)
191 |     x1_reds = []
192 |     x2_reds = []
193 |     x3_reds = []
194 |     x1_greens = []
195 |     x2_greens = []
196 |     x3_greens = []
197 |     for index in range(len(labels)):
198 |         if labels[index] == 'green':
199 |             x1_greens.append(x_vals[index])
200 |             x2_greens.append(y_vals[index])
201 |             x3_greens.append(z_vals[index])
202 |         else:
203 |             x1_reds.append(x_vals[index])
204 |             x2_reds.append(y_vals[index])
205 |             x3_reds.append(z_vals[index])
206 |     ax.scatter(x1_greens, x2_greens, x3_greens, c='green', label='Undamaged',
207 |                )
208 |     ax.scatter(x1_reds, x2_reds, x3_reds, c='red', label='Damaged',
209 |                )
210 |     ax.legend(loc='upper right')
211 |     ax.view_init(elevation, rotation_angle)
212 |     plt.show()
213 | 
214 | 
215 | def plot_generation_graph(*args, same_axis=None, generation_information, y_label=None, title):
216 |     """"
217 |     Generic function to plot data
218 |     :param title: String for the title
219 |     :param y_label: String for the y label
220 |     :param same_axis: Defines whether two or more datasets should be plotted on the same y axis
221 |     """
222 |     # Plus one because of how the range function works
223 |     generations_to_go_through = list(range(1, max(generation_information) + 1))
224 | 
225 |     if len(args) > 1:
226 | 
227 |         # Can't plot more than two items on a 2d plot
228 |         assert (len(args) == 2)
229 |         assert (same_axis is not None)
230 |         if same_axis:
231 |             # Need a common y_label
232 |             assert (y_label is not None)
233 | 
234 |         y_data_list = []
235 |         y_labels = []
236 |         for information in args:
237 |             information_type = information[0]
238 |             information_plot_type = information[1]
239 |             if not same_axis:
240 |                 y_label = information[2]
241 |                 y_labels.append(y_label)
242 | 
243 |             y_data = []
244 |             for generation in generations_to_go_through:
245 |                 y_data.append(generation_information[generation][information_type])
246 |             if information_plot_type == 'line' and same_axis:
247 |                 plt.plot(generations_to_go_through, y_data)
248 |             elif information_plot_type == 'bar' and same_axis:
249 |                 plt.bar(generations_to_go_through, y_data)
250 |             y_data_list.append(y_data)
251 | 
252 |         if not same_axis:
253 |             plt.plot(generations_to_go_through, y_data_list[0], color='r')
254 |             plt.ylabel(y_labels[0])
255 |             axes2 = plt.twinx()
256 |             axes2.plot(generations_to_go_through, y_data_list[1], color='g')
257 |             axes2.set_ylabel(y_labels[1])
258 |         else:
259 |             plt.ylabel(y_label)
260 |         plt.xticks(generations_to_go_through)
261 |         plt.xlabel('Generation')
262 |         plt.title(title)
263 |         plt.show()
264 | 
265 |     else:
266 |         y_data = []
267 |         information = args[0]
268 |         information_type = information[0]
269 |         information_plot_type = information[1]
270 |         for generation in generations_to_go_through:
271 |             y_data.append(generation_information[generation][information_type])
272 |         if information_plot_type == 'line':
273 |             plt.plot(generations_to_go_through, y_data)
274 |         elif information_plot_type == 'bar':
275 |             plt.plot(generations_to_go_through, y_data)
276 |         plt.xticks(generations_to_go_through)
277 |         plt.xlabel('Generation')
278 |         plt.ylabel(y_label)
279 |         plt.title(title)
280 |         plt.show()
281 | 
282 | 
283 | def visualise_generation_tracker(filepath_to_genome):
284 |     infile = open(filepath_to_genome, 'rb')
285 |     generation_tracker_instance = pickle.load(infile)
286 |     generation_information_dict = generation_tracker_instance.generation_information
287 | 
288 |     # If more than one information type is specified, MUST define the same_axis variable
289 |     plot_generation_graph(('best_all_time_genome_fitness', 'line'),
290 |                           ('average_population_fitness', 'line'),
291 |                           same_axis=True,
292 |                           y_label='Fitness value',
293 |                           generation_information=generation_information_dict,
294 |                           title='Best All Time Genome Accuracy through generations')
295 | 
296 |     plot_generation_graph(('best_all_time_genome_accuracy', 'line', 'Best Genome Accuracy (%)'),
297 |                           ('best_all_time_genome_f1_score', 'line', 'Best Genome F1 score'),
298 |                           same_axis=False,
299 |                           generation_information=generation_information_dict,
300 |                           title='Best All Time Genome Accuracy through generations')
301 | 
302 |     plot_generation_graph(('best_all_time_genome_accuracy', 'line'),
303 |                           generation_information=generation_information_dict, y_label='Best Genome Accuracy (%)',
304 |                           title='Best All Time Genome Accuracy through generations')
305 |     infile.close()
306 | 
307 | 
308 | def plot_population_complexity(filepath_to_neat_instance, font_size):
309 |     infile = open(filepath_to_neat_instance, 'rb')
310 |     neat_instance = pickle.load(infile)
311 |     x_data = []
312 |     connection_count = []
313 |     node_count = []
314 |     counter = 0
315 |     for population_member in neat_instance.population.values():
316 |         counter += 1
317 |         x_data.append(counter)
318 |         node_count.append(len(population_member.nodes))
319 |         connection_count.append(len(population_member.connections))
320 | 
321 |     test = [11 for i in range(len(x_data))]
322 | 
323 |     connection_count.sort()
324 | 
325 |     plt.bar(x_data, connection_count)
326 |     plt.xticks(x_data)
327 |     if font_size:
328 |         plt.xlabel('Individual', fontsize=font_size)
329 |         plt.ylabel('Test label', fontsize=font_size)
330 |         plt.title('Test title', fontsize=font_size)
331 |         plt.xticks(fontsize=font_size)
332 |         plt.yticks(fontsize=font_size)
333 |     else:
334 |         plt.xlabel('Individual')
335 |         plt.ylabel('Test label')
336 |         plt.title('Test title')
337 |     axes2 = plt.twinx()
338 |     axes2.plot(x_data, test, color='r')
339 |     # axes2.plot(x_data, node_count, color='r')
340 | 
341 |     if font_size:
342 |         plt.xlabel('Individual', fontsize=font_size)
343 |         plt.ylabel('Test label', fontsize=font_size)
344 |         plt.title('Test title', fontsize=font_size)
345 |         plt.xticks(fontsize=font_size)
346 |         plt.yticks(fontsize=font_size)
347 |     else:
348 |         plt.xlabel('Individual')
349 |         plt.ylabel('Test label')
350 |         plt.title('Test title')
351 |     plt.show()
352 | 
353 |     test = [7 for i in range(len(x_data))]
354 | 
355 |     plt.bar(x_data, node_count)
356 |     plt.xticks(x_data)
357 |     plt.xlabel('Individual')
358 |     plt.ylabel('Test label')
359 |     plt.title('Test title')
360 |     plt.plot(x_data, test, color='r')
361 | 
362 |     plt.xlabel('Individual')
363 |     plt.ylabel('Test label')
364 |     plt.title('Test title')
365 |     plt.show()
366 | 
367 |     best_genome = neat_instance.best_all_time_genome
368 | 
369 |     print(len(best_genome.connections))
370 |     print(len(best_genome.nodes))
371 | 
372 |     infile.close()
373 | 
374 | 
375 | def create_confusion_matrix():
376 |     import pandas as pd
377 |     import seaborn as sn
378 |     y_predicted = np.random.random_integers(low=0, high=1, size=(300, 1))
379 |     y_actual = np.random.random_integers(low=0, high=1, size=(300, 1))
380 | 
381 |     y_predicted = y_predicted[:, 0]
382 |     y_actual = y_actual[:, 0]
383 |     data = {'y_Predicted': y_predicted,
384 |             'y_Actual': y_actual,
385 |             }
386 | 
387 |     df = pd.DataFrame(data, columns=['y_Actual', 'y_Predicted'])
388 |     confusion_matrix = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['Actual'], colnames=['Predicted'])
389 | 
390 |     sn.heatmap(confusion_matrix, annot=True)
391 |     plt.show()
392 | 
393 | 
394 | def main():
395 |     # DATA
396 |     x_data, y_data = create_data(n_generated=200, add_noise=True)
397 |     x_circle, y_circle = get_circle_data()
398 |     x_spiral, y_spiral = get_spiral_data()
399 | 
400 |     # X1, X2 for all datasets
401 |     feature_1_xor = x_data[:, 0]
402 |     feature_2_xor = x_data[:, 1]
403 |     feature_1_circle = x_circle[:, 0]
404 |     feature_2_circle = x_circle[:, 1]
405 |     feature_1_spiral = x_spiral[:, 0]
406 |     feature_2_spiral = x_spiral[:, 1]
407 | 
408 |     plot_data = False
409 |     show_decision_boundary = False
410 |     visualise_generation = False
411 |     plot_confusion_matrix = False
412 |     visualise_population_complexity = False
413 |     plot_shm_data_figure = True
414 | 
415 |     font_size = 20
416 |     # PLOT DATA
417 |     if plot_data:
418 |         # TODO: Add legends
419 |         plt.scatter(feature_1_xor, feature_2_xor, color=create_label_colours(labels=y_data))
420 |         plt.title('XOR Data', fontsize=font_size)
421 |         plt.xlabel('X1', fontsize=font_size)
422 |         plt.ylabel('X2', fontsize=font_size)
423 |         plt.tick_params(axis='both', which='major', labelsize=10)
424 |         plt.xticks(fontsize=font_size)
425 |         plt.yticks(fontsize=font_size)
426 |         plt.show()
427 | 
428 |         fig, ax = plt.subplots()
429 |         label_colours = create_label_colours(labels=y_data)
430 |         x1_reds = []
431 |         x2_reds = []
432 |         x1_greens = []
433 |         x2_greens = []
434 |         for index in range(len(label_colours)):
435 |             if label_colours[index] == 'green':
436 |                 x1_greens.append(feature_1_xor[index])
437 |                 x2_greens.append(feature_2_xor[index])
438 |             else:
439 |                 x1_reds.append(feature_1_xor[index])
440 |                 x2_reds.append(feature_2_xor[index])
441 | 
442 |         ax.scatter(x1_greens, x2_greens, c='green', label='Class 1',
443 |                    alpha=1, edgecolors='none')
444 |         ax.scatter(x1_reds, x2_reds, c='red', label='Class 0',
445 |                    alpha=1, edgecolors='none')
446 |         ax.legend(loc='upper right')
447 |         plt.xlabel('X1')
448 |         plt.ylabel('X2')
449 |         plt.show()
450 | 
451 |         # plt.scatter(feature_1_circle, feature_2_circle, color=create_label_colours(labels=y_circle))
452 |         # plt.title('Circle Data')
453 |         # plt.xlabel('X1')
454 |         # plt.ylabel('X2')
455 |         # plt.show()
456 |         # plt.scatter(feature_1_spiral, feature_2_spiral, color=create_label_colours(labels=y_spiral))
457 |         # plt.title('Spiral Data')
458 |         # plt.xlabel('X1')
459 |         # plt.ylabel('X2')
460 |         # plt.show()
461 | 
462 |     if show_decision_boundary:
463 |         # Test genome accuracy
464 |         genome = initialise_genome(genome_pickle_filepath='pickles/best_genome_pickle_shm_two_class_618056')
465 |         plot_decision_boundary(genome=genome, data_being_used='shm_two_class')
466 | 
467 |     if visualise_generation:
468 |         visualise_generation_tracker(filepath_to_genome='algorithm_runs/xor_small_noise/run_1/generation_tracker')
469 |     if visualise_population_complexity:
470 |         plot_population_complexity(filepath_to_neat_instance='algorithm_runs/xor_small_noise/run_1/NEAT_instance',
471 |                                    font_size=None)
472 | 
473 |     if plot_confusion_matrix:
474 |         create_confusion_matrix()
475 |     if plot_shm_data_figure:
476 |         plot_shm_data(rotation_angle=30, elevation=-160)
477 |     #
478 |     # plt.figure()
479 |     # N = 5
480 |     # menMeans = (20, 35, 30, 35, 27)
481 |     # menStd = (2, 3, 4, 1, 2)
482 |     # width = 0.35  # the width of the bars
483 |     # womenMeans = (25, 32, 34, 20, 25)
484 |     # womenStd = (3, 5, 2, 3, 3)
485 |     # ind = np.arange(N)
486 |     # plt.ylim(0.0, 65.0)
487 |     # plt.bar(ind, menMeans, width, color='r', yerr=menStd, label='Men means')
488 |     # plt.bar(ind + width, womenMeans, width, color='y', yerr=womenStd, label='Women means')
489 |     # # plt.plot(ind + width, womenMeans, color='k', label='Sine')
490 |     # plt.ylabel('Bar plot')
491 |     #
492 |     # x = np.linspace(0, N)
493 |     # y = np.sin(x)
494 |     # axes2 = plt.twinx()
495 |     # # axes2.plot(ind+width, womenMeans, color='k', label='Sine')
496 |     # axes2.plot(x, y, color='k', label='Sine')
497 |     # # axes2.set_ylim(-1, 1)
498 |     # # axes2.set_ylabel('Line plot')
499 |     #
500 |     # plt.show()
501 | 
502 | 
503 | if __name__ == "__main__":
504 |     main()
505 | 


--------------------------------------------------------------------------------
/f_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.stats as stats
 3 | import sklearn.metrics
 4 | 
 5 | 
 6 | def calculate_f_score(real_data, prediction):
 7 |     predicted_num_1 = prediction.count(1)
 8 |     predicted_actual_num_1 = prediction.count(1)
 9 |     real_num_1 = real_data.count(1)
10 | 
11 |     precision = predicted_num_1 / predicted_actual_num_1
12 |     recall = predicted_actual_num_1 / real_num_1
13 | 
14 |     return 2 * ((precision * recall) / (precision + recall))
15 | 
16 | 
17 | def main():
18 |     real_data = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
19 |     prediction = [1, 0, 1, 1, 1, 0, 1, 0, 0, 0]
20 | 
21 |     f1_score_2 = sklearn.metrics.f1_score(real_data, prediction)
22 |     f1_score = calculate_f_score(real_data, prediction)
23 | 
24 |     print(f1_score_2)
25 |     print(f1_score)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 


--------------------------------------------------------------------------------
/gene.py:
--------------------------------------------------------------------------------
 1 | class ConnectionGene:
 2 | 
 3 |     def __init__(self, input_node, output_node, innovation_number=None, enabled=True, weight=None, keep_constant_weight=False):
 4 |         self.innovation_number = innovation_number
 5 |         self.input_node = input_node
 6 |         self._output_node = output_node
 7 |         self.enabled = enabled
 8 |         self.weight = weight
 9 |         # This attribute is used in ghost nodes
10 |         self.keep_constant_weight = keep_constant_weight
11 | 
12 |     @property
13 |     def output_node(self):
14 |         return self._output_node
15 | 
16 |     @output_node.setter
17 |     def output_node(self, value):
18 |         # You can't have a node which loops back to itself
19 |         assert (value != self.input_node)
20 |         self._output_node = value
21 | 
22 |     def __str__(self):
23 |         return 'Input: {}, Output: {}, Enabled: {}'.format(self.input_node, self._output_node, self.enabled)
24 | 
25 |     def __repr__(self):
26 |         return '{}-->{}'.format(self.input_node, self._output_node)
27 | 
28 | 
29 | class NodeGene:
30 | 
31 |     def __init__(self, node_type, node_id, bias=None):
32 |         # Specifies the type of node
33 |         self._node_type = node_type
34 |         # This is to keep track of which node is which
35 |         self.node_id = node_id
36 |         self.bias = bias
37 | 
38 |     @property
39 |     def node_type(self):
40 |         return self._node_type
41 | 
42 |     @node_type.setter
43 |     def node_type(self, value):
44 |         # There are only 3 possible types for a node gene
45 |         assert (value in {'source', 'hidden', 'output'})
46 |         self._node_type = value
47 | 
48 |     def __str__(self):
49 |         return 'This is node number {} which is a {} node with a bias of {}'.format(self.node_id, self._node_type,
50 |                                                                                     self.bias)
51 | 
52 |     def __repr__(self):
53 |         return '{}:{}'.format(self.node_id, self._node_type)
54 | 
55 |     # def __add__(self, other):
56 |     #     return self._node_number + other
57 | 


--------------------------------------------------------------------------------
/generation_statistics.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import collections
  4 | import os
  5 | 
  6 | 
  7 | class GenerationStatistics:
  8 | 
  9 |     def __init__(self):
 10 |         self.generation_information = {}
 11 |         self.mean_compatibility_distance = None
 12 |         self.std_dev_compatibility_distance = None
 13 |         self.best_all_time_genome_fitness = None
 14 |         self.average_population_fitness = None
 15 |         self.num_species = None
 16 |         self.mean_number_connections_overall = None
 17 |         self.mean_number_connections_enabled = None
 18 |         self.population_size = None
 19 |         self.mean_number_nodes_overall = None
 20 |         self.mean_number_nodes_enabled = None
 21 |         self.species_execute_time = None
 22 |         self.reproduce_execute_time = None
 23 |         self.evaluate_execute_time = None
 24 |         self.num_generation_add_node = None
 25 |         self.num_generation_delete_node = None
 26 |         self.num_generation_add_connection = None
 27 |         self.num_generation_delete_connection = None
 28 |         self.num_generation_weight_mutations = None
 29 |         self.perturbation_values_max = None
 30 |         self.perturbation_values_min = None
 31 |         self.perturbation_values_list = None
 32 |         self.num_disjoint_list = None
 33 |         self.num_excess_list = None
 34 |         self.weight_diff_list = None
 35 |         self.avg_num_disjoint = None
 36 |         self.avg_num_excess = None
 37 |         self.avg_weight_diff = None
 38 |         self.best_all_time_genome_f1_score = None
 39 |         self.best_all_time_genome_accuracy = None
 40 | 
 41 |     def update_generation_information(self, generation):
 42 | 
 43 |         # Update min and max values of perturbation to weights
 44 |         self.perturbation_values_max = max(self.perturbation_values_list)
 45 |         self.perturbation_values_min = min(self.perturbation_values_list)
 46 |         self.avg_num_disjoint = np.mean(self.num_disjoint_list)
 47 |         self.avg_num_excess = np.mean(self.num_excess_list)
 48 |         self.avg_weight_diff = np.mean(self.weight_diff_list)
 49 | 
 50 |         information = {}
 51 |         for info_type, info_value in self.__dict__.items():
 52 |             if isinstance(info_value, float) or isinstance(info_value, np.float64):
 53 |                 information[info_type] = round(info_value, 6)
 54 |             else:
 55 |                 information[info_type] = info_value
 56 | 
 57 |         self.generation_information[generation] = information
 58 | 
 59 |     def reset_tracker_attributes(self):
 60 |         """
 61 |         Reset the number of mutations which have occured for the current generation.
 62 |         :return:
 63 |         """
 64 |         self.num_generation_add_connection = 0
 65 |         self.num_generation_add_node = 0
 66 |         self.num_generation_delete_connection = 0
 67 |         self.num_generation_delete_node = 0
 68 |         self.num_generation_weight_mutations = 0
 69 |         self.perturbation_values_list = []
 70 |         self.num_excess_list = []
 71 |         self.num_disjoint_list = []
 72 |         self.weight_diff_list = []
 73 | 
 74 |     def plot_graphs(self, current_gen, save_plots=False, file_path=None):
 75 | 
 76 |         if (save_plots and not file_path) or (file_path and not save_plots):
 77 |             raise Exception('Save_plots and file_paths must be specified at the same time')
 78 | 
 79 |         important_information_keys = {
 80 |             'num_species', 'num_generation_add_node', 'num_generation_delete_node', 'num_generation_add_connection',
 81 |             'num_generation_delete_connection', 'num_generation_weight_mutations', 'average_population_fitness',
 82 |             'best_all_time_genome_fitness', 'mean_number_connections_enabled', 'mean_number_nodes_enabled',
 83 |             'mean_compatibility_distance', 'avg_num_disjoint', 'avg_num_excess', 'avg_weight_diff',
 84 |             'mean_number_connections_overall', 'best_all_time_genome_f1_score', 'best_all_time_genome_accuracy'
 85 |         }
 86 | 
 87 |         # Plot information to graph every certain amount of generations
 88 |         # for information_type, information in self.generation_information[current_gen].items():
 89 |         for information_type in important_information_keys:
 90 |             # Don't need to print the dictionary
 91 |             if information_type != 'generation_information':
 92 |                 # print(information_type, ':', ' {}'.format(information))
 93 |                 # if current_gen % generation_interval_for_graph == 0 and current_gen != 1:
 94 |                 generations_to_go_through = list(range(1, current_gen + 1))
 95 |                 y_data = []
 96 |                 for generation in generations_to_go_through:
 97 |                     y_data.append(self.generation_information[generation][information_type])
 98 | 
 99 |                 plt.plot(generations_to_go_through, y_data)
100 |                 plt.title(information_type)
101 |                 if save_plots:
102 |                     graphs_filepath = '{}/graphs'.format(file_path)
103 |                     if not os.path.exists(graphs_filepath):
104 |                         # Make the directory before saving graphs
105 |                         os.makedirs(graphs_filepath)
106 |                     plt.savefig('{}/{}_generation_{}.png'.format(graphs_filepath, information_type, current_gen))
107 |                 plt.show()
108 | 
109 |     def print_generation_information(self, generation_interval_for_graph, plot_graphs_every_gen):
110 |         current_gen = max(self.generation_information.keys())
111 |         print('**************************** Generation {} *******************************'.format(current_gen))
112 | 
113 |         important_information = [
114 |             ('Number of Species', self.generation_information[current_gen]['num_species']),
115 |             ('Added Node Mutations', self.generation_information[current_gen]['num_generation_add_node']),
116 |             ('Delete Node Mutations', self.generation_information[current_gen]['num_generation_delete_node']),
117 |             ('Add Connection Mutations', self.generation_information[current_gen]['num_generation_add_connection']),
118 |             ('Delete Connection Mutations',
119 |              self.generation_information[current_gen]['num_generation_delete_connection']),
120 |             ('Weight Mutations', self.generation_information[current_gen]['num_generation_weight_mutations']),
121 |             ('Average Fitness', self.generation_information[current_gen]['average_population_fitness']),
122 |             ('Best All Time Genome Fitness', self.generation_information[current_gen]['best_all_time_genome_fitness']),
123 |             (
124 |                 'Best All Time Genome f1 score',
125 |                 self.generation_information[current_gen]['best_all_time_genome_f1_score']),
126 |             (
127 |                 'Best All Time Genome Accuracy Percent',
128 |                 self.generation_information[current_gen]['best_all_time_genome_accuracy']),
129 | 
130 |             ('Average Number of Connections Per Genome',
131 |              self.generation_information[current_gen]['mean_number_connections_enabled']),
132 |             ('Average Number of Nodes Per Genome',
133 |              self.generation_information[current_gen]['mean_number_nodes_enabled']),
134 |             ('Average Compatibility Distance', self.generation_information[current_gen]['mean_compatibility_distance']),
135 |             ('Perturbation Max Value', self.generation_information[current_gen]['perturbation_values_max']),
136 |             ('Perturbation Min Value', self.generation_information[current_gen]['perturbation_values_min']),
137 |             ('Average Number of Disjoint Genes', self.generation_information[current_gen]['avg_num_disjoint']),
138 |             ('Average Number of Excess Genes', self.generation_information[current_gen]['avg_num_excess']),
139 |             ('Average Weight Difference', self.generation_information[current_gen]['avg_weight_diff']),
140 |             ('Average Number of Connections',
141 |              self.generation_information[current_gen]['mean_number_connections_overall']),
142 |             # ('Average Number of Nodes', self.generation_information[current_gen]['avg_weight_diff']),
143 |         ]
144 | 
145 |         # Make it an ordereddict to keep the order above.
146 |         important_information = collections.OrderedDict(important_information)
147 | 
148 |         # Print the information
149 |         for info_type, info_value in important_information.items():
150 |             print('{}:{}'.format(info_type, info_value))
151 |         print('\n')
152 | 
153 |         if current_gen % generation_interval_for_graph == 0 and current_gen != 1 and plot_graphs_every_gen:
154 |             self.plot_graphs(current_gen=current_gen)
155 | 


--------------------------------------------------------------------------------
/graph_algorithm.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | 
  4 | class Graph:
  5 |     """
  6 |     Class used to find the number of paths between two nodes in a grsph
  7 |     """
  8 | 
  9 |     def __init__(self):
 10 |         # Keeps track of the vertex's in the graph
 11 |         self.vertex_list = []
 12 |         # Keeps track of the connections for each node
 13 |         self.connections = {}
 14 |         self.max_layer_for_node = {}
 15 | 
 16 |     def count_path_utils(self, current_node, destination, visited, path_count, path, overall_paths, layer_number):
 17 |         """
 18 |         Checks if we're at the destination, adds one if we are, if not check all the neighbours of the current node
 19 |         :param layer_number: Which layer number we're on
 20 |         :param overall_paths: List containing all the possible paths
 21 |         :param path: The current path (list)
 22 |         :param current_node: The node we're currently add
 23 |         :param destination: The end of the expected path
 24 |         :param visited: A dict which keeps which nodes have been visited or not
 25 |         :param path_count: Keeps the number of paths from the designated start and end node
 26 |         :return:
 27 |         """
 28 |         # We've visited the current node since we're at it
 29 |         visited[current_node] = True
 30 |         path.append(current_node)
 31 |         layer_number.append(1)
 32 |         if current_node not in self.max_layer_for_node:
 33 |             self.max_layer_for_node[current_node] = sum(layer_number)
 34 |         else:
 35 |             if sum(layer_number) > self.max_layer_for_node[current_node]:
 36 |                 self.max_layer_for_node[current_node] = sum(layer_number)
 37 | 
 38 |         # If the current node is the destination then we can increas the path_count number
 39 |         if current_node == destination:
 40 |             path_count.append(1)
 41 |             overall_paths.append(copy.deepcopy(path))
 42 |         else:
 43 |             # Go through all the neighbours looking for the destination
 44 |             if current_node in self.connections:
 45 |                 for neighbour in self.connections[current_node]:
 46 |                     # If we haven't visited the neighbour, look through the neighbour for the destination
 47 |                     if neighbour in visited and not visited[neighbour]:
 48 |                         # Call the function recursively
 49 |                         self.count_path_utils(neighbour, destination, visited, path_count, path, overall_paths,
 50 |                                               layer_number)
 51 | 
 52 |         layer_number.pop()
 53 |         # Remove current vertex from path[] and mark it as unvisited
 54 |         path.pop()
 55 |         # Once we've checked all the neighbour's we can set the visited to false again
 56 |         visited[current_node] = False
 57 | 
 58 |     def add_edge(self, start_node, end_node):
 59 |         # TODO: Check if the multiclass version of this is the better way of adding an edge
 60 |         connection_dict = self.connections.get(start_node)
 61 |         if connection_dict:
 62 |             connection_dict.append(end_node)
 63 |         else:
 64 |             self.connections[start_node] = [end_node]
 65 |             self.vertex_list.append(start_node)
 66 |             self.vertex_list.append(end_node)
 67 | 
 68 |     def count_paths(self, start_node, end_node, return_paths=False):
 69 |         """
 70 |         Count paths from start_node to end_node
 71 |         :param start_node: Where the path starts
 72 |         :param end_node: Where the end of the path is
 73 |         :return:
 74 |         """
 75 | 
 76 |         # Keep's track of a node has been visited or not
 77 |         visited = {node: False for node in self.vertex_list}
 78 |         paths = []
 79 |         overall_paths = []
 80 |         path_count = []
 81 |         layer_number = []
 82 |         self.count_path_utils(start_node, end_node, visited, path_count, paths, overall_paths, layer_number)
 83 |         if return_paths:
 84 |             return sum(path_count), overall_paths
 85 |         else:
 86 |             return sum(path_count)
 87 | 
 88 | 
 89 | def main():
 90 |     g = Graph()
 91 |     g.add_edge(2, 3)
 92 |     g.add_edge(3, 5)
 93 |     g.add_edge(1, 3)
 94 |     g.add_edge(1, 5)
 95 | 
 96 |     print(g.count_paths(2, 5, True))
 97 |     print(g.count_paths(1, 5, True))
 98 | 
 99 |     print(g.max_layer_for_node)
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     main()
104 | 


--------------------------------------------------------------------------------
/graph_algorithm_mutliclass.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | 
  4 | class GraphMultiClass:
  5 |     """
  6 |     Class used to find the number of paths between two nodes in a grsph
  7 |     """
  8 | 
  9 |     def __init__(self):
 10 |         # Keeps track of the vertex's in the graph
 11 |         self.vertex_list = []
 12 |         # Keeps track of the connections for each node
 13 |         self.connections = {}
 14 |         self.max_layer_for_node = {}
 15 | 
 16 |     def count_path_utils(self, current_node, destination, visited, path_count, path, overall_paths, layer_number):
 17 |         """
 18 |         Checks if we're at the destination, adds one if we are, if not check all the neighbours of the current node
 19 |         :param layer_number: Which layer number we're on
 20 |         :param overall_paths: List containing all the possible paths
 21 |         :param path: The current path (list)
 22 |         :param current_node: The node we're currently add
 23 |         :param destination: The end of the expected path
 24 |         :param visited: A dict which keeps which nodes have been visited or not
 25 |         :param path_count: Keeps the number of paths from the designated start and end node
 26 |         :return:
 27 |         """
 28 |         # We've visited the current node since we're at it
 29 |         visited[current_node] = True
 30 |         path.append(current_node)
 31 |         layer_number.append(1)
 32 |         if current_node not in self.max_layer_for_node:
 33 |             self.max_layer_for_node[current_node] = sum(layer_number)
 34 |         else:
 35 |             if sum(layer_number) > self.max_layer_for_node[current_node]:
 36 |                 self.max_layer_for_node[current_node] = sum(layer_number)
 37 | 
 38 |         # If the current node is the destination then we can increas the path_count number
 39 |         if current_node == destination:
 40 |             path_count.append(1)
 41 |             overall_paths.append(copy.deepcopy(path))
 42 |         else:
 43 |             # Go through all the neighbours looking for the destination
 44 |             if current_node in self.connections:
 45 |                 for neighbour in self.connections[current_node]:
 46 |                     # If we haven't visited the neighbour, look through the neighbour for the destination
 47 |                     if neighbour in visited and not visited[neighbour]:
 48 |                         # Call the function recursively
 49 |                         self.count_path_utils(neighbour, destination, visited, path_count, path, overall_paths,
 50 |                                               layer_number)
 51 | 
 52 |         layer_number.pop()
 53 |         # Remove current vertex from path[] and mark it as unvisited
 54 |         path.pop()
 55 |         # Once we've checked all the neighbour's we can set the visited to false again
 56 |         visited[current_node] = False
 57 | 
 58 |     def add_edge(self, start_node, end_node):
 59 |         connection_dict = self.connections.get(start_node)
 60 |         if connection_dict:
 61 |             connection_dict.append(end_node)
 62 |         else:
 63 |             self.connections[start_node] = [end_node]
 64 |         self.vertex_list.append(start_node)
 65 |         self.vertex_list.append(end_node)
 66 |         # Remove duplicates
 67 |         self.vertex_list = list(set(self.vertex_list))
 68 | 
 69 |     def count_paths(self, start_node, end_node, return_paths=False):
 70 |         """
 71 |         Count paths from start_node to end_node
 72 |         :param start_node: Where the path starts
 73 |         :param end_node: Where the end of the path is
 74 |         :return:
 75 |         """
 76 | 
 77 |         # Keep's track of a node has been visited or not
 78 |         visited = {node: False for node in self.vertex_list}
 79 |         paths = []
 80 |         overall_paths = []
 81 |         path_count = []
 82 |         layer_number = []
 83 |         self.count_path_utils(start_node, end_node, visited, path_count, paths, overall_paths, layer_number)
 84 |         if return_paths:
 85 |             return sum(path_count), overall_paths
 86 |         else:
 87 |             return sum(path_count)
 88 | 
 89 | 
 90 | def main():
 91 |     g = Graph()
 92 |     g.add_edge(2, 3)
 93 |     g.add_edge(3, 5)
 94 |     g.add_edge(1, 3)
 95 |     g.add_edge(1, 5)
 96 | 
 97 |     print(g.count_paths(2, 5, True))
 98 |     print(g.count_paths(1, 5, True))
 99 | 
100 |     print(g.max_layer_for_node)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     main()
105 | 


--------------------------------------------------------------------------------
/graphs/average_population_fitness_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/average_population_fitness_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/avg_num_disjoint_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/avg_num_disjoint_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/avg_num_excess_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/avg_num_excess_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/avg_weight_diff_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/avg_weight_diff_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/best_all_time_genome_f1_score_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/best_all_time_genome_f1_score_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/best_all_time_genome_fitness_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/best_all_time_genome_fitness_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/mean_compatibility_distance_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_compatibility_distance_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/mean_number_connections_enabled_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_number_connections_enabled_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/mean_number_connections_overall_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_number_connections_overall_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/mean_number_nodes_enabled_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/mean_number_nodes_enabled_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/num_generation_add_connection_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_add_connection_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/num_generation_add_node_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_add_node_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/num_generation_delete_connection_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_delete_connection_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/num_generation_delete_node_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_delete_node_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/num_generation_weight_mutations_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_generation_weight_mutations_generation_5.jpg


--------------------------------------------------------------------------------
/graphs/num_species_generation_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/graphs/num_species_generation_5.jpg


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from NEAT import NEAT
 2 | import time
 3 | from config import Config
 4 | from data_storage import get_circle_data, get_spiral_data
 5 | from neural_network import create_data
 6 | import numpy as np
 7 | 
 8 | from read_mat_files import get_shm_two_class_data
 9 | 
10 | 
11 | def main():
12 |     # Keep a consistent seed to make debugging easier TODO: Check if this work's across files
13 |     np.random.seed(1)
14 | 
15 |     algorithm_options = {0: 'xor_full', 1: 'xor_small_noise', 2: 'circle_data', 3: 'shm_two_class', 4: 'spiral_data'}
16 |     # Choose which algorithm is running using keys
17 |     algorithm_running = algorithm_options[3]
18 | 
19 |     if algorithm_running == algorithm_options[0]:
20 |         num_data_to_generate = 6250
21 | 
22 |         # Create data
23 |         x_data, y_data = create_data(n_generated=num_data_to_generate, add_noise=False)
24 |     elif algorithm_running == algorithm_options[1]:
25 |         num_data_to_generate = 300
26 | 
27 |         # Create data
28 |         x_data, y_data = create_data(n_generated=num_data_to_generate, add_noise=True)
29 |     elif algorithm_running == algorithm_options[2]:
30 |         x_data, y_data = get_circle_data()
31 |         x_data = x_data[:, 0:2]
32 |         y_data.shape = (len(x_data), 1)
33 |         for row in range(y_data.shape[0]):
34 |             if y_data[row, 0] == -1:
35 |                 y_data[row, 0] = 0
36 |         num_data_to_generate = len(x_data)
37 |     elif algorithm_running == algorithm_options[3]:
38 |         x_data, y_data = get_shm_two_class_data()
39 |         num_data_to_generate = len(x_data)
40 |     elif algorithm_running == algorithm_options[4]:
41 |         x_data, y_data = get_spiral_data()
42 |         x_data = x_data[:, 0:2]
43 |         y_data.shape = (len(x_data), 1)
44 |         for row in range(y_data.shape[0]):
45 |             if y_data[row, 0] == -1:
46 |                 y_data[row, 0] = 0
47 |         num_data_to_generate = len(x_data)
48 | 
49 |     # Training data
50 |     training_percentage = 0.8
51 |     training_upper_limit_index = round(num_data_to_generate * training_percentage)
52 |     x_training = x_data[0:training_upper_limit_index]
53 |     y_training = y_data[0:training_upper_limit_index]
54 | 
55 |     # Test data
56 |     x_test = x_data[training_upper_limit_index:]
57 |     y_test = y_data[training_upper_limit_index:]
58 | 
59 |     neat = NEAT(x_training_data=x_training, y_training_data=y_training, x_test_data=x_test, y_test_data=y_test,
60 |                 config=Config, fitness_threshold=-0.000001, f1_score_threshold=0.95, algorithm_running=algorithm_running)
61 | 
62 |     start_evaluate_time = time.time()
63 |     neat.run(max_num_generations=10000, use_backprop=True, print_generation_information=True,
64 |              show_population_weight_distribution=False)
65 |     end_evaluate_time = time.time()
66 |     total_time = end_evaluate_time - start_evaluate_time
67 |     print(total_time)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/main_multiclass.py:
--------------------------------------------------------------------------------
 1 | from NEAT_multiclass import NEATMultiClass
 2 | from config_multiclass import ConfigMultiClass
 3 | from data_storage import get_circle_data
 4 | from neural_network import create_data
 5 | import numpy as np
 6 | from read_mat_files import get_shm_two_class_data, get_shm_multi_class_data
 7 | 
 8 | 
 9 | def main():
10 |     np.random.seed(1)
11 | 
12 |     # Choose which algorithm is running using keys
13 |     algorithm_options = {0: 'xor_full', 1: 'shm_multi_class'}
14 |     algorithm_running = algorithm_options[1]
15 | 
16 |     if algorithm_running == algorithm_options[0]:
17 |         num_data_to_generate = 6250
18 | 
19 |         # Create data
20 |         x_data, y_data = create_data(n_generated=num_data_to_generate, add_noise=False, use_one_hot=True)
21 |     elif algorithm_running == algorithm_options[1]:
22 |         # Create data
23 |         x_data, y_data = get_shm_multi_class_data()
24 |         num_data_to_generate = len(x_data)
25 | 
26 |     # Training data
27 |     training_percentage = 0.8
28 |     training_upper_limit_index = round(num_data_to_generate * training_percentage)
29 |     x_training = x_data[0:training_upper_limit_index]
30 |     y_training = y_data[0:training_upper_limit_index]
31 | 
32 |     # Test data
33 |     x_test = x_data[training_upper_limit_index:]
34 |     y_test = y_data[training_upper_limit_index:]
35 | 
36 |     f1_score_threshold = 0.95 if algorithm_running != algorithm_options[1] else None
37 |     fitness_threshold = -0.1 if algorithm_running != algorithm_options[1] else None
38 | 
39 |     neat = NEATMultiClass(x_training_data=x_training, y_training_data=y_training, x_test_data=x_test,
40 |                           y_test_data=y_test,
41 |                           config=ConfigMultiClass, fitness_threshold=fitness_threshold,
42 |                           f1_score_threshold=f1_score_threshold,
43 |                           algorithm_running=algorithm_running)
44 | 
45 |     neat.run(max_num_generations=250, use_backprop=True, print_generation_information=True,
46 |              show_population_weight_distribution=False)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     main()
51 | 


--------------------------------------------------------------------------------
/multi_processing.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import multiprocessing
  3 | 
  4 | 
  5 | def calc_square(numbers, result):
  6 |     print('Calculating square numbers')
  7 |     for number in numbers:
  8 |         time.sleep(0.2)
  9 |         print('square', number * number)
 10 | 
 11 | 
 12 | def calc_cube(numbers, result, value, queue):
 13 |     print('Calculate cube of numbers')
 14 |     value.value = 5
 15 |     for index, number in enumerate(numbers):
 16 |         time.sleep(0.2)
 17 |         queue.put(number * number * number)
 18 |         result[index] = number * number * number
 19 | 
 20 | 
 21 | def main():
 22 |     # SHARED MEMORY CONCEPT
 23 |     arr = [2, 3, 8, 9]
 24 |     start_time = time.time()
 25 |     # We have to create a shared memory variable. Specify data type and size
 26 |     result = multiprocessing.Array('i', 4)
 27 |     value = multiprocessing.Value('i', 0)
 28 |     queue = multiprocessing.Queue()
 29 |     # p1 = multiprocessing.Process(target=calc_square, args=(arr,))
 30 |     p2 = multiprocessing.Process(target=calc_cube, args=(arr, result, value, queue))
 31 | 
 32 |     # p1.start()
 33 |     p2.start()
 34 | 
 35 |     # p1.join()
 36 |     p2.join()
 37 | 
 38 |     while not queue.empty():
 39 |         print('Getting')
 40 |         print(queue.get())
 41 |     print(result[:])
 42 |     print(value.value)
 43 |     print("done in: {}".format(time.time() - start_time))
 44 |     print('Done with everything')
 45 | 
 46 | 
 47 | import time
 48 | import multiprocessing
 49 | 
 50 | 
 51 | def deposit(balance, lock):
 52 |     for i in range(100):
 53 |         time.sleep(0.01)
 54 |         lock.acquire()
 55 |         balance.value = balance.value + 1
 56 |         lock.release()
 57 | 
 58 | 
 59 | def withdraw(balance, lock):
 60 |     for i in range(100):
 61 |         time.sleep(0.01)
 62 |         lock.acquire()
 63 |         balance.value = balance.value - 1
 64 |         lock.release()
 65 | 
 66 | 
 67 | def main2():
 68 |     # LOCK CONCEPT
 69 |     balance = multiprocessing.Value('i', 200)
 70 |     # Lock is used to lock a variable so that the value isn't frozen in time and when it is changed, it is not changed fast
 71 |     # enough for the other prcoess to know the updated value thus using the old value of the variable
 72 |     lock = multiprocessing.Lock()
 73 |     d = multiprocessing.Process(target=deposit, args=(balance, lock))
 74 |     w = multiprocessing.Process(target=withdraw, args=(balance, lock))
 75 |     d.start()
 76 |     w.start()
 77 |     d.join()
 78 |     w.join()
 79 |     print(balance.value)
 80 | 
 81 | 
 82 | from multiprocessing import Pool
 83 | 
 84 | 
 85 | def f(n):
 86 |     return n * n
 87 | 
 88 | 
 89 | def main3():
 90 |     # MAP AND REDUCE CONCEPT
 91 |     array = [1, 2, 3, 4, 5, 6]
 92 | 
 93 |     start_time = time.time()
 94 |     p = Pool()
 95 |     result = p.map(f, array)
 96 |     end_time = time.time() - start_time
 97 |     print(result, end_time)
 98 | 
 99 |     start_time = time.time()
100 |     squared = []
101 |     for n in array:
102 |         time.sleep(5)
103 |         squared.append(n * n)
104 |     end_time = time.time() - start_time
105 |     print(squared, end_time)
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     # main()
110 |     # main2()
111 |     main3()
112 | 


--------------------------------------------------------------------------------
/multi_threading.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import threading
 3 | 
 4 | 
 5 | def calc_square(numbers):
 6 |     print('Calculating square numbers')
 7 |     for number in numbers:
 8 |         time.sleep(0.2)
 9 |         print('square', number * number)
10 | 
11 | 
12 | def calc_cube(numbers):
13 |     print('Calculate cube of numbers')
14 |     for number in numbers:
15 |         time.sleep(0.2)
16 |         print('cube', number * number * number)
17 | 
18 | 
19 | def main():
20 |     arr = [2, 3, 8, 9]
21 |     start_time = time.time()
22 |     t1 = threading.Thread(target=calc_square, args=(arr,))
23 |     t2 = threading.Thread(target=calc_cube, args=(arr,))
24 | 
25 |     t1.start()
26 |     t2.start()
27 | 
28 |     t1.join()
29 |     t2.join()
30 |     print("done in: {}".format(time.time() - start_time))
31 |     print('Done with everything')
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     main()
36 | 


--------------------------------------------------------------------------------
/neural_network.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from neural_network_components import *
  4 | 
  5 | 
  6 | class NeuralNetwork:
  7 | 
  8 |     def __init__(self, x_train, y_train, layer_sizes, activation_function_dict, learning_rate=0.0001,
  9 |                  num_epochs=1000, batch_size=64):
 10 |         self.x_train = x_train
 11 |         self.y_train = y_train
 12 |         self.batch_size = batch_size
 13 |         self.weights_dict = {}
 14 |         self.bias_dict = {}
 15 |         self.layer_sizes = layer_sizes
 16 |         self.activation_function_dict = activation_function_dict
 17 |         self.learning_rate = learning_rate
 18 |         self.num_epochs = num_epochs
 19 |         self.num_layers = len(self.layer_sizes) - 1
 20 | 
 21 |         # Layer sizes should be a list with number of hidden nodes per layer. First number in list should be number of
 22 |         # features. And last number should always be one because there is one output.
 23 |         assert (layer_sizes[len(layer_sizes) - 1] == 1)
 24 |         assert (layer_sizes[0] == x_train.shape[1])
 25 | 
 26 |         # This is to check that there is an activation function specified for each layer
 27 |         assert (len(layer_sizes[1:len(layer_sizes)]) == len(activation_function_dict))
 28 | 
 29 |         # The activation function for the last layer should be a sigmoid due to how the gradients were calculated
 30 |         assert (activation_function_dict[len(layer_sizes) - 1] == ActivationFunctions.sigmoid)
 31 | 
 32 |         self.initialise_parameters(have_bias=True)
 33 | 
 34 |     @staticmethod
 35 |     def xavier_initalizer(num_inputs, num_outputs):
 36 |         """
 37 |         NOTE: if using RELU then use constant 2 instead of 1 for sqrt
 38 |         """
 39 |         np.random.seed(7)
 40 |         weights = np.random.randn(num_inputs, num_outputs) * np.sqrt(1 / num_inputs)
 41 | 
 42 |         return weights
 43 | 
 44 |     def initialise_parameters(self, have_bias=False):
 45 |         """
 46 |         :param have_bias: Indicates whether to intialise a bias parameter as well
 47 |         """
 48 |         # Initialise parameters
 49 |         for index in range(1, self.num_layers + 1):
 50 |             # Index +1 because we want layer_numbers to start at 1. Index -1 because number of inputs is number of
 51 |             # features from last layer.
 52 |             self.weights_dict[index] = self.xavier_initalizer(num_inputs=self.layer_sizes[index - 1],
 53 |                                                               num_outputs=self.layer_sizes[index])
 54 | 
 55 |             if have_bias:
 56 |                 # Shape is (1, num_outputs) for the layer
 57 |                 self.bias_dict[index] = np.zeros((1, self.layer_sizes[index]))
 58 | 
 59 |     def run_one_pass(self, input_data, labels):
 60 |         """
 61 |         One pass counts as one forward propagation and one backware propogation including the optimisation of the
 62 |         paramters
 63 |         :return: The cost for the current step
 64 |         """
 65 | 
 66 |         n_examples = input_data.shape[0]
 67 | 
 68 |         prediction, layer_input_dict = ForwardProp.forward_prop(num_layers=self.num_layers, initial_input=input_data,
 69 |                                                                 layer_weights=self.weights_dict,
 70 |                                                                 layer_activation_functions=self.activation_function_dict)
 71 | 
 72 |         # Asserting that the prediction gives the same number of outputs as expected
 73 |         assert (labels.shape[0] == prediction.shape[0])
 74 | 
 75 |         # Excluded bias gradients here
 76 |         weight_gradients, bias_gradients = BackProp.back_prop(num_layers=self.num_layers,
 77 |                                                               layer_inputs=layer_input_dict,
 78 |                                                               layer_weights=self.weights_dict,
 79 |                                                               layer_activation_functions=self.activation_function_dict,
 80 |                                                               expected_y=labels, predicted_y=prediction)
 81 | 
 82 |         self.optimise_parameters(weight_gradients=weight_gradients, bias_gradients=bias_gradients)
 83 | 
 84 |         # Define cost function
 85 |         loss = -((labels * np.log(prediction)) + ((1 - labels) * np.log(1 - prediction)))
 86 |         cost = (1 / n_examples) * np.sum(loss + 1e-8, axis=0)
 87 | 
 88 |         return cost[0]
 89 | 
 90 |     def optimise_parameters(self, weight_gradients, bias_gradients=None):
 91 |         """
 92 |         :param weight_gradients: Dictionary containing weight gradients for each layer
 93 |         :param bias_gradients: Dictionary containing bias gradients for each layer
 94 |         """
 95 | 
 96 |         for layer_number in weight_gradients:
 97 |             self.weights_dict[layer_number] = self.weights_dict[layer_number] - (
 98 |                     self.learning_rate * weight_gradients[layer_number])
 99 | 
100 |             if bias_gradients is not None:
101 |                 self.bias_dict[layer_number] = self.bias_dict[layer_number] - (
102 |                         self.learning_rate * bias_gradients[layer_number])
103 | 
104 |     def optimise(self, print_epoch_cost, error_stop=None):
105 |         """
106 |         Train the neural network
107 |         :return: a list of each epoch with the cost associated with it
108 |         """
109 | 
110 |         epoch_list = []
111 |         cost_list = []
112 | 
113 |         for epoch in range(self.num_epochs):
114 | 
115 |             for batch_start in range(0, self.x_train.shape[0], self.batch_size):
116 |                 current_batch = self.x_train[batch_start:batch_start + self.batch_size, :]
117 |                 current_labels = self.y_train[batch_start:batch_start + self.batch_size, :]
118 | 
119 |                 epoch_cost = self.run_one_pass(input_data=current_batch, labels=current_labels)
120 | 
121 |             epoch_list.append(epoch)
122 |             cost_list.append(epoch_cost)
123 | 
124 |             # Finish early if it is optimised to a certain error
125 |             if error_stop and epoch_cost < error_stop:
126 |                 break
127 | 
128 |             if print_epoch_cost:
129 |                 print('EPOCH:', epoch, 'Cost:', round(epoch_cost, 3))
130 | 
131 |         return epoch_list, cost_list
132 | 
133 | 
134 | def create_architecture(num_features_training, hidden_nodes_per_layer):
135 |     assert (isinstance(hidden_nodes_per_layer, list))
136 |     # See NeuralNetwork class for reasoning for this layout
137 |     return [num_features_training] + hidden_nodes_per_layer + [1]
138 | 
139 | 
140 | def create_data(n_generated, add_noise=False, use_one_hot=False):
141 |     if add_noise:
142 |         x_data = np.random.uniform(low=0.0, high=1.0, size=(n_generated, 2))
143 |         y_data = np.empty((n_generated, 1))
144 |         # Sets y data to 1 or 0 according to XOR rules
145 |         for column in range(x_data.shape[0]):
146 |             x_feature_1 = round(x_data[column, 0])
147 |             x_feature_2 = round(x_data[column, 1])
148 |             y_data[column] = (x_feature_1 == 1 and x_feature_2 == 1) or (
149 |                     x_feature_1 == 0 and x_feature_2 == 0)
150 |     else:
151 |         x_data = np.random.randint(2, size=(n_generated, 2))
152 |         y_data = np.empty((n_generated, 1))
153 |         # Sets y data to 1 or 0 according to XOR rules
154 |         for column in range(x_data.shape[0]):
155 |             y_data[column] = ((x_data[column, 0] == 1 and x_data[column, 1] == 1) or (
156 |                     x_data[column, 0] == 0 and x_data[column, 1] == 0))
157 | 
158 |     if use_one_hot:
159 |         y_data_new = np.empty((y_data.shape[0], 2))
160 |         for row in range(y_data.shape[0]):
161 |             if y_data[row, 0] == 0:
162 |                 y_data_new[row, 0] = 1
163 |                 y_data_new[row, 1] = 0
164 |             else:
165 |                 y_data_new[row, 0] = 0
166 |                 y_data_new[row, 1] = 1
167 | 
168 |         return x_data, y_data_new
169 |     return x_data, y_data
170 | 
171 | 
172 | def main():
173 |     # Test and Train data
174 |     data_train, labels_train = create_data(n_generated=5000)
175 | 
176 |     num_features = data_train.shape[1]
177 | 
178 |     #  This means it will be a two layer neural network with one layer being hidden with 2 nodes
179 |     desired_architecture = [2, 2]
180 |     nn_architecture = create_architecture(num_features, desired_architecture)
181 | 
182 |     # Defines the activation functions used for each layer
183 |     activations_dict = {1: ActivationFunctions.relu, 2: ActivationFunctions.relu, 3: ActivationFunctions.sigmoid}
184 | 
185 |     neural_network = NeuralNetwork(x_train=data_train, y_train=labels_train, layer_sizes=nn_architecture,
186 |                                    activation_function_dict=activations_dict, learning_rate=0.1, num_epochs=1000)
187 | 
188 |     epochs, cost = neural_network.optimise(error_stop=0.09, print_epoch_cost=True)
189 | 
190 |     plt.plot(epochs, cost)
191 |     plt.show()
192 | 
193 | 
194 | if __name__ == '__main__':
195 |     main()
196 | 


--------------------------------------------------------------------------------
/neural_network_components.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class ActivationFunctions:
  5 | 
  6 |     @staticmethod
  7 |     def relu(input_matrix):
  8 |         output = np.maximum(input_matrix, 0, input_matrix)
  9 | 
 10 |         return output
 11 | 
 12 |     @staticmethod
 13 |     def sigmoid(x):
 14 |         # TODO: Create the version for the genome using -4.9
 15 |         activation = 1 / (1 + np.exp(-x))
 16 | 
 17 |         return activation
 18 | 
 19 |     @staticmethod
 20 |     def sigmoid_gradient(a):
 21 |         gradient = (a * (1 - a))
 22 | 
 23 |         return gradient
 24 | 
 25 |     @staticmethod
 26 |     def relu_gradient(x):
 27 |         gradient = x + 1e-8
 28 |         gradient[gradient < 0] = 0
 29 |         gradient[gradient > 1] = 1
 30 | 
 31 |         return gradient
 32 | 
 33 |     @staticmethod
 34 |     def get_activation_gradient(activation_function):
 35 |         """
 36 |         :param activation_function: A function, of the the activation function
 37 |         :return: the correct function to calculate the gradient
 38 |         """
 39 | 
 40 |         if activation_function == ActivationFunctions.relu:
 41 |             return ActivationFunctions.relu_gradient
 42 |         elif activation_function == ActivationFunctions.sigmoid:
 43 |             return ActivationFunctions.sigmoid_gradient
 44 | 
 45 | 
 46 | class ForwardProp:
 47 | 
 48 |     @staticmethod
 49 |     def compute_layer(input_data, weights, bias=None):
 50 |         # Need to ensure there are enough weights for number of features in input data
 51 |         assert (input_data.shape[1] == weights.shape[0])
 52 | 
 53 |         if bias is not None:
 54 |             # Need to ensure that there is a bias term for each hidden node
 55 |             assert (weights.shape[1] == bias.shape[1])
 56 |             # Broadcast so we can remove the required bias for genes
 57 |             broadcasted_bias = np.broadcast_to(bias, (input_data.shape[0], bias.shape[1]))
 58 | 
 59 |         return np.dot(input_data, weights) + broadcasted_bias if bias is not None else np.dot(input_data, weights)
 60 | 
 61 |     @staticmethod
 62 |     def ensure_no_activation_applied(output_without_activation, output_with_activation, constant_connections,
 63 |                                      current_layer, node_map):
 64 |         """
 65 |         Ensures the activation function isn't applied to the nodes which are dummy nodes
 66 |         :param output_without_activation: The output with the activation function applied
 67 |         :param output_with_activation: The output with the activation function applied
 68 |         :param constant_connections: The connections where there should be an activation function applied
 69 |         :param current_layer: The current layer we're calculating in
 70 |         :param node_map: A dictionary of which number each node is in their respective layer
 71 |         :return:
 72 |         """
 73 |         # Need to keep the values where the
 74 |         for connection in constant_connections[current_layer]:
 75 |             # Need to convert to their position in the layer. Minus one because of python indexing
 76 |             output_position_within_layer = node_map[connection.output_node] - 1
 77 |             # The output node position is the node which shouldn't have any activations applied. So we use all the
 78 |             # values from before the activation was applied
 79 |             output_with_activation[:, output_position_within_layer] = \
 80 |                 output_without_activation[
 81 |                 :, output_position_within_layer]
 82 | 
 83 |         return output_with_activation
 84 | 
 85 |     @staticmethod
 86 |     def genome_forward_prop(num_layers, initial_input, layer_weights, keep_constant_connections, node_map,
 87 |                             layer_activation_functions, layer_biases):
 88 |         """
 89 |         :param no_activations_matrix_per_layer: A dict containing an array for each layer which showcases which biases should not be applied
 90 |         :param node_map: A dict for each node which shows which number node they are in their respective layer
 91 |         :param keep_constant_connections: A list of connection for which the connection should remain constand and no activation function applied
 92 |         :param layer_activation_functions: The activation functions to be used on each layer. Should be reference to the function at each key.
 93 |         :param layer_biases: the biases associated with every layer. Is of type dict
 94 |         :param num_layers: number of layers for the neural network
 95 |         :param initial_input: the input data
 96 |         :param layer_weights: the weights associated with every layer contained in a dictionary with the key being which layer they are for (starting at 1)
 97 |         :return: the ouput vector after the forward propogation
 98 |         """
 99 | 
100 |         # This is done to ensure I can use the .get function later to return None
101 |         if layer_biases is None:
102 |             layer_biases = {}
103 |         else:
104 |             assert (len(layer_biases) == num_layers)
105 | 
106 |         if layer_activation_functions is None:
107 |             layer_activation_functions = {}
108 |         else:
109 |             assert (len(layer_activation_functions) == num_layers)
110 | 
111 |         assert (isinstance(layer_weights, dict))
112 |         assert (len(layer_weights) == num_layers)
113 | 
114 |         # Dictionary to keep track of inputs for each layer
115 |         layer_input_dict = {}
116 | 
117 |         current_input = initial_input
118 |         for current_layer_number in range(1, num_layers + 1):
119 |             # Get weights for current layer
120 |             current_weights = layer_weights[current_layer_number]
121 | 
122 |             # Get bias vector for current layer. If there is no bias for that layer returns none
123 |             current_bias = layer_biases.get(current_layer_number, None)
124 | 
125 |             # Get current activation function for the layer
126 |             current_activation_function = layer_activation_functions.get(current_layer_number, None)
127 | 
128 |             # Get output matrix for current_layer
129 |             output = ForwardProp.compute_layer(current_input, current_weights, current_bias)
130 | 
131 |             # If there is an activation function for the layer
132 |             if current_activation_function:
133 |                 saved_output = output
134 |                 output = current_activation_function(output)
135 |                 output = ForwardProp.ensure_no_activation_applied(output_without_activation=saved_output,
136 |                                                                   output_with_activation=output, node_map=node_map,
137 |                                                                   constant_connections=keep_constant_connections,
138 |                                                                   current_layer=current_layer_number)
139 | 
140 |             layer_input_dict[current_layer_number] = current_input
141 | 
142 |             # The input into the next layer becomes the output from the previous layer
143 |             current_input = output
144 | 
145 |         return current_input, layer_input_dict
146 | 
147 |     @staticmethod
148 |     def forward_prop(num_layers, initial_input, layer_weights, layer_activation_functions=None, layer_biases=None,
149 |                      return_number_before_last_activation=False):
150 |         """
151 |         :param return_number_before_last_activation: If you want the raw output number instead of the sigmoid applied to it
152 |         :param layer_activation_functions: The activation functions to be used on each layer. Should be reference to the function at each key.
153 |         :param layer_biases: the biases associated with every layer. Is of type dict
154 |         :param num_layers: number of layers for the neural network
155 |         :param initial_input: the input data
156 |         :param layer_weights: the weights associated with every layer contained in a dictionary with the key being which layer they are for (starting at 1)
157 |         :return: the ouput vector after the forward propogation
158 |         """
159 | 
160 |         # This is done to ensure I can use the .get function later to return None
161 |         if layer_biases is None:
162 |             layer_biases = {}
163 |         else:
164 |             assert (len(layer_biases) == num_layers)
165 | 
166 |         if layer_activation_functions is None:
167 |             layer_activation_functions = {}
168 |         else:
169 |             assert (len(layer_activation_functions) == num_layers)
170 | 
171 |         assert (isinstance(layer_weights, dict))
172 |         assert (len(layer_weights) == num_layers)
173 | 
174 |         # Dictionary to keep track of inputs for each layer
175 |         layer_input_dict = {}
176 | 
177 |         current_input = initial_input
178 |         for current_layer_number in range(1, num_layers + 1):
179 |             # Get weights for current layer
180 |             current_weights = layer_weights[current_layer_number]
181 | 
182 |             # Get bias vector for current layer. If there is no bias for that layer returns none
183 |             current_bias = layer_biases.get(current_layer_number, None)
184 | 
185 |             # Get current activation function for the layer
186 |             current_activation_function = layer_activation_functions.get(current_layer_number, None)
187 |             # Get output matrix for current_layer
188 |             output = ForwardProp.compute_layer(current_input, current_weights, current_bias)
189 | 
190 |             # If you want to return the output before the sigmoid is applied on the last layer
191 |             if return_number_before_last_activation and current_layer_number == num_layers:
192 |                 return output
193 | 
194 |             # If there is an activation function for the layer
195 |             if current_activation_function:
196 |                 output = current_activation_function(output)
197 | 
198 |             layer_input_dict[current_layer_number] = current_input
199 | 
200 |             # The input into the next layer becomes the output from the previous layer
201 |             current_input = output
202 | 
203 |         return current_input, layer_input_dict
204 | 
205 | 
206 | class BackProp:
207 | 
208 |     def computer_layer_gradients(self):
209 |         pass
210 | 
211 |     @staticmethod
212 |     def back_prop(num_layers, layer_inputs, layer_weights, layer_activation_functions, expected_y, predicted_y):
213 |         """
214 |         :param layer_activation_functions: activation functions for each layer
215 |         :param num_layers: number of layers
216 |         :param layer_inputs: the inputs calculated for each layer. The key 1 should return the initial data we put in.
217 |         :param layer_weights: the weights used in each layer of the neural network
218 |         :param expected_y: what the expected value of the output should be. I.e. the real data
219 |         :param predicted_y: What the neural network put out
220 |         :return: two dicts, one for the weights and one for the biases which contains the gradients for each layer
221 |         """
222 | 
223 |         assert (isinstance(layer_inputs, dict))
224 |         assert (isinstance(layer_weights, dict))
225 |         assert (isinstance(layer_activation_functions, dict))
226 | 
227 |         # Ensure information is defined for every layer
228 |         assert (len(layer_activation_functions) == num_layers)
229 |         assert (len(layer_weights) == num_layers)
230 |         assert (len(layer_inputs) == num_layers)
231 | 
232 |         weight_gradients = {}
233 |         bias_gradients = {}
234 | 
235 |         # Key 1 in layer_inputs should contain the initial data input. So the shape 0 will return number of rows, hence
236 |         # number of examples
237 |         n_examples = layer_inputs[1].shape[0]
238 | 
239 |         # This assumes that we will always use a SIGMOID activation function for the last output
240 |         dz_last = (predicted_y - expected_y) * (1 / n_examples)
241 |         # num_layers because we want the inputs into the last layer
242 |         dw_last = np.dot(layer_inputs[num_layers].T, dz_last)
243 |         db_last = np.sum(dz_last)
244 | 
245 |         # Set gradients for final layer
246 |         weight_gradients[num_layers] = dw_last
247 |         bias_gradients[num_layers] = db_last
248 | 
249 |         current_dz = dz_last
250 |         # Have to go backwards in layer numbers. Start at num_layer-1 because last layer is always the same code as
251 |         # above
252 |         for current_layer_number in range(num_layers - 1, 0, -1):
253 |             # Get the activation gradient function for the current activation function
254 |             current_activation_gradient_function = ActivationFunctions.get_activation_gradient(
255 |                 layer_activation_functions[current_layer_number])
256 | 
257 |             current_dz, current_dw, current_db = BackProp.compute_gradient(next_layer_dz=current_dz,
258 |                                                                            next_layer_weights=layer_weights[
259 |                                                                                current_layer_number + 1],
260 |                                                                            activation_gradient_function=current_activation_gradient_function,
261 |                                                                            current_layer_input=layer_inputs[
262 |                                                                                current_layer_number],
263 |                                                                            next_layer_inputs=layer_inputs[
264 |                                                                                current_layer_number + 1])
265 | 
266 |             # Store information of gradients for each layer
267 |             weight_gradients[current_layer_number] = current_dw
268 |             bias_gradients[current_layer_number] = current_db
269 | 
270 |         return weight_gradients, bias_gradients
271 | 
272 |     @staticmethod
273 |     def compute_gradient(next_layer_dz, next_layer_weights, activation_gradient_function, current_layer_input,
274 |                          next_layer_inputs):
275 |         # Calculate dZ for current layer (we use weights from the next layer hence the +1)
276 |         dz_current = np.dot(next_layer_dz,
277 |                             next_layer_weights.T) * activation_gradient_function(next_layer_inputs)
278 | 
279 |         current_dw = np.dot(current_layer_input.T, dz_current)
280 |         current_db = np.sum(dz_current, axis=0)
281 | 
282 |         return dz_current, current_dw, current_db
283 | 


--------------------------------------------------------------------------------
/pickles/best_genome_pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_-1.7547243063454208:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_-1.7547243063454208


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_0.0328201636785844:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_0.0328201636785844


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_0.40751628299650783:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_0.40751628299650783


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_circle_data_8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_circle_data_8


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_shm_two_class_618056:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_shm_two_class_618056


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_shm_two_class_8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_shm_two_class_8


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_xor_full_132164:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_xor_full_132164


--------------------------------------------------------------------------------
/pickles/best_genome_pickle_xor_full_351148:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/best_genome_pickle_xor_full_351148


--------------------------------------------------------------------------------
/pickles/genome_circle_data:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/genome_circle_data


--------------------------------------------------------------------------------
/pickles/genome_noise_trained:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tsdevendra1/NEAT-Algorithm/dbfa7fd1c51d7407128524a4475bb16c95a8d8e3/pickles/genome_noise_trained


--------------------------------------------------------------------------------
/read_mat_files.py:
--------------------------------------------------------------------------------
 1 | import scipy.io as sio
 2 | from sklearn.decomposition import PCA
 3 | import numpy as np
 4 | 
 5 | 
 6 | def get_shm_multi_class_data():
 7 |     mat_contents = sio.loadmat('C:/Users/tsdev/Desktop/SHM DATA/4dof_features.mat')
 8 |     y_data = mat_contents['labels'][:, 0]
 9 |     y_data.shape = (y_data.shape[0])
10 |     x_data = mat_contents['multi_class_feats']
11 | 
12 |     shuffled_data = np.empty([x_data.shape[0], 11])
13 |     shuffled_data[:, 0:10] = x_data
14 |     shuffled_data[:, 10] = y_data
15 |     # Shuffle data because it was ordered before by class
16 |     np.random.shuffle(shuffled_data)
17 | 
18 |     x_data = shuffled_data[:, 0:10]
19 |     y_data = shuffled_data[:, 10]
20 |     y_data.shape = (y_data.shape[0], 1)
21 | 
22 |     y_data_one_hot = np.zeros((y_data.shape[0], 17))
23 |     for row in range(y_data.shape[0]):
24 |         label = int(y_data[row, 0])
25 |         # label - 1 for indexing reasons, for example label = 1 means that the first column (index = 0) is the one with the value one
26 |         y_data_one_hot[row, label - 1] = 1
27 | 
28 |     # if normalise_x:
29 |     #     # We perform these operations because for this data, the values are too high are negative causing issues during
30 |     #     # optimisation otherwise
31 |     #     x_data = x_data * -1
32 |     #     x_data = x_data / 100
33 | 
34 |     return x_data, y_data_one_hot
35 | 
36 | 
37 | def get_shm_two_class_data(normalise_x=True):
38 |     mat_contents = sio.loadmat('C:/Users/tsdev/Desktop/SHM DATA/4dof_features.mat')
39 |     y_data = mat_contents['labels'][:, 1]
40 |     y_data.shape = (y_data.shape[0])
41 |     x_data = mat_contents['two_class_feats']
42 | 
43 |     shuffled_data = np.empty([x_data.shape[0], 4])
44 |     shuffled_data[:, 0:3] = x_data
45 |     shuffled_data[:, 3] = y_data
46 |     # Shuffle data because it was ordered before by class
47 |     np.random.shuffle(shuffled_data)
48 | 
49 |     x_data = shuffled_data[:, 0:3]
50 |     y_data = shuffled_data[:, 3]
51 |     y_data.shape = (y_data.shape[0], 1)
52 | 
53 |     shuffle_check = y_data[0:400, :]
54 |     unique, counts = np.unique(shuffle_check, return_counts=True)
55 |     shuffle_check_length = len(shuffle_check)
56 |     class_1_percentage = counts[0] / shuffle_check_length * 100
57 |     class_2_percentage = counts[1] / shuffle_check_length * 100
58 | 
59 |     if normalise_x:
60 |         # We perform these operations because for this data, the values are too high are negative causing issues during
61 |         # optimisation otherwise
62 |         x_data = x_data * -1
63 |         x_data = x_data / 100
64 | 
65 |     if class_1_percentage < 40 or class_2_percentage < 40:
66 |         raise ValueError('Imbalanced classes due to shuffle, please re-initialise')
67 | 
68 |     return x_data, y_data
69 | 
70 | 
71 | def main():
72 |     x_data, y_data = get_shm_two_class_data()
73 |     assert (y_data.shape[1] == 1)
74 | 
75 |     x_data_multi, y_data_multi = get_shm_multi_class_data()
76 |     print(x_data_multi.shape)
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     main()
81 | 


--------------------------------------------------------------------------------
/reproduce.py:
--------------------------------------------------------------------------------
  1 | from gene import NodeGene, ConnectionGene
  2 | from genome import Genome
  3 | from species import SpeciesSet
  4 | import random
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import seaborn as sns
  8 | import math
  9 | import copy
 10 | import math
 11 | 
 12 | 
 13 | class Reproduce:
 14 |     """
 15 |     Will handle reproduction of the genomes
 16 |     """
 17 | 
 18 |     def __init__(self, stagnation, config):
 19 |         self.global_innovation_number = 0
 20 |         self.stagnation = stagnation
 21 |         self.ancestors = {}
 22 |         self.genome_indexer = 0
 23 |         self.config = config
 24 |         # Key: The tuple of the connection e.g. (1,3) value: the innovation number
 25 |         self.innovation_tracker = {}
 26 | 
 27 |     def create_new_population(self, population_size, num_features):
 28 |         population = {}
 29 | 
 30 |         node_list = []
 31 |         connection_list = []
 32 |         # Create the source nodes
 33 |         for node in range(num_features):
 34 |             node_list.append(NodeGene(node_id=node, node_type='source'))
 35 | 
 36 |         # Add the output node (There is only one in this case)
 37 |         node_list.append(NodeGene(node_id=num_features, node_type='output', bias=1))
 38 | 
 39 |         # Save the innovations for the first generation.
 40 |         for source_node_id in range(num_features):
 41 |             # Increment for the new innovation
 42 |             self.global_innovation_number += 1
 43 |             # The output node will always have the node_id equal to the number of features
 44 |             self.innovation_tracker[(source_node_id, num_features)] = self.global_innovation_number
 45 | 
 46 |         # For each feature there will be a connection to the output
 47 |         for i in range(num_features):
 48 |             connection = (i, num_features)
 49 |             # The connection was already saved, so this should return true
 50 |             assert (connection in self.innovation_tracker)
 51 |             connection_list.append(ConnectionGene(input_node=i, output_node=num_features,
 52 |                                                   innovation_number=self.innovation_tracker[connection], enabled=True))
 53 | 
 54 |         # Create a population of size population_size
 55 |         for index in range(population_size):
 56 |             # Deep copies otherwise changing the connection weight change's it for every genome that has the same
 57 |             # reference to the class
 58 |             deep_copy_connections = copy.deepcopy(connection_list)
 59 |             deep_copy_nodes = copy.deepcopy(node_list)
 60 |             # Set all the connections to a random weight for each genome
 61 |             for connection in deep_copy_connections:
 62 |                 connection.weight = np.random.randn()
 63 |             # Increment since the index value has been assigned
 64 |             self.genome_indexer += 1
 65 | 
 66 |             # Create the genome
 67 |             population[index] = Genome(connections=deep_copy_connections, nodes=deep_copy_nodes,
 68 |                                        key=self.genome_indexer)
 69 | 
 70 |         self.show_population_weight_distribution(population=population)
 71 | 
 72 |         return population
 73 | 
 74 |     @staticmethod
 75 |     def show_population_weight_distribution(population):
 76 |         # See the spread of starting weights
 77 |         list_of_weights = []
 78 |         for genome in population.values():
 79 |             for connection in genome.connections.values():
 80 |                 list_of_weights.append(connection.weight)
 81 | 
 82 |         sns.distplot(list_of_weights)
 83 |         plt.title('Weight distribution of connections in each population member')
 84 |         plt.xlabel('Connection weight values')
 85 |         plt.show()
 86 | 
 87 |     @staticmethod
 88 |     def compute_adjusted_species_sizes(adjusted_species_fitnesses, previous_species_sizes, population_size,
 89 |                                        min_species_size):
 90 |         """
 91 |         Compute the number of offspring per species, proportional to their fitnesses (See page 110 of NEAT paper)
 92 |         :param adjusted_species_fitnesses:
 93 |         :param previous_species_sizes:
 94 |         :param population_size:
 95 |         :param min_species_size:
 96 |         :return:
 97 |         """
 98 | 
 99 |         # Sum all the remaining adjusted species fitnesses
100 |         adjusted_fitness_sum = sum(adjusted_species_fitnesses)
101 | 
102 |         adjusted_species_sizes = []
103 | 
104 |         for adjusted_fitness, previous_size in zip(adjusted_species_fitnesses, previous_species_sizes):
105 |             if adjusted_fitness_sum is not None:
106 |                 # Calculate the adjusted species size for how much of the overall fitness they account for. If this
107 |                 # value is less than the min_species_size then we set it to that instead
108 |                 species_size = max(min_species_size, ((adjusted_fitness / adjusted_fitness_sum) * population_size))
109 | 
110 |             else:
111 |                 species_size = min_species_size
112 | 
113 |             # difference = (species_size - previous_size) * 0.5
114 |             # rounded_difference = int(round(difference))
115 |             # adjusted_size = previous_size
116 |             # if abs(rounded_difference) > 0:
117 |             #     adjusted_size += rounded_difference
118 |             # elif difference > 0:
119 |             #     adjusted_size += 1
120 |             # elif difference < 0:
121 |             #     adjusted_size -= 1
122 |             # adjusted_species_sizes.append(adjusted_size)
123 | 
124 |             # TODO: This allows for the fitter species to more aggressively have more population to create. If you want this behaviour comment out everything above until the end of the if else statement and uncomment this
125 |             adjusted_species_sizes.append(round(species_size))
126 | 
127 |         # Normalize the spawn amounts so that the next generation is roughly
128 |         # the population size requested by the user.
129 |         total_adjusted_size = sum(adjusted_species_sizes)
130 |         norm = population_size / total_adjusted_size
131 |         adjusted_species_sizes = [max(min_species_size, int(round(n * norm))) for n in adjusted_species_sizes]
132 | 
133 |         print('NEW POPULATION SIZE: {}'.format(sum(adjusted_species_sizes)))
134 | 
135 |         return adjusted_species_sizes
136 | 
137 |     def get_non_stagnant_species(self, species_set, generation):
138 |         """
139 |         Checks which species are stagnant ant returns the ones which aren't
140 |         :param generation: Which generation number it is
141 |         :param species_set: The species set instance which stores all the species
142 |         :return: A list of non stagnant species
143 |         """
144 |         # Keeps track of all the fitnesses for the genomes in the population
145 |         all_fitnesses = []
146 |         # Keeps track of the species which aren't stagnant
147 |         remaining_species = []
148 | 
149 |         # (Id, species instance, boolean)
150 |         for species_id, species, is_stagnant in self.stagnation.update(species_set=species_set, generation=generation,
151 |                                                                        config=self.config):
152 |             # Only save species if it is not stagnant
153 |             if not is_stagnant:
154 |                 # Save all the fitness in the species that isn't stagnant
155 |                 all_fitnesses += [member.fitness for member in species.members.values()]
156 |                 remaining_species.append(species)
157 | 
158 |         # The case where there are no species left
159 |         if not remaining_species:
160 |             # TODO: Would this ever come here?
161 |             raise Exception('There are no remaining species in the reproduce function')
162 | 
163 |         return all_fitnesses, remaining_species
164 | 
165 |     def get_adjusted_species_sizes(self, all_fitnesses, remaining_species, population_size):
166 |         """
167 |         Adjusts the size of the species for their fitness values
168 |         :param all_fitnesses: A list of all fitness values for all genomes in the population
169 |         :param remaining_species: A list of species which aren't stagnant
170 |         :param population_size: The population size
171 |         :return: A list of sizes for the new remaining species, adjusted for their respective fitness values
172 |         """
173 | 
174 |         # Find min and max fitness across the entire population. We use this for explicit fitness sharing.
175 |         min_genome_fitness = min(all_fitnesses)
176 |         max_genome_fitness = max(all_fitnesses)
177 | 
178 |         # TODO: The value 1.0 is arbtrirary from the neat python package from previous. Should let it be configurable?
179 |         fitness_range = max(1.0, max_genome_fitness - min_genome_fitness)
180 | 
181 |         # TODO: Not sure if this is the right method to do adjusted fitness
182 |         for species in remaining_species:
183 |             # The adjusted fitness is the mean of the species members fitnesses TODO: Is this correct?
184 |             mean_species_fitness = np.mean([member.fitness for member in species.members.values()])
185 |             adjusted_fitness = (mean_species_fitness - min_genome_fitness) / fitness_range
186 |             species.adjusted_fitness = adjusted_fitness
187 | 
188 |         adjusted_species_fitnesses = [species.adjusted_fitness for species in remaining_species]
189 | 
190 |         # Get a list of the amount of members in each of the remaining species
191 |         previous_species_sizes = [len(species.members) for species in remaining_species]
192 | 
193 |         # If the sum of the adjusted species fitnesses is less than 0.1, it suggests there isn't much fitness variation
194 |         # in the population. Thus we put an artificial barrier to the min species size because there is no species that
195 |         # entirely beats all other species
196 |         # TODO: 0.1 is an random number and should be configurable
197 |         if sum(adjusted_species_fitnesses) < 0.1:
198 |             min_species_size = 2
199 |         else:
200 |             min_species_size = self.config.min_species_size
201 | 
202 |         adjusted_species_sizes = self.compute_adjusted_species_sizes(
203 |             adjusted_species_fitnesses=adjusted_species_fitnesses, min_species_size=min_species_size,
204 |             previous_species_sizes=previous_species_sizes, population_size=population_size)
205 | 
206 |         return adjusted_species_sizes
207 | 
208 |     def get_new_population(self, adjusted_species_sizes, remaining_species, species_set, generation_tracker,
209 |                            backprop_mutation):
210 |         """
211 |         Creates the dictionary of the new genomes for the next generation population
212 |         :param: genetation_tracker:
213 |         :param adjusted_species_sizes:
214 |         :param remaining_species:
215 |         :param species_set:
216 |         :param new_population:
217 |         :return:
218 |         """
219 |         new_population = {}
220 | 
221 |         for species_size, species in zip(adjusted_species_sizes, remaining_species):
222 | 
223 |             # TODO: Uncomment if you removed min_species_size
224 |             # assert (species_size > 0)
225 |             if species_size > 0:
226 | 
227 |                 # List of old species members
228 |                 old_species_members = list(species.members.values())
229 |                 # Reset the members for the current species
230 |                 species.members = {}
231 |                 # Save the species in the species set object
232 |                 species_set.species[species.key] = species
233 | 
234 |                 # Sort the members into the descending fitness
235 |                 old_species_members.sort(reverse=True, key=lambda x: x.fitness)
236 | 
237 |                 # Double check that it is descending
238 |                 if len(old_species_members) > 1:
239 |                     assert (old_species_members[0].fitness >= old_species_members[1].fitness)
240 | 
241 |                 # If we have specified a number of genomes to carry over, carry them over to the new population
242 |                 num_genomes_without_crossover = int(
243 |                     round(species_size * self.config.chance_for_mutation_without_crossover))
244 |                 if num_genomes_without_crossover > 0:
245 | 
246 |                     for member in old_species_members[:num_genomes_without_crossover]:
247 | 
248 |                         # Check if we should carry over a member un-mutated or not
249 |                         if not self.config.keep_unmutated_top_percentage:
250 |                             child = copy.deepcopy(member)
251 | 
252 |                             child.mutate(reproduction_instance=self,
253 |                                          innovation_tracker=self.innovation_tracker, config=self.config,
254 |                                          backprop_mutation=backprop_mutation)
255 | 
256 |                             if not child.check_connection_enabled_amount() and not child.check_num_paths(
257 |                                     only_add_enabled_connections=True):
258 |                                 raise Exception('This child has no enabled connections')
259 | 
260 |                             new_population[child.key] = child
261 |                             self.ancestors[child.key] = ()
262 |                             # new_population[member.key] = member
263 |                             species_size -= 1
264 |                             assert (species_size >= 0)
265 |                         else:
266 |                             # Else we just add the current member to the new population
267 |                             new_population[member.key] = member
268 |                             species_size -= 1
269 |                             assert (species_size >= 0)
270 | 
271 |                 # If there are no more genomes for the current species, then restart the loop for the next species
272 |                 if species_size <= 0:
273 |                     continue
274 | 
275 |                 # Only use the survival threshold fraction to use as parents for the next generation.
276 |                 reproduction_cutoff = int(math.ceil((1 - self.config.chance_for_mutation_without_crossover) *
277 |                                                     len(old_species_members)))
278 | 
279 |                 # Need at least two parents no matter what the previous result
280 |                 reproduction_cutoff = max(reproduction_cutoff, 2)
281 |                 old_species_members = old_species_members[:reproduction_cutoff]
282 | 
283 |                 # Randomly choose parents and choose whilst there can still be additional genomes for the given species
284 |                 while species_size > 0:
285 |                     species_size -= 1
286 | 
287 |                     # TODO: If you don't allow them to mate with themselves then it's a problem because if the species previous
288 |                     # TODO: size is 1, then how can you do with or without crossover?
289 |                     parent_1 = copy.deepcopy(random.choice(old_species_members))
290 |                     parent_2 = copy.deepcopy(random.choice(old_species_members))
291 | 
292 |                     # Has to be a deep copy otherwise the connections which are crossed over are also modified if mutation
293 |                     # occurs on the child.
294 |                     parent_1 = copy.deepcopy(parent_1)
295 |                     parent_2 = copy.deepcopy(parent_2)
296 | 
297 |                     self.genome_indexer += 1
298 |                     genome_id = self.genome_indexer
299 | 
300 |                     child = Genome(key=genome_id)
301 |                     # TODO: Save the parent_1 and parent_2 mutation history as well as what connections they had
302 |                     # Create the genome from the parents
303 |                     num_connections_enabled = child.crossover(genome_1=parent_1, genome_2=parent_2, config=self.config)
304 | 
305 |                     # If there are no connections enabled we forget about this child and don't add it to the existing
306 |                     # population
307 |                     if num_connections_enabled:
308 |                         child.mutate(reproduction_instance=self,
309 |                                      innovation_tracker=self.innovation_tracker, config=self.config,
310 |                                      generation_tracker=generation_tracker, backprop_mutation=backprop_mutation)
311 | 
312 |                         if not child.check_connection_enabled_amount() and not child.check_num_paths(
313 |                                 only_add_enabled_connections=True):
314 |                             raise Exception('This child has no enabled connections')
315 | 
316 |                         new_population[child.key] = child
317 |                         self.ancestors[child.key] = (parent_1.key, parent_2.key)
318 |                     else:
319 |                         # Else if the crossover resulted in an invalid genome.
320 |                         assert num_connections_enabled == 0
321 |                         species_size += 1
322 |                         self.genome_indexer -= 1
323 | 
324 |         return new_population
325 | 
326 |     def reproduce(self, species_set, population_size, generation, generation_tracker, backprop_mutation=False):
327 |         """
328 |         Handles reproduction of a population
329 |         :param generation_tracker: An class instance which keeps track of certain parameters for each generation
330 |         :param generation: Which generation number it is
331 |         :param species_set: The SpeciesSet instance which keeps track of species
332 |         :param population_size: The population size
333 |         :return: A new population
334 |         """
335 |         # Check it is a class instance
336 |         assert (isinstance(species_set, SpeciesSet))
337 | 
338 |         all_fitnesses, remaining_species = self.get_non_stagnant_species(species_set=species_set, generation=generation)
339 | 
340 |         adjusted_species_sizes = self.get_adjusted_species_sizes(all_fitnesses=all_fitnesses,
341 |                                                                  population_size=population_size,
342 |                                                                  remaining_species=remaining_species)
343 | 
344 |         # Set the species dict to an empty one for now as the new species will be configured later
345 |         species_set.species = {}
346 | 
347 |         # Keeps track of the new population (key, object)
348 |         new_population = self.get_new_population(adjusted_species_sizes=adjusted_species_sizes, species_set=species_set,
349 |                                                  remaining_species=remaining_species,
350 |                                                  generation_tracker=generation_tracker,
351 |                                                  backprop_mutation=backprop_mutation)
352 | 
353 |         return new_population
354 | 


--------------------------------------------------------------------------------
/reproduce_multiclass.py:
--------------------------------------------------------------------------------
  1 | from gene import NodeGene, ConnectionGene
  2 | from genome_multiclass import GenomeMultiClass
  3 | from species import SpeciesSet
  4 | import random
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import seaborn as sns
  8 | import math
  9 | import copy
 10 | import math
 11 | 
 12 | 
 13 | class ReproduceMultiClass:
 14 |     """
 15 |     Will handle reproduction of the genomes
 16 |     """
 17 | 
 18 |     def __init__(self, stagnation, config):
 19 |         self.global_innovation_number = 0
 20 |         self.stagnation = stagnation
 21 |         self.ancestors = {}
 22 |         self.genome_indexer = 0
 23 |         self.config = config
 24 |         # Key: The tuple of the connection e.g. (1,3) value: the innovation number
 25 |         self.innovation_tracker = {}
 26 | 
 27 |     def create_new_population(self, population_size, num_features, num_classes):
 28 |         population = {}
 29 | 
 30 |         source_node_list = []
 31 |         output_node_list = []
 32 | 
 33 |         # Create the source and output nodes
 34 |         for node_id in range(num_features + num_classes):
 35 |             if node_id < num_features:
 36 |                 source_node_list.append(NodeGene(node_id=node_id, node_type='source'))
 37 |             else:
 38 |                 output_node_list.append(NodeGene(node_id=node_id, node_type='output', bias=1))
 39 | 
 40 |         # Save innovations on population creation
 41 |         for source_node in source_node_list:
 42 |             for output_node in output_node_list:
 43 |                 # Increment for the new innovation
 44 |                 self.global_innovation_number += 1
 45 |                 # The output node will always have the node_id equal to the number of features
 46 |                 self.innovation_tracker[(source_node.node_id, output_node.node_id)] = self.global_innovation_number
 47 | 
 48 |         connection_list = []
 49 |         # For each feature there will be a connection to the output
 50 |         for source_node in source_node_list:
 51 |             for output_node in output_node_list:
 52 |                 connection = (source_node.node_id, output_node.node_id)
 53 |                 # The connection was already saved, so this should return true
 54 |                 assert (connection in self.innovation_tracker)
 55 |                 connection_list.append(ConnectionGene(input_node=source_node.node_id, output_node=output_node.node_id,
 56 |                                                       innovation_number=self.innovation_tracker[connection],
 57 |                                                       enabled=True))
 58 |         all_nodes_list = source_node_list + output_node_list
 59 |         # Create a population of size population_size
 60 |         for index in range(population_size):
 61 |             # Deep copies otherwise changing the connection weight change's it for every genome that has the same
 62 |             # reference to the class
 63 |             deep_copy_connections = copy.deepcopy(connection_list)
 64 |             deep_copy_nodes = copy.deepcopy(all_nodes_list)
 65 |             # Set all the connections to a random weight for each genome
 66 |             for connection in deep_copy_connections:
 67 |                 connection.weight = np.random.randn()
 68 |             # Increment since the index value has been assigned
 69 |             self.genome_indexer += 1
 70 | 
 71 |             # Create the genome
 72 |             population[index] = GenomeMultiClass(connections=deep_copy_connections, nodes=deep_copy_nodes,
 73 |                                                  key=self.genome_indexer)
 74 | 
 75 |         self.show_population_weight_distribution(population=population)
 76 | 
 77 |         return population
 78 | 
 79 |     @staticmethod
 80 |     def show_population_weight_distribution(population):
 81 |         # See the spread of starting weights
 82 |         list_of_weights = []
 83 |         for genome in population.values():
 84 |             for connection in genome.connections.values():
 85 |                 list_of_weights.append(connection.weight)
 86 | 
 87 |         sns.distplot(list_of_weights)
 88 |         plt.title('Weight distribution of connections in each population member')
 89 |         plt.xlabel('Connection weight values')
 90 |         plt.show()
 91 | 
 92 |     @staticmethod
 93 |     def compute_adjusted_species_sizes(adjusted_species_fitnesses, previous_species_sizes, population_size,
 94 |                                        min_species_size):
 95 |         """
 96 |         Compute the number of offspring per species, proportional to their fitnesses (See page 110 of NEAT paper)
 97 |         :param adjusted_species_fitnesses:
 98 |         :param previous_species_sizes:
 99 |         :param population_size:
100 |         :param min_species_size:
101 |         :return:
102 |         """
103 | 
104 |         # Sum all the remaining adjusted species fitnesses
105 |         adjusted_fitness_sum = sum(adjusted_species_fitnesses)
106 | 
107 |         adjusted_species_sizes = []
108 | 
109 |         for adjusted_fitness, previous_size in zip(adjusted_species_fitnesses, previous_species_sizes):
110 |             if adjusted_fitness_sum is not None:
111 |                 # Calculate the adjusted species size for how much of the overall fitness they account for. If this
112 |                 # value is less than the min_species_size then we set it to that instead
113 |                 species_size = max(min_species_size, ((adjusted_fitness / adjusted_fitness_sum) * population_size))
114 | 
115 |             else:
116 |                 species_size = min_species_size
117 | 
118 |             # difference = (species_size - previous_size) * 0.5
119 |             # rounded_difference = int(round(difference))
120 |             # adjusted_size = previous_size
121 |             # if abs(rounded_difference) > 0:
122 |             #     adjusted_size += rounded_difference
123 |             # elif difference > 0:
124 |             #     adjusted_size += 1
125 |             # elif difference < 0:
126 |             #     adjusted_size -= 1
127 |             #
128 |             # adjusted_species_sizes.append(adjusted_size)
129 |             # TODO: This allows for the fitter species to more aggressively have more population to create. If you want this behaviour comment out everything above until the end of the if else statement and uncomment this
130 |             adjusted_species_sizes.append(round(species_size))
131 | 
132 |         # Normalize the spawn amounts so that the next generation is roughly
133 |         # the population size requested by the user.
134 |         total_adjusted_size = sum(adjusted_species_sizes)
135 |         norm = population_size / total_adjusted_size
136 |         adjusted_species_sizes = [max(min_species_size, int(round(n * norm))) for n in adjusted_species_sizes]
137 | 
138 |         print('NEW POPULATION SIZE: {}'.format(sum(adjusted_species_sizes)))
139 | 
140 |         return adjusted_species_sizes
141 | 
142 |     def get_non_stagnant_species(self, species_set, generation):
143 |         """
144 |         Checks which species are stagnant ant returns the ones which aren't
145 |         :param generation: Which generation number it is
146 |         :param species_set: The species set instance which stores all the species
147 |         :return: A list of non stagnant species
148 |         """
149 |         # Keeps track of all the fitnesses for the genomes in the population
150 |         all_fitnesses = []
151 |         # Keeps track of the species which aren't stagnant
152 |         remaining_species = []
153 | 
154 |         # (Id, species instance, boolean)
155 |         for species_id, species, is_stagnant in self.stagnation.update(species_set=species_set, generation=generation,
156 |                                                                        config=self.config):
157 |             # if is_stagnant:
158 |             #     # TODO: What to do here??
159 |             #     pass
160 |             # else:
161 |             #     # Save all the fitness in the species that isn't stagnant
162 |             #     all_fitnesses += [member.fitness for member in species.members.values()]
163 |             #     remaining_species.append(species)
164 | 
165 |             # Only save species if it is not stagnant
166 |             if not is_stagnant:
167 |                 # Save all the fitness in the species that isn't stagnant
168 |                 all_fitnesses += [member.fitness for member in species.members.values()]
169 |                 remaining_species.append(species)
170 | 
171 |         # The case where there are no species left
172 |         if not remaining_species:
173 |             # TODO: Would this ever come here?
174 |             raise Exception('There are no remaining species in the reproduce function')
175 | 
176 |         return all_fitnesses, remaining_species
177 | 
178 |     def get_adjusted_species_sizes(self, all_fitnesses, remaining_species, population_size):
179 |         """
180 |         Adjusts the size of the species for their fitness values
181 |         :param all_fitnesses: A list of all fitness values for all genomes in the population
182 |         :param remaining_species: A list of species which aren't stagnant
183 |         :param population_size: The population size
184 |         :return: A list of sizes for the new remaining species, adjusted for their respective fitness values
185 |         """
186 | 
187 |         # Find min and max fitness across the entire population. We use this for explicit fitness sharing.
188 |         min_genome_fitness = min(all_fitnesses)
189 |         max_genome_fitness = max(all_fitnesses)
190 | 
191 |         # TODO: The value 1.0 is arbtrirary from the neat python package from previous. Should let it be configurable?
192 |         fitness_range = max(1.0, max_genome_fitness - min_genome_fitness)
193 | 
194 |         # TODO: Not sure if this is the right method to do adjusted fitness
195 |         for species in remaining_species:
196 |             # The adjusted fitness is the mean of the species members fitnesses TODO: Is this correct?
197 |             mean_species_fitness = np.mean([member.fitness for member in species.members.values()])
198 |             species.adjusted_fitness = (mean_species_fitness - min_genome_fitness) / fitness_range
199 | 
200 |         adjusted_species_fitnesses = [species.adjusted_fitness for species in remaining_species]
201 | 
202 |         # Get a list of the amount of members in each of the remaining species
203 |         previous_species_sizes = [len(species.members) for species in remaining_species]
204 | 
205 |         # If the sum of the adjusted species fitnesses is less than 0.1, it suggests there isn't much fitness variation
206 |         # in the population. Thus we put an artificial barrier to the min species size because there is no species that
207 |         # entirely beats all other species
208 |         # TODO: 0.1 is an random number and should be configurable
209 |         adjusted_species_sum = sum(adjusted_species_fitnesses)
210 |         if adjusted_species_sum < 0.1:
211 |             min_species_size = 2
212 |         else:
213 |             min_species_size = self.config.min_species_size
214 | 
215 |         adjusted_species_sizes = self.compute_adjusted_species_sizes(
216 |             adjusted_species_fitnesses=adjusted_species_fitnesses, min_species_size=min_species_size,
217 |             previous_species_sizes=previous_species_sizes, population_size=population_size)
218 | 
219 |         return adjusted_species_sizes
220 | 
221 |     def get_new_population(self, adjusted_species_sizes, remaining_species, species_set, generation_tracker,
222 |                            backprop_mutation):
223 |         """
224 |         Creates the dictionary of the new genomes for the next generation population
225 |         :param: genetation_tracker:
226 |         :param adjusted_species_sizes:
227 |         :param remaining_species:
228 |         :param species_set:
229 |         :param new_population:
230 |         :return:
231 |         """
232 |         new_population = {}
233 | 
234 |         for species_size, species in zip(adjusted_species_sizes, remaining_species):
235 | 
236 |             # TODO: Uncomment if you removed min_species_size
237 |             # assert (species_size > 0)
238 |             if species_size > 0:
239 | 
240 |                 # List of old species members
241 |                 old_species_members = list(species.members.values())
242 |                 # Reset the members for the current species
243 |                 species.members = {}
244 |                 # Save the species in the species set object
245 |                 species_set.species[species.key] = species
246 | 
247 |                 # Sort the members into the descending fitness
248 |                 old_species_members.sort(reverse=True, key=lambda x: x.fitness)
249 | 
250 |                 # Double check that it is descending
251 |                 if len(old_species_members) > 1:
252 |                     assert (old_species_members[0].fitness >= old_species_members[1].fitness)
253 | 
254 |                 # If we have specified a number of genomes to carry over, carry them over to the new population
255 |                 num_genomes_without_crossover = int(
256 |                     round(species_size * self.config.chance_for_mutation_without_crossover))
257 |                 if num_genomes_without_crossover > 0:
258 | 
259 |                     for member in old_species_members[:num_genomes_without_crossover]:
260 | 
261 |                         # Check if we should carry over a member un-mutated or not
262 |                         if not self.config.keep_unmutated_top_percentage:
263 |                             child = copy.deepcopy(member)
264 | 
265 |                             child.mutate(reproduction_instance=self,
266 |                                          innovation_tracker=self.innovation_tracker, config=self.config,
267 |                                          backprop_mutation=backprop_mutation)
268 | 
269 |                             if not child.check_connection_enabled_amount() and not child.check_num_paths(
270 |                                     only_add_enabled_connections=True):
271 |                                 raise Exception('This child has no enabled connections')
272 | 
273 |                             new_population[child.key] = child
274 |                             self.ancestors[child.key] = ()
275 |                             # new_population[member.key] = member
276 |                             species_size -= 1
277 |                             assert (species_size >= 0)
278 |                         else:
279 |                             # Else we just add the current member to the new population
280 |                             new_population[member.key] = member
281 |                             species_size -= 1
282 |                             assert (species_size >= 0)
283 | 
284 |                 # If there are no more genomes for the current species, then restart the loop for the next species
285 |                 if species_size <= 0:
286 |                     continue
287 | 
288 |                 # Only use the survival threshold fraction to use as parents for the next generation.
289 |                 reproduction_cutoff = int(math.ceil((1 - self.config.chance_for_mutation_without_crossover) *
290 |                                                     len(old_species_members)))
291 | 
292 |                 # Need at least two parents no matter what the previous result
293 |                 reproduction_cutoff = max(reproduction_cutoff, 2)
294 |                 old_species_members = old_species_members[:reproduction_cutoff]
295 | 
296 |                 # Randomly choose parents and choose whilst there can still be additional genomes for the given species
297 |                 while species_size > 0:
298 |                     species_size -= 1
299 | 
300 |                     # TODO: If you don't allow them to mate with themselves then it's a problem because if the species previous
301 |                     # TODO: size is 1, then how can you do with or without crossover?
302 |                     parent_1 = copy.deepcopy(random.choice(old_species_members))
303 |                     parent_2 = copy.deepcopy(random.choice(old_species_members))
304 | 
305 |                     # Has to be a deep copy otherwise the connections which are crossed over are also modified if mutation
306 |                     # occurs on the child.
307 |                     parent_1 = copy.deepcopy(parent_1)
308 |                     parent_2 = copy.deepcopy(parent_2)
309 | 
310 |                     self.genome_indexer += 1
311 |                     genome_id = self.genome_indexer
312 | 
313 |                     child = GenomeMultiClass(key=genome_id)
314 |                     # TODO: Save the parent_1 and parent_2 mutation history as well as what connections they had
315 |                     # Create the genome from the parents
316 |                     num_connections_enabled = child.crossover(genome_1=parent_1, genome_2=parent_2, config=self.config)
317 | 
318 |                     # If there are no connections enabled we forget about this child and don't add it to the existing
319 |                     # population
320 |                     if num_connections_enabled:
321 |                         child.mutate(reproduction_instance=self,
322 |                                      innovation_tracker=self.innovation_tracker, config=self.config,
323 |                                      generation_tracker=generation_tracker, backprop_mutation=backprop_mutation)
324 | 
325 |                         if not child.check_connection_enabled_amount() and not child.check_num_paths(
326 |                                 only_add_enabled_connections=True):
327 |                             raise Exception('This child has no enabled connections')
328 | 
329 |                         new_population[child.key] = child
330 |                         self.ancestors[child.key] = (parent_1.key, parent_2.key)
331 |                     else:
332 |                         # Else if the crossover resulted in an invalid genome.
333 |                         assert num_connections_enabled == 0
334 |                         species_size += 1
335 |                         self.genome_indexer -= 1
336 | 
337 |         return new_population
338 | 
339 |     def reproduce(self, species_set, population_size, generation, generation_tracker, backprop_mutation=False):
340 |         """
341 |         Handles reproduction of a population
342 |         :param generation_tracker: An class instance which keeps track of certain parameters for each generation
343 |         :param generation: Which generation number it is
344 |         :param species_set: The SpeciesSet instance which keeps track of species
345 |         :param population_size: The population size
346 |         :return: A new population
347 |         """
348 |         # Check it is a class instance
349 |         assert (isinstance(species_set, SpeciesSet))
350 | 
351 |         all_fitnesses, remaining_species = self.get_non_stagnant_species(species_set=species_set, generation=generation)
352 | 
353 |         adjusted_species_sizes = self.get_adjusted_species_sizes(all_fitnesses=all_fitnesses,
354 |                                                                  population_size=population_size,
355 |                                                                  remaining_species=remaining_species)
356 | 
357 |         # Set the species dict to an empty one for now as the new species will be configured later
358 |         species_set.species = {}
359 | 
360 |         # Keeps track of the new population (key, object)
361 |         new_population = self.get_new_population(adjusted_species_sizes=adjusted_species_sizes, species_set=species_set,
362 |                                                  remaining_species=remaining_species,
363 |                                                  generation_tracker=generation_tracker,
364 |                                                  backprop_mutation=backprop_mutation)
365 | 
366 |         return new_population
367 | 


--------------------------------------------------------------------------------
/species.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Species:
  5 |     def __init__(self, key, generation):
  6 |         self.key = key
  7 |         # Which generation the species was created
  8 |         self.created = generation
  9 |         # Keeps track of what generation the fitness improved
 10 |         self.last_improved = generation
 11 |         self.representative = None
 12 |         # All members of the species
 13 |         self.members = {}
 14 |         self.fitness = None
 15 |         self.adjusted_fitness = None
 16 |         # History of the fitness for the species
 17 |         self.fitness_history = []
 18 | 
 19 |         # Keeps track of the unique characteristics of its members
 20 |         self.species_info = None
 21 | 
 22 |     def update(self, representative, members):
 23 |         self.representative = representative
 24 |         self.members = members
 25 | 
 26 | 
 27 | class SpeciesSet:
 28 | 
 29 |     def __init__(self, config, generation_tracker):
 30 |         self.config = config
 31 |         self.species_indexer = 0
 32 |         self.species = {}
 33 |         # For each genome if you index the dict it will return which species it is a part of
 34 |         self.genome_species = {}
 35 |         self.generation_tracker = generation_tracker
 36 | 
 37 |     @staticmethod
 38 |     def species_fitness_function(species_members, function_type):
 39 |         """
 40 |         Finds the fitness for a species. For now all it doesn is find the mean fitness of the species.
 41 |         :param species_members: The members of the species
 42 |         :param function_type: What type of function you want to apply, e.g. mean, max, min, median
 43 |         :return: The fitness value for the species
 44 |         """
 45 |         # TODO: Allow max, min, median fitness function types
 46 |         if function_type not in {'mean', 'max', 'median', 'min'}:
 47 |             raise Exception('Invalid function type specified for species fitness function')
 48 | 
 49 |         species_member_fitnesses = [member.fitness for member in species_members.values()]
 50 | 
 51 |         if function_type == 'mean':
 52 |             return np.mean(species_member_fitnesses)
 53 | 
 54 |     def calculate_compatibility_distance(self, species_representative, genome, generation_tracker=None):
 55 |         compatibility_distance_1 = species_representative.compute_compatibility_distance(other_genome=genome,
 56 |                                                                                          config=self.config,
 57 |                                                                                          generation_tracker=generation_tracker)
 58 |         compatibility_distance_2 = genome.compute_compatibility_distance(other_genome=species_representative,
 59 |                                                                          config=self.config)
 60 | 
 61 |         # There's no reason for this to be different depending on who you choose to be the other genome
 62 |         if round(compatibility_distance_1, 3) != round(compatibility_distance_2, 3):
 63 |             raise Exception('There is an error in how compatibility distance is calculated')
 64 | 
 65 |         return compatibility_distance_1
 66 | 
 67 |     def find_new_species_representative(self, unspeciated, population, dict_of_compatibility_distances,
 68 |                                         new_representatives, new_members):
 69 |         """
 70 |         :param unspeciated: Set of genome_id's which haven't been assigned a species
 71 |         :param population: A dict of (genome_id, genome_class) for the population
 72 |         :param dict_of_compatibility_distances: An empty dict to store the distance between different genomes
 73 |         :param new_representatives: A dict to save the new representative for a species
 74 |         :param new_members: A dict to save the new members for each of the species
 75 |         """
 76 |         # For each species we find the new representative
 77 |         for species_id, species_object in self.species.items():
 78 |             candidates = []
 79 |             for genome_id in unspeciated:
 80 |                 genome = population[genome_id]
 81 |                 compatibility_distance = self.calculate_compatibility_distance(
 82 |                     species_representative=species_object.representative,
 83 |                     genome=genome)
 84 |                 dict_of_compatibility_distances[(species_object.representative, genome)] = compatibility_distance
 85 |                 candidates.append((compatibility_distance, genome))
 86 | 
 87 |             _, new_rep = min(candidates, key=lambda x: x[0])
 88 |             # Set the new representative for the species for the genome with the lowest distance
 89 |             new_rep_id = new_rep.key
 90 |             new_representatives[species_id] = new_rep_id
 91 |             new_members[species_id] = [new_rep_id]
 92 |             unspeciated.remove(new_rep_id)
 93 | 
 94 |     def find_species_members(self, unspeciated, population, dict_of_compatibility_distances,
 95 |                              new_representatives, new_members, compatibility_threshold, generation_tracker):
 96 |         while unspeciated:
 97 |             genome_id = unspeciated.pop()
 98 |             genome = population[genome_id]
 99 | 
100 |             candidates = []
101 | 
102 |             # Keeps track of the distances with each representative
103 |             compatibility_distances_dict = {}
104 |             representative_genomes_list = []
105 | 
106 |             for species_id, representative_id in new_representatives.items():
107 |                 representative_genome = population[representative_id]
108 |                 compatibility_distance = self.calculate_compatibility_distance(
109 |                     species_representative=representative_genome, genome=genome, generation_tracker=generation_tracker)
110 | 
111 |                 compatibility_distances_dict[representative_genome] = compatibility_distance
112 |                 representative_genomes_list.append(representative_genome)
113 | 
114 |                 dict_of_compatibility_distances[(representative_genome, genome)] = compatibility_distance
115 |                 if compatibility_distance < compatibility_threshold:
116 |                     candidates.append((compatibility_distance, species_id))
117 | 
118 |             if candidates:
119 |                 _, species_id = min(candidates, key=lambda x: x[0])
120 |                 new_members[species_id].append(genome_id)
121 | 
122 |             # We have to create a new species for the genome since it's not compatible
123 |             else:
124 |                 # increment for a new species
125 |                 self.species_indexer += 1
126 | 
127 |                 species_id = self.species_indexer
128 |                 new_representatives[species_id] = genome_id
129 |                 new_members[species_id] = [genome_id]
130 | 
131 |     def save_species_info(self, new_representatives, generation, new_members, population):
132 |         """
133 |         saves the species to a species object and then within the species set instance
134 |         :param new_representatives: The representatives for the new species
135 |         :param generation: Which generation number it is
136 |         :param new_members: The members for each species
137 |         :param population: A dict of (key: genome instance)
138 |         """
139 |         # For each genome_id keeps track of which species_id it belongs to
140 |         self.genome_species = {}
141 | 
142 |         for species_id, representative_id in new_representatives.items():
143 |             species_object = self.species.get(species_id)
144 | 
145 |             if species_object is None:
146 |                 species_object = Species(key=species_id, generation=generation)
147 |                 self.species[species_id] = species_object
148 | 
149 |             members = new_members[species_id]
150 | 
151 |             for genome_id in members:
152 |                 self.genome_species[genome_id] = species_id
153 | 
154 |             members_dict = dict((genome_id, population[genome_id]) for genome_id in members)
155 |             species_object.update(representative=population[representative_id], members=members_dict)
156 | 
157 |     def speciate(self, population, compatibility_threshold, generation, generation_tracker=None):
158 |         """
159 | 
160 |         :param generation: Which generation number it is
161 |         :param compatibility_threshold:
162 |         :param population: A dict of (genome_id, genome_class)
163 |         :param generation_tracker: Tracks attributes for the current generation
164 |         :return:
165 |         """
166 |         unspeciated = set(population)
167 |         new_representatives = {}
168 |         new_members = {}
169 |         dict_of_compatibility_distances = {}
170 | 
171 |         self.find_new_species_representative(unspeciated=unspeciated,
172 |                                              dict_of_compatibility_distances=dict_of_compatibility_distances,
173 |                                              new_members=new_members, new_representatives=new_representatives,
174 |                                              population=population)
175 | 
176 |         self.find_species_members(unspeciated=unspeciated,
177 |                                   dict_of_compatibility_distances=dict_of_compatibility_distances,
178 |                                   new_members=new_members, new_representatives=new_representatives,
179 |                                   compatibility_threshold=compatibility_threshold, population=population,
180 |                                   generation_tracker=generation_tracker)
181 | 
182 |         self.save_species_info(new_representatives=new_representatives, new_members=new_members, population=population,
183 |                                generation=generation)
184 | 
185 |         # Mean compatability distance
186 |         self.generation_tracker.mean_compatibility_distance = np.mean(list(dict_of_compatibility_distances.values()))
187 |         # Standard deviation
188 |         self.generation_tracker.std_dev_compatibility_distance = np.std(list(dict_of_compatibility_distances.values()))
189 |         self.generation_tracker.num_species = len(self.species)
190 | 


--------------------------------------------------------------------------------
/stagnation.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | 
  4 | 
  5 | class Stagnation:
  6 | 
  7 |     @staticmethod
  8 |     def update(species_set, generation, config):
  9 |         """
 10 |         Required interface method. Updates species fitness history information,
 11 |         checking for ones that have not improved in max_stagnation generations,
 12 |         and - unless it would result in the number of species dropping below the configured
 13 |         species_elitism parameter if they were removed,
 14 |         in which case the highest-fitness species are spared -
 15 |         returns a list with stagnant species marked for removal.
 16 |         """
 17 | 
 18 |         species_data = []
 19 |         for species_id, species in species_set.species.items():
 20 |             if species.fitness_history:
 21 |                 # If there is fitness_history get the previous generation fitness
 22 |                 prev_fitness = max(species.fitness_history)
 23 |             else:
 24 |                 # Else just set it to the lowest possible value for now
 25 |                 prev_fitness = -sys.float_info.max
 26 | 
 27 |             # Calculate the fitness for the species
 28 |             species.fitness = species_set.species_fitness_function(species_members=species.members,
 29 |                                                                    function_type='mean')
 30 | 
 31 |             num_nodes_overall = []
 32 |             num_nodes_enabled = []
 33 |             num_connections_overall = []
 34 |             num_connections_enabled = []
 35 |             all_fitnesses = []
 36 |             species_member_fitnesses = [member.fitness for member in species.members.values()]
 37 |             for genome in species.members.values():
 38 |                 num_nodes_overall.append(len(genome.nodes))
 39 |                 num_nodes_enabled.append(len(genome.get_active_nodes()))
 40 |                 num_connections_overall.append(len(genome.connections))
 41 |                 num_connections_enabled.append(genome.check_connection_enabled_amount())
 42 |                 if genome.fitness:
 43 |                     all_fitnesses.append(genome.fitness)
 44 | 
 45 |             avg_num_connections_enabled = np.mean(num_connections_enabled)
 46 |             avg_num_connections_overall = np.mean(num_connections_overall)
 47 |             avg_num_nodes_enabled = np.mean(num_nodes_enabled)
 48 |             avg_num_nodes_overall = np.mean(num_nodes_overall)
 49 | 
 50 |             complexity_tracker = {'num_connections_enabled': avg_num_connections_enabled,
 51 |                                   'num_connections_overall': avg_num_connections_overall,
 52 |                                   'num_nodes_enabled': avg_num_nodes_enabled,
 53 |                                   'num_nodes_overall': avg_num_nodes_overall,
 54 |                                   'mean_weight': np.mean(species_member_fitnesses)}
 55 |             species.species_info = complexity_tracker
 56 | 
 57 |             # Keep track of historical fitness
 58 |             species.fitness_history.append(species.fitness)
 59 | 
 60 |             species.adjusted_fitness = None
 61 | 
 62 |             # Keep track of when the generation was last improved
 63 |             if prev_fitness is None or species.fitness > prev_fitness:
 64 |                 species.last_improved = generation
 65 | 
 66 |             species_data.append((species_id, species))
 67 | 
 68 |         # Sort the species data into ascending fitness order.
 69 |         species_data.sort(key=lambda x: x[1].fitness)
 70 | 
 71 |         # Keeps track of which species are stagnant or not
 72 |         results = []
 73 |         # Keeps track of each species's fitness
 74 |         species_fitnesses = []
 75 |         num_non_stagnant = len(species_data)
 76 | 
 77 |         for index, (species_id, species) in enumerate(species_data):
 78 |             # Override stagnant state if marking this species as stagnant would
 79 |             # result in the total number of species dropping below the limit.
 80 |             # Because species are in ascending fitness order, less fit species
 81 |             # will be marked as stagnant first.
 82 |             stagnant_time = generation - species.last_improved
 83 |             is_stagnant = False
 84 | 
 85 |             if num_non_stagnant > config.num_species_min:
 86 |                 # Check if the stagnation time for the species is greater than the max set in the config
 87 |                 is_stagnant = (stagnant_time >= config.max_stagnation_generations)
 88 | 
 89 |             # This will ensure that whatever the value of species_min is will be allowed to continue un-stagnated even
 90 |             # if they are. Example: if self.config.num_species_min = 2, then as long as the index is the last two of the
 91 |             # length of species_data it will set is_stagnant to False
 92 |             if (len(species_data) - index) <= config.num_species_min:
 93 |                 is_stagnant = False
 94 | 
 95 |             if is_stagnant:
 96 |                 num_non_stagnant -= 1
 97 | 
 98 |             results.append((species_id, species, is_stagnant))
 99 |             species_fitnesses.append(species_fitnesses)
100 | 
101 |         return results
102 | 


--------------------------------------------------------------------------------
/testing_multiclass.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | class LazyProperty:
  7 | 
  8 |     def __init__(self, func):
  9 |         self._func = func
 10 |         self.__name__ = func.__name__
 11 | 
 12 |     def __get__(self, instance, owner):
 13 |         if instance is None:
 14 |             return None
 15 |         result = instance.__dict__[self.__name__] = self._func(instance)
 16 | 
 17 |         return result
 18 | 
 19 | 
 20 | class Model:
 21 | 
 22 |     def __init__(self, train_data, train_labels, test_data, test_labels, batch_size, learning_rate, keep_prob,
 23 |                  num_layers,
 24 |                  num_epochs, layer_sizes):
 25 | 
 26 |         # Data
 27 |         self.x_train = train_data  # REMEMBER YOU WANT THIS TO CONTAIN TEST DATA AS WELL
 28 |         self.y_train = train_labels
 29 |         self.x_test = test_data
 30 |         self.y_test = test_labels
 31 | 
 32 |         # Network architecture
 33 |         self.num_layers = num_layers
 34 |         self.num_epochs = num_epochs
 35 |         self.layer_sizes = layer_sizes
 36 | 
 37 |         # Parameters
 38 |         self.batch_size = batch_size
 39 |         self.learning_rate = learning_rate
 40 |         self.keep_prob = keep_prob
 41 |         self.parameters = {}  # Empty dictionary to hold weights and biases once initialised
 42 |         self.initialise_params()  # initialise parameters
 43 | 
 44 |         # Hold data
 45 |         self.epoch_cost = []
 46 |         self.epoch_counter = []
 47 | 
 48 |     @staticmethod
 49 |     def relu(x):
 50 |         return x * (x > 0)
 51 | 
 52 |     @staticmethod
 53 |     def sigmoid(x):
 54 | 
 55 |         activation = 1 / (1 + np.exp(-x))
 56 | 
 57 |         return activation
 58 | 
 59 |     @staticmethod
 60 |     def softmax(x):
 61 |         """Compute softmax values for each sets of scores in x."""
 62 | 
 63 |         sum_ = np.sum(np.exp(x), axis=1)
 64 |         sum_.shape = (sum_.shape[0], 1)
 65 | 
 66 |         return np.exp(x) / sum_
 67 | 
 68 |     @staticmethod
 69 |     def xavier_initalizer(num_inputs, num_outputs):
 70 |         """
 71 |         NOTE: if using RELU then use constant 2 instead of 1 for sqrt
 72 |         """
 73 |         np.random.seed(7)
 74 |         weights = np.random.randn(num_inputs, num_outputs) * np.sqrt(1 / num_inputs)
 75 | 
 76 |         return weights
 77 | 
 78 |     @staticmethod
 79 |     def sigmoid_gradient(a):
 80 | 
 81 |         gradient = (a * (1 - a))
 82 | 
 83 |         return gradient
 84 | 
 85 |     @staticmethod
 86 |     def relu_gradient(x):
 87 |         return 1. * (x > 0)
 88 | 
 89 |     @staticmethod
 90 |     def cost_fn(logits, labels, n_examples, loss_type='single_class'):
 91 | 
 92 |         if loss_type is 'single_class':
 93 |             # Define cost function
 94 |             loss = -((labels * np.log(logits)) + ((1 - labels) * np.log(1 - logits)))
 95 |             cost_ = (1 / n_examples) * np.sum(loss + eps, axis=0)
 96 | 
 97 |         elif loss_type is 'multi_class':
 98 |             loss = -(np.sum((labels*np.log(logits))))
 99 |             cost_ = np.sum(loss) * (1/n_examples)
100 | 
101 |         return cost_
102 | 
103 |     def plot_graph(self):
104 |         plt.plot(self.epoch_counter, self.epoch_cost)
105 |         plt.show()
106 | 
107 |     @staticmethod
108 |     def batchnorm_forward(x, gamma, beta):
109 |         # compute per-features mean and std_deviation
110 |         mean = np.mean(x, axis=0)
111 |         var = np.var(x, axis=0)
112 | 
113 |         # normalize and zero-center (explicit for caching purposes)
114 |         x_mu = x - mean
115 |         inv_var = 1.0 / np.sqrt(var + eps)
116 |         x_hat = x_mu * inv_var
117 | 
118 |         # squash
119 |         out = gamma * x_hat + beta
120 | 
121 |         # cache variables for backward pass
122 |         cache = x_mu, inv_var, x_hat, gamma
123 | 
124 |         return out, cache
125 | 
126 |     @staticmethod
127 |     def batchnorm_backward(dout, cache):
128 | 
129 |         N, D = dout.shape
130 |         x_mu, inv_var, x_hat, gamma = cache
131 | 
132 |         dbeta = np.sum(dout, axis=0)
133 |         dgamma = np.sum(x_hat * dout, axis=0)
134 |         dx_hat = np.dot(dout, gamma.T)
135 | 
136 |         dvar = np.sum((dx_hat * x_mu * (-0.5) * inv_var ** 3), axis=0)
137 |         dmu = (np.sum((dx_hat * -inv_var), axis=0)) + (dvar * (-2.0 / N) * np.sum(x_mu, axis=0))
138 | 
139 |         dx1 = dx_hat * inv_var
140 |         dx2 = dvar * (2.0 / N) * x_mu
141 |         dx3 = (1.0 / N) * dmu
142 | 
143 |         # final partial derivatives
144 |         dx = dx1 + dx2 + dx3
145 | 
146 |         return dx, dgamma, dbeta
147 | 
148 |     def bias_correction(self, variable_name, timestep, moment=None):
149 |         assert (isinstance(variable_name, str))
150 |         if moment == 1:
151 |             bias_corrected = self.parameters['moment1_{}'.format(variable_name)] / (1 - beta1 ** timestep)
152 | 
153 |         elif moment == 2:
154 |             bias_corrected = self.parameters['moment2_{}'.format(variable_name)] / (1 - beta2 ** timestep)
155 |         else:
156 |             raise ValueError
157 | 
158 |         return bias_corrected
159 | 
160 |     def update_moment(self, variable_name, gradient, moment=None):
161 |         assert (isinstance(variable_name, str))
162 |         if moment == 1:
163 |             self.parameters['moment1_{}'.format(variable_name)] = \
164 |                 (beta1 * self.parameters['moment1_{}'.format(variable_name)]) + ((1 - beta1) * gradient)
165 | 
166 |         if moment == 2:
167 |             self.parameters['moment2_{}'.format(variable_name)] = \
168 |                 (beta2 * self.parameters['moment2_{}'.format(variable_name)]) + ((1 - beta2) * np.power(gradient, 2))
169 | 
170 |     def create_layer(self, input_matrix, weights_matrix, bias_matrix, activation_fn='relu', batch_norm=False):
171 | 
172 |         assert(isinstance(activation_fn, str))
173 | 
174 |         z = np.dot(input_matrix, weights_matrix) + bias_matrix
175 |         # print(z, 'zzzzzzzzzzzzzzzzzz')
176 |         if batch_norm is True:
177 |             gamma = self.parameters['gamma1']
178 |             beta = self.parameters['beta1']
179 |             z, cache = self.batchnorm_forward(z, gamma, beta)
180 | 
181 |         if activation_fn is 'sigmoid':
182 |             activation = self.sigmoid(z)
183 |         elif activation_fn is 'relu':
184 |             activation = self.relu(z)
185 |         elif activation_fn is 'softmax':
186 |             activation = self.softmax(z)
187 |         # print(activation, 'aaaaaaaaaaa')
188 |         if batch_norm is True:
189 |             return activation, cache
190 |         else:
191 |             return activation
192 | 
193 |     def initialise_params(self):
194 | 
195 |         if self.layer_sizes[0, 0] != self.x_train.shape[1]:
196 |             raise ValueError('Number of inputs must match first entry in layer_sizes')
197 | 
198 |         if self.layer_sizes.shape[0] != self.num_layers:
199 |             raise ValueError('Number of layers defined must be equal to number of layers set')
200 | 
201 |         if self.layer_sizes.shape[1] != 1:
202 |             raise ValueError('layer_sizes must be a row vector')
203 | 
204 |         # Iterate through each layer to find num inputs and outputs
205 |         for i in range(self.num_layers - 1):
206 |             index = i + 1  # Keep indexing of parameters to begin from 1 for convenience
207 |             num_inputs = self.layer_sizes[i, 0]  # Current layer number of inputs
208 |             num_outputs = self.layer_sizes[i + 1, 0]  # Next layer expected number of inputs
209 | 
210 |             self.parameters['w{}'.format(index)] = self.xavier_initalizer(num_inputs, num_outputs)
211 | 
212 |             # Initialise moment vectors
213 |             self.parameters['moment1_w{}'.format(index)] = 0
214 |             self.parameters['moment2_w{}'.format(index)] = 0
215 | 
216 |             self.parameters['moment1_b{}'.format(index)] = 0
217 |             self.parameters['moment2_b{}'.format(index)] = 0
218 | 
219 |             # Only need one bias for last layer
220 |             if i == self.num_layers - 2:
221 |                 self.parameters['b{}'.format(index)] = np.zeros((1, n_classes))
222 | 
223 |             else:
224 |                 self.parameters['b{}'.format(index)] = np.zeros((1, num_outputs))
225 | 
226 |             if i != 0:
227 |                 # Parameters for batch norm
228 |                 n_parameters = self.layer_sizes[1, 0]  # Number of neurons on second layer
229 |                 self.parameters['gamma{}'.format(index - 1)] = np.ones((1, n_parameters))
230 |                 self.parameters['moment1_gamma{}'.format(index - 1)] = 0
231 |                 self.parameters['moment2_gamma{}'.format(index - 1)] = 0
232 | 
233 |                 self.parameters['beta{}'.format(index - 1)] = np.zeros((1, n_parameters))
234 |                 self.parameters['moment1_beta{}'.format(index - 1)] = 0
235 |                 self.parameters['moment2_beta{}'.format(index - 1)] = 0
236 | 
237 |         return self.parameters
238 | 
239 |     def calc_train_accuracy(self, train=True):
240 | 
241 |         if train is True:
242 |             data = self.x_train
243 |             labels = self.y_train
244 |         else:
245 |             data = self.x_test
246 |             labels = self.y_test
247 | 
248 |         prediction = self.predict(data, activation_fn_list)
249 | 
250 |         accuracy = (prediction == labels)  # Returns bool array
251 |         accuracy = accuracy * 1  # Turns bools into ints
252 |         accuracy = np.average(accuracy)
253 | 
254 |         return accuracy * 100
255 | 
256 |     def calc_gradients(self, data_batch, labels_batch, n_examples, batch_norm):
257 | 
258 |         if batch_norm is True:
259 |             prediction, a1, cache = self.predict(data_batch, activation_fn_list, optimise=True)
260 | 
261 |             # Compute gradients last layer
262 |             dZ2 = (prediction - labels_batch) * (1 / n_examples)
263 |             dW2 = np.dot(a1.T, dZ2)
264 |             dB2 = np.sum(dZ2, axis=0)
265 | 
266 |             # Compute gradients for batch norm
267 |             if activation_fn_list[0] is 'sigmoid':
268 |                 dY = np.dot(dZ2, self.parameters['w2'].T) * self.sigmoid_gradient(a1)
269 |             elif activation_fn_list[0] is 'relu':
270 |                 dY = np.dot(dZ2, self.parameters['w2'].T) * self.relu_gradient(a1)
271 | 
272 |             dZ1, dgamma, dbeta = self.batchnorm_backward(dY, cache)
273 | 
274 |             # Compute gradients second layer
275 |             dW1 = np.dot(data_batch.T, dZ1)
276 |             dB1 = np.sum(dZ1, axis=0)
277 | 
278 |             gradients_dict = {'w1': dW1, 'w2': dW2, 'b1': dB1, 'b2': dB2, 'beta1': dbeta, 'gamma1': dgamma}
279 | 
280 |         elif batch_norm is False:
281 |             prediction, a1 = self.predict(data_batch, activation_fn_list, optimise=True)
282 | 
283 |             # Compute gradients first layer
284 |             dZ2 = (prediction - labels_batch) * (1 / n_examples)
285 |             dW2 = np.dot(a1.T, dZ2)
286 |             dB2 = np.sum(dZ2, axis=0)
287 | 
288 |             # Compute gradients second layer
289 |             if activation_fn_list[0] is 'sigmoid':
290 |                 dZ1 = np.dot(dZ2, self.parameters['w2'].T) * self.sigmoid_gradient(a1)
291 |             elif activation_fn_list[0] is 'relu':
292 |                 dZ1 = np.dot(dZ2, self.parameters['w2'].T) * self.relu_gradient(a1)
293 | 
294 |             dW1 = np.dot(data_batch.T, dZ1)
295 |             dB1 = np.sum(dZ1, axis=0)
296 | 
297 |             gradients_dict = {'w1': dW1, 'w2': dW2, 'b1': dB1, 'b2': dB2}
298 | 
299 |         dB2.shape = (1, dB2.shape[0])
300 |         dB1.shape = (1, dB1.shape[0])
301 | 
302 |         assert(self.parameters['w1'].shape == dW1.shape)
303 |         assert(self.parameters['w2'].shape == dW2.shape)
304 |         assert(self.parameters['b1'].shape == dB1.shape)
305 |         assert(self.parameters['b2'].shape == dB2.shape)
306 | 
307 |         return gradients_dict, prediction
308 | 
309 |     def predict(self, current_batch, activation_fn_list, optimise=False):
310 | 
311 |         # Define parameters
312 |         w1 = self.parameters['w1']
313 |         b1 = self.parameters['b1']
314 |         w2 = self.parameters['w2']
315 |         b2 = self.parameters['b2']
316 | 
317 |         # Define network and prediction
318 |         if batch_norm is True:
319 | 
320 |             a1, cache = self.create_layer(current_batch, w1, b1, activation_fn=activation_fn_list[0], batch_norm=True)  # First layer
321 |             prediction = self.create_layer(a1, w2, b2, activation_fn=activation_fn_list[1], batch_norm=False)  # Second layer
322 | 
323 |         elif batch_norm is False:
324 | 
325 |             a1 = self.create_layer(current_batch, w1, b1, activation_fn=activation_fn_list[0])  # First layer
326 |             prediction = self.create_layer(a1, w2, b2, activation_fn=activation_fn_list[1])  # Second layer
327 | 
328 |         if optimise is True and batch_norm is True:
329 |             return prediction, a1, cache  # Gives raw values to calculate loss during training
330 |         if optimise is True and batch_norm is False:
331 |             return prediction, a1
332 |         if optimise is False:
333 |             return np.around(prediction)  # Gives values rounded to 0 or 1 to see prediction result on test set
334 | 
335 |     def optimise(self):
336 | 
337 |         timestep = 0
338 | 
339 |         for batch_start in range(0, self.x_train.shape[0], self.batch_size):
340 | 
341 |             timestep += 1
342 | 
343 |             current_batch = self.x_train[batch_start:batch_start + self.batch_size, :]
344 |             current_labels = self.y_train[batch_start:batch_start + self.batch_size, :]
345 | 
346 |             n_examples = current_batch.shape[0]
347 | 
348 |             gradients_dict, prediction = self.calc_gradients(current_batch, current_labels, n_examples, batch_norm=batch_norm)
349 | 
350 |             cost = self.cost_fn(prediction, current_labels, n_examples, loss_type='multi_class')
351 | 
352 |             # Update parameters
353 |             if adam_optimizer is False:
354 | 
355 |                 for variable in gradients_dict:
356 |                     self.parameters[variable] = self.parameters[variable] - \
357 |                                                 (self.learning_rate * gradients_dict[variable])
358 | 
359 |             elif adam_optimizer is True:
360 | 
361 |                 for variable in gradients_dict:
362 | 
363 |                     self.update_moment(variable, gradients_dict[variable], 1)
364 |                     self.update_moment(variable, gradients_dict[variable], 2)
365 | 
366 |                     # Bias correction
367 |                     bias_corr_m1 = self.bias_correction(variable, timestep, 1)
368 |                     bias_corr_m2 = self.bias_correction(variable, timestep, 2)
369 | 
370 |                     self.parameters[variable] = self.parameters[variable] - \
371 |                                                 (self.learning_rate * (bias_corr_m1 / np.sqrt(bias_corr_m2)))
372 | 
373 |         return cost.item()
374 | 
375 | 
376 | # Test and Train data
377 | n_generated = 5000  # How many training examples to be generated
378 | use_onehot = True
379 | 
380 | if use_onehot is True:
381 |     data_train = np.random.randint(2, size=(n_generated, 2))
382 |     labels_train = np.empty((n_generated, 2))
383 |     for row in range(data_train.shape[0]):
384 |         if data_train[row, 0] == 1 and data_train[row, 1] == 1:
385 |             labels_train[row, 0] = 1
386 |             labels_train[row, 1] = 0
387 |         elif data_train[row, 0] == 0 and data_train[row, 1] == 0:
388 |             labels_train[row, 0] = 1
389 |         else:
390 |             labels_train[row, 0] = 0
391 |             labels_train[row, 1] = 0
392 |             labels_train[row, 1] = 1
393 | 
394 |     data_test = np.array(
395 |         [[1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1], [1, 1], [0, 0], [1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1],
396 |          [1, 1], [0, 0]])
397 |     labels_test = np.array([[1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0]])
398 | 
399 | elif use_onehot is False:
400 |     data_train = np.random.randint(2, size=(n_generated, 2))
401 |     labels_train = np.empty((n_generated, 1))
402 |     for column in range(data_train.shape[0]):
403 |         if data_train[column, 0] == 1 and data_train[column, 1] == 1:
404 |             labels_train[column] = 1
405 |         elif data_train[column, 0] == 0 and data_train[column, 1] == 0:
406 |             labels_train[column] = 1
407 |         else:
408 |             labels_train[column] = 0
409 | 
410 |     data_test = np.array(
411 |         [[1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1], [1, 1], [0, 0], [1, 1], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1],
412 |          [1, 1], [0, 0]])
413 |     labels_test = np.array([[1], [0], [1], [1], [0], [0], [1], [1], [1], [0], [1], [1], [0], [0], [1], [1]])
414 | # Parameters
415 | batch_size = 32
416 | batch_norm = False
417 | adam_optimizer = False
418 | beta1 = 0.9
419 | beta2 = 0.999
420 | eps = 1e-8
421 | learning_rate = 0.1
422 | keep_prob = 0.5
423 | num_layers = 3
424 | num_epochs = 125
425 | n_features = data_train.shape[1]
426 | n_classes = 2
427 | activation_fn_list = ['sigmoid', 'sigmoid']  # specify what activation function you for a given layer
428 | 
429 | # Architecture for network
430 | layer_sizes = np.array([[n_features], [2], [n_classes]])
431 | 
432 | # Initialise model
433 | model = Model(data_train, labels_train, data_test, labels_test, batch_size, learning_rate, keep_prob, num_layers,
434 |               num_epochs, layer_sizes)
435 | 
436 | for i in range(model.num_epochs):
437 |     cost = model.optimise()
438 | 
439 |     # Keep track of costs
440 |     model.epoch_counter.append(i)
441 |     model.epoch_cost.append(cost)
442 | 
443 |     # Check cost and accuracy at every quarter and last epoch
444 |     if i % (round(num_epochs * 0.25, 0)) == 0 or i == model.num_epochs - 1:
445 |         accuracy = round(model.calc_train_accuracy(train=True), 0)
446 | 
447 |         print('EPOCH:', i, '\t', 'Cost:', round(cost, 3), '\t',
448 |               'Accuracy:', '%{}'.format(accuracy))
449 | 
450 | model.plot_graph()
451 | 
452 | # Test model on validation data
453 | test_prediction = model.predict(model.x_test, activation_fn_list)
454 | accuracy = round(model.calc_train_accuracy(train=False), 0)
455 | 
456 | print(test_prediction, '\n', 'Test Accuracy:', '%{}'.format(accuracy))
457 | 
458 | def main():
459 |     pass
460 | 
461 | 
462 | if __name__ == "__main__":
463 |     main()


--------------------------------------------------------------------------------