├── mnist_plot.png ├── cifar10_plot.png ├── .gitignore ├── naturalselection ├── __init__.py ├── callbacks.py ├── nn.py └── core.py ├── numbers_example.py ├── onemax_example.py ├── setup.py ├── LICENSE ├── nn_example.py └── README.md /mnist_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saattrupdan/naturalselection/HEAD/mnist_plot.png -------------------------------------------------------------------------------- /cifar10_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saattrupdan/naturalselection/HEAD/cifar10_plot.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* 2 | *.log 3 | *.swp 4 | *.egg-info 5 | dist 6 | build 7 | naturalselection/__pycache__/* 8 | -------------------------------------------------------------------------------- /naturalselection/__init__.py: -------------------------------------------------------------------------------- 1 | from naturalselection.core import Genus, Organism, Population, History 2 | from naturalselection.nn import NN, NNs 3 | -------------------------------------------------------------------------------- /numbers_example.py: -------------------------------------------------------------------------------- 1 | import naturalselection as ns 2 | 3 | Pair = ns.Genus(x = range(1, 10000), y = range(1, 10000)) 4 | 5 | pairs = ns.Population( 6 | genus = Pair, 7 | size = 100, 8 | fitness_fn = lambda n: n.x/n.y 9 | ) 10 | 11 | history = pairs.evolve(generations = 100) 12 | 13 | print(history.fittest) 14 | 15 | history.plot() 16 | -------------------------------------------------------------------------------- /onemax_example.py: -------------------------------------------------------------------------------- 1 | import naturalselection as ns 2 | 3 | BitString = ns.Genus(**{f'x{n}' : (0,1) for n in range(100)}) 4 | 5 | def sum_bits(bitstring): 6 | return sum(bitstring.get_genome().values()) 7 | 8 | bitstrings = ns.Population( 9 | genus = BitString, 10 | size = 5, 11 | fitness_fn = sum_bits, 12 | ) 13 | 14 | history = bitstrings.evolve(generations = 5000, goal = 100) 15 | 16 | print(f"Number of ones achieved: {history.fittest['fitness']}") 17 | 18 | history.plot(only_show_max = True) 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as file_in: 4 | long_description = file_in.read() 5 | 6 | setuptools.setup( 7 | name = 'naturalselection', 8 | entry_points = {'console_scripts' : [ 9 | 'core = naturalselection.core:main', 10 | 'nn = naturalselection.nn:main', 11 | ]}, 12 | install_requires = ['numpy','matplotlib','tqdm','tensorflow','sklearn'], 13 | version = "0.6.0", 14 | author = "Dan Saattrup Nielsen", 15 | author_email = "saattrupdan@gmail.com", 16 | description = "An all-purpose pythonic genetic algorithm", 17 | keywords = "genetic algorithm neural network", 18 | long_description = long_description, 19 | long_description_content_type = "text/markdown", 20 | url = "https://github.com/saattrupdan/naturalselection", 21 | packages = setuptools.find_packages(), 22 | classifiers = [ 23 | "Development Status :: 3 - Alpha", 24 | "Programming Language :: Python :: 3", 25 | "License :: OSI Approved :: MIT License", 26 | "Operating System :: OS Independent", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 The Python Packaging Authority 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /nn_example.py: -------------------------------------------------------------------------------- 1 | import naturalselection as ns 2 | 3 | def image_preprocessing(X): 4 | ''' Basic normalisation and scaling preprocessing. ''' 5 | import numpy as np 6 | X = X.reshape((-1, np.prod(X.shape[1:]))) 7 | X = X.astype('float32') 8 | X = (X - X.min()) / (X.max() - X.min()) 9 | X -= X.mean(axis = 0) 10 | return X 11 | 12 | def train_val_sets(kind = 'mnist'): 13 | ''' Get normalised and scaled train- and val sets. ''' 14 | 15 | from tensorflow.keras.utils import to_categorical 16 | if kind == 'mnist': 17 | import tensorflow.keras.datasets.mnist as data 18 | elif kind == 'fashion_mnist': 19 | import tensorflow.keras.datasets.fashion_mnist as data 20 | elif kind == 'cifar10': 21 | import tensorflow.keras.datasets.cifar10 as data 22 | elif kind == 'cifar100': 23 | import tensorflow.keras.datasets.cifar100 as data 24 | elif kind == 'boston_housing': 25 | import tensorflow.keras.datasets.boston_housing as data 26 | elif kind == 'imdb': 27 | import tensorflow.keras.datasets.imdb as data 28 | elif kind == 'reuters': 29 | import tensorflow.keras.datasets.reuters as data 30 | else: 31 | raise NameError('Dataset not recognised.') 32 | 33 | (X_train, Y_train), (X_val, Y_val) = data.load_data() 34 | X_train = image_preprocessing(X_train) 35 | Y_train = to_categorical(Y_train) 36 | X_val = image_preprocessing(X_val) 37 | Y_val = to_categorical(Y_val) 38 | return (X_train, Y_train, X_val, Y_val) 39 | 40 | def evolve_nn(kind = 'mnist'): 41 | 42 | if kind == 'mnist': 43 | max_training_time = 60 44 | elif kind == 'fashion_mnist': 45 | max_training_time = 240 46 | elif kind == 'cifar10': 47 | max_training_time = 120 48 | elif kind == 'cifar100': 49 | max_training_time = 240 50 | else: 51 | raise NameError('Dataset not recognised.') 52 | 53 | print(f"\n~~~ Now evolving {kind} ~~~") 54 | 55 | nns = ns.NNs( 56 | size = 30, 57 | train_val_sets = train_val_sets(kind), 58 | loss_fn = 'categorical_crossentropy', 59 | score = 'accuracy', 60 | output_activation = 'softmax', 61 | max_training_time = max_training_time, 62 | max_epochs = 1, 63 | ) 64 | 65 | history = nns.evolve(generations = 30) 66 | print("Best overall genome:", history.fittest) 67 | 68 | history.plot( 69 | title = "Validation accuracy by generation", 70 | ylabel = "Validation accuracy", 71 | show_plot = False, 72 | file_name = f'{kind}_plot.png' 73 | ) 74 | 75 | best_score = nns.train_best() 76 | print("Best score:", best_score) 77 | 78 | 79 | if __name__ == '__main__': 80 | from sys import argv 81 | if len(argv) > 1: 82 | for arg in argv[1:]: 83 | evolve_nn(arg) 84 | else: 85 | evolve_nn() 86 | -------------------------------------------------------------------------------- /naturalselection/callbacks.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.callbacks import Callback, EarlyStopping 2 | import numpy as np 3 | from tqdm import tqdm 4 | import time 5 | 6 | class TQDMCallback(Callback): 7 | ''' 8 | Callback to enable tqdm integration. 9 | Source: https://github.com/bstriner/keras-tqdm 10 | ''' 11 | 12 | def __init__(self, outer_description = "Training", 13 | inner_description_initial = "Epoch: {epoch}", 14 | inner_description_update = "Epoch: {epoch} - {metrics}", 15 | metric_format = "{name}: {value:0.4f}", 16 | separator = ", ", 17 | leave_inner = True, 18 | leave_outer = True, 19 | show_inner = True, 20 | show_outer = True, 21 | output_file = None, 22 | outer_position = None, 23 | inner_position = None, 24 | initial = 0): 25 | 26 | self.outer_description = outer_description 27 | self.inner_description_initial = inner_description_initial 28 | self.inner_description_update = inner_description_update 29 | self.metric_format = metric_format 30 | self.separator = separator 31 | self.leave_inner = leave_inner 32 | self.leave_outer = leave_outer 33 | self.show_inner = show_inner 34 | self.show_outer = show_outer 35 | self.output_file = output_file 36 | self.tqdm_outer = None 37 | self.tqdm_inner = None 38 | self.epoch = None 39 | self.running_logs = None 40 | self.inner_count = None 41 | self.initial = initial 42 | self.outer_position = outer_position 43 | self.inner_position = inner_position 44 | 45 | def build_tqdm(self, desc, total, leave, position = None, initial = 0): 46 | """ 47 | Extension point. Override to provide custom options to tqdm 48 | initializer. 49 | """ 50 | return tqdm(desc = desc, total = total, leave = leave, 51 | file = self.output_file, initial = initial, 52 | position = position) 53 | 54 | def build_tqdm_outer(self, desc, total): 55 | """ 56 | Extension point. Override to provide custom options to outer 57 | progress bars (Epoch loop) 58 | """ 59 | return self.build_tqdm(desc = desc, total = total, 60 | leave = self.leave_outer, initial = self.initial, 61 | position = self.outer_position) 62 | 63 | def build_tqdm_inner(self, desc, total): 64 | """ 65 | Extension point. Override to provide custom options to inner 66 | progress bars (Batch loop) 67 | """ 68 | return self.build_tqdm(desc = desc, total = total, 69 | leave = self.leave_inner, position = self.inner_position) 70 | 71 | def on_epoch_begin(self, epoch, logs = {}): 72 | self.epoch = epoch 73 | desc = self.inner_description_initial.format( 74 | epoch = self.epoch) 75 | self.mode = 0 # samples 76 | if 'samples' in self.params: 77 | self.inner_total = self.params['samples'] 78 | elif 'nb_sample' in self.params: 79 | self.inner_total = self.params['nb_sample'] 80 | else: 81 | self.mode = 1 # steps 82 | self.inner_total = self.params['steps'] 83 | if self.show_inner: 84 | self.tqdm_inner = self.build_tqdm_inner(desc = desc, 85 | total = self.inner_total) 86 | self.inner_count = 0 87 | self.running_logs = {} 88 | 89 | def on_epoch_end(self, epoch, logs = {}): 90 | metrics = self.format_metrics(logs) 91 | desc = self.inner_description_update.format(epoch = epoch, 92 | metrics = metrics) 93 | if self.show_inner: 94 | self.tqdm_inner.desc = desc 95 | # set miniters and mininterval to 0 so last update shows 96 | self.tqdm_inner.miniters = 0 97 | self.tqdm_inner.mininterval = 0 98 | self.tqdm_inner.update(self.inner_total - self.tqdm_inner.n) 99 | self.tqdm_inner.close() 100 | if self.show_outer: 101 | self.tqdm_outer.update(1) 102 | 103 | def on_batch_begin(self, batch, logs = {}): 104 | pass 105 | 106 | def on_batch_end(self, batch, logs = {}): 107 | if self.mode == 0: 108 | update = logs['size'] 109 | else: 110 | update = 1 111 | self.inner_count += update 112 | if self.inner_count < self.inner_total: 113 | self.append_logs(logs) 114 | metrics = self.format_metrics(self.running_logs) 115 | desc = self.inner_description_update.format( 116 | epoch = self.epoch, metrics = metrics) 117 | if self.show_inner: 118 | self.tqdm_inner.desc = desc 119 | self.tqdm_inner.update(update) 120 | 121 | def on_train_begin(self, logs = {}): 122 | if self.show_outer: 123 | epochs = (self.params['epochs'] if 'epochs' in self.params 124 | else self.params['nb_epoch']) 125 | self.tqdm_outer = self.build_tqdm_outer( 126 | desc = self.outer_description, total = epochs) 127 | 128 | def on_train_end(self, logs = {}): 129 | if self.show_outer: 130 | self.tqdm_outer.close() 131 | 132 | def append_logs(self, logs): 133 | metrics = self.params['metrics'] 134 | for metric, value in logs.items(): 135 | if metric in metrics: 136 | if metric in self.running_logs: 137 | self.running_logs[metric].append(value[()]) 138 | else: 139 | self.running_logs[metric] = [value[()]] 140 | 141 | def format_metrics(self, logs): 142 | metrics = self.params['metrics'] 143 | strings = [self.metric_format.format(name = metric, 144 | value = np.mean(logs[metric], axis = None)) 145 | for metric in metrics if metric in logs] 146 | return self.separator.join(strings) 147 | 148 | 149 | class EarlierStopping(EarlyStopping): 150 | ''' 151 | Callback to stop training when enough time has passed. 152 | Source: https://github.com/keras-team/keras-contrib/issues/87 153 | 154 | INPUT 155 | (int) seconds: maximum time before stopping. 156 | (int) verbose: verbosity mode. 157 | ''' 158 | def __init__(self, seconds = None, **kwargs): 159 | super().__init__(**kwargs) 160 | self.start_time = 0 161 | self.seconds = seconds 162 | 163 | def on_train_begin(self, logs = {}): 164 | self.start_time = time.time() 165 | super().on_train_begin(logs) 166 | 167 | def on_batch_end(self, batch, logs = {}): 168 | if self.seconds and time.time()-self.start_time > self.seconds: 169 | self.model.stop_training = True 170 | if self.verbose: 171 | print('Stopping after {} seconds.'\ 172 | .format(self.seconds)) 173 | super().on_batch_end(batch, logs) 174 | 175 | def on_epoch_end(self, epoch, logs = {}): 176 | if self.seconds and time.time()-self.start_time > self.seconds: 177 | self.model.stop_training = True 178 | if self.restore_best_weights and self.best_weights: 179 | self.model.set_weights(self.best_weights) 180 | if self.verbose: 181 | print('Stopping after {} seconds.'.\ 182 | format(self.seconds)) 183 | 184 | # Call earlystopping if we're beyond the first epoch 185 | if logs.get(self.monitor): 186 | super().on_epoch_end(epoch, logs) 187 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NaturalSelection Logo of green flower 2 | 3 | [![PyPI version](https://badge.fury.io/py/naturalselection.svg)](https://badge.fury.io/py/naturalselection) 4 | 5 | An all-purpose pythonic genetic algorithm, which includes built-in hyperparameter tuning support for neural networks. 6 | 7 | 8 | ## Installation 9 | 10 | ``` 11 | $ pip install naturalselection 12 | ``` 13 | 14 | 15 | ## Usage 16 | 17 | Here is a toy example optimising a pair of numbers with respect to division. 18 | 19 | ```python 20 | >>> import naturalselection as ns 21 | >>> 22 | >>> Pair = ns.Genus(x = range(1, 10000), y = range(1, 10000)) 23 | >>> 24 | >>> pairs = ns.Population( 25 | ... genus = Pair, 26 | ... size = 100, 27 | ... fitness_fn = lambda n: n.x/n.y 28 | ... ) 29 | ... 30 | >>> history = pairs.evolve(generations = 100) 31 | Evolving population: 100%|██████████████████| 100/100 [00:05<00:00, 19.59it/s] 32 | >>> 33 | >>> history.fittest 34 | {'genome': {'x': 9922, 'y': 10}, 'fitness': 992.2} 35 | >>> 36 | >>> history.plot() 37 | ``` 38 | 39 | ![Plot showing fitness value over 100 generations.](https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/naturalselection_data/numbers_example.png) 40 | 41 | 42 | We can also easily solve the classical [OneMax problem](http://tracer.lcc.uma.es/problems/onemax/onemax.html), which is about finding the bit-string of a given length with all 1's. Here we set `goal = 100` in the `evolve` function to allow for early stopping if we reach our goal before the maximum number of generations, which we here set to 5,000. Note that it only takes nine seconds, however. 43 | 44 | ```python3 45 | >>> import naturalselection as ns 46 | >>> 47 | >>> BitString = ns.Genus(**{f'x{n}' : (0,1) for n in range(100)}) 48 | >>> 49 | >>> def sum_bits(bitstring): 50 | ... return sum(bitstring.get_genome().values()) 51 | ... 52 | >>> bitstrings = ns.Population( 53 | ... genus = BitString, 54 | ... size = 5, 55 | ... fitness_fn = sum_bits 56 | ... ) 57 | ... 58 | >>> history = bitstrings.evolve(generations = 500, goal = 100) 59 | Evolving population: 36%|██████ | 1805/5000 [00:09<00:16, 194.43it/s] 60 | >>> 61 | >>> history.plot(only_show_max = True) 62 | ``` 63 | 64 | ![Plot showing fitness value over 4500 generations, converging steadily to the optimal filled out sequence of ones.](https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/naturalselection_data/onemax_example.png) 65 | 66 | 67 | Lastly, here is an example of finding a fully connected feedforward neural network to model [MNIST](https://en.wikipedia.org/wiki/MNIST_database). 68 | 69 | Note that the models are trained in parallel, so it is loading in a copy of the MNIST data set for every CPU core in your computer, each of which takes up ~750MB of memory. If this causes you to run into memory trouble then you can set the `workers` parameter to something small like 2, or set `multiprocessing = False` to turn parallelism off completely. I've marked these in the code below. 70 | 71 | ```python3 72 | >>> import naturalselection as ns 73 | >>> 74 | >>> def preprocessing(X): 75 | ... ''' Basic normalisation and scaling preprocessing. ''' 76 | ... import numpy as np 77 | ... X = X.reshape((-1, np.prod(X.shape[1:]))) 78 | ... X = (X - X.min()) / (X.max() - X.min()) 79 | ... X -= X.mean(axis = 0) 80 | ... return X 81 | ... 82 | >>> def mnist_train_val_sets(): 83 | ... ''' Get normalised and scaled MNIST train- and val sets. ''' 84 | ... from tensorflow.keras.utils import to_categorical 85 | ... import mnist 86 | ... X_train = preprocessing(mnist.train_images()) 87 | ... Y_train = to_categorical(mnist.train_labels()) 88 | ... X_val = preprocessing(mnist.test_images()) 89 | ... Y_val = to_categorical(mnist.test_labels()) 90 | ... return (X_train, Y_train, X_val, Y_val) 91 | ... 92 | >>> nns = ns.NNs( 93 | ... size = 30, 94 | ... train_val_sets = mnist_train_val_sets(), 95 | ... loss_fn = 'categorical_crossentropy', 96 | ... score = 'accuracy', 97 | ... output_activation = 'softmax', 98 | ... max_epochs = 1, 99 | ... max_training_time = 60, 100 | ... # workers = 2, # If you want to reduce parallelism 101 | ... # multiprocessing = False # If you want to disable parallelism 102 | ... ) 103 | ... 104 | >>> history = nns.evolve(generations = 20) 105 | Evolving population: 100%|█████████████████████| 20/20 [57:18<00:00, 73.22s/it] 106 | Computing fitness: 100%|█████████████████████████| 7/7 [01:20<00:00, 10.13s/it] 107 | >>> 108 | >>> history.fittest 109 | {'genome': {'optimizer': 'adam', 'hidden_activation': 'relu', 110 | 'batch_size': 32, 'initializer': 'glorot_normal', 'input_dropout': 0.2, 111 | 'neurons0': 256, 'dropout0': 0.0, 'neurons1': 128, 'dropout1': 0.1, 112 | 'neurons2': 256, 'dropout2': 0.1, 'neurons3': 256, 'dropout3': 0.2, 113 | 'neurons4': 128, 'dropout4': 0.4}, 'fitness': 0.9659} 114 | >>> 115 | >>> history.plot( 116 | ... title = "Validation accuracy by generation", 117 | ... ylabel = "Validation accuracy" 118 | ... ) 119 | ``` 120 | 121 | ![Plot showing fitness value (which is accuracy in this case) over 20 generations, converging to roughly 96.50%.](https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/naturalselection_data/mnist_example.png) 122 | 123 | We can then train the best performing model and save it locally: 124 | 125 | ```python3 126 | >>> # Training the best model and saving it to mnist_model.h5 127 | >>> best_score = nns.train_best(file_name = 'mnist_model') 128 | Epoch 0, val_acc: 0.966: 100%|█████████| 60000/60000 [00:12<00:00, 1388.45it/s] 129 | (...) 130 | Epoch 19, val_acc: 0.982: 100%|████████| 60000/60000 [00:11<00:00, 1846.24it/s] 131 | >>> 132 | >>> best_score 133 | 0.982 134 | ``` 135 | 136 | ## Algorithmic details 137 | 138 | The algorithm follows the standard blueprint for a genetic algorithm as e.g. described on this [Wikipedia page](https://en.wikipedia.org/wiki/Genetic_algorithm), which roughly goes like this: 139 | 140 | 1. An initial population is constructed 141 | 2. Fitness values for all organisms in the population are computed 142 | 3. A subset of the population (the *elite pool*) is selected 143 | 4. A subset of the population (the *breeding pool*) is selected 144 | 5. Pairs from the breeding pool are chosen, who will breed to create a new "child" organism with genome a combination of the "parent" organisms. Continue breeding until the children and the elites constitute a population of the same size as the original 145 | 6. A subset of the children (the *mutation pool*) is selected 146 | 7. Every child in the mutation pool is mutated, meaning that they will have their genome altered in some way 147 | 8. Go back to step 2 148 | 149 | We now describe the individual steps in this particular implementation in more detail. Note that step 3 is sometimes left out completely, but since that just corresponds to an empty elite pool I decided to keep it in, for generality. 150 | 151 | ### Step 1: Constructing the initial population 152 | 153 | The population is a uniformly random sample of the possible genome values as dictated by the genus, which is run when a new `Population` object is created. Alternatively, you may set the `initial_genome` to whatever genome you would like, which will create a population consisting of organisms similar to this genome (the result of starting with a population all equal to the organism and then mutating 80% of them). 154 | 155 | ```python3 156 | >>> pairs = ns.Population( 157 | ... genus = Pair, 158 | ... size = 100, 159 | ... fitness_fn = lambda n: n.x/n.y, 160 | ... initial_genome = {'x': 9750, 'y': 15} 161 | ... ) 162 | ... 163 | >>> history = pairs.evolve(generations = 100) 164 | Evolving population: 100%|██████████████████| 100/100 [00:05<00:00, 19.47it/s] 165 | >>> 166 | >>> history.fittest 167 | {'genome' : {'x' : 9989, 'y' : 3}, 'fitness' : 3329.66666666665} 168 | ``` 169 | 170 | ### Step 2: Compute fitness values 171 | 172 | This happens in the `update_fitness` function which is called by the `evolve` function. These computations will by default be computed in parallel when dealing with neural networks and serialised otherwise, as the benefits are only reaped when fitness computations take up a significant part of the algorithm (in the examples above not concerning neural networks we would actually slow down the algorithm non-trivially by introducing parallelism). 173 | 174 | ### Steps 3 & 4: Selecting elite pool and breeding pool 175 | 176 | These two pools are selected in exactly the same way, using the `sample` function. They only differ in the amount of organisms sampled, where the default `elitism_rate` is 5% and `breeding_rate` is 80%. In the pool selection it chooses the population based on the distribution with density function the fitness value divided by the sum of all fitness values of the population. This means that the higher fitness score an organism has, the more likely it is for it to be chosen to be a part of the pool. The precise implementation of this is based on the algorithm specified on this [Wikipedia page](https://en.wikipedia.org/wiki/Selection_(genetic_algorithm)). 177 | 178 | ### Step 5: Breeding 179 | 180 | In this implementation the parent organisms are chosen uniformly at random from the breeding pool. When determining the value of the child's genome we apply the "single-point crossover" method, where we choose an index uniformly at random among the attributes, and the child will then inherit all attributes to the left of this index from one parent and the attributes to the right of this index from the other parent.See more on [this Wikipedia page](https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm)). 181 | 182 | ### Step 6: Selection of mutation pool 183 | 184 | The mutation pool is chosen uniformly at random in contrast to the other two pools, as otherwise we would suddenly be more likely to "mutate away" many of the good genes of our fittest organisms. The default `mutation_rate` is 20%. 185 | 186 | ### Step 7: Mutation 187 | 188 | This implementation is roughly the [bit string mutation](https://en.wikipedia.org/wiki/Mutation_(genetic_algorithm)), where every gene of the organism has a 1/n chance of being uniformly randomly replaced by another gene, with n being the number of genes in the organism's genome. This means that, on average, mutation causes one gene to be altered. The amount of genes altered in a mutation can be modified by changing thè `mutation_factor` parameter, which by default is the above 1/n. 189 | 190 | 191 | ## Possible future extensions 192 | 193 | These are the ideas that I have thought of implementing in the future. Check the ongoing process on the `dev` branch. 194 | 195 | * Enable support for CNNs 196 | * Enable support for RNNs and in particular LSTMs 197 | * Include an option to have dependency relations between genes. In a neural network setting this could include the topology as a gene on which all the layer-specific genes depend upon, which would be similar to the approach taken in [this paper](https://arxiv.org/pdf/1703.00548/). 198 | 199 | 200 | ## License 201 | 202 | This project is licensed under the [MIT License](https://github.com/saattrupdan/naturalselection/blob/master/LICENSE). 203 | -------------------------------------------------------------------------------- /naturalselection/nn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from functools import partial 4 | import logging 5 | 6 | # Used to set default value for workers 7 | from multiprocessing import cpu_count 8 | 9 | import naturalselection as ns 10 | 11 | class NN(ns.Genus): 12 | ''' Feedforward fully connected neural network genus. 13 | 14 | INPUT: 15 | (int) max_nm_hidden_layers 16 | (bool) uniform_layers: whether all hidden layers should 17 | have the same amount of neurons and dropout 18 | (iterable) input_dropout: values for input dropout 19 | (iterable) : values for dropout after hidden layers 20 | (iterable) neurons_per_hidden_layer = neurons in hidden layers 21 | (iterable) optimizer: keras optimizers 22 | (iterable) hidden_activation: keras activation functions 23 | (iterable) batch_size: batch sizes 24 | (iterable) initializer: keras initializers 25 | ''' 26 | def __init__(self, max_nm_hidden_layers = 5, uniform_layers = False, 27 | input_dropout = np.arange(0, 0.6, 0.1), 28 | hidden_dropout = np.arange(0, 0.6, 0.1), 29 | neurons = np.array([2 ** n for n in range(4, 11)]), 30 | optimizer = np.array(['adamax', 'adam', 'nadam']), 31 | hidden_activation = np.array(['relu', 'elu']), 32 | batch_size = np.array([2 ** n for n in range(4, 7)]), 33 | initializer = np.array(['lecun_uniform', 'lecun_normal', 34 | 'glorot_uniform', 'glorot_normal', 35 | 'he_uniform', 'he_normal'])): 36 | 37 | self.optimizer = np.unique(np.asarray(optimizer)) 38 | self.hidden_activation = np.unique(np.asarray(hidden_activation)) 39 | self.batch_size = np.unique(np.asarray(batch_size)) 40 | self.initializer = np.unique(np.asarray(initializer)) 41 | self.input_dropout = np.unique(np.asarray(input_dropout)) 42 | 43 | if uniform_layers: 44 | self.neurons = np.unique(np.asarray(neurons)) 45 | self.dropout = np.unique(np.asarray(hidden_dropout)) 46 | self.nm_hidden_layers = \ 47 | np.arange(1, max_nm_hidden_layers + 1) 48 | else: 49 | neurons = np.unique(np.append(neurons, 0)) 50 | dropout = np.around(np.unique(np.append(hidden_dropout, 0)), 2) 51 | layer_info = {} 52 | for layer_idx in range(max_nm_hidden_layers): 53 | layer_info["neurons{}".format(layer_idx)] = neurons 54 | layer_info["dropout{}".format(layer_idx)] = dropout 55 | self.__dict__.update(layer_info) 56 | 57 | class NNs(ns.Population): 58 | def __init__(self, 59 | train_val_sets, 60 | size = 50, 61 | initial_genome = None, 62 | breeding_rate = 0.8, 63 | mutation_rate = 0.2, 64 | mutation_factor = 'default', 65 | elitism_rate = 0.05, 66 | multiprocessing = True, 67 | workers = cpu_count(), 68 | progress_bars = 3, 69 | loss_fn = 'binary_crossentropy', 70 | nm_features = 'infer', 71 | nm_labels = 'infer', 72 | score = 'accuracy', 73 | output_activation = 'sigmoid', 74 | max_epochs = 1000000, 75 | patience = 3, 76 | min_change = 1e-4, 77 | max_training_time = None, 78 | max_nm_hidden_layers = 5, 79 | uniform_layers = False, 80 | input_dropout = np.arange(0, 0.6, 0.1), 81 | hidden_dropout = np.arange(0, 0.6, 0.1), 82 | neurons = np.array([2 ** n for n in range(4, 11)]), 83 | optimizer = np.array(['adamax', 'adam', 'nadam']), 84 | hidden_activation = np.array(['relu', 'elu']), 85 | batch_size = np.array([2 ** n for n in range(4, 7)]), 86 | initializer = np.array(['lecun_uniform', 'lecun_normal', 87 | 'glorot_uniform', 'glorot_normal', 88 | 'he_uniform', 'he_normal']), 89 | verbose = 0): 90 | 91 | self.train_val_sets = train_val_sets 92 | self.size = size 93 | self.initial_genome = initial_genome 94 | self.breeding_rate = breeding_rate 95 | self.mutation_rate = mutation_rate 96 | self.mutation_factor = mutation_factor 97 | self.elitism_rate = elitism_rate 98 | self.multiprocessing = multiprocessing 99 | self.workers = workers 100 | self.progress_bars = progress_bars 101 | self.loss_fn = loss_fn 102 | self.nm_features = nm_features 103 | self.nm_labels = nm_labels 104 | self.score = score 105 | self.output_activation = output_activation 106 | self.max_epochs = max_epochs 107 | self.patience = patience 108 | self.min_change = min_change 109 | self.max_training_time = max_training_time 110 | self.max_nm_hidden_layers = max_nm_hidden_layers 111 | self.uniform_layers = uniform_layers 112 | self.input_dropout = input_dropout 113 | self.hidden_dropout = hidden_dropout 114 | self.neurons = neurons 115 | self.optimizer = optimizer 116 | self.hidden_activation = hidden_activation 117 | self.batch_size = batch_size 118 | self.initializer = initializer 119 | self.verbose = verbose 120 | 121 | logging.basicConfig(format = '%(levelname)s: %(message)s') 122 | self.logger = logging.getLogger() 123 | 124 | if not verbose: 125 | self.logger.setLevel(logging.WARNING) 126 | elif verbose == 1: 127 | self.logger.setLevel(logging.INFO) 128 | elif verbose == 2: 129 | self.logger.setLevel(logging.DEBUG) 130 | 131 | self.logger.info("Creating population...") 132 | 133 | # Hard coded values for neural networks 134 | self.allow_repeats = False 135 | self.memory = 'inf' 136 | 137 | self.genus = NN( 138 | max_nm_hidden_layers = self.max_nm_hidden_layers, 139 | uniform_layers = self.uniform_layers, 140 | input_dropout = self.input_dropout, 141 | hidden_dropout = self.hidden_dropout, 142 | neurons = self.neurons, 143 | optimizer = self.optimizer, 144 | hidden_activation = self.hidden_activation, 145 | batch_size = self.batch_size, 146 | initializer = self.initializer 147 | ) 148 | 149 | self.fitness_fn = partial( 150 | self.train_nn, 151 | max_epochs = self.max_epochs, 152 | patience = self.patience, 153 | min_change = self.min_change, 154 | max_training_time = self.max_training_time, 155 | file_name = None, 156 | ) 157 | 158 | # If user has supplied an initial genome then construct a population 159 | # which is very similar to that 160 | if initial_genome: 161 | 162 | # Create a population of organisms all with the initial genome 163 | self.population = np.array([ 164 | ns.Organism(self.genus, **initial_genome) 165 | for _ in range(size) 166 | ]) 167 | 168 | # Mutate 80% of the population 169 | rnd = np.random.random(self.population.shape) 170 | for (i, org) in enumerate(self.population): 171 | if rnd[i] > 0.2: 172 | org.mutate() 173 | else: 174 | self.population = self.genus.create_organisms(size) 175 | 176 | # We do not have access to fitness values yet, so choose the 'fittest 177 | # organism' to just be a random one 178 | self.fittest = np.random.choice(self.population) 179 | 180 | def train_best(self, max_epochs = 1000000, min_change = 1e-4, 181 | patience = 10, max_training_time = None, file_name = None): 182 | 183 | best_nn = self.fittest 184 | fitness = self.train_nn( 185 | nn = best_nn, 186 | max_epochs = max_epochs, 187 | patience = patience, 188 | min_change = min_change, 189 | max_training_time = max_training_time, 190 | file_name = file_name 191 | ) 192 | return fitness 193 | 194 | def train_nn(self, nn, max_epochs = 1000000, patience = 3, 195 | min_change = 1e-4, max_training_time = None, file_name = None, 196 | worker_idx = None): 197 | ''' Train a feedforward neural network and output the score. 198 | 199 | INPUT 200 | (NN) nn: a neural network genus 201 | (int) max_epochs = 1000000: maximum number of epochs to train for 202 | (int) patience = 3: number of epochs allowed with no progress 203 | above min_change 204 | (float) min_change = 1e-4: everything below this number will 205 | not count as a change in the score 206 | (int) max_training_time = None: maximum number of seconds to 207 | train for, also training the final epoch after the time 208 | has run out 209 | (int) worker_idx = None: what worker is currently training this 210 | network, with enumeration starting from 1 211 | 212 | OUTPUT 213 | (float) the score of the neural network 214 | ''' 215 | 216 | from tensorflow.keras.models import Model 217 | from tensorflow.keras.layers import Input, Dense, Dropout 218 | from tensorflow.keras import backend as K 219 | from tensorflow.python.util import deprecation 220 | from tensorflow import set_random_seed 221 | from sklearn.metrics import f1_score, precision_score, recall_score 222 | 223 | # Custom callbacks 224 | from naturalselection.callbacks import TQDMCallback, EarlierStopping 225 | 226 | # Used when building network 227 | from itertools import count 228 | 229 | # Suppress tensorflow warnings 230 | deprecation._PRINT_DEPRECATION_WARNINGS = False 231 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 232 | 233 | # Set random seeds to enable better comparison of scores 234 | np.random.seed(0) 235 | set_random_seed(0) 236 | 237 | X_train, Y_train, X_val, Y_val = self.train_val_sets 238 | 239 | if self.nm_features == 'infer': 240 | self.nm_features = X_val.shape[1] 241 | if self.nm_labels == 'infer': 242 | self.nm_labels = Y_val.shape[1] 243 | 244 | inputs = Input(shape = (self.nm_features,)) 245 | x = Dropout(nn.input_dropout)(inputs) 246 | 247 | if self.uniform_layers: 248 | for _ in range(nn.nm_hidden_layers): 249 | x = Dense(nn.neurons, activation = nn.hidden_activation, 250 | kernel_initializer = nn.initializer)(x) 251 | x = Dropout(nn.dropout)(x) 252 | else: 253 | for i in count(): 254 | try: 255 | neurons = nn.__dict__["neurons{}".format(i)] 256 | if neurons: 257 | x = Dense(neurons, activation = nn.hidden_activation, 258 | kernel_initializer = nn.initializer)(x) 259 | dropout = nn.__dict__["dropout{}".format(i)] 260 | if dropout: 261 | x = Dropout(dropout)(x) 262 | except: 263 | break 264 | 265 | outputs = Dense(self.nm_labels, 266 | activation = self.output_activation, 267 | kernel_initializer = nn.initializer)(x) 268 | 269 | model = Model(inputs = inputs, outputs = outputs) 270 | 271 | if self.score == 'accuracy': 272 | metrics = ['accuracy'] 273 | elif self.score == 'categorical accuracy': 274 | metrics = ['categorical accuracy'] 275 | else: 276 | metrics = [] 277 | 278 | model.compile( 279 | loss = self.loss_fn, 280 | optimizer = nn.optimizer, 281 | metrics = metrics 282 | ) 283 | 284 | if self.score == 'accuracy' or self.score == 'categorical_accuracy': 285 | monitor = 'val_acc' 286 | else: 287 | monitor = 'val_loss' 288 | 289 | earlier_stopping = EarlierStopping( 290 | monitor = monitor, 291 | patience = patience, 292 | min_delta = min_change, 293 | restore_best_weights = True, 294 | seconds = max_training_time 295 | ) 296 | 297 | callbacks = [earlier_stopping] 298 | if self.progress_bars >= 3: 299 | if worker_idx: 300 | desc = f'Worker {worker_idx % self.workers}, ' 301 | tqdm_callback = TQDMCallback( 302 | show_outer = False, 303 | inner_position = (worker_idx % self.workers) + 2, 304 | leave_inner = False, 305 | inner_description_update = desc + 'Epoch {epoch}', 306 | inner_description_initial = desc + 'Epoch {epoch}' 307 | ) 308 | else: 309 | tqdm_callback = TQDMCallback( 310 | show_outer = False, 311 | inner_position = 0 312 | ) 313 | callbacks.append(tqdm_callback) 314 | 315 | model.fit( 316 | X_train, 317 | Y_train, 318 | batch_size = nn.batch_size, 319 | validation_data = (X_val, Y_val), 320 | epochs = max_epochs, 321 | callbacks = callbacks, 322 | verbose = 0 323 | ) 324 | 325 | if file_name: 326 | model.save("{}.h5".format(file_name)) 327 | 328 | if self.nm_labels > 1: 329 | average = 'micro' 330 | else: 331 | average = None 332 | 333 | Y_hat = model.predict(X_val, batch_size = 128) 334 | if self.score == 'accuracy' or self.score == 'categorical accuracy': 335 | fitness = model.evaluate(X_val, Y_val, verbose = 0)[1] 336 | elif self.score == 'f1': 337 | Y_hat = np.greater(Y_hat, 0.5) 338 | fitness = f1_score(Y_val, Y_hat, average = average) 339 | elif self.score == 'precision': 340 | Y_hat = np.greater(Y_hat, 0.5) 341 | fitness = precision_score(Y_val, Y_hat, average = average) 342 | elif self.score == 'recall': 343 | Y_hat = np.greater(Y_hat, 0.5) 344 | fitness = recall_score(Y_val, Y_hat, average = average) 345 | elif self.score == 'loss': 346 | fitness = np.divide(1, model.evaluate(X_val, Y_val, verbose = 0)) 347 | else: 348 | # Custom scoring function 349 | fitness = self.score(Y_val, Y_hat) 350 | 351 | # Clear tensorflow session to avoid memory leak 352 | K.clear_session() 353 | 354 | return fitness 355 | 356 | 357 | def __main__(): 358 | pass 359 | -------------------------------------------------------------------------------- /naturalselection/core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | from functools import reduce, partial 5 | import logging 6 | 7 | # Suppressing warnings 8 | import warnings 9 | 10 | # Plots 11 | import matplotlib.pyplot as plt 12 | 13 | # Progress bars 14 | from tqdm import tqdm, trange 15 | 16 | # Parallelising fitness 17 | from inspect import getfullargspec 18 | import multiprocessing as mp 19 | 20 | class Genus(): 21 | ''' Storing information about all the possible gene combinations. 22 | 23 | INPUT 24 | (kwargs) genomes 25 | ''' 26 | 27 | def __init__(self, **genomes): 28 | self.__dict__.update( 29 | {key: np.asarray(val) for (key, val) in genomes.items()} 30 | ) 31 | 32 | def create_organism(self): 33 | rnd_genes = {key: val[np.random.choice(range(val.shape[0]))] 34 | for (key, val) in self.__dict__.items()} 35 | return Organism(genus = self, **rnd_genes) 36 | 37 | def create_organisms(self, amount = 1): 38 | ''' Create organisms of this genus. 39 | 40 | INPUT 41 | (int) amount = 1 42 | ''' 43 | return np.array([self.create_organism() for _ in range(amount)]) 44 | 45 | def alter_genomes(self, **genomes): 46 | ''' Add or change genomes to the genus. 47 | 48 | INPUT 49 | (kwargs) genomes 50 | ''' 51 | self.__dict__.update(genomes) 52 | return self 53 | 54 | def remove_genomes(self, *keys): 55 | ''' Remove genomes from the genus. ''' 56 | for key in keys: 57 | self.__dict__.pop(key, None) 58 | return self 59 | 60 | class Organism(): 61 | ''' Organism of a particular genus. 62 | 63 | INPUT 64 | (Genus) genus 65 | (kwargs) genome: genome information 66 | ''' 67 | 68 | def __init__(self, genus, **genome): 69 | 70 | # Check that the input parameters match with the genus type, 71 | # and if any parameters are missing then add random values 72 | genome = {key: val for (key, val) in genome.items() if key in 73 | genus.__dict__.keys() and val in genus.__dict__[key]} 74 | for key in genus.__dict__.keys() - genome.keys(): 75 | val_idx = np.random.choice(range(genus.__dict__[key].shape[0])) 76 | genome[key] = genus.__dict__[key][val_idx] 77 | 78 | self.__dict__.update(genome) 79 | self.genus = genus 80 | self.fitness = 0 81 | 82 | def get_genome(self): 83 | return {key: val for (key, val) in self.__dict__.items() 84 | if key not in {'genus', 'fitness'}} 85 | 86 | def breed(self, other): 87 | ''' Breed organism with another organism, returning a new 88 | organism of the same genus. 89 | 90 | INPUT 91 | (Organism) other organism 92 | ''' 93 | 94 | if self.genus != other.genus: 95 | raise Exception("Only organisms of the same genus can breed.") 96 | 97 | self_genome = list(self.get_genome().items()) 98 | other_genome = list(other.get_genome().items()) 99 | 100 | rnd = np.random.choice(len(self_genome)) 101 | child_genome = dict(self_genome[:rnd] + other_genome[rnd:]) 102 | child = Organism(self.genus, **child_genome) 103 | 104 | return child 105 | 106 | def mutate(self, mutation_factor = 'default'): 107 | ''' Return mutated version of the organism. 108 | 109 | INPUT 110 | (float or string) mutation_factor = 'default': given that an 111 | organism is being mutated, the probability that 112 | a given gene is changed. Defaults to 1/k, where 113 | k is the size of the population 114 | ''' 115 | keys = np.asarray(list(self.get_genome().keys())) 116 | if mutation_factor == 'default': 117 | mutation_factor = np.divide(1, keys.size) 118 | mut_idx = np.less(np.random.random(keys.size), mutation_factor) 119 | mut_vals = {key: self.genus.__dict__[key]\ 120 | [np.random.choice(range(self.genus.__dict__[key].shape[0]))] 121 | for key in keys[mut_idx]} 122 | self.__dict__.update(mut_vals) 123 | return self 124 | 125 | class Population(): 126 | ''' Population of organisms, all of the same genus. 127 | 128 | INPUT 129 | (Genus) genus 130 | (int) size: size of the population 131 | (function) fitness_fn: fitness function 132 | (dict) initial_genome = None: start with a population similar to 133 | the genome, for a warm start 134 | (float) breeding_rate = 0.8: percentage of population to breed 135 | (float) mutation_rate = 0.2: percentage of population to mutate 136 | each generation 137 | (float or string) mutation_factor = 'default': given that an 138 | organism is being mutated, the probability that 139 | a given gene is changed. Defaults to 1/k, where 140 | k is the size of the population 141 | (float) elitism rate = 0.05: percentage of population to keep 142 | across generations 143 | (bool) multiprocessing = False: whether fitnesses should be 144 | computed in parallel 145 | (int) workers = mp.cpu_count(): how many workers to use if 146 | multiprocessing is True 147 | (int) progress_bars = 1: number of progress bars to show, where 1 148 | only shows the main evolution progress, and 2 shows both 149 | the evolution and the fitness computation per generation 150 | (int or string) memory = 'inf': how many generations the 151 | population can look back to avoid redundant 152 | fitness computations, where 'inf' means unlimited 153 | memory. 154 | (bool) allow_repeats = True: allow computing duplicate fitness vals 155 | (int) verbose = 0: verbosity mode 156 | ''' 157 | 158 | def __init__(self, genus, size, fitness_fn, initial_genome = None, 159 | breeding_rate = 0.8, mutation_rate = 0.2, mutation_factor = 'default', 160 | elitism_rate = 0.05, multiprocessing = False, workers = mp.cpu_count(), 161 | progress_bars = 1, memory = 'inf', allow_repeats = True, 162 | verbose = 0): 163 | 164 | self.genus = genus 165 | self.size = size 166 | self.initial_genome = initial_genome 167 | self.breeding_rate = breeding_rate 168 | self.mutation_rate = mutation_rate 169 | self.mutation_factor = mutation_factor 170 | self.elitism_rate = elitism_rate 171 | self.multiprocessing = multiprocessing 172 | self.workers = workers 173 | self.progress_bars = progress_bars 174 | self.memory = memory 175 | self.allow_repeats = allow_repeats 176 | self.verbose = verbose 177 | 178 | if 'worker_idx' not in getfullargspec(fitness_fn).args: 179 | def new_fitness_fn(*args, worker_idx = None, **kwargs): 180 | return fitness_fn(*args, **kwargs) 181 | self.fitness_fn = new_fitness_fn 182 | else: 183 | self.fitness_fn = fitness_fn 184 | 185 | logging.basicConfig(format = '%(levelname)s: %(message)s') 186 | self.logger = logging.getLogger() 187 | 188 | if not verbose: 189 | self.logger.setLevel(logging.WARNING) 190 | elif verbose == 1: 191 | self.logger.setLevel(logging.INFO) 192 | elif verbose == 2: 193 | self.logger.setLevel(logging.DEBUG) 194 | 195 | self.logger.info("Creating population...") 196 | 197 | if initial_genome: 198 | 199 | # Create a population of identical organisms 200 | self.population = np.array( 201 | [Organism(genus, **initial_genome) for _ in range(size)]) 202 | 203 | # Mutate 80% of the population 204 | rnd = np.random.random(self.population.shape) 205 | for (i, org) in enumerate(self.population): 206 | if rnd[i] > 0.2: 207 | org.mutate() 208 | else: 209 | self.population = genus.create_organisms(size) 210 | 211 | self.fittest = np.random.choice(self.population) 212 | 213 | def get_genomes(self): 214 | return np.asarray([org.get_genome() for org in self.population]) 215 | 216 | def get_fitnesses(self): 217 | return np.asarray([org.fitness for org in self.population]) 218 | 219 | def update_fitness(self, history = None): 220 | ''' Compute and update fitness values of the population. 221 | 222 | INPUT 223 | (History) history = None: previous population history 224 | ''' 225 | 226 | # Duck typing function to make things immutable 227 | def make_immutable(x): 228 | try: 229 | if not isinstance(x, str): 230 | x = tuple(x) 231 | except TypeError: 232 | pass 233 | return x 234 | 235 | def immute_dict(d): 236 | return {key: make_immutable(val) for (key, val) in d.items()} 237 | 238 | unique_genomes = np.array( 239 | [dict(gene) for gene in set(frozenset(immute_dict(genome).items()) 240 | for genome in self.get_genomes())] 241 | ) 242 | 243 | # If history is loaded then get the genomes from the current 244 | # population that are unique across all generations 245 | past_indices = np.array([]) 246 | if history and not self.allow_repeats: 247 | g_prev = history.genome_history 248 | f_prev = history.fitness_history 249 | 250 | indices = np.array([((np.where(g_prev == org.get_genome())[0][0], 251 | np.where(g_prev == org.get_genome())[1][0]), idx) 252 | for (idx, org) in enumerate(self.population) 253 | if org.get_genome() in g_prev 254 | ]) 255 | past_indices = np.array([idx for (_, idx) in indices]) 256 | 257 | # Load previous fitnesses of genomes that are occuring now 258 | for (past_idx, idx) in indices: 259 | self.population[idx].fitness = f_prev[past_idx[0], past_idx[1]] 260 | 261 | # Remove genomes that have occured previously 262 | unique_genomes = np.array([genome for genome in unique_genomes 263 | if genome not in g_prev]) 264 | 265 | # Pull out the organisms with the unique genomes 266 | imm_genomes = np.array(list( 267 | map(immute_dict, self.get_genomes()))) 268 | imm_unique_genomes = np.array(list( 269 | map(immute_dict, unique_genomes))) 270 | unique_indices = np.array([np.argmin(imm_genomes != genome) 271 | for genome in imm_unique_genomes]) 272 | 273 | # Compute fitness values if there are any that needs to be computed 274 | if unique_indices.size: 275 | with warnings.catch_warnings(): 276 | 277 | # Ignore warning related to F1-scores 278 | f1_warn = 'F-score is ill-defined and being set to ' \ 279 | '0.0 due to no predicted samples.' 280 | warnings.filterwarnings('ignore', message = f1_warn) 281 | 282 | if self.multiprocessing: 283 | 284 | # Define queues to organise the parallelising 285 | todo = mp.Queue(unique_indices.size + self.workers) 286 | done = mp.Queue(unique_indices.size) 287 | for idx in unique_indices: 288 | todo.put(idx) 289 | for _ in range(self.workers): 290 | todo.put(-1) 291 | 292 | def worker(todo, done): 293 | ''' Fitness computing worker. ''' 294 | from queue import Empty 295 | while True: 296 | try: 297 | idx = todo.get(timeout = 1) 298 | except Empty: 299 | continue 300 | if idx == -1: 301 | break 302 | else: 303 | org = self.population[idx] 304 | worker_idx = mp.current_process()._identity[0] 305 | fitness = self.fitness_fn(org, 306 | worker_idx = worker_idx) 307 | done.put((idx, fitness)) 308 | 309 | # Define our processes 310 | processes = [mp.Process(target = worker, 311 | args = (todo, done)) for _ in range(self.workers)] 312 | 313 | # Daemonise the processes, meaning they close when they 314 | # they finish, and start them 315 | for p in processes: 316 | p.daemon = True 317 | p.start() 318 | 319 | # This is the iterable with (idx, fitness) values 320 | idx_fits = (done.get() for _ in unique_indices) 321 | 322 | else: 323 | # This is the iterable with (idx, fitness) values, 324 | # obtained without any parallelising 325 | idx_fits = self.population[unique_indices] 326 | idx_fits = map(self.fitness_fn, idx_fits) 327 | idx_fits = zip(unique_indices, idx_fits) 328 | 329 | # Set up a progress bar 330 | if self.progress_bars >= 2: 331 | idx_fits = tqdm(idx_fits, total = unique_indices.size) 332 | idx_fits.set_description("Computing fitness") 333 | 334 | # Compute the fitness values 335 | for (idx, new_fitness) in idx_fits: 336 | self.population[idx].fitness = new_fitness 337 | 338 | # Join up the processes 339 | if self.multiprocessing: 340 | for p in processes: 341 | p.join() 342 | 343 | # Close the progress bar 344 | if self.progress_bars >= 2: 345 | idx_fits.close() 346 | 347 | 348 | # Copy out the fitness values to the other organisms with same genome 349 | for (i, org) in enumerate(self.population): 350 | if i not in unique_indices and i not in past_indices: 351 | prev_unique_idx = np.min(np.array( 352 | [idx for idx in unique_indices 353 | if immute_dict(org.get_genome()) == \ 354 | immute_dict(self.population[idx].get_genome())] 355 | )) 356 | self.population[i].fitness = \ 357 | self.population[prev_unique_idx].fitness 358 | 359 | def sample(self, amount = 1): 360 | ''' Sample a fixed amount of organisms from the population, 361 | where the fitter an organism is, the more it's likely 362 | to be chosen. 363 | 364 | INPUT 365 | (int) amount = 1: number of organisms to sample 366 | 367 | OUTPUT 368 | (ndarray) sample of population 369 | ''' 370 | 371 | # Convert fitness values into probabilities 372 | fitnesses = self.get_fitnesses() 373 | probs = np.divide(fitnesses, sum(fitnesses)) 374 | 375 | # Copy the population to a new variable 376 | pop = self.population 377 | 378 | # Sort the probabilities in descending order and sort pop (not 379 | # the actual population) in the same way 380 | sorted_idx = np.argsort(probs)[::-1] 381 | probs = probs[sorted_idx] 382 | pop = pop[sorted_idx] 383 | 384 | # Get random numbers between 0 and 1 385 | indices = np.random.random(amount) 386 | 387 | for i in range(amount): 388 | # Find the index of the fitness value whose accumulated 389 | # sum exceeds the value of the i'th random number. 390 | fn = lambda x, y: (x[0], x[1] + y[1]) \ 391 | if x[1] + y[1] > indices[i] \ 392 | else (x[0] + y[0], x[1] + y[1]) 393 | (idx, _) = reduce(fn, map(lambda x: (1, x), probs)) 394 | indices[i] = idx - 1 395 | 396 | # Return the organisms indexed at the indices found above 397 | return pop[indices.astype(int)] 398 | 399 | def evolve(self, generations = 1, goal = None): 400 | ''' Evolve the population. 401 | 402 | INPUT 403 | (int) generations = 1: number of generations to evolve 404 | (float) goal = None: stop when fitness is not below this value 405 | ''' 406 | 407 | history = History( 408 | population = self, 409 | generations = generations, 410 | memory = self.memory 411 | ) 412 | 413 | if self.progress_bars: 414 | gen_iter = trange(generations) 415 | gen_iter.set_description("Evolving population") 416 | else: 417 | gen_iter = range(generations) 418 | 419 | for gen in gen_iter: 420 | 421 | if goal and self.fittest.fitness >= goal: 422 | # Close tqdm iterator 423 | if self.progress_bars: 424 | gen_iter.close() 425 | 426 | # Truncate history for plotting 427 | history.generations = gen 428 | history.fitness_history = history.fitness_history[:gen, :] 429 | history.genome_history = history.genome_history[:gen, :] 430 | if history.memory == 'inf' or history.memory > gen: 431 | history.memory = gen 432 | 433 | self.logger.info('Reached goal, stopping evolution...') 434 | break 435 | 436 | # Compute and update fitness values 437 | self.update_fitness(history = history) 438 | fitnesses = self.get_fitnesses() 439 | 440 | self.logger.debug('Updating fitness values...') 441 | 442 | # Update the fittest organism 443 | if max(fitnesses) > self.fittest.fitness: 444 | self.fittest = self.population[np.argmax(fitnesses)] 445 | 446 | # Store current population in history 447 | history.add_entry(self, generation = gen) 448 | 449 | self.logger.debug("Fitness values: {}"\ 450 | .format(np.around(fitnesses, 2))) 451 | 452 | # Select elites 453 | elites_amt = np.ceil(self.size * self.elitism_rate).astype(int) 454 | if self.elitism_rate: 455 | elites = self.sample(amount = elites_amt) 456 | 457 | self.logger.debug("Elite pool, of size {}:"\ 458 | .format(elites_amt)) 459 | self.logger.debug(np.array([org.get_genome() 460 | for org in elites])) 461 | 462 | # Select breeders 463 | breeders_amt = max(2, np.ceil(self.size * self.breeding_rate)\ 464 | .astype(int)) 465 | breeders = self.sample(amount = breeders_amt) 466 | 467 | self.logger.debug("Breeding pool, of size {}:"\ 468 | .format(breeders_amt)) 469 | self.logger.debug(np.array([org.get_genome() for org in breeders])) 470 | self.logger.debug("Breeding...") 471 | 472 | # Breed until we reach the same size 473 | children_amt = self.size - elites_amt 474 | parents = np.random.choice(breeders, (self.size, 2)) 475 | children = np.array([parents[i, 0].breed(parents[i, 1]) 476 | for i in range(children_amt)]) 477 | 478 | # Select mutators 479 | mutators = np.less(np.random.random(children_amt), 480 | self.mutation_rate) 481 | 482 | self.logger.debug("Mutation pool, of size {}:"\ 483 | .format(children[mutators].size)) 484 | self.logger.debug(np.array([child.get_genome() for child in 485 | children[mutators]])) 486 | self.logger.debug("Mutating...") 487 | 488 | # Mutate the children 489 | for mutator in children[mutators]: 490 | mutator.mutate(mutation_factor = self.mutation_factor) 491 | 492 | # The children constitutes our new generation 493 | if self.elitism_rate: 494 | self.population = np.append(children, elites) 495 | else: 496 | self.population = children 497 | 498 | self.logger.debug("New population, of size {}:"\ 499 | .format(self.population.size)) 500 | self.logger.debug(self.get_genomes()) 501 | self.logger.debug("Mean fitness: {}".format(np.mean(fitnesses))) 502 | self.logger.debug("Std fitness: {}".format(np.std(fitnesses))) 503 | 504 | self.logger.info("Fittest so far, with fitness {}:"\ 505 | .format(self.fittest.fitness)) 506 | self.logger.info(self.fittest.get_genome()) 507 | 508 | gen_iter.close() 509 | 510 | if self.progress_bars >= 2: 511 | print("") 512 | 513 | return history 514 | 515 | class History(): 516 | ''' History of a population's evolution. 517 | 518 | INPUT 519 | (Population) population 520 | (int) generations 521 | (int or string) memory = 'inf': how many generations the 522 | population can look back to avoid redundant 523 | fitness computations, where 'inf' means unlimited 524 | memory. 525 | ''' 526 | 527 | def __init__(self, population, generations, memory = 'inf'): 528 | 529 | if memory == 'inf' or memory > generations: 530 | self.memory = min(int(1e5), generations) 531 | else: 532 | self.memory = memory 533 | 534 | pop_size = population.size 535 | self.generations = generations 536 | self.genome_history = np.empty((self.memory, pop_size), dict) 537 | self.fitness_history = np.empty((self.memory, pop_size), float) 538 | self.population = population 539 | self.fittest = {'genome': None, 'fitness': 0} 540 | 541 | def add_entry(self, population, generation): 542 | ''' Add population to the history. 543 | 544 | INPUT 545 | (Population) population 546 | (int) generation 547 | ''' 548 | 549 | genomes = population.get_genomes() 550 | fitnesses = population.get_fitnesses() 551 | 552 | if max(fitnesses) > self.fittest['fitness']: 553 | self.fittest['genome'] = genomes[np.argmax(fitnesses)] 554 | self.fittest['fitness'] = max(fitnesses) 555 | 556 | self.genome_history = np.roll(self.genome_history, 1, axis = 0) 557 | self.genome_history[0, :] = genomes 558 | 559 | self.fitness_history = np.roll(self.fitness_history, 1, axis = 0) 560 | self.fitness_history[0, :] = fitnesses 561 | 562 | return self 563 | 564 | def plot(self, title = 'Fitness by generation', xlabel = 'Generation', 565 | ylabel = 'Fitness', file_name = None, show_plot = True, 566 | show_max = True, only_show_max = False, discrete = False, 567 | legend = True, legend_location = 'lower right'): 568 | ''' Plot the fitness values. 569 | 570 | INPUT 571 | (string) title = 'Fitness by generation' 572 | (string) xlabel = 'Generations': label on the x-axis 573 | (string) ylabel = 'Fitness': label on the y-axis 574 | (string) file_name = None: file name to save the plot to 575 | (bool) show_plot = True: show plot as a pop-up 576 | (bool) show_max = True: show max value line on plot 577 | (bool) only_show_max = False: Hide the plot with means and stds 578 | (bool) discrete = False: make the error plot discrete 579 | (bool) legend = True: show legend 580 | (string or int) legend_location = 'lower right': legend location, 581 | either as e.g. 'lower right' or as an integer 582 | between 0 and 10 583 | ''' 584 | 585 | fits = self.fitness_history[::-1] 586 | gens = self.generations 587 | mem = self.memory 588 | means = np.mean(fits, axis = 1) 589 | stds = np.std(fits, axis = 1) 590 | xs = np.arange(mem) 591 | 592 | if gens == 1: 593 | discrete = True 594 | 595 | if show_max or only_show_max: 596 | maxs = np.array([np.max(fits[x, :]) for x in xs]) 597 | 598 | plt.style.use("ggplot") 599 | plt.figure() 600 | plt.xlim(gens - mem - 1, gens) 601 | plt.title(title) 602 | plt.xlabel(xlabel) 603 | plt.ylabel(ylabel) 604 | 605 | xs_shift = xs + (gens - mem) 606 | 607 | if show_max: 608 | plt.plot(xs_shift, maxs[xs], '--', color = 'blue', label = 'max') 609 | 610 | if discrete and not only_show_max: 611 | plt.errorbar(xs_shift, means[xs], stds[xs], fmt = 'ok', 612 | label = 'mean and std') 613 | elif not only_show_max: 614 | plt.plot(xs_shift, means[xs], '-', color = 'black', label = 'mean') 615 | plt.fill_between( 616 | xs_shift, 617 | means[xs] - stds[xs], 618 | means[xs] + stds[xs], 619 | alpha = 0.2, 620 | color = 'gray', 621 | label = 'std' 622 | ) 623 | 624 | if legend: 625 | plt.legend(loc = legend_location) 626 | 627 | if file_name: 628 | plt.savefig(file_name) 629 | 630 | if show_plot: 631 | plt.show() 632 | 633 | 634 | def __main__(): 635 | pass 636 | --------------------------------------------------------------------------------