├── mnist_plot.png
├── cifar10_plot.png
├── .gitignore
├── naturalselection
    ├── __init__.py
    ├── callbacks.py
    ├── nn.py
    └── core.py
├── numbers_example.py
├── onemax_example.py
├── setup.py
├── LICENSE
├── nn_example.py
└── README.md


/mnist_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saattrupdan/naturalselection/HEAD/mnist_plot.png


--------------------------------------------------------------------------------
/cifar10_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saattrupdan/naturalselection/HEAD/cifar10_plot.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/*
2 | *.log
3 | *.swp
4 | *.egg-info
5 | dist
6 | build
7 | naturalselection/__pycache__/*
8 | 


--------------------------------------------------------------------------------
/naturalselection/__init__.py:
--------------------------------------------------------------------------------
1 | from naturalselection.core import Genus, Organism, Population, History
2 | from naturalselection.nn import NN, NNs
3 | 


--------------------------------------------------------------------------------
/numbers_example.py:
--------------------------------------------------------------------------------
 1 | import naturalselection as ns
 2 | 
 3 | Pair = ns.Genus(x = range(1, 10000), y = range(1, 10000))
 4 | 
 5 | pairs = ns.Population(
 6 |     genus = Pair, 
 7 |     size = 100, 
 8 |     fitness_fn = lambda n: n.x/n.y
 9 |     )
10 | 
11 | history = pairs.evolve(generations = 100)
12 | 
13 | print(history.fittest)
14 | 
15 | history.plot()
16 | 


--------------------------------------------------------------------------------
/onemax_example.py:
--------------------------------------------------------------------------------
 1 | import naturalselection as ns
 2 | 
 3 | BitString = ns.Genus(**{f'x{n}' : (0,1) for n in range(100)})
 4 | 
 5 | def sum_bits(bitstring):
 6 |   return sum(bitstring.get_genome().values())
 7 | 
 8 | bitstrings = ns.Population(
 9 |     genus = BitString, 
10 |     size = 5,
11 |     fitness_fn = sum_bits,
12 |     )
13 | 
14 | history = bitstrings.evolve(generations = 5000, goal = 100)
15 | 
16 | print(f"Number of ones achieved: {history.fittest['fitness']}")
17 | 
18 | history.plot(only_show_max = True)
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as file_in:
 4 |     long_description = file_in.read()
 5 | 
 6 | setuptools.setup(
 7 |     name = 'naturalselection',  
 8 |     entry_points = {'console_scripts' : [
 9 |         'core = naturalselection.core:main', 
10 |         'nn = naturalselection.nn:main', 
11 |         ]},
12 |     install_requires = ['numpy','matplotlib','tqdm','tensorflow','sklearn'],
13 |     version = "0.6.0",
14 |     author = "Dan Saattrup Nielsen",
15 |     author_email = "saattrupdan@gmail.com",
16 |     description = "An all-purpose pythonic genetic algorithm",
17 |     keywords = "genetic algorithm neural network",
18 |     long_description = long_description,
19 |     long_description_content_type = "text/markdown",
20 |     url = "https://github.com/saattrupdan/naturalselection",
21 |     packages = setuptools.find_packages(),
22 |     classifiers = [
23 |      "Development Status :: 3 - Alpha",
24 |      "Programming Language :: Python :: 3",
25 |      "License :: OSI Approved :: MIT License",
26 |      "Operating System :: OS Independent",
27 |     ],
28 |     )
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 The Python Packaging Authority
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/nn_example.py:
--------------------------------------------------------------------------------
 1 | import naturalselection as ns
 2 | 
 3 | def image_preprocessing(X):
 4 |     ''' Basic normalisation and scaling preprocessing. '''
 5 |     import numpy as np
 6 |     X = X.reshape((-1, np.prod(X.shape[1:])))
 7 |     X = X.astype('float32')
 8 |     X = (X - X.min()) / (X.max() - X.min())
 9 |     X -= X.mean(axis = 0)
10 |     return X
11 | 
12 | def train_val_sets(kind = 'mnist'):
13 |     ''' Get normalised and scaled train- and val sets. '''
14 | 
15 |     from tensorflow.keras.utils import to_categorical
16 |     if kind == 'mnist':
17 |         import tensorflow.keras.datasets.mnist as data
18 |     elif kind == 'fashion_mnist':
19 |         import tensorflow.keras.datasets.fashion_mnist as data
20 |     elif kind == 'cifar10':
21 |         import tensorflow.keras.datasets.cifar10 as data
22 |     elif kind == 'cifar100':
23 |         import tensorflow.keras.datasets.cifar100 as data
24 |     elif kind == 'boston_housing':
25 |         import tensorflow.keras.datasets.boston_housing as data
26 |     elif kind == 'imdb':
27 |         import tensorflow.keras.datasets.imdb as data
28 |     elif kind == 'reuters':
29 |         import tensorflow.keras.datasets.reuters as data
30 |     else:
31 |         raise NameError('Dataset not recognised.')
32 | 
33 |     (X_train, Y_train), (X_val, Y_val) = data.load_data()
34 |     X_train = image_preprocessing(X_train)
35 |     Y_train = to_categorical(Y_train)
36 |     X_val = image_preprocessing(X_val)
37 |     Y_val = to_categorical(Y_val)
38 |     return (X_train, Y_train, X_val, Y_val)
39 | 
40 | def evolve_nn(kind = 'mnist'):
41 | 
42 |     if kind == 'mnist':
43 |         max_training_time = 60
44 |     elif kind == 'fashion_mnist':
45 |         max_training_time = 240
46 |     elif kind == 'cifar10':
47 |         max_training_time = 120
48 |     elif kind == 'cifar100':
49 |         max_training_time = 240
50 |     else:
51 |         raise NameError('Dataset not recognised.')
52 | 
53 |     print(f"\n~~~ Now evolving {kind} ~~~")
54 | 
55 |     nns = ns.NNs(
56 |         size = 30,
57 |         train_val_sets = train_val_sets(kind),
58 |         loss_fn = 'categorical_crossentropy',
59 |         score = 'accuracy',
60 |         output_activation = 'softmax',
61 |         max_training_time = max_training_time,
62 |         max_epochs = 1,
63 |         )
64 | 
65 |     history = nns.evolve(generations = 30)
66 |     print("Best overall genome:", history.fittest)
67 | 
68 |     history.plot(
69 |         title = "Validation accuracy by generation",
70 |         ylabel = "Validation accuracy",
71 |         show_plot = False,
72 |         file_name = f'{kind}_plot.png'
73 |         )
74 | 
75 |     best_score = nns.train_best()
76 |     print("Best score:", best_score)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     from sys import argv
81 |     if len(argv) > 1:
82 |         for arg in argv[1:]:
83 |             evolve_nn(arg)
84 |     else:
85 |         evolve_nn()
86 | 


--------------------------------------------------------------------------------
/naturalselection/callbacks.py:
--------------------------------------------------------------------------------
  1 | from tensorflow.keras.callbacks import Callback, EarlyStopping
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | import time
  5 | 
  6 | class TQDMCallback(Callback):
  7 |     '''
  8 |     Callback to enable tqdm integration.
  9 |     Source: https://github.com/bstriner/keras-tqdm
 10 |     '''
 11 | 
 12 |     def __init__(self, outer_description = "Training",
 13 |         inner_description_initial = "Epoch: {epoch}",
 14 |         inner_description_update = "Epoch: {epoch} - {metrics}",
 15 |         metric_format = "{name}: {value:0.4f}",
 16 |         separator = ", ",
 17 |         leave_inner = True,
 18 |         leave_outer = True,
 19 |         show_inner = True,
 20 |         show_outer = True,
 21 |         output_file = None,
 22 |         outer_position = None,
 23 |         inner_position = None,
 24 |         initial = 0):
 25 | 
 26 |         self.outer_description          = outer_description
 27 |         self.inner_description_initial  = inner_description_initial
 28 |         self.inner_description_update   = inner_description_update
 29 |         self.metric_format              = metric_format
 30 |         self.separator                  = separator
 31 |         self.leave_inner                = leave_inner
 32 |         self.leave_outer                = leave_outer
 33 |         self.show_inner                 = show_inner
 34 |         self.show_outer                 = show_outer
 35 |         self.output_file                = output_file
 36 |         self.tqdm_outer                 = None
 37 |         self.tqdm_inner                 = None
 38 |         self.epoch                      = None
 39 |         self.running_logs               = None
 40 |         self.inner_count                = None
 41 |         self.initial                    = initial
 42 |         self.outer_position             = outer_position
 43 |         self.inner_position             = inner_position
 44 | 
 45 |     def build_tqdm(self, desc, total, leave, position = None, initial = 0):
 46 |         """
 47 |         Extension point. Override to provide custom options to tqdm
 48 |         initializer.
 49 |         """
 50 |         return tqdm(desc = desc, total = total, leave = leave,
 51 |             file = self.output_file, initial = initial,
 52 |             position = position)
 53 | 
 54 |     def build_tqdm_outer(self, desc, total):
 55 |         """
 56 |         Extension point. Override to provide custom options to outer
 57 |         progress bars (Epoch loop)
 58 |         """
 59 |         return self.build_tqdm(desc = desc, total = total,
 60 |             leave = self.leave_outer, initial = self.initial,
 61 |             position = self.outer_position)
 62 | 
 63 |     def build_tqdm_inner(self, desc, total):
 64 |         """
 65 |         Extension point. Override to provide custom options to inner
 66 |         progress bars (Batch loop)
 67 |         """
 68 |         return self.build_tqdm(desc = desc, total = total,
 69 |             leave = self.leave_inner, position = self.inner_position)
 70 | 
 71 |     def on_epoch_begin(self, epoch, logs = {}):
 72 |         self.epoch = epoch
 73 |         desc = self.inner_description_initial.format(
 74 |             epoch = self.epoch)
 75 |         self.mode = 0  # samples
 76 |         if 'samples' in self.params:
 77 |             self.inner_total = self.params['samples']
 78 |         elif 'nb_sample' in self.params:
 79 |             self.inner_total = self.params['nb_sample']
 80 |         else:
 81 |             self.mode = 1  # steps
 82 |             self.inner_total = self.params['steps']
 83 |         if self.show_inner:
 84 |             self.tqdm_inner = self.build_tqdm_inner(desc = desc,
 85 |                 total = self.inner_total)
 86 |         self.inner_count = 0
 87 |         self.running_logs = {}
 88 | 
 89 |     def on_epoch_end(self, epoch, logs = {}):
 90 |         metrics = self.format_metrics(logs)
 91 |         desc = self.inner_description_update.format(epoch = epoch,
 92 |             metrics = metrics)
 93 |         if self.show_inner:
 94 |             self.tqdm_inner.desc = desc
 95 |             # set miniters and mininterval to 0 so last update shows 
 96 |             self.tqdm_inner.miniters = 0
 97 |             self.tqdm_inner.mininterval = 0
 98 |             self.tqdm_inner.update(self.inner_total - self.tqdm_inner.n)
 99 |             self.tqdm_inner.close()
100 |         if self.show_outer:
101 |             self.tqdm_outer.update(1)
102 | 
103 |     def on_batch_begin(self, batch, logs = {}):
104 |         pass
105 | 
106 |     def on_batch_end(self, batch, logs = {}):
107 |         if self.mode == 0:
108 |             update = logs['size']
109 |         else:
110 |             update = 1
111 |         self.inner_count += update
112 |         if self.inner_count < self.inner_total:
113 |             self.append_logs(logs)
114 |             metrics = self.format_metrics(self.running_logs)
115 |             desc = self.inner_description_update.format(
116 |                 epoch = self.epoch, metrics = metrics)
117 |             if self.show_inner:
118 |                 self.tqdm_inner.desc = desc
119 |                 self.tqdm_inner.update(update)
120 | 
121 |     def on_train_begin(self, logs = {}):
122 |         if self.show_outer:
123 |             epochs = (self.params['epochs'] if 'epochs' in self.params
124 |                       else self.params['nb_epoch'])
125 |             self.tqdm_outer = self.build_tqdm_outer(
126 |                 desc = self.outer_description, total = epochs)
127 | 
128 |     def on_train_end(self, logs = {}):
129 |         if self.show_outer:
130 |             self.tqdm_outer.close()
131 | 
132 |     def append_logs(self, logs):
133 |         metrics = self.params['metrics']
134 |         for metric, value in logs.items():
135 |             if metric in metrics:
136 |                 if metric in self.running_logs:
137 |                     self.running_logs[metric].append(value[()])
138 |                 else:
139 |                     self.running_logs[metric] = [value[()]]
140 | 
141 |     def format_metrics(self, logs):
142 |         metrics = self.params['metrics']
143 |         strings = [self.metric_format.format(name = metric,
144 |             value = np.mean(logs[metric], axis = None))
145 |             for metric in metrics if metric in logs]
146 |         return self.separator.join(strings)
147 | 
148 | 
149 | class EarlierStopping(EarlyStopping):
150 |     '''
151 |     Callback to stop training when enough time has passed.
152 |     Source: https://github.com/keras-team/keras-contrib/issues/87
153 | 
154 |     INPUT
155 |         (int) seconds: maximum time before stopping.
156 |         (int) verbose: verbosity mode.
157 |     '''
158 |     def __init__(self, seconds = None, **kwargs):
159 |         super().__init__(**kwargs)
160 |         self.start_time = 0
161 |         self.seconds = seconds
162 | 
163 |     def on_train_begin(self, logs = {}):
164 |         self.start_time = time.time()
165 |         super().on_train_begin(logs)
166 | 
167 |     def on_batch_end(self, batch, logs = {}):
168 |         if self.seconds and time.time()-self.start_time > self.seconds:
169 |             self.model.stop_training = True
170 |             if self.verbose:
171 |                 print('Stopping after {} seconds.'\
172 |                     .format(self.seconds))
173 |         super().on_batch_end(batch, logs)
174 | 
175 |     def on_epoch_end(self, epoch, logs = {}):
176 |         if self.seconds and time.time()-self.start_time > self.seconds:
177 |             self.model.stop_training = True
178 |             if self.restore_best_weights and self.best_weights:
179 |                 self.model.set_weights(self.best_weights) 
180 |             if self.verbose:
181 |                 print('Stopping after {} seconds.'.\
182 |                     format(self.seconds))
183 | 
184 |         # Call earlystopping if we're beyond the first epoch
185 |         if logs.get(self.monitor):
186 |             super().on_epoch_end(epoch, logs)
187 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # NaturalSelection <img src="https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/naturalselection_data/logo.png" width="50" height="50" alt="Logo of green flower"/>
  2 | 
  3 | [![PyPI version](https://badge.fury.io/py/naturalselection.svg)](https://badge.fury.io/py/naturalselection)
  4 | 
  5 | An all-purpose pythonic genetic algorithm, which includes built-in hyperparameter tuning support for neural networks.
  6 | 
  7 | 
  8 | ## Installation
  9 | 
 10 | ```
 11 | $ pip install naturalselection
 12 | ```
 13 | 
 14 | 
 15 | ## Usage
 16 | 
 17 | Here is a toy example optimising a pair of numbers with respect to division.
 18 | 
 19 | ```python
 20 | >>> import naturalselection as ns
 21 | >>>
 22 | >>> Pair = ns.Genus(x = range(1, 10000), y = range(1, 10000))
 23 | >>>
 24 | >>> pairs = ns.Population(
 25 | ...   genus = Pair, 
 26 | ...   size = 100, 
 27 | ...   fitness_fn = lambda n: n.x/n.y
 28 | ...   )
 29 | ...
 30 | >>> history = pairs.evolve(generations = 100)
 31 | Evolving population: 100%|██████████████████| 100/100 [00:05<00:00,  19.59it/s]
 32 | >>>
 33 | >>> history.fittest
 34 | {'genome': {'x': 9922, 'y': 10}, 'fitness': 992.2}
 35 | >>>
 36 | >>> history.plot()
 37 | ```
 38 | 
 39 | ![Plot showing fitness value over 100 generations.](https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/naturalselection_data/numbers_example.png)
 40 | 
 41 | 
 42 | We can also easily solve the classical [OneMax problem](http://tracer.lcc.uma.es/problems/onemax/onemax.html), which is about finding the bit-string of a given length with all 1's. Here we set `goal = 100` in the `evolve` function to allow for early stopping if we reach our goal before the maximum number of generations, which we here set to 5,000. Note that it only takes nine seconds, however.
 43 | 
 44 | ```python3
 45 | >>> import naturalselection as ns
 46 | >>>
 47 | >>> BitString = ns.Genus(**{f'x{n}' : (0,1) for n in range(100)})
 48 | >>>
 49 | >>> def sum_bits(bitstring):
 50 | ...   return sum(bitstring.get_genome().values())
 51 | ...
 52 | >>> bitstrings = ns.Population(
 53 | ...   genus = BitString,
 54 | ...   size = 5,
 55 | ...   fitness_fn = sum_bits
 56 | ...   )
 57 | ... 
 58 | >>> history = bitstrings.evolve(generations = 500, goal = 100)
 59 | Evolving population: 36%|██████           | 1805/5000 [00:09<00:16, 194.43it/s]
 60 | >>> 
 61 | >>> history.plot(only_show_max = True)
 62 | ```
 63 | 
 64 | ![Plot showing fitness value over 4500 generations, converging steadily to the optimal filled out sequence of ones.](https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/naturalselection_data/onemax_example.png)
 65 | 
 66 | 
 67 | Lastly, here is an example of finding a fully connected feedforward neural network to model [MNIST](https://en.wikipedia.org/wiki/MNIST_database).
 68 | 
 69 | Note that the models are trained in parallel, so it is loading in a copy of the MNIST data set for every CPU core in your computer, each of which takes up ~750MB of memory. If this causes you to run into memory trouble then you can set the `workers` parameter to something small like 2, or set `multiprocessing = False` to turn parallelism off completely. I've marked these in the code below.
 70 | 
 71 | ```python3
 72 | >>> import naturalselection as ns
 73 | >>>
 74 | >>> def preprocessing(X):
 75 | ...   ''' Basic normalisation and scaling preprocessing. '''
 76 | ...   import numpy as np
 77 | ...   X = X.reshape((-1, np.prod(X.shape[1:])))
 78 | ...   X = (X - X.min()) / (X.max() - X.min())
 79 | ...   X -= X.mean(axis = 0)
 80 | ...   return X
 81 | ... 
 82 | >>> def mnist_train_val_sets():
 83 | ...   ''' Get normalised and scaled MNIST train- and val sets. '''
 84 | ...   from tensorflow.keras.utils import to_categorical
 85 | ...   import mnist
 86 | ...   X_train = preprocessing(mnist.train_images())
 87 | ...   Y_train = to_categorical(mnist.train_labels())
 88 | ...   X_val = preprocessing(mnist.test_images())
 89 | ...   Y_val = to_categorical(mnist.test_labels())
 90 | ...   return (X_train, Y_train, X_val, Y_val)
 91 | ...
 92 | >>> nns = ns.NNs(
 93 | ...   size = 30,
 94 | ...   train_val_sets = mnist_train_val_sets(),
 95 | ...   loss_fn = 'categorical_crossentropy',
 96 | ...   score = 'accuracy',
 97 | ...   output_activation = 'softmax',
 98 | ...   max_epochs = 1,
 99 | ...   max_training_time = 60,
100 | ...   # workers = 2, # If you want to reduce parallelism
101 | ...   # multiprocessing = False # If you want to disable parallelism
102 | ...   )
103 | ...
104 | >>> history = nns.evolve(generations = 20)
105 | Evolving population: 100%|█████████████████████| 20/20 [57:18<00:00, 73.22s/it]
106 | Computing fitness: 100%|█████████████████████████| 7/7 [01:20<00:00, 10.13s/it]
107 | >>> 
108 | >>> history.fittest
109 | {'genome': {'optimizer': 'adam', 'hidden_activation': 'relu',
110 | 'batch_size': 32, 'initializer': 'glorot_normal', 'input_dropout': 0.2,
111 | 'neurons0': 256, 'dropout0': 0.0, 'neurons1': 128, 'dropout1': 0.1,
112 | 'neurons2': 256, 'dropout2': 0.1, 'neurons3': 256, 'dropout3': 0.2,
113 | 'neurons4': 128, 'dropout4': 0.4}, 'fitness': 0.9659}
114 | >>> 
115 | >>> history.plot(
116 | ...   title = "Validation accuracy by generation",
117 | ...   ylabel = "Validation accuracy"
118 | ...   )
119 | ```
120 | 
121 | ![Plot showing fitness value (which is accuracy in this case) over 20 generations, converging to roughly 96.50%.](https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/naturalselection_data/mnist_example.png)
122 | 
123 | We can then train the best performing model and save it locally:
124 | 
125 | ```python3
126 | >>> # Training the best model and saving it to mnist_model.h5
127 | >>> best_score = nns.train_best(file_name = 'mnist_model')
128 | Epoch 0, val_acc: 0.966: 100%|█████████| 60000/60000 [00:12<00:00, 1388.45it/s]
129 | (...)
130 | Epoch 19, val_acc: 0.982: 100%|████████| 60000/60000 [00:11<00:00, 1846.24it/s]
131 | >>>
132 | >>> best_score
133 | 0.982
134 | ```
135 | 
136 | ## Algorithmic details
137 | 
138 | The algorithm follows the standard blueprint for a genetic algorithm as e.g. described on this [Wikipedia page](https://en.wikipedia.org/wiki/Genetic_algorithm), which roughly goes like this:
139 | 
140 | 1. An initial population is constructed
141 | 2. Fitness values for all organisms in the population are computed
142 | 3. A subset of the population (the *elite pool*) is selected
143 | 4. A subset of the population (the *breeding pool*) is selected
144 | 5. Pairs from the breeding pool are chosen, who will breed to create a new "child" organism with genome a combination of the "parent" organisms. Continue breeding until the children and the elites constitute a population of the same size as the original
145 | 6. A subset of the children (the *mutation pool*) is selected
146 | 7. Every child in the mutation pool is mutated, meaning that they will have their genome altered in some way
147 | 8. Go back to step 2
148 | 
149 | We now describe the individual steps in this particular implementation in more detail. Note that step 3 is sometimes left out completely, but since that just corresponds to an empty elite pool I decided to keep it in, for generality.
150 | 
151 | ### Step 1: Constructing the initial population
152 | 
153 | The population is a uniformly random sample of the possible genome values as dictated by the genus, which is run when a new `Population` object is created. Alternatively, you may set the `initial_genome` to whatever genome you would like, which will create a population consisting of organisms similar to this genome (the result of starting with a population all equal to the organism and then mutating 80% of them).
154 | 
155 | ```python3
156 | >>> pairs = ns.Population(
157 | ...   genus = Pair,
158 | ...   size = 100,
159 | ...   fitness_fn = lambda n: n.x/n.y,
160 | ...   initial_genome = {'x': 9750, 'y': 15}
161 | ...   )
162 | ...
163 | >>> history = pairs.evolve(generations = 100)
164 | Evolving population: 100%|██████████████████| 100/100 [00:05<00:00,  19.47it/s]
165 | >>> 
166 | >>> history.fittest
167 | {'genome' : {'x' : 9989, 'y' : 3}, 'fitness' : 3329.66666666665}
168 | ```
169 | 
170 | ### Step 2: Compute fitness values
171 | 
172 | This happens in the `update_fitness` function which is called by the `evolve` function. These computations will by default be computed in parallel when dealing with neural networks and serialised otherwise, as the benefits are only reaped when fitness computations take up a significant part of the algorithm (in the examples above not concerning neural networks we would actually slow down the algorithm non-trivially by introducing parallelism).
173 | 
174 | ### Steps 3 & 4: Selecting elite pool and breeding pool
175 | 
176 | These two pools are selected in exactly the same way, using the `sample` function. They only differ in the amount of organisms sampled, where the default `elitism_rate` is 5% and `breeding_rate` is 80%. In the pool selection it chooses the population based on the distribution with density function the fitness value divided by the sum of all fitness values of the population. This means that the higher fitness score an organism has, the more likely it is for it to be chosen to be a part of the pool. The precise implementation of this is based on the algorithm specified on this [Wikipedia page](https://en.wikipedia.org/wiki/Selection_(genetic_algorithm)).
177 | 
178 | ### Step 5: Breeding
179 | 
180 | In this implementation the parent organisms are chosen uniformly at random from the breeding pool. When determining the value of the child's genome we apply the "single-point crossover" method, where we choose an index uniformly at random among the attributes, and the child will then inherit all attributes to the left of this index from one parent and the attributes to the right of this index from the other parent.See more on [this Wikipedia page](https://en.wikipedia.org/wiki/Crossover_(genetic_algorithm)).
181 | 
182 | ### Step 6: Selection of mutation pool
183 | 
184 | The mutation pool is chosen uniformly at random in contrast to the other two pools, as otherwise we would suddenly be more likely to "mutate away" many of the good genes of our fittest organisms. The default `mutation_rate` is 20%.
185 | 
186 | ### Step 7: Mutation
187 | 
188 | This implementation is roughly the [bit string mutation](https://en.wikipedia.org/wiki/Mutation_(genetic_algorithm)), where every gene of the organism has a 1/n chance of being uniformly randomly replaced by another gene, with n being the number of genes in the organism's genome. This means that, on average, mutation causes one gene to be altered. The amount of genes altered in a mutation can be modified by changing thè `mutation_factor` parameter, which by default is the above 1/n.
189 | 
190 | 
191 | ## Possible future extensions
192 | 
193 | These are the ideas that I have thought of implementing in the future. Check the ongoing process on the `dev` branch.
194 | 
195 | * Enable support for CNNs
196 | * Enable support for RNNs and in particular LSTMs
197 | * Include an option to have dependency relations between genes. In a neural network setting this could include the topology as a gene on which all the layer-specific genes depend upon, which would be similar to the approach taken in [this paper](https://arxiv.org/pdf/1703.00548/).
198 | 
199 | 
200 | ## License
201 | 
202 | This project is licensed under the [MIT License](https://github.com/saattrupdan/naturalselection/blob/master/LICENSE).
203 | 


--------------------------------------------------------------------------------
/naturalselection/nn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | from functools import partial 
  4 | import logging
  5 | 
  6 | # Used to set default value for workers
  7 | from multiprocessing import cpu_count
  8 | 
  9 | import naturalselection as ns
 10 | 
 11 | class NN(ns.Genus):
 12 |     ''' Feedforward fully connected neural network genus.
 13 | 
 14 |     INPUT:
 15 |         (int) max_nm_hidden_layers
 16 |         (bool) uniform_layers: whether all hidden layers should
 17 |                have the same amount of neurons and dropout
 18 |         (iterable) input_dropout: values for input dropout
 19 |         (iterable) : values for dropout after hidden layers
 20 |         (iterable) neurons_per_hidden_layer = neurons in hidden layers
 21 |         (iterable) optimizer: keras optimizers
 22 |         (iterable) hidden_activation: keras activation functions
 23 |         (iterable) batch_size: batch sizes
 24 |         (iterable) initializer: keras initializers
 25 |         '''
 26 |     def __init__(self, max_nm_hidden_layers = 5, uniform_layers = False,
 27 |         input_dropout = np.arange(0, 0.6, 0.1),
 28 |         hidden_dropout = np.arange(0, 0.6, 0.1),
 29 |         neurons = np.array([2 ** n for n in range(4, 11)]),
 30 |         optimizer = np.array(['adamax', 'adam', 'nadam']),
 31 |         hidden_activation = np.array(['relu', 'elu']),
 32 |         batch_size = np.array([2 ** n for n in range(4, 7)]),
 33 |         initializer = np.array(['lecun_uniform', 'lecun_normal',
 34 |                                 'glorot_uniform', 'glorot_normal',
 35 |                                 'he_uniform', 'he_normal'])):
 36 | 
 37 |         self.optimizer = np.unique(np.asarray(optimizer))
 38 |         self.hidden_activation = np.unique(np.asarray(hidden_activation))
 39 |         self.batch_size = np.unique(np.asarray(batch_size))
 40 |         self.initializer = np.unique(np.asarray(initializer))
 41 |         self.input_dropout = np.unique(np.asarray(input_dropout))
 42 | 
 43 |         if uniform_layers:
 44 |             self.neurons = np.unique(np.asarray(neurons))
 45 |             self.dropout = np.unique(np.asarray(hidden_dropout))
 46 |             self.nm_hidden_layers = \
 47 |                 np.arange(1, max_nm_hidden_layers + 1)
 48 |         else:
 49 |             neurons = np.unique(np.append(neurons, 0))
 50 |             dropout = np.around(np.unique(np.append(hidden_dropout, 0)), 2)
 51 |             layer_info = {}
 52 |             for layer_idx in range(max_nm_hidden_layers):
 53 |                 layer_info["neurons{}".format(layer_idx)] = neurons
 54 |                 layer_info["dropout{}".format(layer_idx)] = dropout
 55 |             self.__dict__.update(layer_info)
 56 | 
 57 | class NNs(ns.Population):
 58 |     def __init__(self, 
 59 |         train_val_sets,
 60 |         size = 50, 
 61 |         initial_genome = None,
 62 |         breeding_rate = 0.8,
 63 |         mutation_rate = 0.2,
 64 |         mutation_factor = 'default',
 65 |         elitism_rate = 0.05,
 66 |         multiprocessing = True,
 67 |         workers = cpu_count(),
 68 |         progress_bars = 3,
 69 |         loss_fn = 'binary_crossentropy',
 70 |         nm_features = 'infer', 
 71 |         nm_labels = 'infer',
 72 |         score = 'accuracy', 
 73 |         output_activation = 'sigmoid',
 74 |         max_epochs = 1000000, 
 75 |         patience = 3, 
 76 |         min_change = 1e-4,
 77 |         max_training_time = None, 
 78 |         max_nm_hidden_layers = 5,
 79 |         uniform_layers = False,
 80 |         input_dropout = np.arange(0, 0.6, 0.1),
 81 |         hidden_dropout = np.arange(0, 0.6, 0.1),
 82 |         neurons = np.array([2 ** n for n in range(4, 11)]),
 83 |         optimizer = np.array(['adamax', 'adam', 'nadam']),
 84 |         hidden_activation = np.array(['relu', 'elu']),
 85 |         batch_size = np.array([2 ** n for n in range(4, 7)]),
 86 |         initializer = np.array(['lecun_uniform', 'lecun_normal',
 87 |                                 'glorot_uniform', 'glorot_normal',
 88 |                                 'he_uniform', 'he_normal']),
 89 |         verbose = 0):
 90 | 
 91 |         self.train_val_sets       = train_val_sets
 92 |         self.size                 = size
 93 |         self.initial_genome       = initial_genome
 94 |         self.breeding_rate        = breeding_rate
 95 |         self.mutation_rate        = mutation_rate
 96 |         self.mutation_factor      = mutation_factor
 97 |         self.elitism_rate         = elitism_rate
 98 |         self.multiprocessing      = multiprocessing
 99 |         self.workers              = workers
100 |         self.progress_bars        = progress_bars
101 |         self.loss_fn              = loss_fn
102 |         self.nm_features          = nm_features
103 |         self.nm_labels            = nm_labels
104 |         self.score                = score
105 |         self.output_activation    = output_activation
106 |         self.max_epochs           = max_epochs 
107 |         self.patience             = patience
108 |         self.min_change           = min_change
109 |         self.max_training_time    = max_training_time
110 |         self.max_nm_hidden_layers = max_nm_hidden_layers
111 |         self.uniform_layers       = uniform_layers
112 |         self.input_dropout        = input_dropout
113 |         self.hidden_dropout       = hidden_dropout
114 |         self.neurons              = neurons
115 |         self.optimizer            = optimizer
116 |         self.hidden_activation    = hidden_activation
117 |         self.batch_size           = batch_size
118 |         self.initializer          = initializer
119 |         self.verbose              = verbose
120 | 
121 |         logging.basicConfig(format = '%(levelname)s: %(message)s')
122 |         self.logger = logging.getLogger()
123 | 
124 |         if not verbose:
125 |             self.logger.setLevel(logging.WARNING)
126 |         elif verbose == 1:
127 |             self.logger.setLevel(logging.INFO)
128 |         elif verbose == 2:
129 |             self.logger.setLevel(logging.DEBUG)
130 | 
131 |         self.logger.info("Creating population...")
132 | 
133 |         # Hard coded values for neural networks
134 |         self.allow_repeats = False
135 |         self.memory = 'inf'
136 |         
137 |         self.genus = NN(
138 |             max_nm_hidden_layers = self.max_nm_hidden_layers,
139 |             uniform_layers       = self.uniform_layers,
140 |             input_dropout        = self.input_dropout,
141 |             hidden_dropout       = self.hidden_dropout,
142 |             neurons              = self.neurons,
143 |             optimizer            = self.optimizer,
144 |             hidden_activation    = self.hidden_activation,
145 |             batch_size           = self.batch_size,
146 |             initializer          = self.initializer
147 |             )
148 | 
149 |         self.fitness_fn = partial(
150 |             self.train_nn,
151 |             max_epochs          = self.max_epochs,
152 |             patience            = self.patience,
153 |             min_change          = self.min_change,
154 |             max_training_time   = self.max_training_time,
155 |             file_name           = None,
156 |             )
157 | 
158 |         # If user has supplied an initial genome then construct a population
159 |         # which is very similar to that
160 |         if initial_genome:
161 | 
162 |             # Create a population of organisms all with the initial genome
163 |             self.population = np.array([
164 |                 ns.Organism(self.genus, **initial_genome)
165 |                 for _ in range(size)
166 |                 ])
167 | 
168 |             # Mutate 80% of the population
169 |             rnd = np.random.random(self.population.shape)
170 |             for (i, org) in enumerate(self.population):
171 |                 if rnd[i] > 0.2:
172 |                     org.mutate()
173 |         else:
174 |             self.population = self.genus.create_organisms(size)
175 | 
176 |         # We do not have access to fitness values yet, so choose the 'fittest
177 |         # organism' to just be a random one
178 |         self.fittest = np.random.choice(self.population)
179 | 
180 |     def train_best(self, max_epochs = 1000000, min_change = 1e-4,
181 |         patience = 10, max_training_time = None, file_name = None):
182 | 
183 |         best_nn = self.fittest        
184 |         fitness = self.train_nn(
185 |             nn                  = best_nn,
186 |             max_epochs          = max_epochs,
187 |             patience            = patience,
188 |             min_change          = min_change,
189 |             max_training_time   = max_training_time,
190 |             file_name           = file_name
191 |             )
192 |         return fitness
193 | 
194 |     def train_nn(self, nn, max_epochs = 1000000, patience = 3,
195 |         min_change = 1e-4, max_training_time = None, file_name = None, 
196 |         worker_idx = None):
197 |         ''' Train a feedforward neural network and output the score.
198 |         
199 |         INPUT
200 |             (NN) nn: a neural network genus
201 |             (int) max_epochs = 1000000: maximum number of epochs to train for
202 |             (int) patience = 3: number of epochs allowed with no progress
203 |                   above min_change
204 |             (float) min_change = 1e-4: everything below this number will
205 |                     not count as a change in the score
206 |             (int) max_training_time = None: maximum number of seconds to
207 |                   train for, also training the final epoch after the time
208 |                   has run out
209 |             (int) worker_idx = None: what worker is currently training this
210 |                   network, with enumeration starting from 1
211 | 
212 |         OUTPUT
213 |             (float) the score of the neural network
214 |         '''
215 | 
216 |         from tensorflow.keras.models import Model
217 |         from tensorflow.keras.layers import Input, Dense, Dropout
218 |         from tensorflow.keras import backend as K
219 |         from tensorflow.python.util import deprecation
220 |         from tensorflow import set_random_seed
221 |         from sklearn.metrics import f1_score, precision_score, recall_score
222 | 
223 |         # Custom callbacks
224 |         from naturalselection.callbacks import TQDMCallback, EarlierStopping
225 | 
226 |         # Used when building network
227 |         from itertools import count
228 | 
229 |         # Suppress tensorflow warnings
230 |         deprecation._PRINT_DEPRECATION_WARNINGS = False
231 |         os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
232 | 
233 |         # Set random seeds to enable better comparison of scores
234 |         np.random.seed(0)
235 |         set_random_seed(0)
236 | 
237 |         X_train, Y_train, X_val, Y_val = self.train_val_sets
238 | 
239 |         if self.nm_features == 'infer':
240 |             self.nm_features = X_val.shape[1]
241 |         if self.nm_labels == 'infer':
242 |             self.nm_labels = Y_val.shape[1]
243 | 
244 |         inputs = Input(shape = (self.nm_features,))
245 |         x = Dropout(nn.input_dropout)(inputs)
246 | 
247 |         if self.uniform_layers:
248 |             for _ in range(nn.nm_hidden_layers):
249 |                 x = Dense(nn.neurons, activation = nn.hidden_activation,
250 |                     kernel_initializer = nn.initializer)(x)
251 |                 x = Dropout(nn.dropout)(x)
252 |         else:
253 |             for i in count():
254 |                 try:
255 |                     neurons = nn.__dict__["neurons{}".format(i)]
256 |                     if neurons:
257 |                         x = Dense(neurons, activation = nn.hidden_activation,
258 |                             kernel_initializer = nn.initializer)(x)
259 |                     dropout = nn.__dict__["dropout{}".format(i)]
260 |                     if dropout:
261 |                         x = Dropout(dropout)(x)
262 |                 except:
263 |                     break
264 | 
265 |         outputs = Dense(self.nm_labels,
266 |             activation = self.output_activation,
267 |             kernel_initializer = nn.initializer)(x)
268 | 
269 |         model = Model(inputs = inputs, outputs = outputs)
270 | 
271 |         if self.score == 'accuracy':
272 |             metrics = ['accuracy']        
273 |         elif self.score == 'categorical accuracy':
274 |             metrics = ['categorical accuracy']
275 |         else:
276 |             metrics = []
277 | 
278 |         model.compile(
279 |             loss = self.loss_fn,
280 |             optimizer = nn.optimizer,
281 |             metrics = metrics
282 |             )
283 | 
284 |         if self.score == 'accuracy' or self.score == 'categorical_accuracy':
285 |             monitor = 'val_acc'
286 |         else:
287 |             monitor = 'val_loss'
288 | 
289 |         earlier_stopping = EarlierStopping(
290 |             monitor = monitor,
291 |             patience = patience,
292 |             min_delta = min_change,
293 |             restore_best_weights = True,
294 |             seconds = max_training_time
295 |             )
296 | 
297 |         callbacks = [earlier_stopping]
298 |         if self.progress_bars >= 3:
299 |             if worker_idx:
300 |                 desc = f'Worker {worker_idx % self.workers}, '
301 |                 tqdm_callback = TQDMCallback(
302 |                     show_outer = False, 
303 |                     inner_position = (worker_idx % self.workers) + 2,
304 |                     leave_inner = False,
305 |                     inner_description_update = desc + 'Epoch {epoch}',
306 |                     inner_description_initial = desc + 'Epoch {epoch}'
307 |                     )
308 |             else:
309 |                 tqdm_callback = TQDMCallback(
310 |                     show_outer = False, 
311 |                     inner_position = 0
312 |                     )
313 |             callbacks.append(tqdm_callback)
314 | 
315 |         model.fit(
316 |             X_train,
317 |             Y_train,
318 |             batch_size = nn.batch_size,
319 |             validation_data = (X_val, Y_val),
320 |             epochs = max_epochs,
321 |             callbacks = callbacks,
322 |             verbose = 0
323 |             )
324 | 
325 |         if file_name:
326 |             model.save("{}.h5".format(file_name))
327 | 
328 |         if self.nm_labels > 1:
329 |             average = 'micro'
330 |         else:
331 |             average = None
332 | 
333 |         Y_hat = model.predict(X_val, batch_size = 128)
334 |         if self.score == 'accuracy' or self.score == 'categorical accuracy':
335 |             fitness = model.evaluate(X_val, Y_val, verbose = 0)[1]
336 |         elif self.score == 'f1':
337 |             Y_hat = np.greater(Y_hat, 0.5)
338 |             fitness = f1_score(Y_val, Y_hat, average = average)
339 |         elif self.score == 'precision':
340 |             Y_hat = np.greater(Y_hat, 0.5)
341 |             fitness = precision_score(Y_val, Y_hat, average = average)
342 |         elif self.score == 'recall':
343 |             Y_hat = np.greater(Y_hat, 0.5)
344 |             fitness = recall_score(Y_val, Y_hat, average = average)
345 |         elif self.score == 'loss':
346 |             fitness = np.divide(1, model.evaluate(X_val, Y_val, verbose = 0))
347 |         else:
348 |             # Custom scoring function
349 |             fitness = self.score(Y_val, Y_hat)
350 |         
351 |         # Clear tensorflow session to avoid memory leak
352 |         K.clear_session()
353 |         
354 |         return fitness
355 | 
356 | 
357 | def __main__():
358 |     pass
359 | 


--------------------------------------------------------------------------------
/naturalselection/core.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | import os
  4 | from functools import reduce, partial
  5 | import logging
  6 | 
  7 | # Suppressing warnings
  8 | import warnings
  9 | 
 10 | # Plots
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | # Progress bars
 14 | from tqdm import tqdm, trange
 15 | 
 16 | # Parallelising fitness
 17 | from inspect import getfullargspec
 18 | import multiprocessing as mp
 19 | 
 20 | class Genus():
 21 |     ''' Storing information about all the possible gene combinations.
 22 | 
 23 |     INPUT
 24 |         (kwargs) genomes
 25 |     '''
 26 | 
 27 |     def __init__(self, **genomes):
 28 |         self.__dict__.update(
 29 |             {key: np.asarray(val) for (key, val) in genomes.items()}
 30 |             )
 31 | 
 32 |     def create_organism(self):
 33 |         rnd_genes = {key: val[np.random.choice(range(val.shape[0]))]
 34 |             for (key, val) in self.__dict__.items()}
 35 |         return Organism(genus = self, **rnd_genes)
 36 | 
 37 |     def create_organisms(self, amount = 1):
 38 |         ''' Create organisms of this genus.
 39 |         
 40 |         INPUT
 41 |             (int) amount = 1
 42 |         '''
 43 |         return np.array([self.create_organism() for _ in range(amount)])
 44 | 
 45 |     def alter_genomes(self, **genomes):
 46 |         ''' Add or change genomes to the genus.
 47 |         
 48 |         INPUT
 49 |             (kwargs) genomes
 50 |         '''
 51 |         self.__dict__.update(genomes)
 52 |         return self
 53 | 
 54 |     def remove_genomes(self, *keys):
 55 |         ''' Remove genomes from the genus. '''
 56 |         for key in keys:
 57 |             self.__dict__.pop(key, None)
 58 |         return self
 59 | 
 60 | class Organism():
 61 |     ''' Organism of a particular genus. 
 62 | 
 63 |     INPUT
 64 |         (Genus) genus
 65 |         (kwargs) genome: genome information
 66 |     '''
 67 | 
 68 |     def __init__(self, genus, **genome):
 69 | 
 70 |         # Check that the input parameters match with the genus type,
 71 |         # and if any parameters are missing then add random values
 72 |         genome = {key: val for (key, val) in genome.items() if key in
 73 |             genus.__dict__.keys() and val in genus.__dict__[key]}
 74 |         for key in genus.__dict__.keys() - genome.keys():
 75 |             val_idx = np.random.choice(range(genus.__dict__[key].shape[0]))
 76 |             genome[key] = genus.__dict__[key][val_idx]
 77 | 
 78 |         self.__dict__.update(genome)
 79 |         self.genus = genus
 80 |         self.fitness = 0
 81 | 
 82 |     def get_genome(self):
 83 |         return {key: val for (key, val) in self.__dict__.items()
 84 |                           if key not in {'genus', 'fitness'}}
 85 | 
 86 |     def breed(self, other):
 87 |         ''' Breed organism with another organism, returning a new
 88 |             organism of the same genus.
 89 | 
 90 |         INPUT
 91 |             (Organism) other organism
 92 |         '''
 93 | 
 94 |         if self.genus != other.genus:
 95 |             raise Exception("Only organisms of the same genus can breed.")
 96 | 
 97 |         self_genome = list(self.get_genome().items())
 98 |         other_genome = list(other.get_genome().items())
 99 | 
100 |         rnd = np.random.choice(len(self_genome))
101 |         child_genome = dict(self_genome[:rnd] + other_genome[rnd:])
102 |         child = Organism(self.genus, **child_genome)
103 | 
104 |         return child
105 | 
106 |     def mutate(self, mutation_factor = 'default'):
107 |         ''' Return mutated version of the organism.
108 |         
109 |         INPUT
110 |             (float or string) mutation_factor = 'default': given that an
111 |                               organism is being mutated, the probability that
112 |                               a given gene is changed. Defaults to 1/k, where
113 |                               k is the size of the population
114 |         '''
115 |         keys = np.asarray(list(self.get_genome().keys()))
116 |         if mutation_factor == 'default':
117 |             mutation_factor = np.divide(1, keys.size)
118 |         mut_idx = np.less(np.random.random(keys.size), mutation_factor)
119 |         mut_vals = {key: self.genus.__dict__[key]\
120 |             [np.random.choice(range(self.genus.__dict__[key].shape[0]))]
121 |             for key in keys[mut_idx]}
122 |         self.__dict__.update(mut_vals)
123 |         return self
124 | 
125 | class Population():
126 |     ''' Population of organisms, all of the same genus.
127 | 
128 |     INPUT
129 |         (Genus) genus
130 |         (int) size: size of the population
131 |         (function) fitness_fn: fitness function
132 |         (dict) initial_genome = None: start with a population similar to
133 |                the genome, for a warm start
134 |         (float) breeding_rate = 0.8: percentage of population to breed 
135 |         (float) mutation_rate = 0.2: percentage of population to mutate
136 |                 each generation
137 |         (float or string) mutation_factor = 'default': given that an
138 |                           organism is being mutated, the probability that
139 |                           a given gene is changed. Defaults to 1/k, where
140 |                           k is the size of the population
141 |         (float) elitism rate = 0.05: percentage of population to keep
142 |                 across generations
143 |         (bool) multiprocessing = False: whether fitnesses should be
144 |                computed in parallel
145 |         (int) workers = mp.cpu_count(): how many workers to use if
146 |               multiprocessing is True
147 |         (int) progress_bars = 1: number of progress bars to show, where 1
148 |               only shows the main evolution progress, and 2 shows both
149 |               the evolution and the fitness computation per generation
150 |         (int or string) memory = 'inf': how many generations the
151 |                         population can look back to avoid redundant
152 |                         fitness computations, where 'inf' means unlimited
153 |                         memory.
154 |         (bool) allow_repeats = True: allow computing duplicate fitness vals
155 |         (int) verbose = 0: verbosity mode
156 |         '''
157 | 
158 |     def __init__(self, genus, size, fitness_fn, initial_genome = None,
159 |         breeding_rate = 0.8, mutation_rate = 0.2, mutation_factor = 'default', 
160 |         elitism_rate = 0.05, multiprocessing = False, workers = mp.cpu_count(),
161 |         progress_bars = 1, memory = 'inf', allow_repeats = True,
162 |         verbose = 0):
163 | 
164 |         self.genus = genus
165 |         self.size = size
166 |         self.initial_genome = initial_genome
167 |         self.breeding_rate = breeding_rate
168 |         self.mutation_rate = mutation_rate
169 |         self.mutation_factor = mutation_factor
170 |         self.elitism_rate = elitism_rate
171 |         self.multiprocessing = multiprocessing
172 |         self.workers = workers
173 |         self.progress_bars = progress_bars
174 |         self.memory = memory
175 |         self.allow_repeats = allow_repeats
176 |         self.verbose = verbose
177 | 
178 |         if 'worker_idx' not in getfullargspec(fitness_fn).args:
179 |             def new_fitness_fn(*args, worker_idx = None, **kwargs):
180 |                 return fitness_fn(*args, **kwargs)
181 |             self.fitness_fn = new_fitness_fn
182 |         else:
183 |             self.fitness_fn = fitness_fn
184 | 
185 |         logging.basicConfig(format = '%(levelname)s: %(message)s')
186 |         self.logger = logging.getLogger()
187 | 
188 |         if not verbose:
189 |             self.logger.setLevel(logging.WARNING)
190 |         elif verbose == 1:
191 |             self.logger.setLevel(logging.INFO)
192 |         elif verbose == 2:
193 |             self.logger.setLevel(logging.DEBUG)
194 | 
195 |         self.logger.info("Creating population...")
196 | 
197 |         if initial_genome:
198 | 
199 |             # Create a population of identical organisms
200 |             self.population = np.array(
201 |                 [Organism(genus, **initial_genome) for _ in range(size)])
202 | 
203 |             # Mutate 80% of the population
204 |             rnd = np.random.random(self.population.shape)
205 |             for (i, org) in enumerate(self.population):
206 |                 if rnd[i] > 0.2:
207 |                     org.mutate()
208 |         else:
209 |             self.population = genus.create_organisms(size)
210 | 
211 |         self.fittest = np.random.choice(self.population)
212 | 
213 |     def get_genomes(self):
214 |         return np.asarray([org.get_genome() for org in self.population])
215 | 
216 |     def get_fitnesses(self):
217 |         return np.asarray([org.fitness for org in self.population])
218 | 
219 |     def update_fitness(self, history = None):
220 |         ''' Compute and update fitness values of the population.
221 | 
222 |         INPUT
223 |             (History) history = None: previous population history
224 |         '''
225 | 
226 |         # Duck typing function to make things immutable
227 |         def make_immutable(x):
228 |             try:
229 |                 if not isinstance(x, str):
230 |                     x = tuple(x)
231 |             except TypeError:
232 |                 pass
233 |             return x
234 | 
235 |         def immute_dict(d):
236 |             return {key: make_immutable(val) for (key, val) in d.items()}
237 | 
238 |         unique_genomes = np.array(
239 |             [dict(gene) for gene in set(frozenset(immute_dict(genome).items())
240 |             for genome in self.get_genomes())]
241 |             )
242 | 
243 |         # If history is loaded then get the genomes from the current
244 |         # population that are unique across all generations
245 |         past_indices = np.array([])
246 |         if history and not self.allow_repeats:
247 |             g_prev = history.genome_history
248 |             f_prev = history.fitness_history
249 | 
250 |             indices = np.array([((np.where(g_prev == org.get_genome())[0][0],
251 |                 np.where(g_prev == org.get_genome())[1][0]), idx)
252 |                 for (idx, org) in enumerate(self.population)
253 |                 if org.get_genome() in g_prev
254 |                 ])
255 |             past_indices = np.array([idx for (_, idx) in indices])
256 | 
257 |             # Load previous fitnesses of genomes that are occuring now
258 |             for (past_idx, idx) in indices:
259 |                 self.population[idx].fitness = f_prev[past_idx[0], past_idx[1]]
260 | 
261 |             # Remove genomes that have occured previously
262 |             unique_genomes = np.array([genome for genome in unique_genomes
263 |                 if genome not in g_prev])
264 | 
265 |         # Pull out the organisms with the unique genomes
266 |         imm_genomes = np.array(list(
267 |             map(immute_dict, self.get_genomes())))
268 |         imm_unique_genomes = np.array(list(
269 |             map(immute_dict, unique_genomes)))
270 |         unique_indices = np.array([np.argmin(imm_genomes != genome) 
271 |             for genome in imm_unique_genomes])
272 | 
273 |         # Compute fitness values if there are any that needs to be computed
274 |         if unique_indices.size:
275 |             with warnings.catch_warnings():
276 | 
277 |                 # Ignore warning related to F1-scores
278 |                 f1_warn = 'F-score is ill-defined and being set to ' \
279 |                           '0.0 due to no predicted samples.'
280 |                 warnings.filterwarnings('ignore', message = f1_warn)
281 | 
282 |                 if self.multiprocessing:
283 | 
284 |                     # Define queues to organise the parallelising
285 |                     todo = mp.Queue(unique_indices.size + self.workers)
286 |                     done = mp.Queue(unique_indices.size)
287 |                     for idx in unique_indices:
288 |                         todo.put(idx)
289 |                     for _ in range(self.workers):
290 |                         todo.put(-1)
291 | 
292 |                     def worker(todo, done):
293 |                         ''' Fitness computing worker. '''
294 |                         from queue import Empty
295 |                         while True:
296 |                             try:
297 |                                 idx = todo.get(timeout = 1)
298 |                             except Empty:
299 |                                 continue
300 |                             if idx == -1:
301 |                                 break
302 |                             else:
303 |                                 org = self.population[idx]
304 |                                 worker_idx = mp.current_process()._identity[0]
305 |                                 fitness = self.fitness_fn(org,
306 |                                     worker_idx = worker_idx)
307 |                                 done.put((idx, fitness))
308 | 
309 |                     # Define our processes
310 |                     processes = [mp.Process(target = worker,
311 |                         args = (todo, done)) for _ in range(self.workers)]
312 | 
313 |                     # Daemonise the processes, meaning they close when they
314 |                     # they finish, and start them
315 |                     for p in processes:
316 |                         p.daemon = True
317 |                         p.start()
318 | 
319 |                     # This is the iterable with (idx, fitness) values
320 |                     idx_fits = (done.get() for _ in unique_indices)
321 | 
322 |                 else:
323 |                     # This is the iterable with (idx, fitness) values,
324 |                     # obtained without any parallelising
325 |                     idx_fits = self.population[unique_indices]
326 |                     idx_fits = map(self.fitness_fn, idx_fits)
327 |                     idx_fits = zip(unique_indices, idx_fits)
328 |         
329 |                 # Set up a progress bar
330 |                 if self.progress_bars >= 2:
331 |                     idx_fits = tqdm(idx_fits, total = unique_indices.size)
332 |                     idx_fits.set_description("Computing fitness")
333 | 
334 |                 # Compute the fitness values
335 |                 for (idx, new_fitness) in idx_fits:
336 |                     self.population[idx].fitness = new_fitness
337 | 
338 |                 # Join up the processes
339 |                 if self.multiprocessing:
340 |                     for p in processes:
341 |                         p.join()
342 |                
343 |                 # Close the progress bar 
344 |                 if self.progress_bars >= 2:
345 |                     idx_fits.close()
346 | 
347 | 
348 |         # Copy out the fitness values to the other organisms with same genome
349 |         for (i, org) in enumerate(self.population):
350 |             if i not in unique_indices and i not in past_indices:
351 |                 prev_unique_idx = np.min(np.array(
352 |                     [idx for idx in unique_indices
353 |                          if immute_dict(org.get_genome()) == \
354 |                          immute_dict(self.population[idx].get_genome())]
355 |                     ))
356 |                 self.population[i].fitness = \
357 |                     self.population[prev_unique_idx].fitness
358 | 
359 |     def sample(self, amount = 1):
360 |         ''' Sample a fixed amount of organisms from the population,
361 |             where the fitter an organism is, the more it's likely
362 |             to be chosen. 
363 |     
364 |         INPUT
365 |             (int) amount = 1: number of organisms to sample
366 | 
367 |         OUTPUT
368 |             (ndarray) sample of population
369 |         '''
370 | 
371 |         # Convert fitness values into probabilities
372 |         fitnesses = self.get_fitnesses()
373 |         probs = np.divide(fitnesses, sum(fitnesses))
374 |         
375 |         # Copy the population to a new variable
376 |         pop = self.population
377 | 
378 |         # Sort the probabilities in descending order and sort pop (not
379 |         # the actual population) in the same way
380 |         sorted_idx = np.argsort(probs)[::-1]
381 |         probs = probs[sorted_idx]
382 |         pop = pop[sorted_idx]
383 | 
384 |         # Get random numbers between 0 and 1 
385 |         indices = np.random.random(amount)
386 | 
387 |         for i in range(amount):
388 |             # Find the index of the fitness value whose accumulated
389 |             # sum exceeds the value of the i'th random number.
390 |             fn = lambda x, y: (x[0], x[1] + y[1]) \
391 |                               if x[1] + y[1] > indices[i] \
392 |                               else (x[0] + y[0], x[1] + y[1])
393 |             (idx, _) = reduce(fn, map(lambda x: (1, x), probs))
394 |             indices[i] = idx - 1
395 |         
396 |         # Return the organisms indexed at the indices found above
397 |         return pop[indices.astype(int)]
398 | 
399 |     def evolve(self, generations = 1, goal = None):
400 |         ''' Evolve the population.
401 | 
402 |         INPUT
403 |             (int) generations = 1: number of generations to evolve
404 |             (float) goal = None: stop when fitness is not below this value
405 |         '''
406 | 
407 |         history = History(
408 |             population = self,
409 |             generations = generations,
410 |             memory = self.memory
411 |             )
412 | 
413 |         if self.progress_bars:
414 |             gen_iter = trange(generations)
415 |             gen_iter.set_description("Evolving population")
416 |         else:
417 |             gen_iter = range(generations)
418 | 
419 |         for gen in gen_iter:
420 | 
421 |             if goal and self.fittest.fitness >= goal:
422 |                 # Close tqdm iterator
423 |                 if self.progress_bars:
424 |                     gen_iter.close()
425 |             
426 |                 # Truncate history for plotting
427 |                 history.generations = gen
428 |                 history.fitness_history = history.fitness_history[:gen, :]
429 |                 history.genome_history = history.genome_history[:gen, :]
430 |                 if history.memory == 'inf' or history.memory > gen:
431 |                     history.memory = gen
432 | 
433 |                 self.logger.info('Reached goal, stopping evolution...')
434 |                 break
435 | 
436 |             # Compute and update fitness values
437 |             self.update_fitness(history = history)
438 |             fitnesses = self.get_fitnesses()
439 |             
440 |             self.logger.debug('Updating fitness values...')
441 | 
442 |             # Update the fittest organism
443 |             if max(fitnesses) > self.fittest.fitness:
444 |                 self.fittest = self.population[np.argmax(fitnesses)]
445 | 
446 |             # Store current population in history
447 |             history.add_entry(self, generation = gen)
448 | 
449 |             self.logger.debug("Fitness values: {}"\
450 |                 .format(np.around(fitnesses, 2)))
451 | 
452 |             # Select elites 
453 |             elites_amt = np.ceil(self.size * self.elitism_rate).astype(int)
454 |             if self.elitism_rate:
455 |                 elites = self.sample(amount = elites_amt)
456 | 
457 |                 self.logger.debug("Elite pool, of size {}:"\
458 |                     .format(elites_amt))
459 |                 self.logger.debug(np.array([org.get_genome()
460 |                     for org in elites]))
461 | 
462 |             # Select breeders
463 |             breeders_amt = max(2, np.ceil(self.size * self.breeding_rate)\
464 |                 .astype(int))
465 |             breeders = self.sample(amount = breeders_amt)
466 | 
467 |             self.logger.debug("Breeding pool, of size {}:"\
468 |                 .format(breeders_amt))
469 |             self.logger.debug(np.array([org.get_genome() for org in breeders]))
470 |             self.logger.debug("Breeding...")
471 | 
472 |             # Breed until we reach the same size
473 |             children_amt = self.size - elites_amt
474 |             parents = np.random.choice(breeders, (self.size, 2))
475 |             children = np.array([parents[i, 0].breed(parents[i, 1])
476 |                 for i in range(children_amt)])
477 | 
478 |             # Select mutators
479 |             mutators = np.less(np.random.random(children_amt), 
480 |                 self.mutation_rate)
481 | 
482 |             self.logger.debug("Mutation pool, of size {}:"\
483 |                 .format(children[mutators].size))
484 |             self.logger.debug(np.array([child.get_genome() for child in
485 |                 children[mutators]]))
486 |             self.logger.debug("Mutating...")
487 | 
488 |             # Mutate the children
489 |             for mutator in children[mutators]:
490 |                 mutator.mutate(mutation_factor = self.mutation_factor)
491 | 
492 |             # The children constitutes our new generation
493 |             if self.elitism_rate:
494 |                 self.population = np.append(children, elites)
495 |             else:
496 |                 self.population = children
497 |             
498 |             self.logger.debug("New population, of size {}:"\
499 |                 .format(self.population.size))
500 |             self.logger.debug(self.get_genomes())
501 |             self.logger.debug("Mean fitness: {}".format(np.mean(fitnesses)))
502 |             self.logger.debug("Std fitness: {}".format(np.std(fitnesses)))
503 | 
504 |             self.logger.info("Fittest so far, with fitness {}:"\
505 |                 .format(self.fittest.fitness))
506 |             self.logger.info(self.fittest.get_genome())
507 | 
508 |         gen_iter.close()
509 | 
510 |         if self.progress_bars >= 2:
511 |             print("")
512 | 
513 |         return history
514 | 
515 | class History():
516 |     ''' History of a population's evolution.
517 |         
518 |     INPUT
519 |         (Population) population
520 |         (int) generations
521 |         (int or string) memory = 'inf': how many generations the
522 |                         population can look back to avoid redundant
523 |                         fitness computations, where 'inf' means unlimited
524 |                         memory.
525 |     '''
526 | 
527 |     def __init__(self, population, generations, memory = 'inf'):
528 | 
529 |         if memory == 'inf' or memory > generations:
530 |             self.memory = min(int(1e5), generations)
531 |         else:
532 |             self.memory = memory
533 | 
534 |         pop_size = population.size
535 |         self.generations = generations
536 |         self.genome_history = np.empty((self.memory, pop_size), dict)
537 |         self.fitness_history = np.empty((self.memory, pop_size), float)
538 |         self.population = population
539 |         self.fittest = {'genome': None, 'fitness': 0}
540 |     
541 |     def add_entry(self, population, generation):
542 |         ''' Add population to the history. 
543 | 
544 |         INPUT
545 |             (Population) population
546 |             (int) generation
547 |         '''
548 | 
549 |         genomes = population.get_genomes()
550 |         fitnesses = population.get_fitnesses()
551 | 
552 |         if max(fitnesses) > self.fittest['fitness']:
553 |             self.fittest['genome'] = genomes[np.argmax(fitnesses)]
554 |             self.fittest['fitness'] = max(fitnesses)
555 | 
556 |         self.genome_history = np.roll(self.genome_history, 1, axis = 0)
557 |         self.genome_history[0, :] = genomes
558 | 
559 |         self.fitness_history = np.roll(self.fitness_history, 1, axis = 0)
560 |         self.fitness_history[0, :] = fitnesses
561 | 
562 |         return self
563 | 
564 |     def plot(self, title = 'Fitness by generation', xlabel = 'Generation',
565 |         ylabel = 'Fitness', file_name = None, show_plot = True,
566 |         show_max = True, only_show_max = False, discrete = False,
567 |         legend = True, legend_location = 'lower right'):
568 |         ''' Plot the fitness values.
569 | 
570 |         INPUT
571 |             (string) title = 'Fitness by generation'
572 |             (string) xlabel = 'Generations': label on the x-axis
573 |             (string) ylabel = 'Fitness': label on the y-axis
574 |             (string) file_name = None: file name to save the plot to
575 |             (bool) show_plot = True: show plot as a pop-up
576 |             (bool) show_max = True: show max value line on plot
577 |             (bool) only_show_max = False: Hide the plot with means and stds
578 |             (bool) discrete = False: make the error plot discrete
579 |             (bool) legend = True: show legend
580 |             (string or int) legend_location = 'lower right': legend location, 
581 |                             either as e.g. 'lower right' or as an integer
582 |                             between 0 and 10
583 |         '''
584 |         
585 |         fits = self.fitness_history[::-1]
586 |         gens = self.generations
587 |         mem = self.memory
588 |         means = np.mean(fits, axis = 1)
589 |         stds = np.std(fits, axis = 1)
590 |         xs = np.arange(mem)
591 | 
592 |         if gens == 1:
593 |             discrete = True
594 | 
595 |         if show_max or only_show_max:
596 |             maxs = np.array([np.max(fits[x, :]) for x in xs])
597 | 
598 |         plt.style.use("ggplot")
599 |         plt.figure()
600 |         plt.xlim(gens - mem - 1, gens)
601 |         plt.title(title)
602 |         plt.xlabel(xlabel)
603 |         plt.ylabel(ylabel)
604 | 
605 |         xs_shift = xs + (gens - mem)
606 | 
607 |         if show_max:
608 |             plt.plot(xs_shift, maxs[xs], '--', color = 'blue', label = 'max')
609 | 
610 |         if discrete and not only_show_max:
611 |             plt.errorbar(xs_shift, means[xs], stds[xs], fmt = 'ok', 
612 |                 label = 'mean and std')
613 |         elif not only_show_max:
614 |             plt.plot(xs_shift, means[xs], '-', color = 'black', label = 'mean')
615 |             plt.fill_between(
616 |                 xs_shift, 
617 |                 means[xs] - stds[xs],
618 |                 means[xs] + stds[xs], 
619 |                 alpha = 0.2, 
620 |                 color = 'gray', 
621 |                 label = 'std'
622 |                 )
623 | 
624 |         if legend:
625 |             plt.legend(loc = legend_location)
626 | 
627 |         if file_name:
628 |             plt.savefig(file_name)
629 | 
630 |         if show_plot:
631 |             plt.show()
632 | 
633 | 
634 | def __main__():
635 |     pass
636 | 


--------------------------------------------------------------------------------