├── .gitignore ├── Chapter10 ├── LICENSE ├── README.md ├── configurations │ ├── es_atari_config.json │ ├── es_gym_config.json │ ├── ga_atari_config.json │ └── rs_atari_config.json ├── display.py ├── es.py ├── ga.py ├── gym_tensorflow │ ├── Makefile │ ├── README.md │ ├── __init__.py │ ├── atari │ │ ├── README.md │ │ ├── __init__.py │ │ ├── tf_atari.cpp │ │ └── tf_atari.py │ ├── maze │ │ ├── __init__.py │ │ ├── hard_maze.txt │ │ ├── hard_maze.txt.npy │ │ ├── maze.h │ │ ├── tf_maze.cpp │ │ └── tf_maze.py │ ├── ops │ │ ├── __init__.py │ │ └── indexedmatmul.cpp │ ├── tf_env.cpp │ ├── tf_env.h │ ├── tf_env.py │ └── wrappers │ │ ├── __init__.py │ │ └── stack_frames.py ├── neuroevolution │ ├── __init__.py │ ├── concurrent_worker.py │ ├── distributed_helpers.py │ ├── helper.py │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── batchnorm.py │ │ ├── dqn.py │ │ ├── dqn_xavier.py │ │ └── simple.py │ ├── optimizers.py │ └── tf_util.py ├── tabular_logger.py └── utils.py ├── Chapter3 ├── visualize.py ├── xor_config.ini └── xor_experiment.py ├── Chapter4 ├── cart_pole.py ├── cart_two_pole.py ├── single_pole_config.ini ├── single_pole_experiment.py ├── two_pole_markov_config.ini ├── two_pole_markov_experiment.py ├── utils.py └── visualize.py ├── Chapter5 ├── agent.py ├── geometry.py ├── hard_maze.txt ├── maze_config.ini ├── maze_environment.py ├── maze_experiment.py ├── medium_maze.txt ├── utils.py └── visualize.py ├── Chapter6 ├── agent.py ├── geometry.py ├── hard_maze.txt ├── maze_config.ini ├── maze_environment.py ├── maze_experiment.py ├── maze_experiment_multineat.py ├── medium_maze.txt ├── novelty_archive.py ├── utils.py └── visualize.py ├── Chapter7 ├── utils.py ├── vd_environment.py ├── vd_experiment_multineat.py └── visualize.py ├── Chapter8 ├── retina_environment.py ├── retina_experiment.py ├── utils.py └── visualize.py ├── Chapter9 ├── agent.py ├── geometry.py ├── hard_maze.txt ├── maze_environment.py ├── maze_experiment_safe.py ├── medium_maze.txt ├── novelty_archive.py ├── utils.py └── visualize.py ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # The data directory 2 | data/ 3 | 4 | # The output directory 5 | out*/ 6 | 7 | # MacOS specific 8 | .DS_Store 9 | 10 | # VS Code specific 11 | .vscode 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # SageMath parsed files 94 | *.sage.py 95 | 96 | # Environments 97 | .env 98 | .venv 99 | env/ 100 | venv/ 101 | ENV/ 102 | env.bak/ 103 | venv.bak/ 104 | 105 | # Spyder project settings 106 | .spyderproject 107 | .spyproject 108 | 109 | # Rope project settings 110 | .ropeproject 111 | 112 | # mkdocs documentation 113 | /site 114 | 115 | # mypy 116 | .mypy_cache/ 117 | -------------------------------------------------------------------------------- /Chapter10/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Iaroslav Omelianenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | deep-ne-gpu includes parts of: 24 | 25 | deep-neuroevolution 26 | Copyright (c) 2018 Uber Technologies, Inc. 27 | 28 | Permission is hereby granted, free of charge, to any person obtaining a copy 29 | of this software and associated documentation files (the "Software"), to deal 30 | in the Software without restriction, including without limitation the rights 31 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 32 | copies of the Software, and to permit persons to whom the Software is 33 | furnished to do so, subject to the following conditions: 34 | 35 | The above copyright notice and this permission notice shall be included in 36 | all copies or substantial portions of the Software. 37 | 38 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 39 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 40 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 41 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 42 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 43 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 44 | THE SOFTWARE. 45 | 46 | deep-neuroevolution includes: 47 | 48 | evolution-strategies-starter 49 | Copyright (c) 2016 OpenAI (http://openai.com) 50 | 51 | Permission is hereby granted, free of charge, to any person obtaining a copy 52 | of this software and associated documentation files (the "Software"), to deal 53 | in the Software without restriction, including without limitation the rights 54 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 55 | copies of the Software, and to permit persons to whom the Software is 56 | furnished to do so, subject to the following conditions: 57 | 58 | The above copyright notice and this permission notice shall be included in 59 | all copies or substantial portions of the Software. 60 | 61 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 62 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 63 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 64 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 65 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 66 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 67 | THE SOFTWARE. -------------------------------------------------------------------------------- /Chapter10/README.md: -------------------------------------------------------------------------------- 1 | # AI Labs - GPU Neuroevolution 2 | This folder contains preliminary work done to implement GPU-based deep neuroevolution. 3 | For problems like Atari where the policy evaluation takes a considerable amount of time it is advantageous to make use of GPUs to evaluate the Neural Networks. This code shows how it is possible to run Atari simulations in parallel using the GPU in a way where we can evaluate neural networks in batches and have both CPU and GPU operating at the same time. 4 | 5 | This folder has code in prototype stage and still requires a lot of changes to optimize performance, maintanability, and testing. We welcome pull requests to this repo and have plans to improve it in the future. Although it can run on CPU-only, it is slower than our original implementation due to overhead. Once this implementation has matured we plan on distributing it as a package for easy installation. We included an implementation of the HardMaze, but the GA-NS implementation will be added later on. 6 | 7 | ## Installation 8 | 9 | clone repo 10 | 11 | ``` 12 | git clone https://github.com/yaricom/deep-neuroevolution.git 13 | ``` 14 | 15 | create python3 virtual using Anaconda 16 | 17 | ``` 18 | conda create -n deep_ne python=3.5 19 | conda activate deep_ne 20 | ``` 21 | 22 | install current version of tensorflow-gpu 23 | ``` 24 | conda install -c anaconda tensorflow-gpu 25 | ``` 26 | Follow instructions under ./gym_tensorflow/README on how to compile the optimized interfaces. 27 | 28 | To train GA on Atari just run: 29 | ``` 30 | python ga.py -c configurations/ga_atari_config.json -o out 31 | ``` 32 | Random search (It's a special case of GA where 0 individuals become parents): 33 | ``` 34 | python ga.py -c configurations/rs_atari_config.json -o out 35 | ``` 36 | 37 | Evolution Strategies: 38 | ``` 39 | python es.py -c configurations/es_atari_config.json -o out 40 | ``` 41 | 42 | Visualizing policies is possible if you install gym with `pip install gym` and run: 43 | ``` 44 | python -m neuroevolution.display 45 | ``` 46 | We currently have one example policy but more will be added in the future. 47 | 48 | # Credentials 49 | 50 | Initial implementation created by the Uber AI lab and available at https://github.com/uber-research/deep-neuroevolution 51 | -------------------------------------------------------------------------------- /Chapter10/configurations/es_atari_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "game": "frostbite", 3 | "model": "ModelVirtualBN", 4 | "num_validation_episodes": 30, 5 | "num_test_episodes": 200, 6 | "population_size": 5000, 7 | "timesteps": 250e6, 8 | "episode_cutoff_mode": 5000, 9 | "return_proc_mode": "centered_rank", 10 | "l2coeff": 0.005, 11 | "mutation_power": 0.02, 12 | "optimizer": { 13 | "args": { 14 | "stepsize": 0.01 15 | }, 16 | "type": "adam" 17 | } 18 | } -------------------------------------------------------------------------------- /Chapter10/configurations/es_gym_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "game": "gym.CartPole-v1", 3 | "model": "SimpleClassifier", 4 | "num_validation_episodes": 30, 5 | "num_test_episodes": 200, 6 | "population_size": 5000, 7 | "timesteps": 250e6, 8 | "episode_cutoff_mode": 5000, 9 | "return_proc_mode": "centered_rank", 10 | "l2coeff": 0.005, 11 | "mutation_power": 0.02, 12 | "optimizer": { 13 | "args": { 14 | "stepsize": 0.01 15 | }, 16 | "type": "adam" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /Chapter10/configurations/ga_atari_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "game": "frostbite", 3 | "model": "LargeModel", 4 | "num_validation_episodes": 30, 5 | "num_test_episodes": 200, 6 | "population_size": 1000, 7 | "episode_cutoff_mode": 5000, 8 | "timesteps": 1.5e9, 9 | "validation_threshold": 10, 10 | "mutation_power": 0.002, 11 | "selection_threshold": 20 12 | } -------------------------------------------------------------------------------- /Chapter10/configurations/rs_atari_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "game": "frostbite", 3 | "model": "Model", 4 | "num_validation_episodes": 30, 5 | "num_test_episodes": 200, 6 | "population_size": 1000, 7 | "episode_cutoff_mode": 5000, 8 | "timesteps": 1.5e9, 9 | "validation_threshold": 10, 10 | "mutation_power": 0.002, 11 | "selection_threshold": 0 12 | } -------------------------------------------------------------------------------- /Chapter10/display.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import time 21 | import os 22 | import tensorflow as tf 23 | import numpy as np 24 | import gym 25 | import tabular_logger as tlogger 26 | import gym_tensorflow 27 | from neuroevolution.helper import SharedNoiseTable, make_schedule 28 | from PIL import Image 29 | from neuroevolution.models import LargeModel as Model 30 | game = 'frostbite' 31 | seeds = [157822315, [94987453, 0.002], [61990409, 0.002], [132377995, 0.002], [126312029, 0.002], [93915238, 0.002], [204022435, 0.002], [171168059, 0.002], [95856784, 0.002], [205934773, 0.002], [213365167, 0.002], [56944619, 0.002], [130129199, 0.002], [97653261, 0.002], [218695493, 0.002], [28585353, 0.002], [88260057, 0.002], [64456571, 0.002], [98751337, 0.002], [87617692, 0.002], [125110843, 0.002], [152209542, 0.002], [23777454, 0.002], [118715026, 0.002], [99788230, 0.002], [75625082, 0.002], [159513938, 0.002], [49484131, 0.002], [212507985, 0.002], [67766136, 0.002], [105190117, 0.002], [33338001, 0.002], [91160896, 0.002], [95386222, 0.002], [45411355, 0.002], [35330570, 0.002], [52225337, 0.002], [165107533, 0.002], [168561753, 0.002], [227083606, 0.002], [214214551, 0.002], [149424426, 0.002], [227684991, 0.002], [35940913, 0.002], [37453011, 0.002], [47170722, 0.002], [92046206, 0.002], [133306577, 0.002], [241955088, 0.002], [41258860, 0.002], [124242631, 0.002], [238064391, 0.002], [46235460, 0.002], [202890570, 0.002], [162416334, 0.002], [78853643, 0.002], [46547745, 0.002], [42268049, 0.002], [202162794, 0.002], [7635563, 0.002], [157757570, 0.002], [237930574, 0.002], [136918954, 0.002], [74723244, 0.002], [2358695, 0.002], [186515303, 0.002], [123109724, 0.002], [109957783, 0.002], [139233438, 0.002], [149436411, 0.002], [8346966, 0.002], [50835889, 0.002], [88695187, 0.002], [211719991, 0.002], [7283371, 0.002], [187750894, 0.002], [154620515, 0.002], [1567632, 0.002], [152631412, 0.002], [38971002, 0.002], [210627707, 0.002], [13311476, 0.002], [157351125, 0.002], [141462178, 0.002], [77606659, 0.002], [22653392, 0.002], [126720849, 0.002], [103503555, 0.002], [138904418, 0.002], [35877598, 0.002], [144448095, 0.002], [143072590, 0.002], [22256859, 0.002], [136674067, 0.002], [54962461, 0.002], [204771663, 0.002], [126594400, 0.002], [143362648, 0.002], [160053218, 0.002], [36505, 0.002], [234586339, 0.002], [8689386, 0.002], [65244214, 0.002], [39252740, 0.002], [64390487, 0.002], [191138142, 0.002], [114738239, 0.002], [184992944, 0.002], [178848289, 0.002], [685758, 0.002], [3946484, 0.002], [9120869, 0.002], [77891561, 0.002], [21685013, 0.002], [38580333, 0.002], [116730475, 0.002], [235053809, 0.002], [227204700, 0.002], [3795447, 0.002], [81764102, 0.002], [166797679, 0.002], [243641394, 0.002], [100513946, 0.002], [99241225, 0.002], [52990995, 0.002], [184304246, 0.002], [46027535, 0.002], [231862778, 0.002], [213237946, 0.002], [227474205, 0.002], [158534897, 0.002], [121346355, 0.002], [63714427, 0.002], [243338063, 0.002], [77546631, 0.002], [178281288, 0.002], [220770449, 0.002], [145968980, 0.002], [29894061, 0.002], [127519509, 0.002], [77760912, 0.002], [61219600, 0.002], [161595533, 0.002], [221480691, 0.002], [206642829, 0.002], [215721178, 0.002], [229794882, 0.002], [31325752, 0.002], [224755578, 0.002], [21220559, 0.002], [171553173, 0.002], [145243964, 0.002], [210190857, 0.002], [150615695, 0.002], [86169422, 0.002], [68813648, 0.002], [107799990, 0.002], [55892198, 0.002], [2389691, 0.002], [181991246, 0.002], [226957512, 0.002], [17909594, 0.002], [54447626, 0.002], [43646598, 0.002], [235297721, 0.002], [193625953, 0.002], [102087733, 0.002], [90041055, 0.002], [76368893, 0.002], [142359520, 0.002], [46114189, 0.002], [80413082, 0.002], [215509948, 0.002], [224115155, 0.002], [85931155, 0.002], [178125002, 0.002], [212925031, 0.002], [18694268, 0.002], [46238885, 0.002], [84948476, 0.002], [8914603, 0.002], [167599874, 0.002], [187802420, 0.002], [170522346, 0.002], [219794607, 0.002], [138665107, 0.002], [157723712, 0.002], [198373356, 0.002], [17916877, 0.002], [149620586, 0.002], [171324275, 0.002], [33574148, 0.002], [438145, 0.002], [30578731, 0.002], [111771703, 0.002], [215725985, 0.002], [226048734, 0.002], [159650006, 0.002], [94154665, 0.002], [33938839, 0.002], [147816297, 0.002], [55752950, 0.002], [217323253, 0.002], [5963619, 0.002], [236473711, 0.002], [133530026, 0.002], [31605617, 0.002], [176598781, 0.002], [117344669, 0.002], [236439401, 0.002], [232750544, 0.002], [126125063, 0.002], [20500196, 0.002], [156839548, 0.002], [17602010, 0.002], [92471651, 0.002], [92360499, 0.002], [7769454, 0.002], [136213779, 0.002], [118114719, 0.002], [105398561, 0.002], [131436589, 0.002], [202193758, 0.002], [60385109, 0.002], [179870277, 0.002], [239557330, 0.002], [187854329, 0.002], [45710618, 0.002], [186771058, 0.002], [189689540, 0.002], [212594973, 0.002], [13689343, 0.002], [20117487, 0.002], [141338221, 0.002], [174004389, 0.002], [49948893, 0.002], [121246710, 0.002], [80925692, 0.002], [39571786, 0.002], [181570823, 0.002], [181260602, 0.002], [179666712, 0.002], [157724327, 0.002], [142152925, 0.002], [72763175, 0.002], [124426367, 0.002], [95423105, 0.002], [142795024, 0.002], [149481164, 0.002], [156867918, 0.002], [193305436, 0.002], [225062969, 0.002], [51384529, 0.002], [153485310, 0.002], [186021802, 0.002], [126854908, 0.002], [57495392, 0.002], [93191535, 0.002], [123655689, 0.002], [204221002, 0.002], [147627388, 0.002], [100922671, 0.002], [43042488, 0.002], [109793369, 0.002], [86175815, 0.002], [103521806, 0.002]] 32 | def main(): 33 | print('Number of mutations:', len(seeds)) 34 | 35 | env = gym_tensorflow.make(game, 1) 36 | 37 | model = Model() 38 | obs_op = env.observation() 39 | reset_op = env.reset() 40 | 41 | action_op = model.make_net(tf.expand_dims(obs_op, axis=1), env.action_space, batch_size=1) 42 | if env.discrete_action: 43 | action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32) 44 | rew_op, done_op = env.step(action_op) 45 | 46 | from gym.envs.classic_control import rendering 47 | viewer = rendering.SimpleImageViewer() 48 | if hasattr(env.unwrapped, 'render'): 49 | obs_op = env.unwrapped.render() 50 | def display_obs(im): 51 | im = im[0, 0, ...] 52 | 53 | viewer.imshow(im) 54 | else: 55 | def display_obs(im): 56 | im = im[0, :, :, -1] 57 | im = np.stack([im] * 3, axis=-1) 58 | im = (im * 255).astype(np.uint8) 59 | 60 | im = np.array(Image.fromarray(im).resize((256, 256), resample=Image.BILINEAR), dtype=np.uint8) 61 | viewer.imshow(im) 62 | 63 | with tf.Session() as sess: 64 | sess.run(tf.global_variables_initializer()) 65 | model.initialize() 66 | tlogger.info(model.description) 67 | 68 | noise = SharedNoiseTable() 69 | 70 | weights = model.compute_weights_from_seeds(noise, seeds) 71 | model.load(sess, 0, weights, seeds) 72 | 73 | sess.run(reset_op) 74 | display_obs(sess.run(obs_op)) 75 | 76 | total_rew = 0 77 | num_frames = 0 78 | while True: 79 | rew, done = sess.run([rew_op, done_op]) 80 | num_frames += 1 81 | total_rew += rew[0] 82 | display_obs(sess.run(obs_op)) 83 | time.sleep(4/60) 84 | if done[0]: 85 | print('Final reward: ', total_rew, 'after', num_frames, 'steps') 86 | break 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/Makefile: -------------------------------------------------------------------------------- 1 | USE_SDL := 0 2 | USE_ALE := 1 3 | USE_GPU := 1 4 | 5 | DIR := ./ 6 | 7 | TF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 8 | TF_LIB := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 9 | 10 | TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) 11 | TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) 12 | FLAGS := -std=c++11 -shared -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -I$(TF_INC) -I$(TF_INC)/external/nsync/public -L$(TF_LIB) -O2 13 | 14 | #TF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 15 | #TF_LIB := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 16 | #FLAGS := -std=c++11 -shared -fPIC -I$(TF_INC) -I$(TF_INC)/external/nsync/public -L$(TF_LIB) -D_GLIBCXX_USE_CXX11_ABI=0 -O2 17 | CXX := g++ 18 | LDFLAGS := -ltensorflow_framework 19 | 20 | SOURCES := $(DIR)/*.cpp $(DIR)/ops/*.cpp 21 | 22 | ifeq ($(USE_GPU), 1) 23 | FLAGS += -DGOOGLE_CUDA=1 24 | endif 25 | 26 | # This will likely need to be changed to suit your installation. 27 | ifeq ($(USE_ALE), 1) 28 | ALE := $(shell pwd)/atari/atari-py/atari_py/ale_interface 29 | FLAGS += -I$(ALE)/src -I$(ALE)/src/controllers -I$(ALE)/src/os_dependent -I$(ALE)/src/environment -I$(ALE)/src/external -L$(ALE)/build 30 | LDFLAGS += -lale 31 | SOURCES += $(DIR)/atari/*.cpp 32 | endif 33 | 34 | UNAME_S := $(shell uname -s) 35 | ifeq ($(UNAME_S),Linux) 36 | ifeq ($(USE_ALE),1) 37 | FLAGS+= -shared -Wl,-export-dynamic,-rpath,$(ALE)/build 38 | endif 39 | endif 40 | 41 | ifeq ($(UNAME_S),Darwin) 42 | FLAGS += -framework Cocoa 43 | endif 44 | 45 | ifeq ($(strip $(USE_SDL)), 1) 46 | DEFINES += -D__USE_SDL -DSOUND_SUPPORT 47 | FLAGS += $(shell sdl-config --cflags) 48 | LDFLAGS += $(shell sdl-config --libs) 49 | endif 50 | 51 | 52 | all: gym_tensorflow.so 53 | 54 | gym_tensorflow.so: 55 | $(CXX) $(FLAGS) $(SOURCES) $(LDFLAGS) -o gym_tensorflow.so 56 | 57 | clean: 58 | rm -rf gym_tensorflow.so 59 | 60 | remake: clean all 61 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/README.md: -------------------------------------------------------------------------------- 1 | # Instructions 2 | 3 | This module provides C++/TensorFlow interfaces that operated similarly to OpenAI's gym library. Since it was built to remove python from the critical portion of the code (simulations) it provides a significant speed up when operating in a multithreading environment. 4 | We currently provide 2 environments utilizing the interface, Atari and Hard Maze. 5 | 6 | ## Building Module with Atari Environment 7 | 8 | Our Atari support is licensed under GPLv2 and instructions on how to use it can be found under the `./atari` folder. 9 | 10 | Please follow these instruction to build Atari environment first and after that build this module by running `make` 11 | 12 | ## Building Module without Atari Environment 13 | 14 | To compile this module simply open the `Makefile` to adjust settings (USE_GPU, USE_ALE, etc), once configured run `make` to build from source. 15 | 16 | To disable Atari support set in the `Makefile` parameter `USE_ALE` to have value zero as follows: 17 | 18 | `USE_ALE := 0` -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from .tf_env import GymEnv 4 | from.import atari, maze 5 | from .wrappers import StackFramesWrapper 6 | 7 | def make(game, batch_size, *args, **kwargs): 8 | if game == 'maze': 9 | return maze.MazeEnv(batch_size) 10 | if game in atari.games: 11 | return StackFramesWrapper(atari.AtariEnv(game, batch_size, *args, **kwargs)) 12 | if game.startswith('gym.'): 13 | return GymEnv(game[4:], batch_size, *args, **kwargs) 14 | raise NotImplementedError(game) 15 | 16 | 17 | def get_ref_batch(make_env_f, sess, batch_size): 18 | env = make_env_f(1) 19 | assert env.discrete_action 20 | actions = tf.random_uniform((1,), minval=0, maxval=env.action_space, dtype=tf.int32) 21 | 22 | print("GTF: Actions", actions) 23 | 24 | reset_op = env.reset() 25 | obs_op = env.observation() 26 | rew_op, done_op=env.step(actions) 27 | 28 | sess.run(tf.global_variables_initializer()) 29 | 30 | print("GTF: Global variables initialized") 31 | 32 | sess.run(reset_op) 33 | 34 | print("GTF: Atari environment reset") 35 | 36 | ref_batch = [] 37 | while len(ref_batch) < batch_size: 38 | obs, done = sess.run([obs_op, done_op]) 39 | ref_batch.append(obs) 40 | if done.any(): 41 | sess.run(reset_op) 42 | 43 | return np.concatenate(ref_batch) 44 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/atari/README.md: -------------------------------------------------------------------------------- 1 | Notice 2 | ----------------- 3 | The ALE/atari-py is not part of deep-neuroevolution. 4 | This folder provides the instructions and sample code if you are interested in running the ALE. 5 | It depends on atari-py. atari-py is licensed under GPLv2. 6 | 7 | Instructions 8 | ----------------- 9 | 10 | The first thing to do is clone the atari-py repository into the `gym_tensorflow` folder using 11 | ``` 12 | git clone https://github.com/yaricom/atari-py.git 13 | ``` 14 | 15 | Now you can build the library with `cd ./atari-py && make`. 16 | 17 | ## Building GYM Tensorflow 18 | 19 | Building `cd ../..gym_tensorflow && make` should give you access to the Atari games as a set of TensorFlow ops. -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/atari/__init__.py: -------------------------------------------------------------------------------- 1 | from..import tf_env 2 | 3 | from .tf_atari import * 4 | 5 | if not hasattr(tf_env.gym_tensorflow_module, 'atari_make'): 6 | class AtariEnv(TensorFlowEnv): 7 | def __init__(self, * args, ** kwargs): 8 | raise NotImplementedError("gym_tensorflow was not compiled with ALE support.") 9 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/atari/tf_atari.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tensorflow/core/framework/op_kernel.h" 5 | #include "tensorflow/core/framework/op.h" 6 | #include "tensorflow/core/framework/shape_inference.h" 7 | #include "tensorflow/core/framework/resource_mgr.h" 8 | #include "tensorflow/core/framework/resource_op_kernel.h" 9 | #include "tensorflow/core/lib/core/blocking_counter.h" 10 | #include "tensorflow/core/lib/core/threadpool.h" 11 | #include "tensorflow/core/platform/mutex.h" 12 | #include "../tf_env.h" 13 | 14 | #ifdef __USE_SDL 15 | #include 16 | #endif 17 | 18 | using namespace tensorflow; 19 | using namespace std; 20 | using namespace ale; 21 | 22 | #define RAM_SIZE (128) 23 | 24 | class AtariEnvironment : public Environment, public StepInterface 25 | { 26 | public: 27 | AtariEnvironment(int batch_size) 28 | { 29 | m_numNoops.resize(batch_size, 0); 30 | m_maxFrames.resize(batch_size, 100000); 31 | m_pInterfaces = new ALEInterface[batch_size]; 32 | } 33 | void load_rom(string game, int i) 34 | { 35 | assert(m_numNoops[i] == 0); 36 | m_numNoops[i] = 1; 37 | m_pInterfaces[i].setFloat("repeat_action_probability", 0.0f); 38 | m_pInterfaces[i].setInt("random_seed", 0); 39 | m_pInterfaces[i].loadROM(game); 40 | } 41 | virtual ~AtariEnvironment() { 42 | delete[] m_pInterfaces; 43 | } 44 | 45 | TensorShape get_action_shape() override 46 | { 47 | return TensorShape(); 48 | } 49 | 50 | TensorShape get_observation_shape() override 51 | { 52 | return TensorShape({2, 53 | static_cast(m_pInterfaces[0].getScreen().height()), 54 | static_cast(m_pInterfaces[0].getScreen().width())}); 55 | } 56 | 57 | void get_observation(uint8 *data, int idx) override 58 | { 59 | const auto ssize = m_pInterfaces[idx].getScreen().height() * m_pInterfaces[idx].getScreen().width(); 60 | memcpy(data, m_pInterfaces[idx].theOSystem->console().mediaSource().previousFrameBuffer(), ssize); 61 | memcpy(data + ssize, m_pInterfaces[idx].theOSystem->console().mediaSource().currentFrameBuffer(), ssize); 62 | } 63 | 64 | float step(int idx, const int* action) override 65 | { 66 | int rewards = 0; 67 | for (int i = 0; i < m_repeat; ++i) 68 | { 69 | assert(m_pInterfaces[idx].getMinimalActionSet().size() > (*action)); 70 | rewards += m_pInterfaces[idx].act(m_pInterfaces[idx].getMinimalActionSet()[*action]); 71 | if (is_done(idx)) 72 | break; 73 | } 74 | return rewards; 75 | } 76 | 77 | bool is_done(int idx) override 78 | { 79 | return m_pInterfaces[idx].game_over() || 80 | m_pInterfaces[idx].getEpisodeFrameNumber() - m_numNoops[idx] >= m_maxFrames[idx]; 81 | } 82 | 83 | void reset(int i, int numNoops=0, int maxFrames=100000) override 84 | { 85 | m_pInterfaces[i].reset_game(); 86 | if(numNoops > 0) 87 | { 88 | assert(m_pInterfaces[i].getMinimalActionSet()[0] == Action::PLAYER_A_NOOP); 89 | for (int s = 0; s < numNoops;++s) 90 | { 91 | m_pInterfaces[i].act(Action::PLAYER_A_NOOP); 92 | if (m_pInterfaces[i].game_over()) 93 | m_pInterfaces[i].reset_game(); 94 | } 95 | } 96 | // Check if FIRE is part of the minimal action set 97 | if (m_pInterfaces[i].getMinimalActionSet()[1] == Action::PLAYER_A_FIRE) 98 | { 99 | assert(m_pInterfaces[i].getMinimalActionSet().size() >= 3); 100 | int action = 1; 101 | step(i, &action); 102 | if (m_pInterfaces[i].game_over()) 103 | m_pInterfaces[i].reset_game(); 104 | 105 | action = 2; 106 | step(i, &action); 107 | if (m_pInterfaces[i].game_over()) 108 | m_pInterfaces[i].reset_game(); 109 | } 110 | m_numNoops[i] = m_pInterfaces[i].getEpisodeFrameNumber(); 111 | m_maxFrames[i] = maxFrames; 112 | } 113 | 114 | void get_final_state(float *data, int idx) 115 | { 116 | auto ram = m_pInterfaces[idx].getRAM(); 117 | for (auto i = 0; i < RAM_SIZE; ++ i) 118 | data[i] = ram.get(i); 119 | } 120 | 121 | string DebugString() override { return "AtariEnvironment"; } 122 | private: 123 | ALEInterface* m_pInterfaces; 124 | bool m_initialized; 125 | int m_repeat = 4; 126 | std::vector m_numNoops; 127 | std::vector m_maxFrames; 128 | }; 129 | 130 | class AtariMakeOp : public EnvironmentMakeOp { 131 | public: 132 | explicit AtariMakeOp(OpKernelConstruction* context) : EnvironmentMakeOp(context) { 133 | OP_REQUIRES_OK(context, context->GetAttr("game", &m_game)); 134 | ale::Logger::setMode(ale::Logger::mode(2)); 135 | } 136 | 137 | private: 138 | virtual Status CreateResource(OpKernelContext* context, BaseEnvironment** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { 139 | AtariEnvironment* env = new AtariEnvironment(batch_size); 140 | if (env == nullptr) 141 | return errors::ResourceExhausted("Failed to allocate"); 142 | *ret = env; 143 | 144 | const auto thread_pool = context->device()->tensorflow_cpu_worker_threads(); 145 | const int num_threads = std::min(thread_pool->num_threads, batch_size); 146 | auto f = [&](int thread_id) { 147 | for(int b =thread_id; b < batch_size;b+=num_threads) 148 | { 149 | env->load_rom(m_game, b); 150 | } 151 | }; 152 | 153 | BlockingCounter counter(num_threads-1); 154 | for (int i = 1; i < num_threads; ++i) { 155 | thread_pool->workers->Schedule([&, i]() { 156 | f(i); 157 | counter.DecrementCount(); 158 | }); 159 | } 160 | f(0); 161 | counter.Wait(); 162 | return Status::OK(); 163 | } 164 | std::string m_game; 165 | }; 166 | 167 | REGISTER_OP("AtariMake") 168 | .Attr("batch_size: int") 169 | .Attr("game: string") 170 | .Attr("container: string = ''") 171 | .Attr("shared_name: string = ''") 172 | .Output("handle: resource") 173 | .SetIsStateful() 174 | .SetShapeFn(shape_inference::ScalarShape); 175 | 176 | REGISTER_KERNEL_BUILDER(Name("AtariMake").Device(DEVICE_CPU), AtariMakeOp); 177 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/maze/__init__.py: -------------------------------------------------------------------------------- 1 | from .tf_maze import * -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/maze/hard_maze.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 400 3 | 13 4 | 36 184 5 | 0 6 | 31 20 7 | 31 20 8 | 41 5 3 8 9 | 3 8 4 49 10 | 4 49 57 53 11 | 4 49 7 202 12 | 7 202 195 198 13 | 195 198 186 8 14 | 186 8 39 5 15 | 56 54 56 157 16 | 57 106 158 162 17 | 77 201 108 164 18 | 6 80 33 121 19 | 192 146 87 91 20 | 56 55 133 30 21 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/maze/hard_maze.txt.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-on-Neuroevolution-with-Python/c85cf36e20f72dc586049c845428dee0397b8d79/Chapter10/gym_tensorflow/maze/hard_maze.txt.npy -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/maze/tf_maze.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | 20 | #include 21 | #include 22 | #include "tensorflow/core/framework/op_kernel.h" 23 | #include "tensorflow/core/framework/op.h" 24 | #include "tensorflow/core/framework/shape_inference.h" 25 | #include "tensorflow/core/framework/resource_mgr.h" 26 | #include "tensorflow/core/framework/resource_op_kernel.h" 27 | #include "tensorflow/core/lib/core/blocking_counter.h" 28 | #include "tensorflow/core/lib/core/threadpool.h" 29 | #include "tensorflow/core/platform/mutex.h" 30 | #include "tf_env.h" 31 | #include "maze.h" 32 | 33 | #ifdef __USE_SDL 34 | #include 35 | #endif 36 | 37 | using namespace tensorflow; 38 | using namespace std; 39 | 40 | class MazeEnvironment : public Environment, public StepInterface 41 | { 42 | public: 43 | MazeEnvironment(int batch_size) 44 | { 45 | m_pInterfaces = new maze::Environment[batch_size]; 46 | m_numSteps.resize(batch_size, 0); 47 | } 48 | void load(std::string filename, int i) 49 | { 50 | m_pInterfaces[i].load_from(filename.c_str()); 51 | } 52 | virtual ~MazeEnvironment() { 53 | delete[] m_pInterfaces; 54 | } 55 | 56 | TensorShape get_observation_shape() override 57 | { 58 | return TensorShape({m_pInterfaces[0].get_sensor_size()}); 59 | } 60 | 61 | void get_observation(float *data, int idx) override 62 | { 63 | assert(idx < m_numSteps.size()); 64 | m_pInterfaces[idx].generate_neural_inputs(data); 65 | } 66 | 67 | void get_final_state(float *data, int idx) 68 | { 69 | assert(idx < m_numSteps.size()); 70 | data[0] = m_pInterfaces[idx].hero.location.x; 71 | data[1] = m_pInterfaces[idx].hero.location.y; 72 | } 73 | 74 | TensorShape get_action_shape() override { 75 | return TensorShape({2}); 76 | } 77 | 78 | float step(int idx, const float* action) override { 79 | assert(idx < m_numSteps.size()); 80 | m_pInterfaces[idx].interpret_outputs(float(action[0]) + 0.5, 0.5 + float(action[1])); 81 | m_pInterfaces[idx].Update(); 82 | m_numSteps[idx] += 1; 83 | if (is_done(idx)) 84 | { 85 | return -m_pInterfaces[idx].distance_to_target(); 86 | } 87 | return 0.0f; 88 | } 89 | 90 | bool is_done(int idx) override { 91 | assert(idx < m_numSteps.size()); 92 | return m_numSteps[idx] >= 400; 93 | } 94 | 95 | void reset(int i, int numNoops=0, int maxFrames=100000) override { 96 | m_pInterfaces[i].reset(); 97 | m_numSteps[i] = 0; 98 | } 99 | 100 | string DebugString() override { return "MazeEnvironment"; } 101 | public: 102 | maze::Environment* m_pInterfaces; 103 | std::vector m_numSteps; 104 | }; 105 | 106 | class MazeMakeOp : public EnvironmentMakeOp { 107 | public: 108 | explicit MazeMakeOp(OpKernelConstruction* context) : EnvironmentMakeOp(context) { 109 | OP_REQUIRES_OK(context, context->GetAttr("filename", &m_filename)); 110 | } 111 | 112 | private: 113 | virtual Status CreateResource(OpKernelContext* context, BaseEnvironment** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { 114 | MazeEnvironment* env = new MazeEnvironment(batch_size); 115 | if (env == nullptr) 116 | return errors::ResourceExhausted("Failed to allocate"); 117 | *ret = env; 118 | 119 | const auto thread_pool = context->device()->tensorflow_cpu_worker_threads(); 120 | const int num_threads = std::min(thread_pool->num_threads, batch_size); 121 | auto f = [&](int thread_id) { 122 | for(int b =thread_id; b < batch_size;b+=num_threads) 123 | { 124 | env->load(m_filename, b); 125 | } 126 | }; 127 | 128 | BlockingCounter counter(num_threads-1); 129 | for (int i = 1; i < num_threads; ++i) { 130 | thread_pool->workers->Schedule([&, i]() { 131 | f(i); 132 | counter.DecrementCount(); 133 | }); 134 | } 135 | f(0); 136 | counter.Wait(); 137 | return Status::OK(); 138 | } 139 | std::string m_filename; 140 | }; 141 | 142 | REGISTER_OP("MazeMake") 143 | .Attr("batch_size: int") 144 | .Attr("filename: string") 145 | .Attr("container: string = ''") 146 | .Attr("shared_name: string = ''") 147 | .Output("handle: resource") 148 | .SetIsStateful() 149 | .SetShapeFn(shape_inference::ScalarShape); 150 | 151 | REGISTER_KERNEL_BUILDER(Name("MazeMake").Device(DEVICE_CPU), MazeMakeOp); 152 | 153 | 154 | class MazeFinalStateOp : public OpKernel { 155 | public: 156 | explicit MazeFinalStateOp(OpKernelConstruction* context) : OpKernel(context) { 157 | } 158 | void Compute(OpKernelContext* context) override { 159 | const Tensor &indices_tensor = context->input(1); 160 | auto indices_flat = indices_tensor.flat(); 161 | 162 | const int m_numInterfaces = indices_tensor.NumElements(); 163 | 164 | Tensor* position_tensor = NULL; 165 | OP_REQUIRES_OK(context, context->allocate_output(0, 166 | TensorShape({static_cast(m_numInterfaces), 2}), 167 | &position_tensor)); 168 | auto position_flat = position_tensor->flat(); 169 | if(m_numInterfaces > 0) 170 | { 171 | BaseEnvironment *tmp_env; 172 | OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), &tmp_env)); 173 | 174 | auto env = dynamic_cast(tmp_env); 175 | OP_REQUIRES(context, env != nullptr, errors::Internal("BaseEnvironment is not of MazeEnvironment type.")); 176 | core::ScopedUnref s(tmp_env); 177 | 178 | const auto thread_pool = context->device()->tensorflow_cpu_worker_threads(); 179 | const int num_threads = std::min(thread_pool->num_threads, int(m_numInterfaces)); 180 | 181 | auto f = [&](int thread_id) { 182 | // Set all but the first element of the output tensor to 0. 183 | for(int b =thread_id; b < m_numInterfaces;b+=num_threads) 184 | { 185 | env->get_final_state(&position_flat(b * 2), indices_flat(b)); 186 | } 187 | }; 188 | 189 | BlockingCounter counter(num_threads-1); 190 | for (int i = 1; i < num_threads; ++i) { 191 | thread_pool->workers->Schedule([&, i]() { 192 | f(i); 193 | counter.DecrementCount(); 194 | }); 195 | } 196 | f(0); 197 | counter.Wait(); 198 | } 199 | } 200 | }; 201 | 202 | REGISTER_OP("MazeFinalState") 203 | .Input("handle: resource") 204 | .Input("indices: int32") 205 | .Output("position: float") 206 | .SetShapeFn([](::tensorflow::shape_inference::InferenceContext *c) { 207 | ::tensorflow::shape_inference::ShapeHandle input; 208 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &input)); 209 | ::tensorflow::shape_inference::ShapeHandle output; 210 | TF_RETURN_IF_ERROR(c->Concatenate(input, c->MakeShape({2}), &output)); 211 | c->set_output(0, output); 212 | return Status::OK(); 213 | }); 214 | 215 | REGISTER_KERNEL_BUILDER(Name("MazeFinalState").Device(DEVICE_CPU), MazeFinalStateOp); 216 | 217 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/maze/tf_maze.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | import tensorflow as tf 20 | from gym_tensorflow.tf_env import TensorFlowEnv, gym_tensorflow_module 21 | 22 | 23 | class MazeEnv(TensorFlowEnv): 24 | def __init__(self, batch_size, name=None): 25 | self.batch_size = batch_size 26 | self.obs_variable = None 27 | with tf.variable_scope(name, default_name='MazeInstance'): 28 | self.instances = gym_tensorflow_module.maze_make(batch_size=batch_size, filename='hard_maze.txt') 29 | 30 | @property 31 | def env_default_timestep_cutoff(self): 32 | return 400 33 | 34 | @property 35 | def action_space(self): 36 | return 2 37 | 38 | @property 39 | def discrete_action(self): 40 | return False 41 | 42 | def step(self, action, indices=None, name=None): 43 | with tf.variable_scope(name, default_name='MazeStep'): 44 | #action = tf.Print(action, [action], 'action=') 45 | return gym_tensorflow_module.environment_step(self.instances, indices=indices, action=action) 46 | 47 | def reset(self, indices=None, max_frames=None, name=None): 48 | '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip) 49 | ''' 50 | with tf.variable_scope(name, default_name='MazeReset'): 51 | noops = tf.random_uniform(tf.shape(indices), minval=1, maxval=31, dtype=tf.int32) 52 | if max_frames is None: 53 | max_frames = self.env_default_timestep_cutoff 54 | return gym_tensorflow_module.environment_reset(self.instances, indices, noops=noops, max_frames=max_frames) 55 | 56 | def observation(self, indices=None, name=None): 57 | with tf.variable_scope(name, default_name='MazeObservation'): 58 | with tf.device('/cpu:0'): 59 | obs = gym_tensorflow_module.environment_observation(self.instances, indices, T=tf.float32) 60 | obs.set_shape((None,) + (11,)) 61 | #obs = tf.Print(obs, [obs], "obs=") 62 | return tf.expand_dims(obs, axis=1) 63 | 64 | def final_state(self, indices, name=None): 65 | with tf.variable_scope(name, default_name='MazeFinalState'): 66 | return gym_tensorflow_module.maze_final_state(self.instances, indices) 67 | 68 | def close(self): 69 | pass -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/ops/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..tf_env import gym_tensorflow_module 3 | 4 | try: 5 | indexed_matmul = gym_tensorflow_module.indexed_batch_mat_mul 6 | except: 7 | import time 8 | print('Index MatMul implementation not available. This significantly affects performance') 9 | time.sleep(5) 10 | def indexed_matmul(a, b, idx): 11 | return tf.matmul(a, tf.gather(b, idx)) -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/tf_env.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | 20 | #ifndef TF_ENV_H_ 21 | #define TF_ENV_H_ 22 | #include 23 | #include "tensorflow/core/framework/resource_mgr.h" 24 | #include "tensorflow/core/framework/op_kernel.h" 25 | 26 | using namespace tensorflow; 27 | class BaseEnvironment : public ResourceBase 28 | { 29 | public: 30 | virtual bool is_done(int idx) = 0; 31 | virtual void reset(int i, int numNoops = 0, int maxFrames = 100000) = 0; 32 | }; 33 | 34 | template 35 | class StepInterface 36 | { 37 | public: 38 | virtual TensorShape get_action_shape() = 0; 39 | virtual float step(int idx, const T* action) = 0; 40 | }; 41 | 42 | template 43 | class Environment : public BaseEnvironment 44 | { 45 | public: 46 | virtual void get_observation(T* data, int idx) = 0; 47 | virtual TensorShape get_observation_shape() = 0; 48 | }; 49 | 50 | class EnvironmentMakeOp : public OpKernel { 51 | public: 52 | explicit EnvironmentMakeOp(OpKernelConstruction *context); 53 | 54 | // The resource is deleted from the resource manager only when it is private 55 | // to kernel. Ideally the resource should be deleted when it is no longer held 56 | // by anyone, but it would break backward compatibility. 57 | virtual ~EnvironmentMakeOp() override; 58 | 59 | void Compute(OpKernelContext *context) override LOCKS_EXCLUDED(mu_); 60 | 61 | protected: 62 | // Variables accessible from subclasses. 63 | tensorflow::mutex mu_; 64 | ContainerInfo cinfo_ GUARDED_BY(mu_); 65 | BaseEnvironment* resource_ GUARDED_BY(mu_) = nullptr; 66 | int batch_size; 67 | 68 | private: 69 | // During the first Compute(), resource is either created or looked up using 70 | // shared_name. In the latter case, the resource found should be verified if 71 | // it is compatible with this op's configuration. The verification may fail in 72 | // cases such as two graphs asking queues of the same shared name to have 73 | // inconsistent capacities. 74 | virtual Status VerifyResource(BaseEnvironment *resource); 75 | 76 | PersistentTensor handle_ GUARDED_BY(mu_); 77 | 78 | virtual Status CreateResource(OpKernelContext *context, BaseEnvironment **ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) = 0; 79 | 80 | TF_DISALLOW_COPY_AND_ASSIGN(EnvironmentMakeOp); 81 | }; 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/tf_env.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | import numpy as np 20 | import os 21 | 22 | import tensorflow as tf 23 | 24 | gym_tensorflow_module = tf.load_op_library(os.path.join(os.path.dirname(__file__), 'gym_tensorflow.so')) 25 | 26 | 27 | class TensorFlowEnv(object): 28 | pass 29 | 30 | 31 | class PythonEnv(TensorFlowEnv): 32 | def step(self, action, indices=None, name=None): 33 | 34 | if indices is None: 35 | indices = np.arange(self.batch_size) 36 | with tf.variable_scope(name, default_name='PythonStep'): 37 | 38 | with tf.device('/cpu:0'): 39 | reward, done = tf.py_func(self._step, [action, indices], [tf.float32, tf.bool]) 40 | reward.set_shape(indices.shape) 41 | done.set_shape(indices.shape) 42 | return reward, done 43 | 44 | def _reset(self, indices): 45 | raise NotImplementedError() 46 | 47 | def reset(self, indices=None, max_frames=None, name=None): 48 | 49 | if indices is None: 50 | indices = np.arange(self.batch_size) 51 | with tf.variable_scope(name, default_name='PythonReset'): 52 | return tf.py_func(self._reset, [indices], tf.int64).op 53 | 54 | def _step(self, action, indices): 55 | raise NotImplementedError() 56 | 57 | def _obs(self, indices): 58 | raise NotImplementedError() 59 | 60 | def observation(self, indices=None, name=None): 61 | 62 | if indices is None: 63 | indices = np.arange(self.batch_size) 64 | with tf.variable_scope(name, default_name='PythonObservation'): 65 | 66 | with tf.device('/cpu:0'): 67 | obs = tf.py_func(self._obs, [indices], tf.float32) 68 | obs.set_shape(tuple(indices.shape) + self.observation_space) 69 | return tf.expand_dims(obs, axis=1) 70 | 71 | def final_state(self, indices, name=None): 72 | with tf.variable_scope(name, default_name='PythonFinalState'): 73 | return tf.zeros([tf.shape(indices)[0], 2], dtype=tf.float32) 74 | 75 | @property 76 | def unwrapped(self): 77 | return self 78 | 79 | def close(self): 80 | pass 81 | 82 | 83 | class GymEnv(PythonEnv): 84 | def __init__(self, name, batch_size): 85 | import gym 86 | self.env = [gym.make(name) for _ in range(batch_size)] 87 | self.obs = [None] * batch_size 88 | self.is_discrete_action = isinstance( self.env[0].action_space , gym.spaces.Discrete ) 89 | self.batch_size = batch_size 90 | 91 | @property 92 | def action_space(self): 93 | #return np.prod(self.env[0].action_space.shape, dtype=np.int32) 94 | return self.env[0].action_space.n 95 | 96 | @property 97 | def observation_space(self): 98 | return self.env[0].observation_space.shape 99 | 100 | @property 101 | def discrete_action(self): 102 | return self.is_discrete_action 103 | 104 | @property 105 | def env_default_timestep_cutoff(self): 106 | return 1000 107 | 108 | def _step(self, action, indices): 109 | assert self.discrete_action == True 110 | results = map(lambda i: self.env[indices[i]].step(action[i]), range(len(indices))) 111 | obs, reward, done, _ = zip(*results) 112 | for i in range(len(indices)): 113 | self.obs[indices[i]] = obs[i].astype(np.float32) 114 | 115 | return np.array(reward, dtype=np.float32), np.array(done, dtype=np.bool) 116 | 117 | def _reset(self, indices): 118 | for i in indices: 119 | self.obs[i] = self.env[i].reset().astype(np.float32) 120 | return 0 121 | 122 | def _obs(self, indices): 123 | return np.array([self.obs[i] for i in indices]).astype(np.float32) 124 | -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .stack_frames import StackFramesWrapper -------------------------------------------------------------------------------- /Chapter10/gym_tensorflow/wrappers/stack_frames.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | from gym_tensorflow.tf_env import TensorFlowEnv 6 | 7 | class StackFramesWrapper(TensorFlowEnv): 8 | def __init__(self, env, num_stacked_frames=4): 9 | self.env = env 10 | self.num_stacked_frames = num_stacked_frames 11 | self.obs_variable = tf.Variable(tf.zeros(shape=self.observation_space, dtype=tf.float32), trainable=False) 12 | 13 | @property 14 | def batch_size(self): 15 | return self.env.batch_size 16 | 17 | @property 18 | def env_default_timestep_cutoff(self): 19 | return self.env.env_default_timestep_cutoff 20 | 21 | @property 22 | def action_space(self): 23 | return self.env.action_space 24 | 25 | @property 26 | def observation_space(self): 27 | return self.env.observation_space[:-1] + (self.env.observation_space[-1] * self.num_stacked_frames, ) 28 | 29 | @property 30 | def discrete_action(self): 31 | return self.env.discrete_action 32 | 33 | def stack_observation(self, indices, reset=False): 34 | obs = self.env.observation(indices) 35 | 36 | if reset: 37 | obs_batch = tf.zeros((tf.shape(indices)[0],) +self.env.observation_space[1:-1] + (self.env.observation_space[-1] * self.num_stacked_frames-1, ), dtype=tf.float32) 38 | obs_batch = tf.concat([obs_batch, obs], axis=-1) 39 | else: 40 | obs_batch = tf.gather(self.obs_variable, indices) 41 | obs_batch = tf.slice(obs_batch, (0, 0, 0, 1), (-1, -1, -1, -1)) 42 | obs_batch = tf.concat([obs_batch, obs], axis=-1) 43 | return tf.scatter_update(self.obs_variable, indices, obs_batch) 44 | 45 | def step(self, action, indices=None, name=None): 46 | if indices is None: 47 | indices = np.arange(self.batch_size) 48 | rew, done = self.env.step(action=action, indices=indices, name=name) 49 | with tf.control_dependencies([rew, done]): 50 | with tf.control_dependencies([self.stack_observation(indices)]): 51 | return tf.identity(rew), tf.identity(done) 52 | 53 | def reset(self, indices=None, max_frames=None, name=None): 54 | '''Resets Atari instances with a random noop start (1-30) and set the maximum number of frames for the episode (default 100,000 * frameskip) 55 | ''' 56 | if indices is None: 57 | indices = np.arange(self.batch_size) 58 | reset_op = self.env.reset(indices=indices, max_frames=max_frames, name=name) 59 | with tf.control_dependencies([reset_op]): 60 | return self.stack_observation(indices, reset=True).op 61 | 62 | def observation(self, indices=None, name=None): 63 | '''Returns current observation after preprocessing (skip, grayscale, warp, stack).\nMust be called ONCE each time step is called if num_stacked_frames > 1 64 | ''' 65 | if indices is None: 66 | indices = np.arange(self.batch_size) 67 | return tf.gather(self.obs_variable, indices) 68 | 69 | def final_state(self, indices, name=None): 70 | return self.env.final_state(indices, name) 71 | 72 | @property 73 | def unwrapped(self): 74 | return self.env 75 | 76 | def close(self): 77 | return self.env.close() 78 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Hands-on-Neuroevolution-with-Python/c85cf36e20f72dc586049c845428dee0397b8d79/Chapter10/neuroevolution/__init__.py -------------------------------------------------------------------------------- /Chapter10/neuroevolution/distributed_helpers.py: -------------------------------------------------------------------------------- 1 | 2 | import threading 3 | from queue import Queue 4 | from multiprocessing.pool import ApplyResult 5 | 6 | import tabular_logger as tlogger 7 | 8 | class AsyncWorker(object): 9 | @property 10 | def concurrent_tasks(self): 11 | raise NotImplementedError() 12 | 13 | def run_async(self, task_id, task, callback): 14 | raise NotImplementedError() 15 | 16 | 17 | class WorkerHub(object): 18 | def __init__(self, workers, input_queue, done_queue): 19 | self.done_buffer = Queue() 20 | self.workers = workers 21 | self.available_workers = Queue() 22 | self.done_queue = done_queue 23 | self._cache = {} 24 | self.input_queue = input_queue 25 | 26 | for w in workers: 27 | for t in w.concurrent_tasks: 28 | self.available_workers.put((w, t)) 29 | 30 | self.__initialize_handlers() 31 | 32 | def __initialize_handlers(self): 33 | self._input_handler = threading.Thread( 34 | target=WorkerHub._handle_input, 35 | args=(self,) 36 | ) 37 | self._input_handler._state = 0 38 | tlogger.info('WorkerHub: _input_handler initialized') 39 | 40 | self._output_handler = threading.Thread( 41 | target=WorkerHub._handle_output, 42 | args=(self,) 43 | ) 44 | self._output_handler._state = 0 45 | tlogger.info('WorkerHub: _output_handler initialized') 46 | 47 | def worker_callback(self, worker, subworker, result): 48 | worker_task = (worker, subworker) 49 | if worker_task in self._cache: 50 | task_id = self._cache[worker_task] 51 | del self._cache[worker_task] 52 | self.done_buffer.put((task_id, result)) 53 | else: 54 | tlogger.warn('WorkerHub: Worker task not found in cache', worker_task) 55 | tlogger.warn('WorkerHub: Subworker', subworker) 56 | tlogger.warn('WorkerHub: Unable to process result', result) 57 | 58 | # Return worker back 59 | self.available_workers.put(worker_task) 60 | 61 | @staticmethod 62 | def _handle_input(self): 63 | try: 64 | while True: 65 | worker_task = self.available_workers.get() 66 | if worker_task is None: 67 | tlogger.info('WorkerHub._handle_input NO MORE WORKERS AWAILABLE') 68 | break 69 | worker, subworker = worker_task 70 | 71 | task = self.input_queue.get() 72 | if task is None: 73 | tlogger.info('WorkerHub._handle_input NO MORE INPUTS AWAILABLE') 74 | break 75 | task_id, task = task 76 | self._cache[worker_task] = task_id 77 | # tlogger.info('WorkerHub: put task id: %s in cache keyed by worker task: %s' % (task_id, worker_task)) 78 | 79 | worker.run_async(subworker, task, callback=self.worker_callback) 80 | except: 81 | tlogger.exception('WorkerHub._handle_input exception thrown') 82 | raise 83 | 84 | @staticmethod 85 | def _handle_output(self): 86 | try: 87 | while True: 88 | result = self.done_buffer.get() 89 | if result is None: 90 | tlogger.info('WorkerHub._handle_output done') 91 | break 92 | self.done_queue.put(result) 93 | except: 94 | tlogger.exception('WorkerHub._handle_output exception thrown') 95 | raise 96 | 97 | def initialize(self): 98 | self._input_handler.start() 99 | self._output_handler.start() 100 | 101 | def close(self): 102 | self.available_workers.put(None) 103 | self.input_queue.put(None) 104 | self.done_buffer.put(None) 105 | 106 | class AsyncTaskHub(object): 107 | def __init__(self, input_queue=None, results_queue=None): 108 | if input_queue is None: 109 | input_queue = Queue(64) 110 | self.input_queue = input_queue 111 | self._cache = {} 112 | self.results_queue = None 113 | if results_queue is not None: 114 | self.results_queue = results_queue 115 | 116 | self._output_handler = threading.Thread( 117 | target=AsyncTaskHub._handle_output, 118 | args=(self,) 119 | ) 120 | self._output_handler.daemon = True 121 | self._output_handler._state = 0 122 | self._output_handler.start() 123 | 124 | @staticmethod 125 | def _handle_output(self): 126 | try: 127 | while True: 128 | result = self.results_queue.get() 129 | if result is None: 130 | tlogger.info('AsyncTaskHub._handle_output done') 131 | break 132 | self.put(result) 133 | except: 134 | tlogger.exception('AsyncTaskHub._handle_output exception thrown') 135 | raise 136 | 137 | def run_async(self, task, callback=None, error_callback=None): 138 | result = ApplyResult(self._cache, callback, error_callback) 139 | self.input_queue.put((result._job, task)) 140 | return result 141 | 142 | def put(self, result): 143 | job, result=result 144 | self._cache[job]._set(0, (True, result)) 145 | 146 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/helper.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import numbers 21 | import threading 22 | from queue import Queue 23 | import numpy as np 24 | import math 25 | 26 | 27 | class SharedNoiseTable(object): 28 | def __init__(self): 29 | import ctypes, multiprocessing 30 | seed = 123 31 | count = 250000000 # 1 gigabyte of 32-bit numbers. Will actually sample 2 gigabytes below. 32 | print('Sampling {} random numbers with seed {}'.format(count, seed)) 33 | self._shared_mem = multiprocessing.Array(ctypes.c_float, count) 34 | self.noise = np.ctypeslib.as_array(self._shared_mem.get_obj()) 35 | assert self.noise.dtype == np.float32 36 | self.noise[:] = np.random.RandomState(seed).randn(count) # 64-bit to 32-bit conversion here 37 | print('Sampled {} bytes'.format(self.noise.size * 4)) 38 | 39 | def get(self, i, dim): 40 | return self.noise[i:i + dim] 41 | 42 | def sample_index(self, stream, dim): 43 | return stream.randint(0, len(self.noise) - dim + 1) 44 | 45 | 46 | class ConstantSchedule(object): 47 | def __init__(self, value): 48 | self._value = value 49 | 50 | def value(self, **kwargs): 51 | return self._value 52 | 53 | 54 | class LinearSchedule(object): 55 | def __init__(self, schedule, final_p, initial_p, field): 56 | self.schedule = schedule 57 | self.field = field 58 | self.final_p = final_p 59 | self.initial_p = initial_p 60 | 61 | def value(self, **kwargs): 62 | assert self.field in kwargs, "Argument {} not provided to scheduler Available: {}".format(self.field, kwargs) 63 | fraction = min(float(kwargs[self.field]) / self.schedule, 1.0) 64 | return self.initial_p + fraction * (self.final_p - self.initial_p) 65 | 66 | 67 | class ExponentialSchedule(object): 68 | def __init__(self, initial_p, final_p, schedule, field): 69 | self.initial_p = initial_p 70 | self.final_p = final_p 71 | self.schedule = schedule 72 | self.field = field 73 | 74 | self.linear = LinearSchedule( 75 | initial_p=math.log(self.initial_p), 76 | final_p=math.log(self.final_p), 77 | schedule=self.schedule, 78 | field=self.field) 79 | 80 | def value(self, **kwargs): 81 | return math.exp(self.linear(**kwargs)) 82 | 83 | 84 | def make_schedule(args): 85 | if isinstance(args, numbers.Number): 86 | return ConstantSchedule(args) 87 | else: 88 | return globals()[args['type']](**{key: value for key, value in args.items() if key != 'type'}) 89 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .dqn_xavier import SmallDQN, LargeDQN 2 | from .dqn import Model, LargeModel 3 | from .batchnorm import ModelBN, ModelVirtualBN 4 | from .simple import LinearClassifier, SimpleClassifier 5 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/models/base.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import tensorflow as tf 21 | import numpy as np 22 | import math 23 | import tabular_logger as tlogger 24 | from gym_tensorflow.ops import indexed_matmul 25 | 26 | class BaseModel(object): 27 | def __init__(self): 28 | self.nonlin = tf.nn.relu 29 | self.scope = None 30 | self.load_op = None 31 | self.indices = None 32 | self.variables = [] 33 | self.description = "" 34 | 35 | @property 36 | def requires_ref_batch(self): 37 | return False 38 | 39 | def create_variable(self, name, shape, scale_by): 40 | var = tf.get_variable(name, (self.batch_size, ) + shape, trainable=False) 41 | if not hasattr(var, 'scale_by'): 42 | var.scale_by = scale_by 43 | self.variables.append(var) 44 | return var 45 | 46 | def create_weight_variable(self, name, shape, std): 47 | factor = (shape[-2] + shape[-1]) * np.prod(shape[:-2]) / 2 48 | scale_by = std * np.sqrt(factor) 49 | return self.create_variable(name, shape, scale_by) 50 | 51 | def create_bias_variable(self, name, shape): 52 | return self.create_variable(name, shape, 0.0) 53 | 54 | def conv(self, x, kernel_size, num_outputs, name, stride=1, padding="SAME", bias=True, std=1.0): 55 | assert len(x.get_shape()) == 5 # Policies x Batch x Height x Width x Feature 56 | with tf.variable_scope(name): 57 | w = self.create_weight_variable('w', std=std, 58 | shape=(kernel_size, kernel_size, int(x.get_shape()[-1].value), num_outputs)) 59 | w = tf.reshape(w, [-1, kernel_size *kernel_size * int(x.get_shape()[-1].value), num_outputs]) 60 | 61 | x_reshape = tf.reshape(x, (-1, x.get_shape()[2], x.get_shape()[3], x.get_shape()[4])) 62 | patches = tf.extract_image_patches(x_reshape, [1, kernel_size, kernel_size, 1], [1, stride, stride, 1], rates=[1, 1, 1, 1], padding=padding) 63 | final_shape = (tf.shape(x)[0], tf.shape(x)[1], patches.get_shape()[1].value, patches.get_shape()[2].value, num_outputs) 64 | patches = tf.reshape(patches, [tf.shape(x)[0], 65 | -1, 66 | kernel_size * kernel_size * x.get_shape()[-1].value]) 67 | 68 | if self.indices is None: 69 | ret = tf.matmul(patches, w) 70 | else: 71 | ret = indexed_matmul(patches, w, self.indices) 72 | ret = tf.reshape(ret, final_shape) 73 | self.description += "Convolution layer {} with input shape {} and output shape {}\n".format(name, x.get_shape(), ret.get_shape()) 74 | 75 | 76 | if bias: 77 | b = self.create_bias_variable('b', (1, 1, 1, num_outputs)) 78 | if self.indices is not None: 79 | b = tf.gather(b, self.indices) 80 | 81 | ret = ret + b 82 | return ret 83 | 84 | def dense(self, x, size, name, bias=True, std=1.0): 85 | with tf.variable_scope(name): 86 | w = self.create_weight_variable('w', std=std, shape=(x.get_shape()[-1].value, size)) 87 | if self.indices is None: 88 | ret = tf.matmul(x, w) 89 | else: 90 | ret = indexed_matmul(x, w, self.indices) 91 | self.description += "Dense layer {} with input shape {} and output shape {}\n".format(name, x.get_shape(), ret.get_shape()) 92 | if bias: 93 | b = self.create_bias_variable('b', (1, size, )) 94 | if self.indices is not None: 95 | b = tf.gather(b, self.indices) 96 | 97 | return ret + b 98 | else: 99 | return ret 100 | 101 | def flattenallbut0(self, x): 102 | return tf.reshape(x, [-1, tf.shape(x)[1], np.prod(x.get_shape()[2:])]) 103 | 104 | def make_net(self, x, num_actions, indices=None, batch_size=1, ref_batch=None): 105 | with tf.variable_scope('Model') as scope: 106 | self.description = "Input shape: {}. Number of actions: {}\n".format(x.get_shape(), num_actions) 107 | self.scope = scope 108 | self.num_actions = num_actions 109 | self.ref_batch = ref_batch 110 | assert self.requires_ref_batch == False or self.ref_batch is not None 111 | self.batch_size = batch_size 112 | self.indices = indices 113 | self.graph = tf.get_default_graph() 114 | a = self._make_net(x, num_actions) 115 | return tf.reshape(a, (-1, num_actions)) 116 | 117 | def _make_net(self, x, num_actions): 118 | raise NotImplementedError() 119 | 120 | def initialize(self): 121 | self.make_weights() 122 | 123 | def randomize(self, rs, noise): 124 | seeds = (noise.sample_index(rs, self.num_params), ) 125 | return self.compute_weights_from_seeds(noise, seeds), seeds 126 | 127 | def compute_weights_from_seeds(self, noise, seeds, cache=None): 128 | if cache: 129 | cache_seeds = [o[1] for o in cache] 130 | if seeds in cache_seeds: 131 | return cache[cache_seeds.index(seeds)][0] 132 | elif seeds[:-1] in cache_seeds: 133 | theta = cache[cache_seeds.index(seeds[:-1])][0] 134 | return self.compute_mutation(noise, theta, *seeds[-1]) 135 | elif len(seeds) == 1: 136 | return self.compute_weights_from_seeds(noise, seeds) 137 | else: 138 | raise NotImplementedError() 139 | else: 140 | idx = seeds[0] 141 | theta = noise.get(idx, self.num_params).copy() * self.scale_by 142 | 143 | for mutation in seeds[1:]: 144 | idx, power = mutation 145 | theta = self.compute_mutation(noise, theta, idx, power) 146 | return theta 147 | 148 | def mutate(self, parent, rs, noise, mutation_power): 149 | parent_theta, parent_seeds = parent 150 | idx = noise.sample_index(rs, self.num_params) 151 | seeds = parent_seeds + ((idx, mutation_power), ) 152 | theta = self.compute_mutation(noise, parent_theta, idx, mutation_power) 153 | return theta, seeds 154 | 155 | def compute_mutation(self, noise, parent_theta, idx, mutation_power): 156 | return parent_theta + mutation_power * noise.get(idx, self.num_params) 157 | 158 | def load(self, sess, i, theta, seeds): 159 | if self.seeds[i] == seeds: 160 | return False 161 | sess.run(self.load_op, {self.theta: theta, self.theta_idx: i}) 162 | self.seeds[i] = seeds 163 | return True 164 | 165 | def make_weights(self): 166 | self.num_params = 0 167 | self.batch_size = 0 168 | self.scale_by = [] 169 | shapes = [] 170 | for var in self.variables: 171 | shape = [v.value for v in var.get_shape()] 172 | shapes.append(shape) 173 | self.num_params += np.prod(shape[1:]) 174 | self.scale_by.append(var.scale_by * np.ones(np.prod(shape[1:]), dtype=np.float32)) 175 | self.batch_size = shape[0] 176 | self.seeds = [None] * self.batch_size 177 | self.scale_by = np.concatenate(self.scale_by) 178 | assert self.scale_by.size == self.num_params 179 | 180 | # Make commit op 181 | assigns = [] 182 | 183 | self.theta = tf.placeholder(tf.float32, [self.num_params]) 184 | self.theta_idx = tf.placeholder(tf.int32, ()) 185 | offset = 0 186 | assigns = [] 187 | for (shape,v) in zip(shapes, self.variables): 188 | size = np.prod(shape[1:]) 189 | assigns.append(tf.scatter_update(v, self.theta_idx, tf.reshape(self.theta[offset:offset+size], shape[1:]))) 190 | offset += size 191 | self.load_op = tf.group( * assigns) 192 | self.description += "Number of parameteres: {}".format(self.num_params) 193 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/models/batchnorm.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | from .dqn import Model 21 | import tensorflow as tf 22 | 23 | 24 | class ModelBN(Model): 25 | def __init__(self): 26 | super(ModelBN, self).__init__() 27 | self.nonlin = lambda x: tf.nn.relu(self.batchnorm(x)) 28 | def batchnorm(self, x): 29 | with tf.variable_scope(None, default_name='BatchNorm'): 30 | ret = tf.layers.batch_normalization(x, center=False, scale=False, training=True) 31 | 32 | if len(x.get_shape()) == 4: 33 | b = self.create_bias_variable('b', (1, 1, ret.get_shape()[-1].value)) 34 | else: 35 | b = self.create_bias_variable('b', (1, ret.get_shape()[-1].value)) 36 | if self.indices is not None: 37 | b = tf.gather(b, self.indices) 38 | 39 | ret = ret + b 40 | return ret 41 | 42 | def _make_net(self, x, num_actions): 43 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4, bias=False)) 44 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2, bias=False)) 45 | x = self.flattenallbut0(x) 46 | x = self.nonlin(self.dense(x, 256, 'fc', bias=False)) 47 | 48 | ret = self.dense(x, num_actions, 'out', std=0.1) 49 | return ret 50 | 51 | 52 | class ModelVirtualBN(Model): 53 | def __init__(self): 54 | super(ModelVirtualBN, self).__init__() 55 | self.is_ref_batch = False 56 | self.nonlin = lambda x: tf.nn.relu(self.batchnorm(x)) 57 | self.device = None 58 | 59 | @property 60 | def requires_ref_batch(self): 61 | return True 62 | 63 | # This got a little out of hand, but it maintains a set of mean/var variables that are updated on load and used during inference. 64 | def batchnorm(self, x): 65 | with tf.variable_scope('BatchNorm'): 66 | if len(x.get_shape()) == 5: 67 | vbn_mean = tf.get_variable('mean', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 68 | vbn_var = tf.get_variable('var', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 69 | else: 70 | vbn_mean = tf.get_variable('mean', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 71 | vbn_var = tf.get_variable('var', shape=(self.batch_size, x.get_shape()[-1].value), trainable=False) 72 | 73 | if self.is_ref_batch: 74 | mean, var = tf.nn.moments(x, list(range(1, len(x.get_shape())-1))) 75 | var = 1 / tf.sqrt(var + 1e-3) 76 | mean, var = tf.scatter_update(vbn_mean, self.indices, mean), tf.scatter_update(vbn_var, self.indices, var) 77 | else: 78 | mean, var = vbn_mean, vbn_var 79 | while len(mean.get_shape()) < len(x.get_shape()): 80 | mean, var = tf.expand_dims(mean, 1), tf.expand_dims(var, 1) 81 | 82 | if self.indices is not None: 83 | mean, var = tf.gather(mean, self.indices), tf.gather(var, self.indices) 84 | 85 | ret = (x-mean) * var 86 | 87 | if len(x.get_shape()) == 5: 88 | b = self.create_bias_variable('b', (1, 1, 1, ret.get_shape()[-1].value)) 89 | else: 90 | b = self.create_bias_variable('b', (1, ret.get_shape()[-1].value)) 91 | if self.indices is not None: 92 | b = tf.gather(b, self.indices) 93 | return ret + b 94 | 95 | def _make_net(self, x, num_actions, ): 96 | with tf.variable_scope('layer1'): 97 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4, bias=False)) 98 | with tf.variable_scope('layer2'): 99 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2, bias=False)) 100 | x = self.flattenallbut0(x) 101 | with tf.variable_scope('layer3'): 102 | x = self.nonlin(self.dense(x, 256, 'fc', bias=False)) 103 | 104 | with tf.variable_scope('layer4'): 105 | return self.dense(x, num_actions, 'out') 106 | 107 | def make_weights(self): 108 | super(ModelVirtualBN, self).make_weights() 109 | self.ref_batch_idx = tf.placeholder(tf.int32, ()) 110 | tmp = self.indices 111 | self.indices = [self.ref_batch_idx] 112 | with tf.device(self.device): 113 | with tf.variable_scope(self.scope, reuse=True): 114 | ref_batch = tf.stack([self.ref_batch]) 115 | self.is_ref_batch = True 116 | self.ref_batch_assign = self._make_net(ref_batch, self.num_actions) 117 | self.is_ref_batch = False 118 | self.indices = tmp 119 | 120 | def load(self, sess, i, *args, **kwargs): 121 | ret = super(ModelVirtualBN, self).load(sess, i, *args, **kwargs) 122 | sess.run(self.ref_batch_assign, {self.ref_batch_idx: i}) 123 | return ret 124 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/models/dqn.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import numpy as np 21 | import tensorflow as tf 22 | from .base import BaseModel 23 | 24 | 25 | class Model(BaseModel): 26 | def create_weight_variable(self, name, shape, std): 27 | scale_by = std / np.sqrt(np.prod(shape[:-1])) 28 | return self.create_variable(name, shape, scale_by) 29 | 30 | def _make_net(self, x, num_actions): 31 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4)) 32 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2)) 33 | x = self.flattenallbut0(x) 34 | x = self.nonlin(self.dense(x, 256, 'fc')) 35 | 36 | return self.dense(x, num_actions, 'out', std=0.1) 37 | 38 | 39 | class LargeModel(Model): 40 | def _make_net(self, x, num_actions): 41 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=32, kernel_size=8, stride=4, std=1.0)) 42 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=64, kernel_size=4, stride=2, std=1.0)) 43 | x = self.nonlin(self.conv(x, name='conv3', num_outputs=64, kernel_size=3, stride=1, std=1.0)) 44 | x = self.flattenallbut0(x) 45 | x = self.nonlin(self.dense(x, 512, 'fc')) 46 | 47 | return self.dense(x, num_actions, 'out', std=0.1) 48 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/models/dqn_xavier.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import tensorflow as tf 21 | from .base import BaseModel 22 | 23 | 24 | class SmallDQN(BaseModel): 25 | def _make_net(self, x, num_actions): 26 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=16, kernel_size=8, stride=4)) 27 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=32, kernel_size=4, stride=2)) 28 | x = self.flattenallbut0(x) 29 | x = self.nonlin(self.dense(x, 256, 'fc')) 30 | 31 | return self.dense(x, num_actions, 'out', std=0.1) 32 | 33 | 34 | class LargeDQN(BaseModel): 35 | def _make_net(self, x, num_actions): 36 | x = self.nonlin(self.conv(x, name='conv1', num_outputs=32, kernel_size=8, stride=4, std=1.0)) 37 | x = self.nonlin(self.conv(x, name='conv2', num_outputs=64, kernel_size=4, stride=2, std=1.0)) 38 | x = self.nonlin(self.conv(x, name='conv3', num_outputs=64, kernel_size=3, stride=1, std=1.0)) 39 | x = self.flattenallbut0(x) 40 | x = self.nonlin(self.dense(x, 512, 'fc')) 41 | 42 | return self.dense(x, num_actions, 'out', std=0.1) 43 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/models/simple.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | from .dqn import Model 21 | 22 | 23 | class LinearClassifier(Model): 24 | def _make_net(self, x, num_actions): 25 | x = self.flattenallbut0(x) 26 | ret = self.dense(x, num_actions, 'out') 27 | return ret 28 | 29 | class SimpleClassifier(Model): 30 | def _make_net(self, x, num_actions): 31 | x = self.flattenallbut0(x) 32 | x = self.nonlin(self.dense(x, 16, 'fc1')) 33 | x = self.nonlin(self.dense(x, 16, 'fc2')) 34 | ret = self.dense(x, num_actions, 'out', std=0.1) 35 | return ret 36 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/optimizers.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import numpy as np 21 | 22 | 23 | class Optimizer(object): 24 | def __init__(self, theta): 25 | self.theta = theta 26 | self.dim = len(self.theta) 27 | self.t = 0 28 | 29 | def update(self, globalg): 30 | self.t += 1 31 | step = self._compute_step(globalg) 32 | theta = self.theta 33 | ratio = np.linalg.norm(step) / np.linalg.norm(theta) 34 | new_theta = self.theta + step 35 | self.theta = new_theta 36 | return ratio, new_theta 37 | 38 | def _compute_step(self, globalg): 39 | raise NotImplementedError 40 | 41 | 42 | class SGD(Optimizer): 43 | def __init__(self, theta, stepsize, momentum=0.9): 44 | Optimizer.__init__(self, theta) 45 | self.v = np.zeros(self.dim, dtype=np.float32) 46 | self.stepsize, self.momentum = stepsize, momentum 47 | 48 | def _compute_step(self, globalg): 49 | # NOTE: different from Open AI to match more common momentum implementations (e.g. Tensorflow) 50 | # original from OpenAI: self.v = self.momentum * self.v + (1 - self.momentum) * globalg 51 | self.v = self.momentum * self.v + globalg 52 | step = -self.stepsize * self.v 53 | return step 54 | 55 | 56 | class Adam(Optimizer): 57 | def __init__(self, theta, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08): 58 | Optimizer.__init__(self, theta) 59 | self.stepsize = stepsize 60 | self.beta1 = beta1 61 | self.beta2 = beta2 62 | self.epsilon = epsilon 63 | self.m = np.zeros(self.dim, dtype=np.float32) 64 | self.v = np.zeros(self.dim, dtype=np.float32) 65 | 66 | def _compute_step(self, globalg): 67 | a = self.stepsize * np.sqrt(1 - self.beta2 ** self.t) / (1 - self.beta1 ** self.t) 68 | self.m = self.beta1 * self.m + (1 - self.beta1) * globalg 69 | self.v = self.beta2 * self.v + (1 - self.beta2) * (globalg * globalg) 70 | step = -a * self.m / (np.sqrt(self.v) + self.epsilon) 71 | return step 72 | 73 | -------------------------------------------------------------------------------- /Chapter10/neuroevolution/tf_util.py: -------------------------------------------------------------------------------- 1 | __copyright__ = """ 2 | Copyright (c) 2018 Uber Technologies, Inc. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | """ 19 | 20 | import tensorflow as tf 21 | import numpy as np 22 | 23 | import tabular_logger as tlogger 24 | 25 | def get_available_gpus(): 26 | from tensorflow.python.client import device_lib 27 | local_device_protos = device_lib.list_local_devices() 28 | return [x.name for x in local_device_protos if x.device_type == 'GPU'] 29 | 30 | 31 | class WorkerSession(object): 32 | def __init__(self, worker): 33 | self._worker = worker 34 | def __enter__(self, *args, **kwargs): 35 | self._sess = tf.Session(*args, **kwargs) 36 | self._sess.run(tf.global_variables_initializer()) 37 | self._worker.initialize(self._sess) 38 | 39 | tlogger.info(self._worker.model.description) 40 | 41 | self.coord = tf.train.Coordinator() 42 | self.threads = tf.train.start_queue_runners(self._sess, self.coord, start=True) 43 | 44 | return self._sess 45 | 46 | def __exit__(self, exception_type, exception_value, traceback): 47 | if exception_type in [tf.errors.OutOfRangeError, StopIteration]: 48 | exception_type = None 49 | try: 50 | self._worker.close() 51 | self.coord.request_stop() 52 | self.coord.join(self.threads) 53 | if self._sess is None: 54 | raise RuntimeError('Session is already closed.') 55 | self._sess.close() 56 | finally: 57 | self._sess = None 58 | return exception_type is None 59 | -------------------------------------------------------------------------------- /Chapter10/tabular_logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import traceback as traceback_module 4 | import time 5 | from collections import OrderedDict 6 | import json 7 | import numpy as np 8 | import tempfile 9 | 10 | 11 | DEBUG = 10 12 | INFO = 20 13 | WARN = 30 14 | ERROR = 40 15 | 16 | DISABLED = 50 17 | 18 | 19 | def record_tabular(key, val): 20 | """ 21 | Log a value of some diagnostic 22 | Call this once for each diagnostic quantity, each iteration 23 | """ 24 | CURRENT().record_tabular(key, val) 25 | 26 | 27 | def dump_tabular(): 28 | """ 29 | Write all of the diagnostics from the current iteration 30 | 31 | level: int. (see logger.py docs) If the global logger level is higher than 32 | the level argument here, don't print to stdout. 33 | """ 34 | CURRENT().dump_tabular() 35 | 36 | 37 | def __log(level, *args): 38 | """ 39 | Write the sequence of args, with no separators, to the console and output files (if you've configured an output file). 40 | """ 41 | CURRENT().log(level, *[str(a) for a in args]) 42 | 43 | 44 | def debug(*args): 45 | __log(DEBUG, *args) 46 | 47 | 48 | def info(*args): 49 | __log(INFO, *args) 50 | 51 | 52 | log = info 53 | 54 | 55 | def warn(*args): 56 | __log(WARN, *args) 57 | 58 | 59 | def error(*args): 60 | __log(ERROR, *args) 61 | 62 | 63 | def set_level(level): 64 | """ 65 | Set logging threshold on current logger. 66 | """ 67 | CURRENT().set_level(level) 68 | 69 | 70 | def get_dir(): 71 | """ 72 | Get directory that log files are being written to. 73 | will be None if there is no output directory (i.e., if you didn't call start) 74 | """ 75 | return CURRENT().get_dir() 76 | 77 | 78 | def get_expt_dir(): 79 | sys.stderr.write("get_expt_dir() is Deprecated. Switch to get_dir()\n") 80 | return get_dir() 81 | 82 | # ================================================================ 83 | # Backend 84 | # ================================================================ 85 | 86 | 87 | class NumPyArangeEncoder(json.JSONEncoder): 88 | def default(self, obj): 89 | if isinstance(obj, np.ndarray): 90 | return obj.tolist() 91 | if isinstance(obj, np.number): 92 | return obj.item() 93 | return json.JSONEncoder.default(self, obj) 94 | 95 | 96 | __DEFAULT = None 97 | def DEFAULT(): 98 | global __DEFAULT 99 | if __DEFAULT is None: 100 | set_default(TabularLogger()) 101 | return __DEFAULT 102 | 103 | def set_default(logger, replace=True): 104 | global __DEFAULT 105 | if __DEFAULT is None or replace: 106 | __DEFAULT = logger 107 | return __DEFAULT 108 | 109 | __CURRENT = None 110 | def CURRENT(): 111 | global __CURRENT 112 | if __CURRENT is None: 113 | __CURRENT = DEFAULT() 114 | return __CURRENT 115 | 116 | class TabularLogger(object): 117 | def __init__(self, dir=None, format='{asctime} {message}\n', datefmt='%m/%d/%Y %I:%M:%S %p'): 118 | self.format = format 119 | self.datefmt = datefmt 120 | self.name2val = OrderedDict() # values this iteration 121 | self.level = INFO 122 | self.cassandra_level = WARN 123 | self.text_outputs = [sys.stdout] 124 | self.tbwriter = None 125 | self.experiment_name = None 126 | self.dir = dir 127 | if dir is None: 128 | dir = self.log_dir() 129 | if dir is not None: 130 | try: 131 | if not os.path.isdir(dir): 132 | os.makedirs(dir) 133 | self.text_outputs.append(open(os.path.join(dir, "log.txt"), "a+")) 134 | except: 135 | self.exception("Unable to save to {}".format(dir)) 136 | 137 | set_default(self, False) 138 | 139 | def log_dir(self): 140 | if self.dir: 141 | return self.dir 142 | self.dir = tempfile.mkdtemp() 143 | return self.dir 144 | 145 | # Logging API, forwarded 146 | # ---------------------------------------- 147 | def record_tabular(self, key, val): 148 | self.name2val[key] = val 149 | 150 | def flush_tabular(self): 151 | self.name2val.clear() 152 | 153 | def dump_tabular(self): 154 | # Create strings for printing 155 | key2str = OrderedDict() 156 | for (key, val) in self.name2val.items(): 157 | try: 158 | if hasattr(val, "__float__"): 159 | valstr = "%-8.3g" % val 160 | else: 161 | valstr = val 162 | assert self._truncate(key) not in key2str, 'Truncated tabular key has already been used!' 163 | key2str[self._truncate(key)] = self._truncate(str(valstr)) 164 | except: 165 | self.log(INFO, 'Cannot dump_tabular: {}:{}'.format(key, val)) 166 | keywidth = max(map(len, key2str.keys())) 167 | valwidth = max(map(len, key2str.values())) 168 | # Write to all text outputs 169 | self._write_text("-" * (keywidth + valwidth + 13), "\n") 170 | for (key, val) in key2str.items(): 171 | self._write_text("| ", key, " " * (keywidth - len(key)), 172 | " | ", val, " " * (valwidth - len(val)), " |\n") 173 | self._write_text("-" * (keywidth + valwidth + 13), "\n") 174 | for f in self.text_outputs: 175 | try: 176 | f.flush() 177 | except OSError: 178 | sys.stderr.write('Warning! OSError when flushing.\n') 179 | # Write to tensorboard 180 | if self.tbwriter is not None: 181 | self.tbwriter.write_values(self.name2val) 182 | self.name2val.clear() 183 | 184 | def log(self, level, *args): 185 | if self.level <= level: 186 | self._do_log(*args) 187 | 188 | def exception(self, *args): 189 | result = "".join(traceback_module.format_exception(*sys.exc_info())) 190 | self.log(ERROR, result, *args) 191 | 192 | # Configuration 193 | # ---------------------------------------- 194 | def set_level(self, level): 195 | self.level = level 196 | 197 | def get_dir(self): 198 | return self.dir 199 | 200 | def close(self): 201 | for f in self.text_outputs[1:]: 202 | f.close() 203 | if self.tbwriter: 204 | self.tbwriter.close() 205 | 206 | # Misc 207 | # ---------------------------------------- 208 | def _do_log(self, *args): 209 | self._write_text(*args) 210 | for f in self.text_outputs: 211 | try: 212 | f.flush() 213 | except OSError: 214 | print('Warning! OSError when flushing.') 215 | 216 | def _write_text(self, *strings): 217 | s = self.format.format(asctime=time.strftime(self.datefmt), message=' '.join(strings)) 218 | for f in self.text_outputs: 219 | f.write(s) 220 | 221 | def _truncate(self, s): 222 | if len(s) > 33: 223 | return "..." + s[-30:] 224 | else: 225 | return s 226 | 227 | def log_dir(): 228 | return CURRENT().log_dir() 229 | 230 | def flush_tabular(): 231 | return CURRENT().flush_tabular() 232 | 233 | def set_log_dir(dir): 234 | CURRENT().dir = dir 235 | 236 | 237 | def exception(exception, *args): 238 | CURRENT().exception(exception, *args) 239 | -------------------------------------------------------------------------------- /Chapter10/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # The collection of utilities 3 | # 4 | import os 5 | import shutil 6 | 7 | def clear_output(out_dir): 8 | """ 9 | Function to clear output directory. 10 | Arguments: 11 | out_dir: The directory to be cleared 12 | """ 13 | if os.path.isdir(out_dir): 14 | # remove files from previous run 15 | shutil.rmtree(out_dir) 16 | 17 | # create the output directory 18 | os.makedirs(out_dir, exist_ok=False) -------------------------------------------------------------------------------- /Chapter3/visualize.py: -------------------------------------------------------------------------------- 1 | #Copyright (c) 2007-2011, cesar.gomes and mirrorballu2 2 | #Copyright (c) 2015-2017, CodeReclaimers, LLC 3 | # 4 | #Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 5 | #following conditions are met: 6 | # 7 | #1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 8 | #disclaimer. 9 | # 10 | #2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 11 | #disclaimer in the documentation and/or other materials provided with the distribution. 12 | # 13 | #3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products 14 | #derived from this software without specific prior written permission. 15 | # 16 | #THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 | #INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | #DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | #SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | #LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 21 | #CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | #SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | from __future__ import print_function 24 | 25 | import copy 26 | import warnings 27 | 28 | import graphviz 29 | import matplotlib.pyplot as plt 30 | import numpy as np 31 | 32 | 33 | def plot_stats(statistics, ylog=False, view=False, filename='avg_fitness.svg'): 34 | """ Plots the population's average and best fitness. """ 35 | if plt is None: 36 | warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") 37 | return 38 | 39 | generation = range(len(statistics.most_fit_genomes)) 40 | best_fitness = [c.fitness for c in statistics.most_fit_genomes] 41 | avg_fitness = np.array(statistics.get_fitness_mean()) 42 | stdev_fitness = np.array(statistics.get_fitness_stdev()) 43 | 44 | plt.plot(generation, avg_fitness, 'b-', label="average") 45 | plt.plot(generation, avg_fitness - stdev_fitness, 'g-.', label="-1 sd") 46 | plt.plot(generation, avg_fitness + stdev_fitness, 'g-.', label="+1 sd") 47 | plt.plot(generation, best_fitness, 'r-', label="best") 48 | 49 | plt.title("Population's average and best fitness") 50 | plt.xlabel("Generations") 51 | plt.ylabel("Fitness") 52 | plt.grid() 53 | plt.legend(loc="best") 54 | if ylog: 55 | plt.gca().set_yscale('symlog') 56 | 57 | plt.savefig(filename) 58 | if view: 59 | plt.show() 60 | 61 | plt.close() 62 | 63 | def plot_species(statistics, view=False, filename='speciation.svg'): 64 | """ Visualizes speciation throughout evolution. """ 65 | if plt is None: 66 | warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") 67 | return 68 | 69 | species_sizes = statistics.get_species_sizes() 70 | num_generations = len(species_sizes) 71 | curves = np.array(species_sizes).T 72 | 73 | fig, ax = plt.subplots() 74 | ax.stackplot(range(num_generations), *curves) 75 | 76 | plt.title("Speciation") 77 | plt.ylabel("Size per Species") 78 | plt.xlabel("Generations") 79 | 80 | plt.savefig(filename) 81 | 82 | if view: 83 | plt.show() 84 | 85 | plt.close() 86 | 87 | 88 | def draw_net(config, genome, view=False, filename=None, directory=None, node_names=None, show_disabled=True, prune_unused=False, 89 | node_colors=None, fmt='svg'): 90 | """ Receives a genome and draws a neural network with arbitrary topology. """ 91 | # Attributes for network nodes. 92 | if graphviz is None: 93 | warnings.warn("This display is not available due to a missing optional dependency (graphviz)") 94 | return 95 | 96 | if node_names is None: 97 | node_names = {} 98 | 99 | assert type(node_names) is dict 100 | 101 | if node_colors is None: 102 | node_colors = {} 103 | 104 | assert type(node_colors) is dict 105 | 106 | node_attrs = { 107 | 'shape': 'circle', 108 | 'fontsize': '9', 109 | 'height': '0.2', 110 | 'width': '0.2'} 111 | 112 | dot = graphviz.Digraph(format=fmt, node_attr=node_attrs) 113 | 114 | inputs = set() 115 | for k in config.genome_config.input_keys: 116 | inputs.add(k) 117 | name = node_names.get(k, str(k)) 118 | input_attrs = {'style': 'filled', 'shape': 'box', 'fillcolor': node_colors.get(k, 'lightgray')} 119 | dot.node(name, _attributes=input_attrs) 120 | 121 | outputs = set() 122 | for k in config.genome_config.output_keys: 123 | outputs.add(k) 124 | name = node_names.get(k, str(k)) 125 | node_attrs = {'style': 'filled', 'fillcolor': node_colors.get(k, 'lightblue')} 126 | 127 | dot.node(name, _attributes=node_attrs) 128 | 129 | if prune_unused: 130 | connections = set() 131 | for cg in genome.connections.values(): 132 | if cg.enabled or show_disabled: 133 | connections.add((cg.in_node_id, cg.out_node_id)) 134 | 135 | used_nodes = copy.copy(outputs) 136 | pending = copy.copy(outputs) 137 | while pending: 138 | new_pending = set() 139 | for a, b in connections: 140 | if b in pending and a not in used_nodes: 141 | new_pending.add(a) 142 | used_nodes.add(a) 143 | pending = new_pending 144 | else: 145 | used_nodes = set(genome.nodes.keys()) 146 | 147 | for n in used_nodes: 148 | if n in inputs or n in outputs: 149 | continue 150 | 151 | attrs = {'style': 'filled', 152 | 'fillcolor': node_colors.get(n, 'white')} 153 | dot.node(str(n), _attributes=attrs) 154 | 155 | for cg in genome.connections.values(): 156 | if cg.enabled or show_disabled: 157 | #if cg.input not in used_nodes or cg.output not in used_nodes: 158 | # continue 159 | input, output = cg.key 160 | a = node_names.get(input, str(input)) 161 | b = node_names.get(output, str(output)) 162 | style = 'solid' if cg.enabled else 'dotted' 163 | color = 'green' if cg.weight > 0 else 'red' 164 | width = str(0.1 + abs(cg.weight / 5.0)) 165 | dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) 166 | 167 | dot.render(filename, directory, view=view) 168 | 169 | return dot -------------------------------------------------------------------------------- /Chapter3/xor_config.ini: -------------------------------------------------------------------------------- 1 | #--- Hyper-parameters for the XOR experiment ---# 2 | 3 | [NEAT] 4 | fitness_criterion = max 5 | fitness_threshold = 15.5 6 | pop_size = 150 7 | reset_on_extinction = False 8 | 9 | [DefaultGenome] 10 | # node activation options 11 | activation_default = sigmoid 12 | activation_mutate_rate = 0.0 13 | activation_options = sigmoid 14 | 15 | # node aggregation options 16 | aggregation_default = sum 17 | aggregation_mutate_rate = 0.0 18 | aggregation_options = sum 19 | 20 | # node bias options 21 | bias_init_mean = 0.0 22 | bias_init_stdev = 1.0 23 | bias_max_value = 30.0 24 | bias_min_value = -30.0 25 | bias_mutate_power = 0.5 26 | bias_mutate_rate = 0.7 27 | bias_replace_rate = 0.1 28 | 29 | # genome compatibility options 30 | compatibility_disjoint_coefficient = 1.0 31 | compatibility_weight_coefficient = 0.5 32 | 33 | # connection add/remove rates 34 | conn_add_prob = 0.5 35 | conn_delete_prob = 0.5 36 | 37 | # connection enable options 38 | enabled_default = True 39 | enabled_mutate_rate = 0.01 40 | 41 | feed_forward = True 42 | initial_connection = full_direct 43 | 44 | # node add/remove rates 45 | node_add_prob = 0.2 46 | node_delete_prob = 0.2 47 | 48 | # network parameters 49 | num_hidden = 0 50 | num_inputs = 2 51 | num_outputs = 1 52 | 53 | # node response options 54 | response_init_mean = 1.0 55 | response_init_stdev = 0.0 56 | response_max_value = 30.0 57 | response_min_value = -30.0 58 | response_mutate_power = 0.0 59 | response_mutate_rate = 0.0 60 | response_replace_rate = 0.0 61 | 62 | # connection weight options 63 | weight_init_mean = 0.0 64 | weight_init_stdev = 1.0 65 | weight_max_value = 30 66 | weight_min_value = -30 67 | weight_mutate_power = 0.5 68 | weight_mutate_rate = 0.8 69 | weight_replace_rate = 0.1 70 | 71 | [DefaultSpeciesSet] 72 | compatibility_threshold = 3.0 73 | 74 | [DefaultStagnation] 75 | species_fitness_func = max 76 | max_stagnation = 20 77 | species_elitism = 2 78 | 79 | [DefaultReproduction] 80 | elitism = 2 81 | survival_threshold = 0.2 82 | min_species_size = 2 -------------------------------------------------------------------------------- /Chapter3/xor_experiment.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file provides source code of XOR experiment using on NEAT-Python library 3 | # 4 | 5 | # The Python standard library import 6 | import os 7 | import shutil 8 | # The NEAT-Python library imports 9 | import neat 10 | # The helper used to visualize experiment results 11 | import visualize 12 | 13 | # The current working directory 14 | local_dir = os.path.dirname(__file__) 15 | # The directory to store outputs 16 | out_dir = os.path.join(local_dir, 'out') 17 | 18 | # The XOR inputs and expected corresponding outputs for fitness evaluation 19 | xor_inputs = [(0.0, 0.0), (0.0, 1.0), (1.0, 0.0), (1.0, 1.0)] 20 | xor_outputs = [ (0.0,), (1.0,), (1.0,), (0.0,)] 21 | 22 | def eval_fitness(net): 23 | """ 24 | Evaluates fitness of the genome that was used to generate 25 | provided net 26 | Arguments: 27 | net: The feed-forward neural network generated from genome 28 | Returns: 29 | The fitness score - the higher score the means the better 30 | fit organism. Maximal score: 16.0 31 | """ 32 | error_sum = 0.0 33 | for xi, xo in zip(xor_inputs, xor_outputs): 34 | output = net.activate(xi) 35 | error_sum += abs(output[0] - xo[0]) 36 | # Calculate amplified fitness 37 | fitness = (4 - error_sum) ** 2 38 | return fitness 39 | 40 | def eval_genomes(genomes, config): 41 | """ 42 | The function to evaluate the fitness of each genome in 43 | the genomes list. 44 | The provided configuration is used to create feed-forward 45 | neural network from each genome and after that created 46 | the neural network evaluated in its ability to solve 47 | XOR problem. As a result of this function execution, the 48 | the fitness score of each genome updated to the newly 49 | evaluated one. 50 | Arguments: 51 | genomes: The list of genomes from population in the 52 | current generation 53 | config: The configuration settings with algorithm 54 | hyper-parameters 55 | """ 56 | for genome_id, genome in genomes: 57 | genome.fitness = 4.0 58 | net = neat.nn.FeedForwardNetwork.create(genome, config) 59 | genome.fitness = eval_fitness(net) 60 | 61 | def run_experiment(config_file): 62 | """ 63 | The function to run XOR experiment against hyper-parameters 64 | defined in the provided configuration file. 65 | The winner genome will be rendered as a graph as well as the 66 | important statistics of neuroevolution process execution. 67 | Arguments: 68 | config_file: the path to the file with experiment 69 | configuration 70 | """ 71 | # Load configuration. 72 | config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, 73 | neat.DefaultSpeciesSet, neat.DefaultStagnation, 74 | config_file) 75 | 76 | # Create the population, which is the top-level object for a NEAT run. 77 | p = neat.Population(config) 78 | 79 | # Add a stdout reporter to show progress in the terminal. 80 | p.add_reporter(neat.StdOutReporter(True)) 81 | stats = neat.StatisticsReporter() 82 | p.add_reporter(stats) 83 | p.add_reporter(neat.Checkpointer(5, filename_prefix='out/neat-checkpoint-')) 84 | 85 | # Run for up to 300 generations. 86 | best_genome = p.run(eval_genomes, 300) 87 | 88 | # Display the best genome among generations. 89 | print('\nBest genome:\n{!s}'.format(best_genome)) 90 | 91 | # Show output of the most fit genome against training data. 92 | print('\nOutput:') 93 | net = neat.nn.FeedForwardNetwork.create(best_genome, config) 94 | for xi, xo in zip(xor_inputs, xor_outputs): 95 | output = net.activate(xi) 96 | print("input {!r}, expected output {!r}, got {!r}".format(xi, xo, output)) 97 | 98 | # Check if the best genome is an adequate XOR solver 99 | best_genome_fitness = eval_fitness(net) 100 | if best_genome_fitness > config.fitness_threshold: 101 | print("\n\nSUCCESS: The XOR problem solver found!!!") 102 | else: 103 | print("\n\nFAILURE: Failed to find XOR problem solver!!!") 104 | 105 | # Visualize the experiment results 106 | node_names = {-1:'A', -2: 'B', 0:'A XOR B'} 107 | visualize.draw_net(config, best_genome, True, node_names=node_names, directory=out_dir) 108 | visualize.plot_stats(stats, ylog=False, view=True, filename=os.path.join(out_dir, 'avg_fitness.svg')) 109 | visualize.plot_species(stats, view=True, filename=os.path.join(out_dir, 'speciation.svg')) 110 | 111 | def clean_output(): 112 | if os.path.isdir(out_dir): 113 | # remove files from previous run 114 | shutil.rmtree(out_dir) 115 | 116 | # create the output directory 117 | os.makedirs(out_dir, exist_ok=False) 118 | 119 | 120 | if __name__ == '__main__': 121 | # Determine path to configuration file. This path manipulation is 122 | # here so that the script will run successfully regardless of the 123 | # current working directory. 124 | config_path = os.path.join(local_dir, 'xor_config.ini') 125 | 126 | # Clean results of previous run if any or init the ouput directory 127 | clean_output() 128 | 129 | # Run the experiment 130 | run_experiment(config_path) -------------------------------------------------------------------------------- /Chapter4/cart_pole.py: -------------------------------------------------------------------------------- 1 | # 2 | # This is implementation of cart-pole apparatus simulation based on the Newton laws 3 | # which use Euler's method for numerical approximation the equations on motion. 4 | # 5 | import math 6 | import random 7 | 8 | # 9 | # The constants defining physics of cart-pole apparatus 10 | # 11 | GRAVITY = 9.8 # m/s^2 12 | MASSCART = 1.0 # kg 13 | MASSPOLE = 0.5 # kg 14 | TOTAL_MASS = (MASSPOLE + MASSCART) 15 | # The distance from the center of mass of the pole to the pivot 16 | # (actually half the pole's length) 17 | LENGTH = 0.5 # m 18 | POLEMASS_LENGTH = (MASSPOLE * LENGTH) # m 19 | FORCE_MAG = 10.0 # N 20 | FOURTHIRDS = 4.0/3.0 21 | # the number seconds between state updates 22 | TAU = 0.02 # sec 23 | 24 | # set random seed 25 | random.seed(42) 26 | 27 | def do_step(action, x, x_dot, theta, theta_dot): 28 | """ 29 | The function to perform the one step of simulation over 30 | provided state variables. 31 | Arguments: 32 | action: The binary action defining direction of 33 | force to be applied. 34 | x: The current cart X position 35 | x_dot: The velocity of the cart 36 | theta: The current angle of the pole from vertical 37 | theta_dot: The angular velocity of the pole. 38 | Returns: 39 | The numerically approximated values of state variables 40 | after current time step (TAU) 41 | """ 42 | # Find the force direction 43 | force = -FORCE_MAG if action <= 0 else FORCE_MAG 44 | # Pre-calcuate cosine and sine to optimize performance 45 | cos_theta = math.cos(theta) 46 | sin_theta = math.sin(theta) 47 | 48 | temp = (force + POLEMASS_LENGTH * theta_dot * theta_dot * sin_theta) / TOTAL_MASS 49 | # The angular acceleration of the pole 50 | theta_acc = (GRAVITY * sin_theta - cos_theta * temp) / (LENGTH * (FOURTHIRDS - MASSPOLE * cos_theta * cos_theta / TOTAL_MASS)) 51 | # The linear acceleration of the cart 52 | x_acc = temp - POLEMASS_LENGTH * theta_acc * cos_theta / TOTAL_MASS 53 | 54 | # Update the four state variables, using Euler's method. 55 | x_ret = x + TAU * x_dot 56 | x_dot_ret = x_dot + TAU * x_acc 57 | theta_ret = theta + TAU * theta_dot 58 | theta_dot_ret = theta_dot + TAU * theta_acc 59 | 60 | return x_ret, x_dot_ret, theta_ret, theta_dot_ret 61 | 62 | def run_cart_pole_simulation(net, max_bal_steps, random_start=True): 63 | """ 64 | The function to run cart-pole apparatus simulation for a 65 | certain number of time steps as maximum. 66 | Arguments: 67 | net: The ANN of the phenotype to be evaluated. 68 | max_bal_steps: The maximum nubmer of time steps to 69 | execute simulation. 70 | random_start: If evaluates to True than cart-pole simulation 71 | starts from random initial positions. 72 | Returns: 73 | the number of steps that the control ANN was able to 74 | maintain the single-pole balancer in stable state. 75 | """ 76 | # Set random initial state if appropriate 77 | x, x_dot, theta, theta_dot = 0.0, 0.0, 0.0, 0.0 78 | if random_start: 79 | x = (random.random() * 4.8 - 2.4) / 2.0 # -1.4 < x < 1.4 80 | x_dot = (random.random() * 3 - 1.5) / 4.0 # -0.375 < x_dot < 0.375 81 | theta = (random.random() * 0.42 - 0.21) / 2.0 # -0.105 < theta < 0.105 82 | theta_dot = (random.random() * 4 - 2) / 4.0 # -0.5 < theta_dot < 0.5 83 | 84 | # Run simulation for specified number of steps while 85 | # cart-pole system stays within contstraints 86 | input = [None] * 4 # the inputs 87 | for steps in range(max_bal_steps): 88 | # Load scaled inputs 89 | input[0] = (x + 2.4) / 4.8 90 | input[1] = (x_dot + 1.5) / 3 91 | input[2] = (theta + 0.21) / .42 92 | input[3] = (theta_dot + 2.0) / 4.0 93 | 94 | # Activate the NET 95 | output = net.activate(input) 96 | # Make action values discrete 97 | action = 0 if output[0] < 0.5 else 1 98 | 99 | # Apply action to the simulated cart-pole 100 | x, x_dot, theta, theta_dot = do_step( action = action, 101 | x = x, 102 | x_dot = x_dot, 103 | theta = theta, 104 | theta_dot = theta_dot ) 105 | 106 | # Check for failure due constraints violation. If so, return number of steps. 107 | if x < -2.4 or x > 2.4 or theta < -0.21 or theta > 0.21: 108 | return steps 109 | 110 | return max_bal_steps 111 | 112 | def eval_fitness(net, max_bal_steps=500000): 113 | """ 114 | The function to evaluate fitness score of phenotype produced 115 | provided ANN 116 | Arguments: 117 | net: The ANN of the phenotype to be evaluated. 118 | max_bal_steps: The maximum nubmer of time steps to 119 | execute simulation. 120 | Returns: 121 | The phenotype fitness score in range [0, 1] 122 | """ 123 | # First we run simulation loop returning number of successfull 124 | # simulation steps 125 | steps = run_cart_pole_simulation(net, max_bal_steps) 126 | 127 | if steps == max_bal_steps: 128 | # the maximal fitness 129 | return 1.0 130 | elif steps == 0: # needed to avoid math error when taking log(0) 131 | # the minimal fitness 132 | return 0.0 133 | else: 134 | # we use logarithmic scale because most cart-pole runs fails 135 | # too early - within ~100 steps, but we are testing against 136 | # 500'000 balancing steps 137 | log_steps = math.log(steps) 138 | log_max_steps = math.log(max_bal_steps) 139 | # The loss value is in range [0, 1] 140 | error = (log_max_steps - log_steps) / log_max_steps 141 | # The fitness value is a complement of the loss value 142 | return 1.0 - error 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /Chapter4/single_pole_config.ini: -------------------------------------------------------------------------------- 1 | #--- Hyper-parameters for the Single-Pole balancing experiment ---# 2 | 3 | [NEAT] 4 | fitness_criterion = max 5 | fitness_threshold = 1.0 6 | pop_size = 150 7 | reset_on_extinction = False 8 | 9 | [DefaultGenome] 10 | # node activation options 11 | activation_default = sigmoid 12 | activation_mutate_rate = 0.0 13 | activation_options = sigmoid 14 | 15 | # node aggregation options 16 | aggregation_default = sum 17 | aggregation_mutate_rate = 0.0 18 | aggregation_options = sum 19 | 20 | # node bias options 21 | bias_init_mean = 0.0 22 | bias_init_stdev = 1.0 23 | bias_max_value = 30.0 24 | bias_min_value = -30.0 25 | bias_mutate_power = 0.5 26 | bias_mutate_rate = 0.7 27 | bias_replace_rate = 0.1 28 | 29 | # genome compatibility options 30 | compatibility_disjoint_coefficient = 1.0 31 | compatibility_weight_coefficient = 0.5 32 | 33 | # connection add/remove rates 34 | conn_add_prob = 0.3 35 | conn_delete_prob = 0.3 36 | 37 | # connection enable options 38 | enabled_default = True 39 | enabled_mutate_rate = 0.01 40 | 41 | feed_forward = True 42 | initial_connection = full_direct 43 | 44 | # node add/remove rates 45 | node_add_prob = 0.02 46 | node_delete_prob = 0.02 47 | 48 | # network parameters 49 | num_hidden = 0 50 | num_inputs = 4 51 | num_outputs = 1 52 | 53 | # node response options 54 | response_init_mean = 1.0 55 | response_init_stdev = 0.0 56 | response_max_value = 30.0 57 | response_min_value = -30.0 58 | response_mutate_power = 0.0 59 | response_mutate_rate = 0.0 60 | response_replace_rate = 0.0 61 | 62 | # connection weight options 63 | weight_init_mean = 0.0 64 | weight_init_stdev = 1.0 65 | weight_max_value = 30 66 | weight_min_value = -30 67 | weight_mutate_power = 0.5 68 | weight_mutate_rate = 0.8 69 | weight_replace_rate = 0.1 70 | 71 | [DefaultSpeciesSet] 72 | compatibility_threshold = 4.0 73 | 74 | [DefaultStagnation] 75 | species_fitness_func = max 76 | max_stagnation = 15 77 | species_elitism = 2 78 | 79 | [DefaultReproduction] 80 | elitism = 2 81 | survival_threshold = 0.2 82 | min_species_size = 8 -------------------------------------------------------------------------------- /Chapter4/single_pole_experiment.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file provides the source code of the Single-Pole balancing experiment using on NEAT-Python library 3 | # 4 | 5 | # The Python standard library import 6 | import os 7 | 8 | # The NEAT-Python library imports 9 | import neat 10 | # The helper used to visualize experiment results 11 | import visualize 12 | # The cart-pole simulator 13 | import cart_pole as cart 14 | 15 | import utils 16 | 17 | # The current working directory 18 | local_dir = os.path.dirname(__file__) 19 | # The directory to store outputs 20 | out_dir = os.path.join(local_dir, 'out') 21 | out_dir = os.path.join(out_dir, 'single_pole') 22 | 23 | # The number of additional simulation runs for the winner genome 24 | additional_num_runs = 100 25 | # The number os steps in additional simulation runs 26 | additional_steps = 200 27 | 28 | def eval_genomes(genomes, config): 29 | """ 30 | The function to evaluate the fitness of each genome in 31 | the genomes list. 32 | Arguments: 33 | genomes: The list of genomes from population in the 34 | current generation 35 | config: The configuration settings with algorithm 36 | hyper-parameters 37 | """ 38 | for genome_id, genome in genomes: 39 | genome.fitness = 0.0 40 | net = neat.nn.FeedForwardNetwork.create(genome, config) 41 | fitness = cart.eval_fitness(net) 42 | if fitness >= config.fitness_threshold: 43 | # do additional steps of evaluation with random initial states 44 | # to make sure that we found stable control strategy rather than 45 | # special case for particular initial state 46 | success_runs = evaluate_best_net(net, config, additional_num_runs) 47 | # adjust fitness 48 | fitness = 1.0 - (additional_num_runs - success_runs) / \ 49 | additional_num_runs 50 | 51 | genome.fitness = fitness 52 | 53 | def run_experiment(config_file, n_generations=100): 54 | """ 55 | The function to run the experiment against hyper-parameters 56 | defined in the provided configuration file. 57 | The winner genome will be rendered as a graph as well as the 58 | important statistics of neuroevolution process execution. 59 | Arguments: 60 | config_file: the path to the file with experiment 61 | configuration 62 | """ 63 | # Load configuration. 64 | config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, 65 | neat.DefaultSpeciesSet, neat.DefaultStagnation, 66 | config_file) 67 | 68 | # Create the population, which is the top-level object for a NEAT run. 69 | p = neat.Population(config) 70 | 71 | # Add a stdout reporter to show progress in the terminal. 72 | p.add_reporter(neat.StdOutReporter(True)) 73 | stats = neat.StatisticsReporter() 74 | p.add_reporter(stats) 75 | p.add_reporter(neat.Checkpointer(5, filename_prefix='out/spb-neat-checkpoint-')) 76 | 77 | # Run for up to N generations. 78 | best_genome = p.run(eval_genomes, n=n_generations) 79 | 80 | # Display the best genome among generations. 81 | print('\nBest genome:\n{!s}'.format(best_genome)) 82 | 83 | # Check if the best genome is a winning Single-Pole balancing controller 84 | net = neat.nn.FeedForwardNetwork.create(best_genome, config) 85 | print("\n\nEvaluating the best genome in random runs") 86 | success_runs = evaluate_best_net(net, config, additional_num_runs) 87 | print("Runs successful/expected: %d/%d" % (success_runs, additional_num_runs)) 88 | if success_runs == additional_num_runs: 89 | print("SUCCESS: The stable Single-Pole balancing controller found!!!") 90 | else: 91 | print("FAILURE: Failed to find the stable Single-Pole balancing controller!!!") 92 | 93 | # Visualize the experiment results 94 | node_names = {-1:'x', -2:'dot_x', -3:'θ', -4:'dot_θ', 0:'action'} 95 | visualize.draw_net(config, best_genome, True, node_names=node_names, directory=out_dir, fmt='svg') 96 | visualize.plot_stats(stats, ylog=False, view=True, filename=os.path.join(out_dir, 'avg_fitness.svg')) 97 | visualize.plot_species(stats, view=True, filename=os.path.join(out_dir, 'speciation.svg')) 98 | 99 | def evaluate_best_net(net, config, num_runs): 100 | """ 101 | The function to evaluate the ANN of the best genome in 102 | specified number of sequetial runs. It is aimed to test it 103 | against various random initial states that checks if it is 104 | implementing stable control strategy or just a special case 105 | for particular initial state. 106 | Arguments: 107 | net: The ANN to evaluate 108 | config: The hyper-parameters configuration 109 | num_runs: The number of sequential runs 110 | Returns: 111 | The number of succesful runs 112 | """ 113 | for run in range(num_runs): 114 | fitness = cart.eval_fitness(net, max_bal_steps=additional_steps) 115 | if fitness < config.fitness_threshold: 116 | return run 117 | return num_runs 118 | 119 | if __name__ == '__main__': 120 | # Determine path to configuration file. This path manipulation is 121 | # here so that the script will run successfully regardless of the 122 | # current working directory. 123 | config_path = os.path.join(local_dir, 'single_pole_config.ini') 124 | 125 | # Clean results of previous run if any or init the ouput directory 126 | utils.clear_output(out_dir) 127 | 128 | # Run the experiment 129 | run_experiment(config_path) -------------------------------------------------------------------------------- /Chapter4/two_pole_markov_config.ini: -------------------------------------------------------------------------------- 1 | #--- Hyper-parameters for the Double-Pole balancing Markov experiment ---# 2 | 3 | [NEAT] 4 | fitness_criterion = max 5 | fitness_threshold = 1.0 6 | pop_size = 1000 7 | reset_on_extinction = True 8 | 9 | [DefaultGenome] 10 | # node activation options sigmoid 11 | activation_default = sigmoid 12 | activation_mutate_rate = 0.0 13 | activation_options = sigmoid 14 | 15 | # node aggregation options 16 | aggregation_default = sum 17 | aggregation_mutate_rate = 0.0 18 | aggregation_options = sum 19 | 20 | # node bias options 21 | bias_init_mean = 0.0 22 | bias_init_stdev = 1.0 23 | bias_max_value = 30.0 24 | bias_min_value = -30.0 25 | bias_mutate_power = 0.5 26 | bias_mutate_rate = 0.7 27 | bias_replace_rate = 0.1 28 | 29 | # genome compatibility options 30 | compatibility_disjoint_coefficient = 1.0 31 | compatibility_weight_coefficient = 0.6 32 | 33 | # connection add/remove rates 34 | conn_add_prob = 0.5 35 | conn_delete_prob = 0.2 36 | 37 | # connection enable options 38 | enabled_default = True 39 | enabled_mutate_rate = 0.01 40 | 41 | feed_forward = True 42 | initial_connection = partial_direct 0.5 43 | 44 | # node add/remove rates 45 | node_add_prob = 0.2 46 | node_delete_prob = 0.2 47 | 48 | # network parameters 49 | num_hidden = 0 50 | num_inputs = 6 51 | num_outputs = 1 52 | 53 | # node response options 54 | response_init_mean = 1.0 55 | response_init_stdev = 0.0 56 | response_max_value = 30.0 57 | response_min_value = -30.0 58 | response_mutate_power = 0.0 59 | response_mutate_rate = 0.0 60 | response_replace_rate = 0.0 61 | 62 | # connection weight options 63 | weight_init_mean = 0.0 64 | weight_init_stdev = 1.0 65 | weight_max_value = 30.0 66 | weight_min_value = -30.0 67 | weight_mutate_power = 0.5 68 | weight_mutate_rate = 0.8 69 | weight_replace_rate = 0.1 70 | 71 | [DefaultSpeciesSet] 72 | compatibility_threshold = 3.0 73 | 74 | [DefaultStagnation] 75 | species_fitness_func = mean 76 | max_stagnation = 15 77 | species_elitism = 1 78 | 79 | [DefaultReproduction] 80 | elitism = 2 81 | survival_threshold = 0.1 82 | min_species_size = 2 -------------------------------------------------------------------------------- /Chapter4/two_pole_markov_experiment.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file provides source code of double-pole balancing experiment in 3 | # its Markovian version, i.e., when velocity information is available to 4 | # the solver. 5 | # 6 | 7 | # The Python standard library import 8 | import os 9 | import shutil 10 | import math 11 | import random 12 | import time 13 | # The NEAT-Python library imports 14 | import neat 15 | # The helper used to visualize experiment results 16 | import visualize 17 | # The cart-2-pole simulator 18 | import cart_two_pole as cart 19 | 20 | import utils 21 | 22 | # The current working directory 23 | local_dir = os.path.dirname(__file__) 24 | # The directory to store outputs 25 | out_dir = os.path.join(local_dir, 'out') 26 | out_dir = os.path.join(out_dir, 'two_pole_markov') 27 | 28 | # The number of additional simulation runs for the winner genome 29 | additional_num_runs = 1 30 | # The number os steps in additional simulation runs 31 | additional_steps = 100000 32 | 33 | def eval_fitness(net, max_bal_steps=100000): 34 | """ 35 | Evaluates fitness of the genome that was used to generate 36 | provided net 37 | Arguments: 38 | net: The feed-forward neural network generated from genome 39 | max_bal_steps: The maximum nubmer of time steps to 40 | execute simulation. 41 | Returns: 42 | The phenotype fitness score in range [0, 1] 43 | """ 44 | # First we run simulation loop returning number of successfull 45 | # simulation steps 46 | steps = cart.run_markov_simulation(net, max_bal_steps) 47 | 48 | if steps == max_bal_steps: 49 | # the maximal fitness 50 | return 1.0 51 | elif steps == 0: # needed to avoid math error when taking log(0) 52 | # the minimal fitness 53 | return 0.0 54 | else: 55 | # we use logarithmic scale because most cart-pole runs fails 56 | # too early - within ~100 steps, but we are testing against 57 | # 100'000 balancing steps 58 | log_steps = math.log(steps) 59 | log_max_steps = math.log(max_bal_steps) 60 | # The loss value is in range [0, 1] 61 | error = (log_max_steps - log_steps) / log_max_steps 62 | # The fitness value is a complement of the loss value 63 | return 1.0 - error 64 | 65 | def eval_genomes(genomes, config): 66 | """ 67 | The function to evaluate the fitness of each genome in 68 | the genomes list. 69 | Arguments: 70 | genomes: The list of genomes from population in the 71 | current generation 72 | config: The configuration settings with algorithm 73 | hyper-parameters 74 | """ 75 | for _, genome in genomes: 76 | genome.fitness = 0.0 77 | net = neat.nn.FeedForwardNetwork.create(genome, config) 78 | genome.fitness = eval_fitness(net) 79 | 80 | def run_experiment(config_file, n_generations=100, silent=False): 81 | """ 82 | The function to run the experiment against hyper-parameters 83 | defined in the provided configuration file. 84 | The winner genome will be rendered as a graph as well as the 85 | important statistics of neuroevolution process execution. 86 | Arguments: 87 | config_file: the path to the file with experiment 88 | configuration 89 | Returns: 90 | True if experiment finished with successful solver found. 91 | """ 92 | # set random seed 93 | seed = 1559231616#int(time.time())# 94 | random.seed(seed) 95 | 96 | # Load configuration. 97 | config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, 98 | neat.DefaultSpeciesSet, neat.DefaultStagnation, 99 | config_file) 100 | 101 | # Create the population, which is the top-level object for a NEAT run. 102 | p = neat.Population(config) 103 | 104 | # Add a stdout reporter to show progress in the terminal. 105 | p.add_reporter(neat.StdOutReporter(True)) 106 | stats = neat.StatisticsReporter() 107 | p.add_reporter(stats) 108 | p.add_reporter(neat.Checkpointer(5, filename_prefix='out/tpbm-neat-checkpoint-')) 109 | 110 | # Run for up to N generations. 111 | best_genome = p.run(eval_genomes, n=n_generations) 112 | 113 | # Display the best genome among generations. 114 | print('\nBest genome:\n{!s}'.format(best_genome)) 115 | 116 | # Check if the best genome is a winning Double-Pole-Markov balancing controller 117 | net = neat.nn.FeedForwardNetwork.create(best_genome, config) 118 | print("\n\nEvaluating the best genome in random runs") 119 | success_runs = evaluate_best_net(net, config, additional_num_runs) 120 | print("Runs successful/expected: %d/%d" % (success_runs, additional_num_runs)) 121 | if success_runs == additional_num_runs: 122 | print("SUCCESS: The stable Double-Pole-Markov balancing controller found!!!") 123 | else: 124 | print("FAILURE: Failed to find the stable Double-Pole-Markov balancing controller!!!") 125 | 126 | print("Random seed:", seed) 127 | 128 | # Visualize the experiment results 129 | if not silent or success_runs == additional_num_runs: 130 | node_names = {-1:'x', -2:'dot_x', -3:'θ_1', -4:'dot_θ_1', -5:'θ_2', -6:'dot_θ_2', 0:'action'} 131 | visualize.draw_net(config, best_genome, True, node_names=node_names, directory=out_dir, fmt='svg') 132 | visualize.plot_stats(stats, ylog=False, view=True, filename=os.path.join(out_dir, 'avg_fitness.svg')) 133 | visualize.plot_species(stats, view=True, filename=os.path.join(out_dir, 'speciation.svg')) 134 | 135 | return success_runs == additional_num_runs 136 | 137 | def evaluate_best_net(net, config, num_runs): 138 | """ 139 | The function to evaluate the ANN of the best genome in 140 | specified number of sequetial runs. It is aimed to test it 141 | against various random initial states that checks if it is 142 | implementing stable control strategy or just a special case 143 | for particular initial state. 144 | Arguments: 145 | net: The ANN to evaluate 146 | config: The hyper-parameters configuration 147 | num_runs: The number of sequential runs 148 | Returns: 149 | The number of succesful runs 150 | """ 151 | for run in range(num_runs): 152 | fitness = eval_fitness(net, max_bal_steps=additional_steps) 153 | if fitness < config.fitness_threshold: 154 | return run 155 | return num_runs 156 | 157 | if __name__ == '__main__': 158 | # Determine path to configuration file. This path manipulation is 159 | # here so that the script will run successfully regardless of the 160 | # current working directory. 161 | config_path = os.path.join(local_dir, 'two_pole_markov_config.ini') 162 | 163 | # Clean results of previous run if any or init the ouput directory 164 | utils.clear_output(out_dir) 165 | 166 | # Run the experiment 167 | pole_length = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] 168 | num_runs = len(pole_length) 169 | for i in range(num_runs): 170 | cart.LENGTH_2 = pole_length[i] / 2.0 171 | solved = run_experiment(config_path, n_generations=100, silent=True) 172 | print("run: %d, solved: %s, half-length: %f" % (i + 1, solved, cart.LENGTH_2)) 173 | if solved: 174 | print("Solution found in: %d run, short pole length: %f" % (i + 1, pole_length[i])) 175 | break 176 | 177 | -------------------------------------------------------------------------------- /Chapter4/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # The collection of utilities 3 | # 4 | import os 5 | import shutil 6 | 7 | def clear_output(out_dir): 8 | """ 9 | Function to clear output directory. 10 | Arguments: 11 | out_dir: The directory to be cleared 12 | """ 13 | if os.path.isdir(out_dir): 14 | # remove files from previous run 15 | shutil.rmtree(out_dir) 16 | 17 | # create the output directory 18 | os.makedirs(out_dir, exist_ok=False) -------------------------------------------------------------------------------- /Chapter4/visualize.py: -------------------------------------------------------------------------------- 1 | #Copyright (c) 2007-2011, cesar.gomes and mirrorballu2 2 | #Copyright (c) 2015-2017, CodeReclaimers, LLC 3 | # 4 | #Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 5 | #following conditions are met: 6 | # 7 | #1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 8 | #disclaimer. 9 | # 10 | #2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 11 | #disclaimer in the documentation and/or other materials provided with the distribution. 12 | # 13 | #3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products 14 | #derived from this software without specific prior written permission. 15 | # 16 | #THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 | #INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | #DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | #SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | #LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 21 | #CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | #SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | from __future__ import print_function 24 | 25 | import copy 26 | import warnings 27 | 28 | import graphviz 29 | import matplotlib.pyplot as plt 30 | import numpy as np 31 | 32 | 33 | def plot_stats(statistics, ylog=False, view=False, filename='avg_fitness.svg'): 34 | """ Plots the population's average and best fitness. """ 35 | if plt is None: 36 | warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") 37 | return 38 | 39 | generation = range(len(statistics.most_fit_genomes)) 40 | best_fitness = [c.fitness for c in statistics.most_fit_genomes] 41 | avg_fitness = np.array(statistics.get_fitness_mean()) 42 | stdev_fitness = np.array(statistics.get_fitness_stdev()) 43 | 44 | plt.plot(generation, avg_fitness, 'b-', label="average") 45 | plt.plot(generation, avg_fitness - stdev_fitness, 'g-.', label="-1 sd") 46 | plt.plot(generation, avg_fitness + stdev_fitness, 'g-.', label="+1 sd") 47 | plt.plot(generation, best_fitness, 'r-', label="best") 48 | 49 | plt.title("Population's average and best fitness") 50 | plt.xlabel("Generations") 51 | plt.ylabel("Fitness") 52 | plt.grid() 53 | plt.legend(loc="best") 54 | if ylog: 55 | plt.gca().set_yscale('symlog') 56 | 57 | plt.savefig(filename) 58 | if view: 59 | plt.show() 60 | 61 | plt.close() 62 | 63 | def plot_species(statistics, view=False, filename='speciation.svg'): 64 | """ Visualizes speciation throughout evolution. """ 65 | if plt is None: 66 | warnings.warn("This display is not available due to a missing optional dependency (matplotlib)") 67 | return 68 | 69 | species_sizes = statistics.get_species_sizes() 70 | num_generations = len(species_sizes) 71 | curves = np.array(species_sizes).T 72 | 73 | fig, ax = plt.subplots() 74 | ax.stackplot(range(num_generations), *curves) 75 | 76 | plt.title("Speciation") 77 | plt.ylabel("Size per Species") 78 | plt.xlabel("Generations") 79 | 80 | plt.savefig(filename) 81 | 82 | if view: 83 | plt.show() 84 | 85 | plt.close() 86 | 87 | 88 | def draw_net(config, genome, view=False, filename=None, directory=None, node_names=None, show_disabled=True, prune_unused=False, 89 | node_colors=None, fmt='svg'): 90 | """ Receives a genome and draws a neural network with arbitrary topology. """ 91 | # Attributes for network nodes. 92 | if graphviz is None: 93 | warnings.warn("This display is not available due to a missing optional dependency (graphviz)") 94 | return 95 | 96 | if node_names is None: 97 | node_names = {} 98 | 99 | assert type(node_names) is dict 100 | 101 | if node_colors is None: 102 | node_colors = {} 103 | 104 | assert type(node_colors) is dict 105 | 106 | node_attrs = { 107 | 'shape': 'circle', 108 | 'fontsize': '9', 109 | 'height': '0.2', 110 | 'width': '0.2'} 111 | 112 | dot = graphviz.Digraph(format=fmt, node_attr=node_attrs) 113 | 114 | inputs = set() 115 | for k in config.genome_config.input_keys: 116 | inputs.add(k) 117 | name = node_names.get(k, str(k)) 118 | input_attrs = {'style': 'filled', 'shape': 'box', 'fillcolor': node_colors.get(k, 'lightgray')} 119 | dot.node(name, _attributes=input_attrs) 120 | 121 | outputs = set() 122 | for k in config.genome_config.output_keys: 123 | outputs.add(k) 124 | name = node_names.get(k, str(k)) 125 | node_attrs = {'style': 'filled', 'fillcolor': node_colors.get(k, 'lightblue')} 126 | 127 | dot.node(name, _attributes=node_attrs) 128 | 129 | if prune_unused: 130 | connections = set() 131 | for cg in genome.connections.values(): 132 | if cg.enabled or show_disabled: 133 | connections.add((cg.in_node_id, cg.out_node_id)) 134 | 135 | used_nodes = copy.copy(outputs) 136 | pending = copy.copy(outputs) 137 | while pending: 138 | new_pending = set() 139 | for a, b in connections: 140 | if b in pending and a not in used_nodes: 141 | new_pending.add(a) 142 | used_nodes.add(a) 143 | pending = new_pending 144 | else: 145 | used_nodes = set(genome.nodes.keys()) 146 | 147 | for n in used_nodes: 148 | if n in inputs or n in outputs: 149 | continue 150 | 151 | attrs = {'style': 'filled', 152 | 'fillcolor': node_colors.get(n, 'white')} 153 | dot.node(str(n), _attributes=attrs) 154 | 155 | for cg in genome.connections.values(): 156 | if cg.enabled or show_disabled: 157 | #if cg.input not in used_nodes or cg.output not in used_nodes: 158 | # continue 159 | input, output = cg.key 160 | a = node_names.get(input, str(input)) 161 | b = node_names.get(output, str(output)) 162 | style = 'solid' if cg.enabled else 'dotted' 163 | color = 'green' if cg.weight > 0 else 'red' 164 | width = str(0.1 + abs(cg.weight / 5.0)) 165 | dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) 166 | 167 | dot.render(filename, directory, view=view) 168 | 169 | return dot -------------------------------------------------------------------------------- /Chapter5/agent.py: -------------------------------------------------------------------------------- 1 | # 2 | # This is the definition of a maze navigating agent. 3 | # 4 | import pickle 5 | 6 | class Agent: 7 | """ 8 | This is the maze navigating agent 9 | """ 10 | def __init__(self, location, heading=0, speed=0, angular_vel=0, radius=8.0, range_finder_range=100.0): 11 | """ 12 | Creates new Agent with specified parameters. 13 | Arguments: 14 | location: The agent initial position within maze 15 | heading: The heading direction in degrees. 16 | speed: The linear velocity of the agent. 17 | angular_vel: The angular velocity of the agent. 18 | radius: The agent's body radius. 19 | range_finder_range: The maximal detection range for range finder sensors. 20 | """ 21 | self.heading = heading 22 | self.speed = speed 23 | self.angular_vel = angular_vel 24 | self.radius = radius 25 | self.range_finder_range = range_finder_range 26 | self.location = location 27 | 28 | # defining the range finder sensors 29 | self.range_finder_angles = [-90.0, -45.0, 0.0, 45.0, 90.0, -180.0] 30 | 31 | # defining the radar sensors 32 | self.radar_angles = [(315.0, 405.0), (45.0, 135.0), (135.0, 225.0), (225.0, 315.0)] 33 | 34 | # the list to hold range finders activations 35 | self.range_finders = [None] * len(self.range_finder_angles) 36 | # the list to hold pie-slice radar activations 37 | self.radar = [None] * len(self.radar_angles) 38 | 39 | class AgentRecord: 40 | """ 41 | The class to hold results of maze navigation simulation for specific 42 | solver agent. It provides all statistics about the agent at the end 43 | of navigation run. 44 | """ 45 | def __init__(self, generation, agent_id): 46 | """ 47 | Creates new record for specific agent at the specific generation 48 | of the evolutionary process. 49 | """ 50 | self.generation = generation 51 | self.agent_id = agent_id 52 | # initialize agent's properties 53 | self.x = -1 54 | self.y = -1 55 | self.fitness = -1 56 | # The flag to indicate whether this agent was able to find maze exit 57 | self.hit_exit = False 58 | # The ID of species this agent belongs to 59 | self.species_id = -1 60 | # The age of agent's species at the time of recording 61 | self.species_age = -1 62 | 63 | class AgentRecordStore: 64 | """ 65 | The class to control agents record store. 66 | """ 67 | def __init__(self): 68 | """ 69 | Creates new instance. 70 | """ 71 | self.records = [] 72 | 73 | def add_record(self, record): 74 | """ 75 | The function to add specified record to this store. 76 | Arguments: 77 | record: The record to be added. 78 | """ 79 | self.records.append(record) 80 | 81 | def load(self, file): 82 | """ 83 | The function to load records list from the specied file into this class. 84 | Arguments: 85 | file: The path to the file to read agents records from. 86 | """ 87 | with open(file, 'rb') as dump_file: 88 | self.records = pickle.load(dump_file) 89 | 90 | def dump(self, file): 91 | """ 92 | The function to dump records list to the specified file from this class. 93 | Arguments: 94 | file: The path to the file to hold data dump. 95 | """ 96 | with open(file, 'wb') as dump_file: 97 | pickle.dump(self.records, dump_file) 98 | -------------------------------------------------------------------------------- /Chapter5/geometry.py: -------------------------------------------------------------------------------- 1 | # 2 | # Here we define common geometric primitives along with utilities 3 | # allowing to find distance from point to the line, to find intersection point 4 | # of two lines, and to find the length of the line in two dimensional Euclidean 5 | # space. 6 | # 7 | 8 | import math 9 | 10 | def deg_to_rad(degrees): 11 | """ 12 | The function to convert degrees to radians. 13 | Arguments: 14 | degrees: The angle in degrees to be converted. 15 | Returns: 16 | The degrees converted to radians. 17 | """ 18 | return degrees / 180.0 * math.pi 19 | 20 | def read_point(str): 21 | """ 22 | The function to read Point from specified string. The point 23 | coordinates are in order (x, y) and delimited by space. 24 | Arguments: 25 | str: The string encoding Point coorinates. 26 | Returns: 27 | The Point with coordinates parsed from provided string. 28 | """ 29 | coords = str.split(' ') 30 | assert len(coords) == 2 31 | return Point(float(coords[0]), float(coords[1])) 32 | 33 | def read_line(str): 34 | """ 35 | The function to read line segment from provided string. The coordinates 36 | of line end points are in order: x1, y1, x2, y2 and delimited by spaces. 37 | Arguments: 38 | str: The string to read line coordinates from. 39 | Returns: 40 | The parsed line segment. 41 | """ 42 | coords = str.split(' ') 43 | assert len(coords) == 4 44 | a = Point(float(coords[0]), float(coords[1])) 45 | b = Point(float(coords[2]), float(coords[3])) 46 | return Line(a, b) 47 | 48 | class Point: 49 | """ 50 | The basic class describing point in the two dimensional Cartesian coordinate 51 | system. 52 | """ 53 | def __init__(self, x, y): 54 | """ 55 | Creates new point at specified coordinates 56 | """ 57 | self.x = x 58 | self.y = y 59 | 60 | def angle(self): 61 | """ 62 | The function to determine angle in degrees of vector drawn from the 63 | center of coordinates to this point. The angle values is in range 64 | from 0 to 360 degrees in anticlockwise direction. 65 | """ 66 | ang = math.atan2(self.y, self.x) / math.pi * 180.0 67 | if (ang < 0.0): 68 | # the lower quadrants (3 or 4) 69 | return ang + 360 70 | return ang 71 | 72 | def rotate(self, angle, point): 73 | """ 74 | The function to rotate this point around another point with given 75 | angle in degrees. 76 | Arguments: 77 | angle: The rotation angle (degrees) 78 | point: The point - center of rotation 79 | """ 80 | rad = deg_to_rad(angle) 81 | # translate to have another point at the center of coordinates 82 | self.x -= point.x 83 | self.y -= point.y 84 | # rotate 85 | ox, oy = self.x, self.y 86 | self.x = math.cos(rad) * ox - math.sin(rad) * oy 87 | self.y = math.sin(rad) * ox - math.cos(rad) * oy 88 | # restore 89 | self.x += point.x 90 | self.y += point.y 91 | 92 | def distance(self, point): 93 | """ 94 | The function to caclulate Euclidean distance between this and given point. 95 | Arguments: 96 | point: The another point 97 | Returns: 98 | The Euclidean distance between this and given point. 99 | """ 100 | dx = self.x - point.x 101 | dy = self.y - point.y 102 | 103 | return math.sqrt(dx*dx + dy*dy) 104 | 105 | def __str__(self): 106 | """ 107 | Returns the nicely formatted string representation of this point. 108 | """ 109 | return "Point (%.1f, %.1f)" % (self.x, self.y) 110 | 111 | class Line: 112 | """ 113 | The simple line segment between two points. Used to represent maze wals. 114 | """ 115 | def __init__(self, a, b): 116 | """ 117 | Creates new line segment between two points. 118 | Arguments: 119 | a, b: The end points of the line 120 | """ 121 | self.a = a 122 | self.b = b 123 | 124 | def midpoint(self): 125 | """ 126 | The function to find midpoint of this line segment. 127 | Returns: 128 | The midpoint of this line segment. 129 | """ 130 | x = (self.a.x + self.b.x) / 2.0 131 | y = (self.a.y + self.b.y) / 2.0 132 | 133 | return Point(x, y) 134 | 135 | def intersection(self, line): 136 | """ 137 | The function to find intersection between this line and the given one. 138 | Arguments: 139 | line: The line to test intersection against. 140 | Returns: 141 | The tuple with the first value indicating if intersection was found (True/False) 142 | and the second value holding the intersection Point or None 143 | """ 144 | A, B, C, D = self.a, self.b, line.a, line.b 145 | 146 | rTop = (A.y - C.y) * (D.x - C.x) - (A.x - C.x) * (D.y - C.y) 147 | rBot = (B.x - A.x) * (D.y - C.y) - (B.y - A.y) * (D.x - C.x) 148 | 149 | sTop = (A.y - C.y) * (B.x - A.x) - (A.x - C.x) * (B.y - A.y) 150 | sBot = (B.x - A.x) * (D.y - C.y) - (B.y - A.y) * (D.x - C.x) 151 | 152 | if rBot == 0 or sBot == 0: 153 | # lines are parallel 154 | return False, None 155 | 156 | r = rTop / rBot 157 | s = sTop / sBot 158 | if r > 0 and r < 1 and s > 0 and s < 1: 159 | x = A.x + r * (B.x - A.x) 160 | y = A.y + r * (B.y - A.y) 161 | return True, Point(x, y) 162 | 163 | return False, None 164 | 165 | def distance(self, p): 166 | """ 167 | The function to estimate distance to the given point from this line. 168 | Arguments: 169 | p: The point to find distance to. 170 | Returns: 171 | The distance between given point and this line. 172 | """ 173 | utop = (p.x - self.a.x) * (self.b.x - self.a.x) + (p.y - self.a.y) * (self.b.y - self.a.y) 174 | ubot = self.a.distance(self.b) 175 | ubot *= ubot 176 | if ubot == 0.0: 177 | return 0.0 178 | 179 | u = utop / ubot 180 | if u < 0 or u > 1: 181 | d1 = self.a.distance(p) 182 | d2 = self.b.distance(p) 183 | if d1 < d2: 184 | return d1 185 | return d2 186 | 187 | x = self.a.x + u * (self.b.x - self.a.x) 188 | y = self.a.y + u * (self.b.y - self.a.y) 189 | point = Point(x, y) 190 | return point.distance(p) 191 | 192 | def length(self): 193 | """ 194 | The function to calculate the length of this line segment. 195 | Returns: 196 | The length of this line segment as distance between its endpoints. 197 | """ 198 | return self.a.distance(self.b) 199 | 200 | def __str__(self): 201 | """ 202 | Returns the nicely formatted string representation of this line. 203 | """ 204 | return "Line (%.1f, %.1f) -> (%.1f, %.1f)" % (self.a.x, self.a.y, self.b.x, self.b.y) -------------------------------------------------------------------------------- /Chapter5/hard_maze.txt: -------------------------------------------------------------------------------- 1 | 11 2 | 36 184 3 | 0 4 | 31 20 5 | 6 | 5 5 5 200 7 | 5 200 200 200 8 | 200 200 200 5 9 | 200 5 5 5 10 | 11 | 5 49 57 53 12 | 56 54 56 157 13 | 57 106 158 162 14 | 77 200 108 164 15 | 5 80 33 121 16 | 200 146 87 91 17 | 56 55 133 30 18 | -------------------------------------------------------------------------------- /Chapter5/maze_config.ini: -------------------------------------------------------------------------------- 1 | #--- Hyper-parameters for the Single-Pole balancing experiment ---# 2 | 3 | [NEAT] 4 | fitness_criterion = max 5 | fitness_threshold = 1.0 6 | pop_size = 250 7 | reset_on_extinction = False 8 | 9 | [DefaultGenome] 10 | # node activation options 11 | activation_default = sigmoid 12 | activation_mutate_rate = 0.0 13 | activation_options = sigmoid 14 | 15 | # node aggregation options 16 | aggregation_default = sum 17 | aggregation_mutate_rate = 0.0 18 | aggregation_options = sum 19 | 20 | # node bias options 21 | bias_init_mean = 0.0 22 | bias_init_stdev = 1.0 23 | bias_max_value = 30.0 24 | bias_min_value = -30.0 25 | bias_mutate_power = 0.5 26 | bias_mutate_rate = 0.7 27 | bias_replace_rate = 0.1 28 | 29 | # genome compatibility options 30 | compatibility_disjoint_coefficient = 1.1 31 | compatibility_weight_coefficient = 0.5 32 | 33 | # connection add/remove rates 34 | conn_add_prob = 0.5 35 | conn_delete_prob = 0.5 36 | 37 | # connection enable options 38 | enabled_default = True 39 | enabled_mutate_rate = 0.01 40 | 41 | feed_forward = False 42 | initial_connection = partial_direct 0.5 43 | 44 | # node add/remove rates 45 | node_add_prob = 0.1 46 | node_delete_prob = 0.1 47 | 48 | # network parameters 49 | num_hidden = 1 50 | num_inputs = 10 51 | num_outputs = 2 52 | 53 | # node response options 54 | response_init_mean = 1.0 55 | response_init_stdev = 0.0 56 | response_max_value = 30.0 57 | response_min_value = -30.0 58 | response_mutate_power = 0.0 59 | response_mutate_rate = 0.0 60 | response_replace_rate = 0.0 61 | 62 | # connection weight options 63 | weight_init_mean = 0.0 64 | weight_init_stdev = 1.0 65 | weight_max_value = 30 66 | weight_min_value = -30 67 | weight_mutate_power = 0.5 68 | weight_mutate_rate = 0.8 69 | weight_replace_rate = 0.1 70 | 71 | [DefaultSpeciesSet] 72 | compatibility_threshold = 3.0 73 | 74 | [DefaultStagnation] 75 | species_fitness_func = max 76 | max_stagnation = 20 77 | species_elitism = 1 78 | 79 | [DefaultReproduction] 80 | elitism = 2 81 | survival_threshold = 0.1 82 | min_species_size = 2 -------------------------------------------------------------------------------- /Chapter5/medium_maze.txt: -------------------------------------------------------------------------------- 1 | 11 2 | 30 22 3 | 0 4 | 270 100 5 | 5 5 295 5 6 | 295 5 295 135 7 | 295 135 5 135 8 | 5 135 5 5 9 | 241 135 58 65 10 | 114 5 73 42 11 | 130 91 107 46 12 | 196 5 139 51 13 | 219 125 182 63 14 | 267 5 214 63 15 | 271 135 237 88 16 | -------------------------------------------------------------------------------- /Chapter5/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # The collection of utilities 3 | # 4 | import os 5 | import shutil 6 | 7 | def clear_output(out_dir): 8 | """ 9 | Function to clear output directory. 10 | Arguments: 11 | out_dir: The directory to be cleared 12 | """ 13 | if os.path.isdir(out_dir): 14 | # remove files from previous run 15 | shutil.rmtree(out_dir) 16 | 17 | # create the output directory 18 | os.makedirs(out_dir, exist_ok=False) -------------------------------------------------------------------------------- /Chapter6/agent.py: -------------------------------------------------------------------------------- 1 | # 2 | # This is the definition of a maze navigating agent. 3 | # 4 | import pickle 5 | 6 | class Agent: 7 | """ 8 | This is the maze navigating agent 9 | """ 10 | def __init__(self, location, heading=0, speed=0, angular_vel=0, radius=8.0, range_finder_range=100.0): 11 | """ 12 | Creates new Agent with specified parameters. 13 | Arguments: 14 | location: The agent initial position within maze 15 | heading: The heading direction in degrees. 16 | speed: The linear velocity of the agent. 17 | angular_vel: The angular velocity of the agent. 18 | radius: The agent's body radius. 19 | range_finder_range: The maximal detection range for range finder sensors. 20 | """ 21 | self.heading = heading 22 | self.speed = speed 23 | self.angular_vel = angular_vel 24 | self.radius = radius 25 | self.range_finder_range = range_finder_range 26 | self.location = location 27 | 28 | # defining the range finder sensors 29 | self.range_finder_angles = [-90.0, -45.0, 0.0, 45.0, 90.0, -180.0] 30 | 31 | # defining the radar sensors 32 | self.radar_angles = [(315.0, 405.0), (45.0, 135.0), (135.0, 225.0), (225.0, 315.0)] 33 | 34 | # the list to hold range finders activations 35 | self.range_finders = [None] * len(self.range_finder_angles) 36 | # the list to hold pie-slice radar activations 37 | self.radar = [None] * len(self.radar_angles) 38 | 39 | class AgentRecord: 40 | """ 41 | The class to hold results of maze navigation simulation for specific 42 | solver agent. It provides all statistics about the agent at the end 43 | of navigation run. 44 | """ 45 | def __init__(self, generation, agent_id): 46 | """ 47 | Creates new record for specific agent at the specific generation 48 | of the evolutionary process. 49 | """ 50 | self.generation = generation 51 | self.agent_id = agent_id 52 | # initialize agent's properties 53 | self.x = -1 54 | self.y = -1 55 | self.fitness = -1 56 | self.novelty = -1 57 | # The flag to indicate whether this agent was able to find maze exit 58 | self.hit_exit = False 59 | # The ID of species this agent belongs to 60 | self.species_id = -1 61 | # The age of agent's species at the time of recording 62 | self.species_age = -1 63 | 64 | class AgentRecordStore: 65 | """ 66 | The class to control agents record store. 67 | """ 68 | def __init__(self): 69 | """ 70 | Creates new instance. 71 | """ 72 | self.records = [] 73 | 74 | def add_record(self, record): 75 | """ 76 | The function to add specified record to this store. 77 | Arguments: 78 | record: The record to be added. 79 | """ 80 | self.records.append(record) 81 | 82 | def load(self, file): 83 | """ 84 | The function to load records list from the specied file into this class. 85 | Arguments: 86 | file: The path to the file to read agents records from. 87 | """ 88 | with open(file, 'rb') as dump_file: 89 | self.records = pickle.load(dump_file) 90 | 91 | def dump(self, file): 92 | """ 93 | The function to dump records list to the specified file from this class. 94 | Arguments: 95 | file: The path to the file to hold data dump. 96 | """ 97 | with open(file, 'wb') as dump_file: 98 | pickle.dump(self.records, dump_file) 99 | -------------------------------------------------------------------------------- /Chapter6/geometry.py: -------------------------------------------------------------------------------- 1 | # 2 | # Here we define common geometric primitives along with utilities 3 | # allowing to find distance from point to the line, to find intersection point 4 | # of two lines, and to find the length of the line in two dimensional Euclidean 5 | # space. 6 | # 7 | 8 | import math 9 | 10 | def deg_to_rad(degrees): 11 | """ 12 | The function to convert degrees to radians. 13 | Arguments: 14 | degrees: The angle in degrees to be converted. 15 | Returns: 16 | The degrees converted to radians. 17 | """ 18 | return degrees / 180.0 * math.pi 19 | 20 | def read_point(str): 21 | """ 22 | The function to read Point from specified string. The point 23 | coordinates are in order (x, y) and delimited by space. 24 | Arguments: 25 | str: The string encoding Point coorinates. 26 | Returns: 27 | The Point with coordinates parsed from provided string. 28 | """ 29 | coords = str.split(' ') 30 | assert len(coords) == 2 31 | return Point(float(coords[0]), float(coords[1])) 32 | 33 | def read_line(str): 34 | """ 35 | The function to read line segment from provided string. The coordinates 36 | of line end points are in order: x1, y1, x2, y2 and delimited by spaces. 37 | Arguments: 38 | str: The string to read line coordinates from. 39 | Returns: 40 | The parsed line segment. 41 | """ 42 | coords = str.split(' ') 43 | assert len(coords) == 4 44 | a = Point(float(coords[0]), float(coords[1])) 45 | b = Point(float(coords[2]), float(coords[3])) 46 | return Line(a, b) 47 | 48 | class Point: 49 | """ 50 | The basic class describing point in the two dimensional Cartesian coordinate 51 | system. 52 | """ 53 | def __init__(self, x, y): 54 | """ 55 | Creates new point at specified coordinates 56 | """ 57 | self.x = x 58 | self.y = y 59 | 60 | def angle(self): 61 | """ 62 | The function to determine angle in degrees of vector drawn from the 63 | center of coordinates to this point. The angle values is in range 64 | from 0 to 360 degrees in anticlockwise direction. 65 | """ 66 | ang = math.atan2(self.y, self.x) / math.pi * 180.0 67 | if (ang < 0.0): 68 | # the lower quadrants (3 or 4) 69 | return ang + 360 70 | return ang 71 | 72 | def rotate(self, angle, point): 73 | """ 74 | The function to rotate this point around another point with given 75 | angle in degrees. 76 | Arguments: 77 | angle: The rotation angle (degrees) 78 | point: The point - center of rotation 79 | """ 80 | rad = deg_to_rad(angle) 81 | # translate to have another point at the center of coordinates 82 | self.x -= point.x 83 | self.y -= point.y 84 | # rotate 85 | ox, oy = self.x, self.y 86 | self.x = math.cos(rad) * ox - math.sin(rad) * oy 87 | self.y = math.sin(rad) * ox - math.cos(rad) * oy 88 | # restore 89 | self.x += point.x 90 | self.y += point.y 91 | 92 | def distance(self, point): 93 | """ 94 | The function to caclulate Euclidean distance between this and given point. 95 | Arguments: 96 | point: The another point 97 | Returns: 98 | The Euclidean distance between this and given point. 99 | """ 100 | dx = self.x - point.x 101 | dy = self.y - point.y 102 | 103 | return math.sqrt(dx*dx + dy*dy) 104 | 105 | def __str__(self): 106 | """ 107 | Returns the nicely formatted string representation of this point. 108 | """ 109 | return "Point (%.1f, %.1f)" % (self.x, self.y) 110 | 111 | class Line: 112 | """ 113 | The simple line segment between two points. Used to represent maze wals. 114 | """ 115 | def __init__(self, a, b): 116 | """ 117 | Creates new line segment between two points. 118 | Arguments: 119 | a, b: The end points of the line 120 | """ 121 | self.a = a 122 | self.b = b 123 | 124 | def midpoint(self): 125 | """ 126 | The function to find midpoint of this line segment. 127 | Returns: 128 | The midpoint of this line segment. 129 | """ 130 | x = (self.a.x + self.b.x) / 2.0 131 | y = (self.a.y + self.b.y) / 2.0 132 | 133 | return Point(x, y) 134 | 135 | def intersection(self, line): 136 | """ 137 | The function to find intersection between this line and the given one. 138 | Arguments: 139 | line: The line to test intersection against. 140 | Returns: 141 | The tuple with the first value indicating if intersection was found (True/False) 142 | and the second value holding the intersection Point or None 143 | """ 144 | A, B, C, D = self.a, self.b, line.a, line.b 145 | 146 | rTop = (A.y - C.y) * (D.x - C.x) - (A.x - C.x) * (D.y - C.y) 147 | rBot = (B.x - A.x) * (D.y - C.y) - (B.y - A.y) * (D.x - C.x) 148 | 149 | sTop = (A.y - C.y) * (B.x - A.x) - (A.x - C.x) * (B.y - A.y) 150 | sBot = (B.x - A.x) * (D.y - C.y) - (B.y - A.y) * (D.x - C.x) 151 | 152 | if rBot == 0 or sBot == 0: 153 | # lines are parallel 154 | return False, None 155 | 156 | r = rTop / rBot 157 | s = sTop / sBot 158 | if r > 0 and r < 1 and s > 0 and s < 1: 159 | x = A.x + r * (B.x - A.x) 160 | y = A.y + r * (B.y - A.y) 161 | return True, Point(x, y) 162 | 163 | return False, None 164 | 165 | def distance(self, p): 166 | """ 167 | The function to estimate distance to the given point from this line. 168 | Arguments: 169 | p: The point to find distance to. 170 | Returns: 171 | The distance between given point and this line. 172 | """ 173 | utop = (p.x - self.a.x) * (self.b.x - self.a.x) + (p.y - self.a.y) * (self.b.y - self.a.y) 174 | ubot = self.a.distance(self.b) 175 | ubot *= ubot 176 | if ubot == 0.0: 177 | return 0.0 178 | 179 | u = utop / ubot 180 | if u < 0 or u > 1: 181 | d1 = self.a.distance(p) 182 | d2 = self.b.distance(p) 183 | if d1 < d2: 184 | return d1 185 | return d2 186 | 187 | x = self.a.x + u * (self.b.x - self.a.x) 188 | y = self.a.y + u * (self.b.y - self.a.y) 189 | point = Point(x, y) 190 | return point.distance(p) 191 | 192 | def length(self): 193 | """ 194 | The function to calculate the length of this line segment. 195 | Returns: 196 | The length of this line segment as distance between its endpoints. 197 | """ 198 | return self.a.distance(self.b) 199 | 200 | def __str__(self): 201 | """ 202 | Returns the nicely formatted string representation of this line. 203 | """ 204 | return "Line (%.1f, %.1f) -> (%.1f, %.1f)" % (self.a.x, self.a.y, self.b.x, self.b.y) -------------------------------------------------------------------------------- /Chapter6/hard_maze.txt: -------------------------------------------------------------------------------- 1 | 11 2 | 36 184 3 | 0 4 | 31 20 5 | 6 | 5 5 5 200 7 | 5 200 200 200 8 | 200 200 200 5 9 | 200 5 5 5 10 | 11 | 5 49 57 53 12 | 56 54 56 157 13 | 57 106 158 162 14 | 77 200 108 164 15 | 5 80 33 121 16 | 200 146 87 91 17 | 56 55 133 30 18 | -------------------------------------------------------------------------------- /Chapter6/maze_config.ini: -------------------------------------------------------------------------------- 1 | #--- Hyper-parameters for the Single-Pole balancing experiment ---# 2 | 3 | [NEAT] 4 | fitness_criterion = max 5 | fitness_threshold = 13.5 6 | pop_size = 500 7 | reset_on_extinction = True 8 | 9 | [DefaultGenome] 10 | # node activation options 11 | activation_default = sigmoid 12 | activation_mutate_rate = 0.0 13 | activation_options = sigmoid 14 | 15 | # node aggregation options 16 | aggregation_default = sum 17 | aggregation_mutate_rate = 0.0 18 | aggregation_options = sum 19 | 20 | # node bias options 21 | bias_init_mean = 0.0 22 | bias_init_stdev = 1.0 23 | bias_max_value = 30.0 24 | bias_min_value = -30.0 25 | bias_mutate_power = 0.5 26 | bias_mutate_rate = 0.7 27 | bias_replace_rate = 0.1 28 | 29 | # genome compatibility options 30 | compatibility_disjoint_coefficient = 1.1 31 | compatibility_weight_coefficient = 0.5 32 | 33 | # connection add/remove rates 34 | conn_add_prob = 0.5 35 | conn_delete_prob = 0.1 36 | 37 | # connection enable options 38 | enabled_default = True 39 | enabled_mutate_rate = 0.01 40 | 41 | feed_forward = False 42 | initial_connection = partial_direct 0.5 43 | 44 | # node add/remove rates 45 | node_add_prob = 0.1 46 | node_delete_prob = 0.1 47 | 48 | # network parameters 49 | num_hidden = 1 50 | num_inputs = 10 51 | num_outputs = 2 52 | 53 | # node response options 54 | response_init_mean = 1.0 55 | response_init_stdev = 0.0 56 | response_max_value = 30.0 57 | response_min_value = -30.0 58 | response_mutate_power = 0.0 59 | response_mutate_rate = 0.0 60 | response_replace_rate = 0.0 61 | 62 | # connection weight options 63 | weight_init_mean = 0.0 64 | weight_init_stdev = 1.0 65 | weight_max_value = 30 66 | weight_min_value = -30 67 | weight_mutate_power = 0.5 68 | weight_mutate_rate = 0.8 69 | weight_replace_rate = 0.1 70 | 71 | [DefaultSpeciesSet] 72 | compatibility_threshold = 3.0 73 | 74 | [DefaultStagnation] 75 | species_fitness_func = max 76 | max_stagnation = 100 77 | species_elitism = 1 78 | 79 | [DefaultReproduction] 80 | elitism = 2 81 | survival_threshold = 0.1 82 | min_species_size = 2 -------------------------------------------------------------------------------- /Chapter6/medium_maze.txt: -------------------------------------------------------------------------------- 1 | 11 2 | 30 22 3 | 0 4 | 270 100 5 | 5 5 295 5 6 | 295 5 295 135 7 | 295 135 5 135 8 | 5 135 5 5 9 | 241 135 58 65 10 | 114 5 73 42 11 | 130 91 107 46 12 | 196 5 139 51 13 | 219 125 182 63 14 | 267 5 214 63 15 | 271 135 237 88 16 | -------------------------------------------------------------------------------- /Chapter6/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # The collection of utilities 3 | # 4 | import os 5 | import shutil 6 | 7 | def clear_output(out_dir): 8 | """ 9 | Function to clear output directory. 10 | Arguments: 11 | out_dir: The directory to be cleared 12 | """ 13 | if os.path.isdir(out_dir): 14 | # remove files from previous run 15 | shutil.rmtree(out_dir) 16 | 17 | # create the output directory 18 | os.makedirs(out_dir, exist_ok=False) -------------------------------------------------------------------------------- /Chapter7/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # The collection of utilities 3 | # 4 | import os 5 | import shutil 6 | 7 | import numpy as np 8 | 9 | class Statistics: 10 | def __init__(self): 11 | self.most_fit_scores = [] 12 | self.generation_statistics = [] 13 | 14 | def post_evaluate(self, distances, max_fitness): 15 | self.generation_statistics.append(distances) 16 | self.most_fit_scores.append(max_fitness) 17 | 18 | def get_distance_mean(self): 19 | avg_distance = np.array([np.array(xi).mean() for xi in self.generation_statistics]) 20 | return avg_distance 21 | 22 | def get_distance_stdev(self): 23 | stdev_distance = np.array([np.array(xi).std() for xi in self.generation_statistics]) 24 | return stdev_distance 25 | 26 | def clear_output(out_dir): 27 | """ 28 | Function to clear output directory. 29 | Arguments: 30 | out_dir: The directory to be cleared 31 | """ 32 | if os.path.isdir(out_dir): 33 | # remove files from previous run 34 | shutil.rmtree(out_dir) 35 | 36 | # create the output directory 37 | os.makedirs(out_dir, exist_ok=False) -------------------------------------------------------------------------------- /Chapter7/vd_environment.py: -------------------------------------------------------------------------------- 1 | # 2 | # The script to maintain the visual discriminator environment 3 | # 4 | import math 5 | 6 | import numpy as np 7 | 8 | 9 | class VisualField: 10 | """ 11 | Represents visual field 12 | """ 13 | def __init__(self, big_pos, small_pos, field_size): 14 | self.big_pos = big_pos 15 | self.small_pos = small_pos 16 | self.field_size = field_size 17 | self.data = np.zeros((field_size, field_size)) 18 | 19 | # store small object position 20 | self._set_point(small_pos[0], small_pos[1]) 21 | 22 | # store big object points 23 | offsets = [-1, 0, 1] 24 | for xo in offsets: 25 | for yo in offsets: 26 | self._set_point(big_pos[0] + xo, big_pos[1] + yo) 27 | 28 | def get_data(self): 29 | return self.data.flatten().tolist() 30 | 31 | def _set_point(self, x, y): 32 | px, py = x, y 33 | if px < 0: 34 | px = self.field_size + px 35 | elif px >= self.field_size: 36 | px = px - self.field_size 37 | 38 | if py < 0: 39 | py = self.field_size + py 40 | elif py >= self.field_size: 41 | py = py - self.field_size 42 | 43 | self.data[py, px] = 1 # in Numpy index is: [row, col] 44 | 45 | class VDEnvironment: 46 | """ 47 | Represents test environment to hold data set of visual fields 48 | """ 49 | def __init__(self, small_object_positions, big_object_offset, field_size): 50 | self.s_object_pos = small_object_positions 51 | self.data_set = [] 52 | self.b_object_offset = big_object_offset 53 | self.field_size = field_size 54 | 55 | self.max_dist = self._distance((0, 0), (field_size - 1, field_size - 1)) 56 | 57 | # create test data set 58 | self._create_data_set() 59 | 60 | def evaluate_net(self, net): 61 | """ 62 | The function to evaluate performance of the provided network 63 | against the dataset 64 | Returns: 65 | the fitness score and average Euclidean distance between found and target objects 66 | """ 67 | avg_dist = 0 68 | 69 | # evaluate predicted positions 70 | for ds in self.data_set: 71 | # evaluate and get outputs 72 | _, x, y = self.evaluate_net_vf(net, ds) 73 | 74 | # find the distance to the big object 75 | dist = self._distance((x, y), ds.big_pos) 76 | avg_dist = avg_dist + dist 77 | 78 | avg_dist /= float(len(self.data_set)) 79 | 80 | # normalized detection error 81 | error = avg_dist / self.max_dist 82 | # fitness 83 | fitness = 1.0 - error 84 | 85 | return fitness, avg_dist 86 | 87 | def evaluate_net_vf(self, net, vf): 88 | """ 89 | The function to evaluate provided ANN against specific VisualField 90 | """ 91 | depth = 1 # we just have 2 layers 92 | 93 | net.Flush() 94 | # prepare input 95 | inputs = vf.get_data() 96 | 97 | net.Input(inputs) 98 | # activate 99 | [net.Activate() for _ in range(depth)] 100 | 101 | # get outputs 102 | outputs = net.Output() 103 | # find coordinates of big object 104 | x, y = self._big_object_coordinates(outputs) 105 | 106 | return outputs, x, y 107 | 108 | def _distance(self, source, target): 109 | """ 110 | Function to find Euclidean distance between source and target points 111 | """ 112 | dist = (source[0] - target[0]) * (source[0] - target[0]) + (source[1] - target[1]) * (source[1] - target[1]) 113 | return math.sqrt(dist) 114 | 115 | def _big_object_coordinates(self, outputs): 116 | max_activation = -100.0 117 | max_index = -1 118 | for i, out in enumerate(outputs): 119 | if out > max_activation: 120 | max_activation = out 121 | max_index = i 122 | 123 | # estimate the maximal activation's coordinates 124 | x = max_index % self.field_size 125 | y = int(max_index / self.field_size) 126 | 127 | return (x, y) 128 | 129 | 130 | def _create_visual_field(self, sx, sy, x_off, y_off): 131 | bx = sx + x_off 132 | # 5 point to the right 133 | if bx >= self.field_size: 134 | bx = bx - self.field_size # wrap 135 | by = sy + y_off 136 | if by >= self.field_size: 137 | by = by - self.field_size # wrap 138 | 139 | # create visual field 140 | return VisualField(big_pos=(bx, by), small_pos=(sx, sy), field_size=self.field_size) 141 | 142 | def _create_data_set(self): 143 | for x in self.s_object_pos: 144 | for y in self.s_object_pos: 145 | # diagonal 146 | vf = self._create_visual_field(x, y, self.b_object_offset, self.b_object_offset) 147 | self.data_set.append(vf) 148 | # right 149 | vf = self._create_visual_field(x, y, x_off=self.b_object_offset, y_off=0) 150 | self.data_set.append(vf) 151 | # down 152 | vf = self._create_visual_field(x, y, x_off=0, y_off=self.b_object_offset) 153 | self.data_set.append(vf) 154 | -------------------------------------------------------------------------------- /Chapter7/visualize.py: -------------------------------------------------------------------------------- 1 | # 2 | # The visualization routines 3 | # 4 | import warnings 5 | 6 | import matplotlib.pyplot as plt 7 | import seaborn as sns 8 | 9 | import numpy as np 10 | 11 | import graphviz 12 | 13 | # The MultiNEAT specific 14 | import MultiNEAT as NEAT 15 | 16 | def draw_net(nn, view=False, filename=None, directory=None, node_names=None, node_colors=None, fmt='svg'): 17 | """ Receives a genome and draws a neural network with arbitrary topology. """ 18 | # Attributes for network nodes. 19 | if graphviz is None: 20 | warnings.warn("This display is not available due to a missing optional dependency (graphviz)") 21 | return 22 | 23 | if node_names is None: 24 | node_names = {} 25 | 26 | assert type(node_names) is dict 27 | 28 | if node_colors is None: 29 | node_colors = {} 30 | 31 | assert type(node_colors) is dict 32 | 33 | node_attrs = { 34 | 'shape': 'circle', 35 | 'fontsize': '9', 36 | 'height': '0.2', 37 | 'width': '0.2'} 38 | 39 | dot = graphviz.Digraph(format=fmt, node_attr=node_attrs) 40 | 41 | # neurons 42 | for index in range(len(nn.neurons)): 43 | n = nn.neurons[index] 44 | node_attrs = None, None 45 | if n.type == NEAT.NeuronType.INPUT: 46 | node_attrs = {'style': 'filled', 'shape': 'box', 'fillcolor': node_colors.get(index, 'lightgray')} 47 | elif n.type == NEAT.NeuronType.BIAS: 48 | node_attrs = {'style': 'filled', 'shape': 'diamond', 'fillcolor': node_colors.get(index, 'yellow')} 49 | elif n.type == NEAT.NeuronType.HIDDEN: 50 | node_attrs = {'style': 'filled', 'fillcolor': node_colors.get(index, 'white')} 51 | elif n.type == NEAT.NeuronType.OUTPUT: 52 | node_attrs = {'style': 'filled', 'fillcolor': node_colors.get(index, 'lightblue')} 53 | 54 | # add node with name and attributes 55 | name = node_names.get(index, str(index)) 56 | dot.node(name, _attributes=node_attrs) 57 | 58 | # connections 59 | for cg in nn.connections: 60 | a = node_names.get(cg.source_neuron_idx, str(cg.source_neuron_idx)) 61 | b = node_names.get(cg.target_neuron_idx, str(cg.target_neuron_idx)) 62 | style = 'solid' 63 | color = 'green' if cg.weight > 0 else 'red' 64 | width = str(0.1 + abs(cg.weight / 5.0)) 65 | dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) 66 | 67 | dot.render(filename, directory, view=view) 68 | return dot 69 | 70 | def plot_stats(statistics, ylog=False, view=False, filename='avg_distance.svg'): 71 | """ Plots the population's best fitness and average distances. """ 72 | 73 | generation = range(len(statistics.most_fit_scores)) 74 | avg_distance = statistics.get_distance_mean() 75 | stdev_distance = statistics.get_distance_stdev() 76 | 77 | fig, ax1 = plt.subplots() 78 | # Plot average distance 79 | ax1.plot(generation, avg_distance, 'b--', label="average distance") 80 | ax1.plot(generation, avg_distance - stdev_distance, 'g-.', label="-1 sd") 81 | ax1.plot(generation, avg_distance + stdev_distance, 'g-.', label="+1 sd") 82 | ax1.set_xlabel("Generations") 83 | ax1.set_ylabel("Distance") 84 | ax1.grid() 85 | ax1.legend(loc="best") 86 | 87 | # Plot best fitness 88 | ax2 = ax1.twinx() 89 | ax2.plot(generation, statistics.most_fit_scores, 'r-', label="best fitness") 90 | ax2.set_ylabel("Fitness") 91 | 92 | plt.title("Population's best fitness and average distance") 93 | fig.tight_layout() 94 | if ylog: 95 | plt.gca().set_yscale('symlog') 96 | 97 | plt.savefig(filename) 98 | if view: 99 | plt.show() 100 | 101 | plt.close() 102 | 103 | def draw_activations(activations, found_object, vf, dimns, view=False, filename='activations.svg', fig_width=11): 104 | """ 105 | Function to plot activations array with specified dimensions. 106 | """ 107 | print("found", found_object) 108 | print("target", vf.big_pos) 109 | # reshape 110 | data = np.array(activations).reshape((dimns,dimns)) 111 | 112 | # render 113 | grid_kws = {"width_ratios": (.9, .9, .05), "wspace": .2} 114 | fig, (ax_target, ax_map, cbar_ax) = plt.subplots(nrows=1, ncols=3, gridspec_kw=grid_kws) 115 | # Draw ANN activations 116 | sns.heatmap(data, linewidth=0.2, cmap="YlGnBu", 117 | ax=ax_map, cbar_ax=cbar_ax, 118 | cbar_kws={"orientation": "vertical"}) 119 | 120 | ax_map.set_title("ANN activations map") 121 | ax_map.set_xlabel("X") 122 | ax_map.set_ylabel("Y") 123 | 124 | # Draw visual field 125 | sns.heatmap(vf.data, linewidth=0.2, cmap="YlGnBu", 126 | ax=ax_target, cbar=False) 127 | ax_target.set_title("Visual field") 128 | ax_target.set_xlabel("X") 129 | ax_target.set_ylabel("Y") 130 | 131 | ax_map.set_title("ANN activations map") 132 | ax_map.set_xlabel("X") 133 | ax_map.set_ylabel("Y") 134 | 135 | # Set figure size 136 | fig.set_dpi(100) 137 | fig_height = fig_width / 2.0 - 0.3 138 | print("Plot figure width: %.1f, height: %.1f" % (fig_width, fig_height)) 139 | fig.set_size_inches(fig_width, fig_height) 140 | 141 | plt.savefig(filename) 142 | if view: 143 | plt.show() 144 | 145 | plt.close() 146 | -------------------------------------------------------------------------------- /Chapter8/retina_environment.py: -------------------------------------------------------------------------------- 1 | # 2 | # The script to maintain the modular retina test environment. 3 | # 4 | from enum import Enum 5 | import numpy as np 6 | 7 | class Side(Enum): 8 | LEFT = 1 9 | RIGHT = 2 10 | BOTH = 3 11 | 12 | class VisualObject: 13 | """ 14 | The class to encode the visual object representation 15 | """ 16 | def __init__(self, configuration, side, size=2): 17 | """ 18 | Creates new instance with provided configuration and object size 19 | Arguments: 20 | configuration: The configuration of the visual object in form of the text: 21 | o o 22 | o . 23 | side: The side of the retina this object must occupy 24 | size: The size of the visual object 25 | """ 26 | self.size = size 27 | self.side = side 28 | self.configuration = configuration 29 | self.data = np.zeros((size, size)) 30 | 31 | # Parse configuration 32 | lines = self.configuration.splitlines() 33 | for r, line in enumerate(lines): 34 | chars = line.split(" ") 35 | for c, ch in enumerate(chars): 36 | if ch == 'o': 37 | # pixel is ON 38 | self.data[r, c] = 1.0#3.0 39 | else: 40 | # pixel is OFF 41 | self.data[r, c] = 0.0#-3.0 42 | 43 | def get_data(self): 44 | return self.data.flatten().tolist() 45 | 46 | def __str__(self): 47 | """ 48 | Returns the nicely formatted string representation of this object. 49 | """ 50 | return "%s\n%s" % (self.side.name, self.configuration) 51 | 52 | class RetinaEnvironment: 53 | """ 54 | Represents the modular retina environment holding test data set and providing 55 | methods to evaluate detector ANN against it. 56 | """ 57 | def __init__(self): 58 | self.visual_objects = [] 59 | # populate data set 60 | self.create_data_set() 61 | 62 | def evaluate_net(self, net, depth = 3, max_fitness = 1000.0, debug=False): 63 | """ 64 | The function to evaluate performance of the provided network 65 | against the dataset 66 | Returns: 67 | the fitness score and error 68 | """ 69 | error_sum = 0.0 70 | count = 0.0 71 | detection_error_count = 0.0 72 | # Evaluate the detector ANN against 256 combintaions of the left and the right visual objects 73 | # at correct and incorrect sides of retina 74 | for left in self.visual_objects: 75 | for right in self.visual_objects: 76 | error, _ = self._evaluate(net, left, right, depth, debug=debug) 77 | error_sum += error 78 | count += 1.0 79 | if error > 0: 80 | detection_error_count += 1.0 81 | 82 | 83 | # calculate the fitness score 84 | fitness = max_fitness / (1.0 + error_sum) 85 | avg_error = error_sum / count 86 | 87 | if debug: 88 | print("Average error: %f, errors sum: %f, false detections: %s" % (avg_error, error_sum, detection_error_count)) 89 | 90 | return fitness, avg_error, count, detection_error_count 91 | 92 | def _evaluate(self, net, left, right, depth, debug=False): 93 | """ 94 | The function to evaluate ANN against specific visual objects at lEFT and RIGHT side 95 | """ 96 | net.Flush() 97 | # prepare input 98 | inputs = left.get_data() + right.get_data() 99 | inputs.append(0.5) # the bias 100 | 101 | net.Input(inputs) 102 | # activate 103 | [net.Activate() for _ in range(depth)] 104 | 105 | # get outputs 106 | outputs = net.Output() 107 | outputs[0] = 1.0 if outputs[0] >= 0.5 else 0.0 108 | outputs[1] = 1.0 if outputs[1] >= 0.5 else 0.0 109 | 110 | # set ground truth 111 | left_target = 1.0 if left.side == Side.LEFT or left.side == Side.BOTH else 0.0 112 | right_target = 1.0 if right.side == Side.RIGHT or right.side == Side.BOTH else 0.0 113 | targets = [left_target, right_target] 114 | 115 | # find error as a distance between outputs and groud truth 116 | error = (outputs[0] - targets[0]) * (outputs[0] - targets[0]) + \ 117 | (outputs[1] - targets[1]) * (outputs[1] - targets[1]) 118 | flag = "+" if error == 0 else "-" 119 | 120 | if debug: 121 | print("[%.2f, %.2f] -> [%.2f, %.2f] %s" % (targets[0], targets[1], outputs[0], outputs[1], flag)) 122 | 123 | return error, outputs 124 | 125 | def create_data_set(self): 126 | # set left side objects 127 | self.visual_objects.append(VisualObject(". .\n. .", side=Side.BOTH)) 128 | self.visual_objects.append(VisualObject(". .\n. o", side=Side.BOTH)) 129 | self.visual_objects.append(VisualObject(". o\n. o", side=Side.LEFT)) 130 | self.visual_objects.append(VisualObject(". o\n. .", side=Side.BOTH)) 131 | self.visual_objects.append(VisualObject(". o\no o", side=Side.LEFT)) 132 | self.visual_objects.append(VisualObject(". .\no .", side=Side.BOTH)) 133 | self.visual_objects.append(VisualObject("o o\n. o", side=Side.LEFT)) 134 | self.visual_objects.append(VisualObject("o .\n. .", side=Side.BOTH)) 135 | 136 | # set right side objects 137 | self.visual_objects.append(VisualObject(". .\n. .", side=Side.BOTH)) 138 | self.visual_objects.append(VisualObject("o .\n. .", side=Side.BOTH)) 139 | self.visual_objects.append(VisualObject("o .\no .", side=Side.RIGHT)) 140 | self.visual_objects.append(VisualObject(". .\no .", side=Side.BOTH)) 141 | self.visual_objects.append(VisualObject("o o\no .", side=Side.RIGHT)) 142 | self.visual_objects.append(VisualObject(". o\n. .", side=Side.BOTH)) 143 | self.visual_objects.append(VisualObject("o .\no o", side=Side.RIGHT)) 144 | self.visual_objects.append(VisualObject(". .\n. o", side=Side.BOTH)) 145 | 146 | def __str__(self): 147 | """ 148 | Returns the nicely formatted string representation of this environment. 149 | """ 150 | str = "Retina Environment" 151 | for obj in self.visual_objects: 152 | str += "\n%s" % obj 153 | 154 | return str 155 | -------------------------------------------------------------------------------- /Chapter8/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # The collection of utilities 3 | # 4 | import os 5 | import shutil 6 | 7 | import numpy as np 8 | 9 | class Statistics: 10 | def __init__(self): 11 | self.most_fit_scores = [] 12 | self.generation_statistics = [] 13 | 14 | def post_evaluate(self, errors, max_fitness): 15 | self.generation_statistics.append(errors) 16 | self.most_fit_scores.append(max_fitness) 17 | 18 | def get_error_mean(self): 19 | avg_error = np.array([np.array(xi).mean() for xi in self.generation_statistics]) 20 | return avg_error 21 | 22 | def get_error_stdev(self): 23 | stdev_error = np.array([np.array(xi).std() for xi in self.generation_statistics]) 24 | return stdev_error 25 | 26 | def clear_output(out_dir): 27 | """ 28 | Function to clear output directory. 29 | Arguments: 30 | out_dir: The directory to be cleared 31 | """ 32 | if os.path.isdir(out_dir): 33 | # remove files from previous run 34 | shutil.rmtree(out_dir) 35 | 36 | # create the output directory 37 | os.makedirs(out_dir, exist_ok=False) -------------------------------------------------------------------------------- /Chapter8/visualize.py: -------------------------------------------------------------------------------- 1 | # 2 | # The visualization routines 3 | # 4 | import warnings 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import numpy as np 9 | 10 | import graphviz 11 | 12 | # The MultiNEAT specific 13 | import MultiNEAT as NEAT 14 | 15 | def draw_net(nn, view=False, filename=None, directory=None, node_names=None, node_colors=None, fmt='svg'): 16 | """ Receives a genome and draws a neural network with arbitrary topology. """ 17 | # Attributes for network nodes. 18 | if graphviz is None: 19 | warnings.warn("This display is not available due to a missing optional dependency (graphviz)") 20 | return 21 | 22 | if node_names is None: 23 | node_names = {} 24 | 25 | assert type(node_names) is dict 26 | 27 | if node_colors is None: 28 | node_colors = {} 29 | 30 | assert type(node_colors) is dict 31 | 32 | node_attrs = { 33 | 'shape': 'circle', 34 | 'fontsize': '9', 35 | 'height': '0.2', 36 | 'width': '0.2'} 37 | 38 | dot = graphviz.Digraph(format=fmt, node_attr=node_attrs) 39 | 40 | # neurons 41 | for index in range(len(nn.neurons)): 42 | n = nn.neurons[index] 43 | node_attrs = None, None 44 | if n.type == NEAT.NeuronType.INPUT: 45 | node_attrs = {'style': 'filled', 'shape': 'box', 'fillcolor': node_colors.get(index, 'lightgray')} 46 | elif n.type == NEAT.NeuronType.BIAS: 47 | node_attrs = {'style': 'filled', 'shape': 'diamond', 'fillcolor': node_colors.get(index, 'yellow')} 48 | elif n.type == NEAT.NeuronType.HIDDEN: 49 | node_attrs = {'style': 'filled', 'fillcolor': node_colors.get(index, 'white')} 50 | elif n.type == NEAT.NeuronType.OUTPUT: 51 | node_attrs = {'style': 'filled', 'fillcolor': node_colors.get(index, 'lightblue')} 52 | 53 | # add node with name and attributes 54 | name = node_names.get(index, str(index)) 55 | dot.node(name, _attributes=node_attrs) 56 | 57 | # connections 58 | for cg in nn.connections: 59 | a = node_names.get(cg.source_neuron_idx, str(cg.source_neuron_idx)) 60 | b = node_names.get(cg.target_neuron_idx, str(cg.target_neuron_idx)) 61 | style = 'solid' 62 | color = 'green' if cg.weight > 0 else 'red' 63 | width = str(0.1 + abs(cg.weight / 5.0)) 64 | dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width}) 65 | 66 | dot.render(filename, directory, view=view) 67 | return dot 68 | 69 | def plot_stats(statistics, ylog=False, view=False, filename='avg_distance.svg'): 70 | """ Plots the population's best fitness and average distances. """ 71 | 72 | generation = range(len(statistics.most_fit_scores)) 73 | avg_distance = statistics.get_error_mean() 74 | stdev_distance = statistics.get_error_stdev() 75 | 76 | fig, ax1 = plt.subplots() 77 | # Plot average distance 78 | ax1.plot(generation, avg_distance, 'b--', label="average distance") 79 | ax1.plot(generation, avg_distance - stdev_distance, 'g-.', label="-1 sd") 80 | ax1.plot(generation, avg_distance + stdev_distance, 'g-.', label="+1 sd") 81 | ax1.set_xlabel("Generations") 82 | ax1.set_ylabel("Avgerage Error") 83 | ax1.grid() 84 | ax1.legend(loc="best") 85 | 86 | # Plot best fitness 87 | ax2 = ax1.twinx() 88 | ax2.plot(generation, statistics.most_fit_scores, 'r-', label="best fitness") 89 | ax2.set_ylabel("Fitness") 90 | 91 | plt.title("Population's best fitness and average distance") 92 | fig.tight_layout() 93 | if ylog: 94 | plt.gca().set_yscale('symlog') 95 | 96 | plt.savefig(filename) 97 | if view: 98 | plt.show() 99 | 100 | plt.close() 101 | -------------------------------------------------------------------------------- /Chapter9/agent.py: -------------------------------------------------------------------------------- 1 | # 2 | # This is the definition of a maze navigating agent. 3 | # 4 | import pickle 5 | 6 | class Agent: 7 | """ 8 | This is the maze navigating agent 9 | """ 10 | def __init__(self, location, heading=0, speed=0, angular_vel=0, radius=8.0, range_finder_range=100.0): 11 | """ 12 | Creates new Agent with specified parameters. 13 | Arguments: 14 | location: The agent initial position within maze 15 | heading: The heading direction in degrees. 16 | speed: The linear velocity of the agent. 17 | angular_vel: The angular velocity of the agent. 18 | radius: The agent's body radius. 19 | range_finder_range: The maximal detection range for range finder sensors. 20 | """ 21 | self.heading = heading 22 | self.speed = speed 23 | self.angular_vel = angular_vel 24 | self.radius = radius 25 | self.range_finder_range = range_finder_range 26 | self.location = location 27 | 28 | # defining the range finder sensors 29 | self.range_finder_angles = [-90.0, -45.0, 0.0, 45.0, 90.0, -180.0] 30 | 31 | # defining the radar sensors 32 | self.radar_angles = [(315.0, 405.0), (45.0, 135.0), (135.0, 225.0), (225.0, 315.0)] 33 | 34 | # the list to hold range finders activations 35 | self.range_finders = [None] * len(self.range_finder_angles) 36 | # the list to hold pie-slice radar activations 37 | self.radar = [None] * len(self.radar_angles) 38 | 39 | class AgenRecord: 40 | """ 41 | The class to hold results of maze navigation simulation for specific 42 | solver agent. It provides all statistics about the agent at the end 43 | of navigation run. 44 | """ 45 | def __init__(self, generation, agent_id): 46 | """ 47 | Creates new record for specific agent at the specific generation 48 | of the evolutionary process. 49 | """ 50 | self.generation = generation 51 | self.agent_id = agent_id 52 | # initialize agent's properties 53 | self.x = -1 54 | self.y = -1 55 | self.fitness = -1 56 | self.distance = -1 57 | self.novelty = -1 58 | # The flag to indicate whether this agent was able to find maze exit 59 | self.hit_exit = False 60 | # The ID of species this agent belongs to 61 | self.species_id = -1 62 | # The age of agent's species at the time of recording 63 | self.species_age = -1 64 | 65 | class AgentRecordStore: 66 | """ 67 | The class to control agents record store. 68 | """ 69 | def __init__(self): 70 | """ 71 | Creates new instance. 72 | """ 73 | self.records = [] 74 | 75 | def add_record(self, record): 76 | """ 77 | The function to add specified record to this store. 78 | Arguments: 79 | record: The record to be added. 80 | """ 81 | self.records.append(record) 82 | 83 | def load(self, file): 84 | """ 85 | The function to load records list from the specied file into this class. 86 | Arguments: 87 | file: The path to the file to read agents records from. 88 | """ 89 | with open(file, 'rb') as dump_file: 90 | self.records = pickle.load(dump_file) 91 | 92 | def dump(self, file): 93 | """ 94 | The function to dump records list to the specified file from this class. 95 | Arguments: 96 | file: The path to the file to hold data dump. 97 | """ 98 | with open(file, 'wb') as dump_file: 99 | pickle.dump(self.records, dump_file) 100 | -------------------------------------------------------------------------------- /Chapter9/geometry.py: -------------------------------------------------------------------------------- 1 | # 2 | # Here we define common geometric primitives along with utilities 3 | # allowing to find distance from point to the line, to find intersection point 4 | # of two lines, and to find the length of the line in two dimensional Euclidean 5 | # space. 6 | # 7 | 8 | import math 9 | 10 | def deg_to_rad(degrees): 11 | """ 12 | The function to convert degrees to radians. 13 | Arguments: 14 | degrees: The angle in degrees to be converted. 15 | Returns: 16 | The degrees converted to radians. 17 | """ 18 | return degrees / 180.0 * math.pi 19 | 20 | def read_point(str): 21 | """ 22 | The function to read Point from specified string. The point 23 | coordinates are in order (x, y) and delimited by space. 24 | Arguments: 25 | str: The string encoding Point coorinates. 26 | Returns: 27 | The Point with coordinates parsed from provided string. 28 | """ 29 | coords = str.split(' ') 30 | assert len(coords) == 2 31 | return Point(float(coords[0]), float(coords[1])) 32 | 33 | def read_line(str): 34 | """ 35 | The function to read line segment from provided string. The coordinates 36 | of line end points are in order: x1, y1, x2, y2 and delimited by spaces. 37 | Arguments: 38 | str: The string to read line coordinates from. 39 | Returns: 40 | The parsed line segment. 41 | """ 42 | coords = str.split(' ') 43 | assert len(coords) == 4 44 | a = Point(float(coords[0]), float(coords[1])) 45 | b = Point(float(coords[2]), float(coords[3])) 46 | return Line(a, b) 47 | 48 | class Point: 49 | """ 50 | The basic class describing point in the two dimensional Cartesian coordinate 51 | system. 52 | """ 53 | def __init__(self, x, y): 54 | """ 55 | Creates new point at specified coordinates 56 | """ 57 | self.x = x 58 | self.y = y 59 | 60 | def angle(self): 61 | """ 62 | The function to determine angle in degrees of vector drawn from the 63 | center of coordinates to this point. The angle values is in range 64 | from 0 to 360 degrees in anticlockwise direction. 65 | """ 66 | ang = math.atan2(self.y, self.x) / math.pi * 180.0 67 | if (ang < 0.0): 68 | # the lower quadrants (3 or 4) 69 | return ang + 360 70 | return ang 71 | 72 | def rotate(self, angle, point): 73 | """ 74 | The function to rotate this point around another point with given 75 | angle in degrees. 76 | Arguments: 77 | angle: The rotation angle (degrees) 78 | point: The point - center of rotation 79 | """ 80 | rad = deg_to_rad(angle) 81 | # translate to have another point at the center of coordinates 82 | self.x -= point.x 83 | self.y -= point.y 84 | # rotate 85 | ox, oy = self.x, self.y 86 | self.x = math.cos(rad) * ox - math.sin(rad) * oy 87 | self.y = math.sin(rad) * ox - math.cos(rad) * oy 88 | # restore 89 | self.x += point.x 90 | self.y += point.y 91 | 92 | def distance(self, point): 93 | """ 94 | The function to caclulate Euclidean distance between this and given point. 95 | Arguments: 96 | point: The another point 97 | Returns: 98 | The Euclidean distance between this and given point. 99 | """ 100 | dx = self.x - point.x 101 | dy = self.y - point.y 102 | 103 | return math.sqrt(dx*dx + dy*dy) 104 | 105 | def __str__(self): 106 | """ 107 | Returns the nicely formatted string representation of this point. 108 | """ 109 | return "Point (%.1f, %.1f)" % (self.x, self.y) 110 | 111 | class Line: 112 | """ 113 | The simple line segment between two points. Used to represent maze wals. 114 | """ 115 | def __init__(self, a, b): 116 | """ 117 | Creates new line segment between two points. 118 | Arguments: 119 | a, b: The end points of the line 120 | """ 121 | self.a = a 122 | self.b = b 123 | 124 | def midpoint(self): 125 | """ 126 | The function to find midpoint of this line segment. 127 | Returns: 128 | The midpoint of this line segment. 129 | """ 130 | x = (self.a.x + self.b.x) / 2.0 131 | y = (self.a.y + self.b.y) / 2.0 132 | 133 | return Point(x, y) 134 | 135 | def intersection(self, line): 136 | """ 137 | The function to find intersection between this line and the given one. 138 | Arguments: 139 | line: The line to test intersection against. 140 | Returns: 141 | The tuple with the first value indicating if intersection was found (True/False) 142 | and the second value holding the intersection Point or None 143 | """ 144 | A, B, C, D = self.a, self.b, line.a, line.b 145 | 146 | rTop = (A.y - C.y) * (D.x - C.x) - (A.x - C.x) * (D.y - C.y) 147 | rBot = (B.x - A.x) * (D.y - C.y) - (B.y - A.y) * (D.x - C.x) 148 | 149 | sTop = (A.y - C.y) * (B.x - A.x) - (A.x - C.x) * (B.y - A.y) 150 | sBot = (B.x - A.x) * (D.y - C.y) - (B.y - A.y) * (D.x - C.x) 151 | 152 | if rBot == 0 or sBot == 0: 153 | # lines are parallel 154 | return False, None 155 | 156 | r = rTop / rBot 157 | s = sTop / sBot 158 | if r > 0 and r < 1 and s > 0 and s < 1: 159 | x = A.x + r * (B.x - A.x) 160 | y = A.y + r * (B.y - A.y) 161 | return True, Point(x, y) 162 | 163 | return False, None 164 | 165 | def distance(self, p): 166 | """ 167 | The function to estimate distance to the given point from this line. 168 | Arguments: 169 | p: The point to find distance to. 170 | Returns: 171 | The distance between given point and this line. 172 | """ 173 | utop = (p.x - self.a.x) * (self.b.x - self.a.x) + (p.y - self.a.y) * (self.b.y - self.a.y) 174 | ubot = self.a.distance(self.b) 175 | ubot *= ubot 176 | if ubot == 0.0: 177 | return 0.0 178 | 179 | u = utop / ubot 180 | if u < 0 or u > 1: 181 | d1 = self.a.distance(p) 182 | d2 = self.b.distance(p) 183 | if d1 < d2: 184 | return d1 185 | return d2 186 | 187 | x = self.a.x + u * (self.b.x - self.a.x) 188 | y = self.a.y + u * (self.b.y - self.a.y) 189 | point = Point(x, y) 190 | return point.distance(p) 191 | 192 | def length(self): 193 | """ 194 | The function to calculate the length of this line segment. 195 | Returns: 196 | The length of this line segment as distance between its endpoints. 197 | """ 198 | return self.a.distance(self.b) 199 | 200 | def __str__(self): 201 | """ 202 | Returns the nicely formatted string representation of this line. 203 | """ 204 | return "Line (%.1f, %.1f) -> (%.1f, %.1f)" % (self.a.x, self.a.y, self.b.x, self.b.y) -------------------------------------------------------------------------------- /Chapter9/hard_maze.txt: -------------------------------------------------------------------------------- 1 | 11 2 | 36 184 3 | 0 4 | 31 20 5 | 6 | 5 5 5 200 7 | 5 200 200 200 8 | 200 200 200 5 9 | 200 5 5 5 10 | 11 | 5 49 57 53 12 | 56 54 56 157 13 | 57 106 158 162 14 | 77 200 108 164 15 | 5 80 33 121 16 | 200 146 87 91 17 | 56 55 133 30 18 | -------------------------------------------------------------------------------- /Chapter9/medium_maze.txt: -------------------------------------------------------------------------------- 1 | 11 2 | 30 22 3 | 0 4 | 270 100 5 | 5 5 295 5 6 | 295 5 295 135 7 | 295 135 5 135 8 | 5 135 5 5 9 | 241 135 58 65 10 | 114 5 73 42 11 | 130 91 107 46 12 | 196 5 139 51 13 | 219 125 182 63 14 | 267 5 214 63 15 | 271 135 237 88 16 | -------------------------------------------------------------------------------- /Chapter9/novelty_archive.py: -------------------------------------------------------------------------------- 1 | # 2 | # The script providing implementation of structures and functions used in 3 | # the Novelty Search method. 4 | # 5 | from functools import total_ordering 6 | 7 | # how many nearest neighbors to consider for calculating novelty score? 8 | KNN = 15 9 | # the maximal novelty archive size 10 | MAXNoveltyArchiveSize = 1000 11 | 12 | @total_ordering 13 | class NoveltyItem: 14 | """ 15 | The class to encapsulate information about particular item that 16 | holds information about novelty score associated with specific 17 | genome along with auxiliary information. It is used in combination 18 | with NoveltyArchive 19 | """ 20 | def __init__(self, generation=-1, genomeId=-1, novelty=-1): 21 | """ 22 | Creates new item with specified parameters. 23 | Arguments: 24 | generation: The evolution generation when this item was created 25 | genomeId: The ID of genome associated with it 26 | novelty: The novelty score of genome 27 | """ 28 | self.generation = generation 29 | self.genomeId = genomeId 30 | self.novelty = novelty 31 | # Indicates whether this item was already added to the archive 32 | self.in_archive = False 33 | # The list holding data points associated with this item that will be used 34 | # to calculate distance between this item and any other item. This distance 35 | # will be used to estimate the novelty score associated with the item. 36 | self.data = [] 37 | 38 | def __str__(self): 39 | """ 40 | The function to create string representation 41 | """ 42 | return "%s: id: %d, at generation: %d, novelty: %f\tdata: %s" % \ 43 | (self.__class__.__name__, self.genomeId, self.generation, self.novelty, self.data) 44 | 45 | def _is_valid_operand(self, other): 46 | return (hasattr(other, "novelty")) 47 | 48 | def __lt__(self, other): 49 | """ 50 | Compare if this item is less novel than supplied other item. 51 | """ 52 | if not self._is_valid_operand(other): 53 | return NotImplemented 54 | 55 | # less novel is less 56 | return self.novelty < other.novelty 57 | 58 | class NoveltyArchive: 59 | """ 60 | The novelty archive contains all of the novel items we have encountered thus far. 61 | """ 62 | def __init__(self, metric): 63 | """ 64 | Creates new instance with specified novelty threshold and function 65 | defined novelty metric. 66 | Arguments: 67 | metric: The function to calculate the novelty score of specific genome. 68 | """ 69 | self.novelty_metric = metric 70 | 71 | # list with all novel items found so far 72 | self.novel_items = [] 73 | 74 | def size(self): 75 | """ 76 | Returns the size of this archive. 77 | """ 78 | return len(self.novel_items) 79 | 80 | def evaluate_novelty_score(self, item, n_items_list): 81 | """ 82 | The function to evaluate novelty score of given novelty item among archive items 83 | and population items. 84 | Arguments: 85 | item: The novelty item to evaluate 86 | n_items_list: The list with novelty items for current population 87 | """ 88 | # collect distances among archived novelty items 89 | distances = [] 90 | for n in self.novel_items: 91 | if n.genomeId != item.genomeId: 92 | distances.append(self.novelty_metric(n, item)) 93 | else: 94 | print("Novelty Item is already in archive: %d" % n.genomeId) 95 | 96 | # collect distances to the novelty items in the population 97 | for p_item in n_items_list: 98 | if p_item.genomeId != item.genomeId: 99 | distances.append(self.novelty_metric(p_item, item)) 100 | 101 | # calculate average KNN 102 | distances = sorted(distances) 103 | item.novelty = sum(distances[:KNN])/KNN 104 | 105 | # store novelty item 106 | self._add_novelty_item(item) 107 | 108 | return item.novelty 109 | 110 | def write_to_file(self, path): 111 | """ 112 | The function to write all NoveltyItems stored in this archive. 113 | Arguments: 114 | path: The path to the file where to store NoveltyItems 115 | """ 116 | with open(path, 'w') as file: 117 | for ni in self.novel_items: 118 | file.write("%s\n" % ni) 119 | 120 | def _add_novelty_item(self, item): 121 | """ 122 | The function to add specified NoveltyItem to this archive. 123 | Arguments: 124 | item: The NoveltyItem to be added 125 | """ 126 | # add item 127 | item.in_archive = True 128 | if len(self.novel_items) >= MAXNoveltyArchiveSize: 129 | # check if this item has higher novelty than last item in the archive (minimal novelty) 130 | if item > self.novel_items[-1]: 131 | # replace it 132 | self.novel_items[-1] = item 133 | else: 134 | # just add new item 135 | self.novel_items.append(item) 136 | 137 | # sort items array in descending order by novelty score 138 | self.novel_items.sort(reverse=True) -------------------------------------------------------------------------------- /Chapter9/utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # The collection of utilities 3 | # 4 | import os 5 | import shutil 6 | 7 | import numpy as np 8 | 9 | class Statistics: 10 | def __init__(self): 11 | self.most_fit_scores = [] 12 | self.generation_statistics = [] 13 | 14 | def post_evaluate(self, errors, max_fitness): 15 | self.generation_statistics.append(errors) 16 | self.most_fit_scores.append(max_fitness) 17 | 18 | def get_error_mean(self): 19 | avg_error = np.array([np.array(xi).mean() for xi in self.generation_statistics]) 20 | return avg_error 21 | 22 | def get_error_stdev(self): 23 | stdev_error = np.array([np.array(xi).std() for xi in self.generation_statistics]) 24 | return stdev_error 25 | 26 | def clear_output(out_dir): 27 | """ 28 | Function to clear output directory. 29 | Arguments: 30 | out_dir: The directory to be cleared 31 | """ 32 | if os.path.isdir(out_dir): 33 | # remove files from previous run 34 | shutil.rmtree(out_dir) 35 | 36 | # create the output directory 37 | os.makedirs(out_dir, exist_ok=False) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Hands-on-Neuroevolution-with-Python 5 | 6 | Learning DevOps 7 | 8 | This is the code repository for [Hands-On Neuroevolution with Python ](https://www.packtpub.com/data/hands-on-neuroevolution-with-python?utm_source=github&utm_medium=repository&utm_campaign=9781838822002), published by Packt. 9 | 10 | **Build high-performing artificial neural network architectures using neuroevolution-based algorithms** 11 | 12 | ## What is this book about? 13 | Neuroevolution is a form of artificial intelligence learning that uses evolutionary algorithms to simplify the process of solving complex tasks in domains such as games, robotics, and the simulation of natural processes. This book will give you comprehensive insights into essential neuroevolution concepts and equip you with the skills you need to apply neuroevolution-based algorithms to solve practical, real-world problems. 14 | You'll start with learning the key neuroevolution concepts and methods by writing code with Python. You'll also get hands-on experience with popular Python libraries and cover examples of classical reinforcement learning, path planning for autonomous agents, and developing agents to autonomously play Atari games. Next, you'll learn to solve common and not-so-common challenges in natural computing using neuroevolution-based algorithms. Later, you'll understand how to apply neuroevolution strategies to existing neural network designs to improve training and inference performance. Finally, you'll gain clear insights into the topology of neural networks and how neuroevolution allows you to develop complex networks, starting with simple ones. 15 | 16 | This book covers the following exciting features: 17 | * Discover the most popular neuroevolution algorithms – NEAT, HyperNEAT, and ES-HyperNEAT 18 | * Explore how to implement neuroevolution-based algorithms in Python 19 | * Get up to speed with advanced visualization tools to examine evolved neural network graphs 20 | * Understand how to examine the results of experiments and analyze algorithm performance 21 | * Delve into neuroevolution techniques to improve the performance of existing methods 22 | * Apply deep neuroevolution to develop agents for playing Atari games 23 | 24 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/183882491X) today! 25 | 26 | https://www.packtpub.com/ 28 | 29 | ## Instructions and Navigations 30 | All of the code is organized into folders. For example, Chapter02. 31 | 32 | The code will look like the following: 33 | ``` 34 | for xi in xor_inputs: 35 | output = winner_ann.activate(xi) 36 | print(xi, output) # print results 37 | ``` 38 | 39 | **Following is what you need for this book:** 40 | A practical knowledge of the Python programming language is essential to work with the examples presented in this book. For better source code understanding, it is preferable to use an IDE that supports Python syntax highlighting and code reference location. If you don't have one installed, you can use Microsoft Visual Studio Code. It is free and cross-platform, and you can download it here: https://code.visualstudio.com. 41 | 42 | With the following software and hardware list you can run all code files present in the book (Chapter 1-15). 43 | ### Software and Hardware List 44 | | Chapter | Software required | OS required | 45 | | -------- | ------------------------------------ | ----------------------------------- | 46 | | 3-10 | Anaconda Distribution 2019.10 | Windows, Linux, macOS) | 47 | 48 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://static.packt-cdn.com/downloads/9781838824914_ColorImages.pdf). 49 | 50 | ### Related products 51 | * Hands-On Deep Learning Algorithms with Python [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/hands-deep-learning-algorithms-python?utm_source=github&utm_medium=repository&utm_campaign=9781789344158) [[Amazon]](https://www.amazon.com/dp/1789344158) 52 | 53 | * Python Deep Learning - Second Edition [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-second-edition?utm_source=github&utm_medium=repository&utm_campaign=9781789348460) [[Amazon]](https://www.amazon.com/dp/1789348463) 54 | 55 | ## Get to Know the Author 56 | **Iaroslav Omelianenko** 57 | occupied the position of CTO and research director for more than a decade. He is an active member of the research community and has published several research papers at arXiv, ResearchGate, Preprints, and more. He started working with applied machine learning by developing autonomous agents for mobile games more than a decade ago. For the last 5 years, he has actively participated in research related to applying deep machine learning methods for authentication, personal traits recognition, cooperative robotics, synthetic intelligence, and more. He is an active software developer and creates open source neuroevolution algorithm implementations in the Go language. 58 | 59 | ### Suggestions and Feedback 60 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions. 61 | ### Download a free PDF 62 | 63 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
64 |

https://packt.link/free-ebook/9781838824914

65 | --------------------------------------------------------------------------------