├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── build.py ├── num_workers.txt ├── population.py ├── process.py ├── requirements.txt ├── run_evolution.py ├── run_unit_test.sh └── worker.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | env: 4 | - KERAS_BACKEND=tensorflow TENSORFLOW_V=2.1.0 5 | python: 6 | - "3.7" 7 | 8 | install: 9 | - python -m pip install --upgrade pip==19.3.1 setuptools wheel 10 | - pip install -q -r requirements.txt 11 | - pip list 12 | 13 | script: 14 | - ./run_unit_test.sh 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at shashankkotyan@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Shashank Kotyan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Robust Architecture Search 2 | 3 | This github repository contains the official code for the papers, 4 | 5 | > [Towards Evolving Robust Neural Architectures to Defend from Adversarial Attacks](https://dl.acm.org/doi/abs/10.1145/3377929.3389962)\ 6 | > Shashank Kotyan and Danilo Vasconcellos Vargas\ 7 | > GECCO (2020). 8 | > 9 | > [Evolving Robust Neural Architectures to Defend from Adversarial Attacks](https://ceur-ws.org/Vol-2640/paper_1.pdf)\ 10 | > Shashank Kotyan and Danilo Vasconcellos Vargas,\ 11 | > AISafety Workshop (2020). 12 | > 13 | > [Is Neural Architecture Search A Way Forward to Develop Robust Neural Networks?](https://www.jstage.jst.go.jp/article/pjsai/JSAI2020/0/JSAI2020_2K1ES203/_article/-char/ja/)\ 14 | > Shashank Kotyan and Danilo Vasconcellos Vargas\ 15 | > JSAI (2020). 16 | > 17 | 18 | # IMPORTANT 19 | 20 | **In the current version of the code, the robustness evalution is not implemented in the code. We are currently testing for the compatibility and reproducibility of the adversarial examples and results before adding it in the repository to ensure the quality of the code remains intact.** 21 | 22 | **In the meantime, one can generate their own adversarial samples using the repository of [Dual Quality Assessment](https://github.com/shashankkotyan/DualQualityAssessment) and make necessary changes in fitness evaluation of the evolved models in `run_model` function in `worker.py` file. Fitness of the evolved models is calculated in the `line 113`** 23 | 24 | ## Citation 25 | 26 | If this work helps your research and/or project in anyway, please cite: 27 | 28 | ```bibtex 29 | @inproceedings{kotyan2020towards, 30 | title={Towards evolving robust neural architectures to defend from adversarial attacks}, 31 | author={Kotyan, Shashank and Vargas, Danilo Vasconcellos}, 32 | booktitle={Proceedings of the 2020 Genetic and Evolutionary Computation Conference Companion}, 33 | pages={135--136}, 34 | year={2020} 35 | } 36 | 37 | @inproceedings{kotyan2020evolving, 38 | title={Towards evolving robust neural architectures to defend from adversarial attacks}, 39 | author={Kotyan, Shashank and Vargas, Danilo Vasconcellos}, 40 | maintitle = {International Joint Conference on Artificial Intelligence - Pacific Rim International Conference on Artificial Intelligence (IJCAI-PRICAI)}, 41 | booktitle = {Workshop on the AISafety}, 42 | year={2020} 43 | } 44 | 45 | @inproceedings{kotyan2020neural, 46 | title={Is Neural Architecture Search A Way Forward to Develop Robust Neural Networks?}, 47 | author={Kotyan, Shashank and Vargas, Danilo Vasconcellos}, 48 | booktitle={人工知能学会全国大会論文集 第 34 回 (2020)}, 49 | pages={2K1ES203--2K1ES203}, 50 | year={2020}, 51 | organization={一般社団法人 人工知能学会} 52 | } 53 | 54 | 55 | ``` 56 | 57 | ## Testing Environment 58 | 59 | The code is tested on Ubuntu 18.04.3 with Python 3.7.4. 60 | 61 | ## Getting Started 62 | 63 | ### Requirements 64 | 65 | To run the code in the tutorial locally, it is recommended, 66 | - a dedicated GPU suitable for running, and 67 | - install Anaconda. 68 | 69 | The following python packages are required to run the code. 70 | - `GPUtil==1.4.0` 71 | - `matplotlib==3.1.1` 72 | - `networkx==2.3` 73 | - `numpy==1.17.2` 74 | - `scipy==1.4.1` 75 | - `tensorflow==2.1.0` 76 | 77 | --- 78 | 79 | ### Steps 80 | 81 | 1. Clone the repository. 82 | 83 | ```bash 84 | git clone https://github.com/shashankkotyan/RobustArchitectureSearch.git 85 | cd ./RobustArchitectureSearch 86 | ``` 87 | 88 | 2. Create a virtual environment 89 | 90 | ```bash 91 | conda create --name ras python=3.7.4 92 | conda activate ras 93 | ``` 94 | 95 | 3. Install the python packages in `requirements.txt` if you don't have them already. 96 | 97 | ```bash 98 | pip install -r ./requirements.txt 99 | ``` 100 | 101 | 4. Run the Robust Architecture Search Code with the following command. 102 | 103 | ```bash 104 | python -u run_evolution.py [ARGS] > run.txt 105 | ``` 106 | 107 | 5. Calculate the statstics for the evolution. 108 | 109 | ```bash 110 | python -u run_stats.py > run_stats.txt 111 | ``` 112 | 113 | ## Arguments to run run_evolution.py 114 | 115 | TBD 116 | 117 | ## Notes 118 | 119 | - It is recommended to run the code on a multi-gpu system to ensure faster evolution. However, changing the number of workers in `num_workers.txt` to 1 will ensure the evolution on a single GPU system. 120 | - Setting `num_workers.txt` to number of GPUs your system has will run the code optimally utilising maximum performance by the GPUs. 121 | 122 | ## Milestones 123 | 124 | - [ ] **Include Robustness Evaluation** 125 | - [ ] Toy Example for Evolutionary Strategy 126 | - [ ] Addition of Comments in the Code 127 | - [ ] Cross Platform Compatibility 128 | - [ ] Description of Method in Readme File 129 | 130 | ## License 131 | 132 | Robust Architecture Search is licensed under the MIT license. 133 | Contributors agree to license their contributions under the MIT license. 134 | 135 | ## Contributors and Acknowledgements 136 | 137 | TBD 138 | 139 | ## Reaching out 140 | 141 | You can reach me at shashankkotyan@gmail.com or [\@shashankkotyan](https://twitter.com/shashankkotyan). 142 | If you tweet about Robust Architecture Search, please use the tag `#RAS` and/or mention me ([\@shashankkotyan](https://twitter.com/shashankkotyan)) in the tweet. 143 | For bug reports, questions, and suggestions, use [Github issues](https://github.com/shashankkotyan/RobustArchitectureSearch/issues). 144 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Author: Shashank Kotyan 5 | Email: shashankkotyan@gmail.com 6 | """ 7 | 8 | import os, traceback, random, numpy as np 9 | 10 | import networkx as nx 11 | from matplotlib import pyplot as plt 12 | 13 | from process import Process 14 | 15 | x = np.zeros((1,32,32,3)) 16 | y = np.zeros((1,10)) 17 | y[0] = 1 18 | 19 | 20 | def plot_graph(individual, dna): 21 | 22 | G= dna['graph'] 23 | 24 | dense_nodes, conv_nodes, mandatory_nodes = [], [], [] 25 | dense_nodes_color, conv_nodes_color = [], [] 26 | 27 | cmap = plt.cm.get_cmap('Blues', 4) 28 | dmap = plt.cm.get_cmap('Blues', 4) 29 | 30 | for node in G.nodes: 31 | 32 | layer_graph = G.nodes[node] 33 | 34 | if layer_graph['type_layer'] == 'fully_connected': 35 | 36 | dense_nodes += [node] 37 | 38 | if layer_graph['use_bn'] and layer_graph['use_dropout'][0]: dense_nodes_color += [dmap(1)] 39 | elif layer_graph['use_bn'] and not layer_graph['use_dropout'][0]: dense_nodes_color += [dmap(2)] 40 | elif not layer_graph['use_bn'] and layer_graph['use_dropout'][0]: dense_nodes_color += [dmap(3)] 41 | else: dense_nodes_color += [dmap(4)] 42 | 43 | elif layer_graph['type_layer'] == 'convolution_2d': 44 | 45 | conv_nodes += [node] 46 | 47 | if layer_graph['use_bn'] and layer_graph['use_dropout'][0]: conv_nodes_color += [cmap(1)] 48 | elif layer_graph['use_bn'] and not layer_graph['use_dropout'][0]: conv_nodes_color += [cmap(2)] 49 | elif not layer_graph['use_bn'] and layer_graph['use_dropout'][0]: conv_nodes_color += [cmap(3)] 50 | else: conv_nodes_color += [cmap(4)] 51 | 52 | else: 53 | 54 | mandatory_nodes += [node] 55 | 56 | layout = ['random', 'circular', 'shell', 'spring', 'spectral', 'kamada_kawai', 'planar', 'spiral'] 57 | 58 | for i in range(len(layout)): 59 | 60 | try: 61 | 62 | if i == 0: pos = nx.random_layout(G) 63 | elif i == 1: pos = nx.circular_layout(G) 64 | elif i == 2: pos = nx.shell_layout(G) 65 | elif i == 3: pos = nx.spring_layout(G) 66 | elif i == 4: pos = nx.spectral_layout(G) 67 | elif i == 5: pos = nx.kamada_kawai_layout(G) 68 | elif i == 6: pos = nx.planar_layout(G) 69 | elif i == 7: pos = nx.spiral_layout(G) 70 | 71 | nx.draw_networkx_nodes(G, pos, nodelist=dense_nodes, node_size=300, node_color=dense_nodes_color, node_shape='o', cmap=dmap) 72 | nx.draw_networkx_nodes(G, pos, nodelist=conv_nodes, node_size=300, node_color=conv_nodes_color, node_shape='s', cmap=cmap) 73 | nx.draw_networkx_nodes(G, pos, nodelist=mandatory_nodes, node_size=300, node_color='black', node_shape='h') 74 | nx.draw_networkx_edges(G, pos) 75 | 76 | plt.savefig(f"{individual}/network_{layout[i]}.png", bbox='tight', dpi=300) 77 | plt.clf() 78 | 79 | except Exception as e: 80 | 81 | pass 82 | 83 | 84 | def build_block(G, num_classes=1, gpu_index=None): 85 | 86 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 87 | 88 | if gpu_index is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_index) 89 | else: os.environ["CUDA_VISIBLE_DEVICES"] = "" 90 | 91 | import tensorflow as tf 92 | 93 | if gpu_index is not None: 94 | 95 | gpus = tf.config.experimental.list_physical_devices('GPU') 96 | 97 | if gpus: 98 | 99 | try: 100 | tf.config.experimental.set_visible_devices(gpus[0], 'GPU') 101 | tf.config.experimental.set_memory_growth(gpus[0], True) 102 | logical_gpus = tf.config.experimental.list_logical_devices('GPU') 103 | 104 | except RuntimeError as e: print(e) 105 | 106 | from tensorflow.keras import callbacks, datasets, utils, layers, models, optimizers, backend as K 107 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 108 | 109 | keras_layers = {} 110 | 111 | input_node, output_node = 0, 1 112 | keras_layers[input_node] = layers.Input(shape=(32,32,3)) 113 | keras_layers[output_node] = layers.GlobalAveragePooling2D() 114 | 115 | for node in G.nodes: 116 | 117 | layer_graph = G.nodes[node] 118 | 119 | if layer_graph['type_layer'] in ['fully_connected', 'convolution_2d']: 120 | 121 | layer_params = layer_graph['layer_params'] 122 | 123 | if layer_graph['type_layer'] == 'convolution_2d': layer = layers.Conv2D(filters=layer_params[0], kernel_size=(layer_params[1], layer_params[2]), strides=(layer_params[3], layer_params[4]), padding='same') 124 | elif layer_graph['type_layer'] == 'fully_connected': layer = layers.Dense(units=layer_params[0]) 125 | 126 | keras_layers[node] = layer 127 | 128 | explored, queue = [input_node], [input_node] 129 | 130 | while queue: 131 | 132 | node = queue.pop(0) 133 | 134 | for successor in G.successors(node): 135 | add = True 136 | for predecessor in G.predecessors(successor): 137 | if predecessor not in explored: add = False 138 | if add: queue.append(successor) 139 | 140 | if node not in explored: 141 | 142 | predecessors = [predecessor for predecessor in G.predecessors(node)] 143 | 144 | if len(predecessors) == 1: 145 | 146 | if predecessors[0] == 0: ilayer = layers.Lambda(lambda x: x)(keras_layers[predecessors[0]]) 147 | else: ilayer = keras_layers[predecessors[0]] 148 | 149 | elif len(predecessors) > 1: 150 | 151 | ilayer = layers.Concatenate()([layers.Flatten()(keras_layers[predecessor]) for predecessor in predecessors]) 152 | 153 | shape = ilayer.shape[1] 154 | 155 | if shape % 65536 == 0: ilayer = layers.Reshape((256, 256, shape // 65536))(ilayer) 156 | elif shape % 16384 == 0: ilayer = layers.Reshape((128, 128, shape // 16384))(ilayer) 157 | elif shape % 4096 == 0: ilayer = layers.Reshape((64, 64, shape // 4096))(ilayer) 158 | elif shape % 1024 == 0: ilayer = layers.Reshape((32, 32, shape // 1024))(ilayer) 159 | elif shape % 256 == 0: ilayer = layers.Reshape((16, 16, shape // 256))(ilayer) 160 | elif shape % 64 == 0: ilayer = layers.Reshape((8, 8, shape // 64))(ilayer) 161 | elif shape % 16 == 0: ilayer = layers.Reshape((4, 4, shape // 16))(ilayer) 162 | elif shape % 4 == 0: ilayer = layers.Reshape((2, 2, shape // 4))(ilayer) 163 | else: ilayer = layers.Reshape((1, 1, shape))(ilayer) 164 | 165 | keras_layers[node] = keras_layers[node](ilayer) 166 | 167 | layer_graph = G.nodes[node] 168 | 169 | if layer_graph['type_layer'] != 'output': 170 | 171 | keras_layers[node] = layers.BatchNormalization()(keras_layers[node]) 172 | keras_layers[node] = layers.Activation('relu')(keras_layers[node]) 173 | 174 | explored.append(node) 175 | 176 | for successor in G.successors(input_node): 177 | add = True 178 | for predecessor in G.predecessors(successor): 179 | if predecessor not in explored: add = False 180 | if add: queue.append(successor) 181 | else: waiting.append(successor) 182 | 183 | assert set(explored) == set(list(G.nodes)) 184 | 185 | layer = layers.Dense(units=10, activation='softmax', name='Output')(keras_layers[output_node]) 186 | 187 | model = models.Model(inputs=keras_layers[input_node], outputs=layer) 188 | model.compile(optimizer=optimizers.Nadam(), loss='categorical_crossentropy', metrics=['accuracy']) 189 | 190 | try: model.fit(x, y, epochs=1, verbose=0) 191 | except Exception as e : raise Exception(f"Model cannot be trained: {e} \n{traceback.format_exc()}\n") 192 | 193 | return model 194 | 195 | 196 | def check_block(graph): 197 | 198 | p = Process(target=build_block, args=(graph, 1, None)) 199 | p.start() 200 | p.join() 201 | if p.exception is not None: raise Exception(f"{p.exception[0]}, {p.exception[1]}") 202 | 203 | 204 | def build_keras_block(graph,individual, fitness_string): 205 | 206 | from tensorflow.keras.utils import plot_model 207 | plot_model(build_block(graph, 1, None), to_file=f"{individual}/keras_graph_{fitness_string}.png", dpi=300) 208 | 209 | 210 | def check_keras_block(graph, individual, fitness_string): 211 | 212 | p = Process(target=build_keras_block, args=(graph, individual, fitness_string)) 213 | p.start() 214 | p.join() 215 | if p.exception is not None: raise Exception(f"{p.exception[0]}, {p.exception[1]}") 216 | 217 | 218 | def load_data(dataset): 219 | 220 | from tensorflow.keras import callbacks, datasets, utils 221 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 222 | 223 | def preprocess(x): 224 | 225 | for i in range(3): x[:,:,:,i] = (x[:,:,:,i] - mean[i]) / std[i] 226 | return x 227 | 228 | if dataset == 0: 229 | 230 | num_classes = 10 231 | (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() 232 | x_train, x_test = x_train.reshape(-1, 28, 28, 1).astype('float32'), x_test.reshape(-1, 28, 28, 1).astype('float32') 233 | y_train, y_test = utils.to_categorical(y_train, 10), utils.to_categorical(y_test, 10) 234 | 235 | elif dataset == 1: 236 | 237 | num_classes = 10 238 | (x_train, y_train), (x_test, y_test) = datasets.fashion_mnist.load_data() 239 | x_train, x_test = x_train.reshape(-1, 28, 28, 1).astype('float32'), x_test.reshape(-1, 28, 28, 1).astype('float32') 240 | y_train, y_test = utils.to_categorical(y_train, 10), utils.to_categorical(y_test, 10) 241 | 242 | elif dataset == 2: 243 | 244 | num_classes = 10 245 | (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data() 246 | x_train, x_test = x_train.astype('float32'), x_test.astype('float32') 247 | y_train, y_test = utils.to_categorical(y_train[:,0], 10), utils.to_categorical(y_test[:,0], 10) 248 | 249 | elif dataset == 3: 250 | 251 | num_classes = 100 252 | (x_train, y_train), (x_test, y_test) = datasets.cifar100.load_data() 253 | x_train, x_test = x_train.astype('float32'), x_test.astype('float32') 254 | mean, std = [125.307, 122.95, 113.865], [62.9932, 62.0887, 66.7048] 255 | x_train, x_test = preprocess(x_train.astype('float32')), preprocess(x_test.astype('float32')) 256 | y_train, y_test = utils.to_categorical(y_train[:,0], 100), utils.to_categorical(y_test[:,0], 100) 257 | 258 | cbks = [] 259 | cbks += [callbacks.EarlyStopping(monitor='accuracy', min_delta=0.0001, patience=15)] 260 | 261 | datagen = ImageDataGenerator(horizontal_flip=True, width_shift_range=0.125, height_shift_range=0.125, fill_mode='constant', cval=0.) 262 | datagen.fit(x_train) 263 | 264 | return { 265 | 'callbacks':cbks, 'datagen': datagen, 266 | 'x_train': x_train, 'x_test': x_test, 'y_train': y_train, 'y_test': y_test, 267 | 'count_train': len(x_train), 'count_test': len(x_test), 268 | 'num_classes': num_classes 269 | } 270 | -------------------------------------------------------------------------------- /num_workers.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /population.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Author: Shashank Kotyan 5 | Email: shashankkotyan@gmail.com 6 | """ 7 | 8 | import os, random, pickle, traceback, copy, numpy as np 9 | from itertools import product 10 | 11 | import networkx as nx 12 | 13 | import build 14 | 15 | 16 | class Population: 17 | 18 | 19 | def set(self, args): 20 | 21 | self.use_adaptive_mutations = args.use_adaptive_mutations 22 | self.use_cycles = args.use_cycles 23 | self.use_random_params = args.use_random_params 24 | self.use_non_squares = args.use_non_squares 25 | self.log_filepath = args.log_dir 26 | 27 | self.initial_filter, self.initial_kernel, self.initial_stride = [8, 16, 32, 64], [1, 3, 5], [1, 2] 28 | self.initial_units = [64, 128, 256, 512] 29 | 30 | self.initial_layers = list(range(1,2)) 31 | self.initial_blocks = list(range(1,2)) 32 | 33 | self.population_layers_count, self.population_blocks_count, self.population_models_count = 0, 0, 0 34 | self.population_layers, self.population_blocks, self.population_models = [], [], [] 35 | 36 | 37 | def write_log(self, string): 38 | with open(f"{self.log_filepath}/logs.log", "a") as file: file.write(string) 39 | 40 | 41 | def write_population_log(self, string): 42 | with open(f"{self.log_filepath}/population.log", "a") as file: file.write(string) 43 | 44 | 45 | def write_create_log(self, string): 46 | with open(f"{self.log_filepath}/create.log", "a") as file: file.write(string) 47 | 48 | 49 | def write_mutation_log(self, string): 50 | with open(f"{self.log_filepath}/mutation.log", "a") as file: file.write(string) 51 | 52 | 53 | def write_exceptions_log(self, string): 54 | with open(f"{self.log_filepath}/exceptions.log", "a") as file: file.write(string) 55 | 56 | 57 | def read_populations(self, population_dir): 58 | with open(f"{population_dir}/layers.pkl", 'rb') as file: self.population_layers = pickle.load(file) 59 | with open(f"{population_dir}/blocks.pkl", 'rb') as file: self.population_blocks = pickle.load(file) 60 | with open(f"{population_dir}/models.pkl", 'rb') as file: self.population_models = pickle.load(file) 61 | 62 | 63 | def save_populations(self, population_dir): 64 | with open(f"{population_dir}/layers.pkl", 'wb') as file: pickle.dump(self.population_layers, file) 65 | with open(f"{population_dir}/blocks.pkl", 'wb') as file: pickle.dump(self.population_blocks, file) 66 | with open(f"{population_dir}/models.pkl", 'wb') as file: pickle.dump(self.population_models, file) 67 | self.write_population_log(f"L:{len(self.population_layers)}, B:{len(self.population_blocks)}, M:{len(self.population_models)}, LC:{self.population_layers_count}, BC:{self.population_blocks_count}, MC:{self.population_models_count},\n") 68 | 69 | 70 | def update_populations(self, model_dna): 71 | 72 | self.population_models += [model_dna] 73 | 74 | for block_dna in model_dna['constituents']['blocks']: 75 | 76 | self.population_blocks += [block_dna] 77 | 78 | for layer_dna in block_dna['constituents']['layers']: 79 | 80 | self.population_layers += [layer_dna] 81 | 82 | 83 | def clean_populations(self): self.population_layers, self.population_blocks, self.population_models = [], [], [] 84 | 85 | 86 | def create_graph_layer(self, layer): 87 | 88 | dna = {} 89 | 90 | G = nx.DiGraph() 91 | G.add_node(f"{self.population_layers_count}") 92 | 93 | parameters = { 94 | 95 | 'type_layer': layer[0], 96 | 'layer_params': layer[1:], 97 | } 98 | 99 | for key, value in parameters.items(): 100 | 101 | nx.set_node_attributes(G, value, key) 102 | G.graph[key] = value 103 | 104 | dna['graph'] = G 105 | dna['constituents'] = {'node': layer} 106 | 107 | self.population_layers_count += 1 108 | 109 | return dna 110 | 111 | 112 | def create_graph_block(self, nodes, connections): 113 | 114 | dna = {} 115 | in_out = {} 116 | vertices = [] 117 | count = 0 118 | 119 | for i, l in enumerate(nodes): 120 | 121 | in_out[i] = {'start': [ int(n) + count for n, node in enumerate(l['graph'].nodes) if l['graph'].in_degree(node) == 0], 'end': [ int(n) + count for n, node in enumerate(l['graph'].nodes) if l['graph'].out_degree(node) == 0]} 122 | vertices += [l['graph']] 123 | count += len(list(l['graph'].nodes)) 124 | 125 | edges = [(start, end) for s, e in connections for start, end in product(in_out[s]['end'], in_out[e]['start'])] 126 | 127 | G = nx.disjoint_union_all(vertices) 128 | if len(vertices) == 1: G = nx.convert_node_labels_to_integers(G) 129 | G.add_edges_from(edges) 130 | 131 | in_out = {'start': [ node for node in G.nodes if G.in_degree(node) == 0], 'end': [ node for node in G.nodes if G.out_degree(node) == 0]} 132 | 133 | H = nx.DiGraph() 134 | H.add_node(f"input") 135 | H.add_node(f"output") 136 | H.nodes['input']['type_layer'] = 'input' 137 | H.nodes['output']['type_layer'] = 'output' 138 | 139 | U = nx.disjoint_union(H, G) 140 | 141 | for s in in_out['start']: U.add_edge(0, int(s)+2) 142 | for e in in_out['end']: U.add_edge(int(e)+2, 1) 143 | 144 | if len(list(nx.simple_cycles(U))) > 0: raise Exception(f"\nCycle found in the block graph\n") 145 | 146 | # build.check_block(U) 147 | 148 | dna['graph'] = G 149 | dna['constituents'] = {'layers': nodes, 'edges': connections} 150 | 151 | self.population_blocks_count += 1 152 | 153 | return dna 154 | 155 | 156 | def create_graph_model(self, nodes, connections): 157 | 158 | dna = {} 159 | in_out = {} 160 | vertices = [] 161 | count = 0 162 | 163 | for i, l in enumerate(nodes): 164 | 165 | in_out[i] = {'start': [ int(n) + count for n, node in enumerate(l['graph'].nodes) if l['graph'].in_degree(node) == 0], 'end': [ int(n) + count for n, node in enumerate(l['graph'].nodes) if l['graph'].out_degree(node) == 0]} 166 | vertices += [l['graph']] 167 | count += len(list(l['graph'].nodes)) 168 | 169 | edges = [(start, end) for s, e in connections for start, end in product(in_out[s]['end'], in_out[e]['start'])] 170 | 171 | G = nx.disjoint_union_all(vertices) 172 | if len(vertices) == 1: G = nx.convert_node_labels_to_integers(G) 173 | G.add_edges_from(edges) 174 | 175 | in_out = {'start': [ node for node in G.nodes if G.in_degree(node) == 0], 'end': [ node for node in G.nodes if G.out_degree(node) == 0]} 176 | 177 | H = nx.DiGraph() 178 | H.add_node(f"input") 179 | H.add_node(f"output") 180 | H.nodes['input']['type_layer'] = 'input' 181 | H.nodes['output']['type_layer'] = 'output' 182 | 183 | U = nx.disjoint_union(H,G) 184 | 185 | for s in in_out['start']: U.add_edge(0, int(s)+2) 186 | for e in in_out['end']: U.add_edge(int(e)+2, 1) 187 | 188 | if len(list(nx.simple_cycles(U))) > 0: raise Exception(f"\nCycle found in the model graph\n") 189 | 190 | build.check_block(U) 191 | 192 | parameters = { 193 | 'model_params': { 194 | 'weights': [1/3,1/3,1/3], 195 | }, 196 | 'spectrum': self.get_spectrum(U, nodes, connections), 197 | } 198 | 199 | for key, value in parameters.items(): U.graph[key] = value 200 | 201 | dna['graph'] = U 202 | dna['constituents'] = {'blocks': nodes, 'edges': connections} 203 | 204 | self.update_populations(dna) 205 | self.population_models_count += 1 206 | 207 | return dna 208 | 209 | 210 | def create_random_layer(self): 211 | 212 | type_layer = random.choice([0,1]) 213 | 214 | if type_layer == 0: 215 | 216 | f, k, s = random.choice(self.initial_filter), random.choice(self.initial_kernel), random.choice(self.initial_stride) 217 | layer = ['convolution_2d', f, k, k, s, s] 218 | self.write_create_log(f"Convolution Layer Created Randomly with filter_size ={f} kernel_size={k} stride_size={s}\n") 219 | 220 | elif type_layer == 1: 221 | 222 | u = random.choice(self.initial_units) 223 | layer = ['fully_connected', u] 224 | self.write_create_log(f"Dense Layer Created Randomly with units ={u}\n") 225 | 226 | return self.create_graph_layer(layer) 227 | 228 | 229 | def create_random_block(self): 230 | 231 | n = random.choice(self.initial_layers) 232 | 233 | while True: 234 | try: 235 | 236 | self.write_create_log(f"Block Created Randomly with no of layers ={n}\n") 237 | return self.create_graph_block([ self.create_random_layer() for _ in range(n)], list(nx.generators.trees.random_tree(n).edges)) 238 | 239 | except Exception as e: 240 | 241 | self.write_exceptions_log(f"\nException occured in creating block:\n{traceback.format_exc()}\n") 242 | 243 | 244 | def create_random_model(self): 245 | 246 | n = random.choice(self.initial_blocks) 247 | 248 | while True: 249 | 250 | try: 251 | 252 | self.write_create_log(f"Model Created Randomly with no of blocks = {n}\n") 253 | return self.create_graph_model([self.create_random_block() for _ in range(n)], list(nx.generators.trees.random_tree(n).edges)) 254 | 255 | except Exception as e: 256 | 257 | self.write_exceptions_log(f"\nException occured in creating model:\n{traceback.format_exc()}\n") 258 | 259 | 260 | def get_random_models(self, n): 261 | 262 | clean_population = self.population_models 263 | 264 | l = len(clean_population) 265 | 266 | if l > n : 267 | 268 | indices = random.sample(list(range(l)), n) 269 | models = [clean_population[index]['constituents'] for index in indices] 270 | 271 | else: 272 | 273 | indices = list(range(l)) 274 | models = [clean_population[index]['constituents'] for index in indices] 275 | 276 | self.write_create_log(f"\n{n} number of models were retrived and their indices are {indices}\n\n") 277 | 278 | return models 279 | 280 | 281 | def get_random_blocks(self, n): 282 | clean_population = self.population_blocks 283 | 284 | l = len(clean_population) 285 | 286 | if l > n: 287 | 288 | indices = random.sample(list(range(l)), n) 289 | blocks = [clean_population[index] for index in indices] 290 | 291 | else: 292 | 293 | indices = list(range(l)) 294 | blocks = [clean_population[index]['constituents'] for index in indices] 295 | 296 | self.write_create_log(f"\n{n} number of blocks were retrived and their indices are {indices}\n") 297 | 298 | return blocks 299 | 300 | 301 | def get_random_layers(self, n, new=False): 302 | 303 | clean_population = self.population_layers 304 | 305 | l = len(clean_population) 306 | 307 | if l > n: 308 | 309 | indices = random.sample(list(range(l)), n) 310 | layers = [clean_population[index] for index in indices] 311 | 312 | else: 313 | 314 | indices = list(range(l)) 315 | layers = [clean_population[index]['constituents'] for index in indices] 316 | 317 | self.write_create_log(f"\n{n} number of layers were retrived and their indices are {indices}\n") 318 | 319 | return layers 320 | 321 | 322 | def mutate(self, old_model): 323 | 324 | mutation = None 325 | 326 | while mutation is None: 327 | 328 | mutated_model = copy.deepcopy(old_model) 329 | 330 | if self.use_adaptive_mutations: 331 | 332 | mutated_model['graph'].graph['model_params']['weights'] = [ weight + random.uniform(-0.03,0.03) for weight in old_model['graph'].graph['model_params']['weights']] 333 | mutated_model['graph'].graph['model_params']['weights'] = [ float(weight)/sum(mutated_model.graph['model_params']['weights']) for weight in mutated_model['graph'].graph['model_params']['weights']] 334 | 335 | type_mutate = random.choice([0,1,2], p=mutated_model['graph'].graph['model_params']['weights']) 336 | 337 | else: 338 | 339 | type_mutate = random.choice([0,1,2]) 340 | 341 | if type_mutate == 0: type_mutation, mutation = self.mutate_model(mutated_model) 342 | elif type_mutate == 1: type_mutation, mutation = self.mutate_block(mutated_model) 343 | elif type_mutate == 2: type_mutation, mutation = self.mutate_layer(mutated_model) 344 | 345 | if mutation is not None: 346 | 347 | self.write_mutation_log(f"Mutating model with mutation {type_mutation}\n") 348 | return type_mutation, mutation 349 | 350 | 351 | def mutate_layer(self, old_model): 352 | 353 | convolution_mutation = {'swap_layer': self.swap_layer,'kernel': self.kernel, 'filter': self.filter, 'stride': self.stride} 354 | dense_mutation = {'swap_layer': self.swap_layer,'units': self.units} 355 | 356 | if self.use_cycles: 357 | 358 | l= len(self.population_layers) 359 | 360 | if l > 200: 361 | 362 | convolution_mutation = {'swap_layer': self.swap_layer} 363 | dense_mutation = {'swap_layer': self.swap_layer} 364 | 365 | elif l < 50: 366 | 367 | del convolution_mutation['swap_layer'] 368 | del dense_mutation['swap_layer'] 369 | 370 | layer_mutation = {'convolution_2d': convolution_mutation, 'fully_connected': dense_mutation } 371 | 372 | try: 373 | 374 | mutated_model = copy.deepcopy(old_model) 375 | 376 | mutated_block_index = random.choice(list(range(len(mutated_model['constituents']['blocks'])))) 377 | mutated_block = mutated_model['constituents']['blocks'][mutated_block_index] 378 | 379 | mutated_layer_index = random.choice(list(range(len(mutated_block['constituents']['layers'])))) 380 | mutated_layer = mutated_block['constituents']['layers'][mutated_layer_index] 381 | 382 | type_mutation = layer_mutation[mutated_layer['constituents']['node'][0]] 383 | mutation = random.choice(list(type_mutation.keys())) 384 | 385 | mutated_layer = type_mutation[mutation](mutated_layer) 386 | 387 | if mutated_layer is None: return 'Failed', None 388 | 389 | mutated_block['constituents']['layers'][mutated_layer_index] = mutated_layer 390 | mutated_model['constituents']['blocks'][mutated_block_index] = mutated_block 391 | 392 | try: 393 | 394 | self.create_graph_layer(mutated_layer['constituents']['node']) 395 | 396 | except Exception as e: 397 | 398 | self.write_exceptions_log(f"\nException occured in mutating layer (Creating layer) {mutation}:\n{traceback.format_exc()}\n") 399 | return 'Failed', None 400 | 401 | try: 402 | 403 | self.create_graph_block(mutated_block['constituents']['layers'], mutated_block['constituents']['edges']) 404 | 405 | except Exception as e: 406 | 407 | self.write_exceptions_log(f"\nException occured in mutating layer (Creating Block) {mutation}:\n{traceback.format_exc()}\n") 408 | return 'Failed', None 409 | 410 | try: 411 | 412 | mutated = self.create_graph_model(mutated_model['constituents']['blocks'], mutated_model['constituents']['edges']) 413 | self.write_mutation_log(f"\nSuccessfully Mutated Layer with {mutation}\n") 414 | return mutation, mutated 415 | 416 | except Exception as e: 417 | 418 | self.write_exceptions_log(f"\nException occured in mutating layer (Creating Model) {mutation}:\n{traceback.format_exc()}\n") 419 | return 'Failed', None 420 | 421 | except: 422 | 423 | self.write_exceptions_log(f"\nException occured in mutating layer:\n{traceback.format_exc()}\n") 424 | return 'Failed', None 425 | 426 | 427 | def mutate_block(self, old_model): 428 | 429 | block_mutation = { 430 | 'add_layer': self.add_layer, 'add_layer_connection': self.add_layer_connection, 431 | 'remove_layer': self.remove_layer, 'remove_layer_connection': self.remove_layer_connection, 432 | 'swap_blocks': self.swap_blocks 433 | } 434 | 435 | if self.use_cycles: 436 | 437 | l= len(self.population_blocks) 438 | if l > 200: block_mutation = {'swap_blocks': self.swap_blocks} 439 | elif l < 50: del block_mutation['swap_blocks'] 440 | 441 | try: 442 | 443 | mutated_model = copy.deepcopy(old_model) 444 | 445 | mutated_block_index = random.choice(list(range(len(mutated_model['constituents']['blocks'])))) 446 | mutated_block = mutated_model['constituents']['blocks'][mutated_block_index] 447 | 448 | type_mutation = block_mutation 449 | mutation = random.choice(list(type_mutation.keys())) 450 | 451 | mutated_block = type_mutation[mutation](mutated_block) 452 | 453 | if mutated_block is None: return 'Failed', None 454 | 455 | mutated_model['constituents']['blocks'][mutated_block_index] = mutated_block 456 | 457 | try: 458 | 459 | self.create_graph_block(mutated_block['constituents']['layers'], mutated_block['constituents']['edges']) 460 | 461 | except Exception as e: 462 | 463 | self.write_exceptions_log(f"\nException occured in mutating block (Creating block) {mutation}:\n{traceback.format_exc()}\n") 464 | return 'Failed', None 465 | 466 | try: 467 | 468 | mutated = self.create_graph_model(mutated_model['constituents']['blocks'], mutated_model['constituents']['edges']) 469 | self.write_mutation_log(f"\nSuccessfully Mutated Block with {mutation}\n") 470 | return mutation, mutated 471 | 472 | except Exception as e: 473 | 474 | self.write_exceptions_log(f"\nException occured in mutating block (Creating Model) {mutation}:\n{traceback.format_exc()}\n") 475 | return 'Failed', None 476 | 477 | except: 478 | 479 | self.write_exceptions_log(f"\nException occured in mutating block:\n{traceback.format_exc()}\n") 480 | return 'Failed', None 481 | 482 | 483 | def mutate_model(self, old_model): 484 | 485 | model_mutation = { 486 | 'add_block': self.add_block, 'add_block_connection': self.add_block_connection, 487 | 'remove_block': self.remove_block, 'remove_block_connection': self.remove_block_connection 488 | } 489 | 490 | try: 491 | 492 | mutated_model = copy.deepcopy(old_model) 493 | 494 | type_mutation = model_mutation 495 | mutation = random.choice(list(type_mutation.keys())) 496 | 497 | mutated_model = type_mutation[mutation](mutated_model) 498 | 499 | if mutated_model is None: return 'Failed', None 500 | 501 | try: 502 | mutated = self.create_graph_model(mutated_model['constituents']['blocks'], mutated_model['constituents']['edges']) 503 | self.write_mutation_log(f"\nSuccessfully Mutated Model with {mutation}\n") 504 | 505 | return mutation, mutated_model 506 | except Exception as e: 507 | self.write_exceptions_log(f"\nException occured in mutating model (Creating Model) {mutation}:\n{traceback.format_exc()}\n") 508 | return 'Failed', None 509 | 510 | except: 511 | self.write_exceptions_log(f"\nException occured in mutating model:\n{traceback.format_exc()}\n") 512 | return 'Failed', None 513 | 514 | 515 | def kernel(self, old_layer): 516 | 517 | if old_layer['constituents']['node'][0] == 'convolution_2d': index = 2 + round(random.random()) if self.use_non_squares else 2 518 | elif old_layer['constituents']['node'][0] == 'convolution_1d': index = 2 519 | else: raise Exception('Layer doesnot support kernel mutation') 520 | 521 | mutated_layer = copy.deepcopy(old_layer) 522 | old_kernel = old_layer['constituents']['node'][index] 523 | 524 | if self.use_random_params: 525 | 526 | new_kernel = random.uniform(old_kernel / 2.0, old_kernel * 2.0) 527 | new_kernel = int(new_kernel) if int(new_kernel) % 2 != 0 else int(new_kernel) + 1 528 | 529 | else: new_kernel = random.choice([x for x in self.initial_kernel if x != old_kernel]) 530 | 531 | mutated_layer['constituents']['node'][index], mutated_layer['constituents']['node'][3] = new_kernel, new_kernel 532 | 533 | return mutated_layer 534 | 535 | 536 | def stride(self, old_layer): 537 | 538 | if old_layer['constituents']['node'][0] == 'convolution_2d': index = 4 + round(random.random()) if self.use_non_squares else 4 539 | elif old_layer['constituents']['node'][0] == 'convolution_1d': index = 3 540 | else: raise Exception('Layer doesnot support stride mutation') 541 | 542 | mutated_layer = copy.deepcopy(old_layer) 543 | old_stride = old_layer['constituents']['node'][index] 544 | 545 | if self.use_random_params: new_stride = round(2** round(random.uniform(old_stride / 2.0, old_stride * 2.0))) 546 | else: new_stride = random.choice([x for x in self.initial_stride if x != old_stride]) 547 | 548 | mutated_layer['constituents']['node'][index], mutated_layer['constituents']['node'][5] = new_stride, new_stride 549 | 550 | return mutated_layer 551 | 552 | 553 | def filter(self, old_layer): 554 | 555 | if old_layer['constituents']['node'][0] != 'convolution_2d' and old_layer['constituents']['node'][0] != 'convolution_1d': raise Exception('Layer doesnot support filter mutation') 556 | 557 | index = 1 558 | 559 | mutated_layer = copy.deepcopy(old_layer) 560 | old_filter = old_layer['constituents']['node'][index] 561 | 562 | if self.use_random_params: new_filter = round(random.uniform(old_filter / 2.0, old_filter * 2.0)) 563 | else: new_filter = random.choice([x for x in self.initial_filter if x != old_filter]) 564 | 565 | mutated_layer['constituents']['node'][index] = new_filter 566 | 567 | return mutated_layer 568 | 569 | 570 | def units(self, old_layer): 571 | 572 | if old_layer['constituents']['node'][0] != 'fully_connected': raise Exception('Layer doesnot support units mutation') 573 | 574 | index = 1 575 | 576 | mutated_layer = copy.deepcopy(old_layer) 577 | old_units = old_layer['constituents']['node'][index] 578 | 579 | if self.use_random_params: new_units = round(random.uniform(old_units / 2.0, old_units * 2.0)) 580 | else: new_units = random.choice([x for x in self.initial_units if x != old_units]) 581 | 582 | mutated_layer['constituents']['node'][index] = new_units 583 | 584 | return mutated_layer 585 | 586 | 587 | def swap_layer(self, old_layer): 588 | 589 | mutated_layer = old_layer 590 | 591 | while set(mutated_layer['constituents']['node']) == set(old_layer['constituents']['node']): mutated_layer = self.get_random_layers(1)[0] 592 | 593 | return mutated_layer 594 | 595 | 596 | def swap_blocks(self, old_block): 597 | 598 | mutated_block = old_block 599 | 600 | while set([tuple(layer['constituents']['node']) for layer in mutated_block['constituents']['layers']]) == set([tuple(layer['constituents']['node']) for layer in old_block['constituents']['layers']]): mutated_block = self.get_random_blocks(1)[0] 601 | 602 | return mutated_block 603 | 604 | 605 | def add_layer(self, old_block): 606 | 607 | mutated_block = copy.deepcopy(old_block) 608 | 609 | try: 610 | 611 | l_n = len(old_block['constituents']['layers']) 612 | 613 | layer = self.get_random_layers(1)[0] 614 | 615 | mutated_block['constituents']['layers'] += [layer] 616 | 617 | start, end = random.sample(list(range(l_n)) + [-1, -1], 2) 618 | 619 | if start == -1 and end != -1: 620 | 621 | mutated_block['constituents']['edges'] += [(l_n, end)] 622 | 623 | elif start != -1 and end == -1: 624 | 625 | mutated_block['constituents']['edges'] += [(start, l_n)] 626 | 627 | elif start != -1 and end != -1: 628 | 629 | mutated_block['constituents']['edges'] += [(start, l_n)] 630 | mutated_block['constituents']['edges'] += [(l_n, end)] 631 | 632 | return mutated_block 633 | 634 | except Exception as e: 635 | 636 | self.write_exceptions_log(f"\nException occured in mutating block (Add Layer):\n{traceback.format_exc()}\n") 637 | return None 638 | 639 | 640 | def add_layer_connection(self, old_block): 641 | 642 | mutated_block = copy.deepcopy(old_block) 643 | 644 | l = len(mutated_block['constituents']['layers']) 645 | 646 | if l > 2: 647 | try: 648 | 649 | new_connection = mutated_block['constituents']['edges'][0] 650 | while new_connection in mutated_block['constituents']['edges']: new_connection = tuple(random.sample(list(range(l)), 2)) 651 | 652 | mutated_block['constituents']['edges'] += [new_connection] 653 | return mutated_block 654 | 655 | except Exception as e: 656 | 657 | self.write_exceptions_log(f"\nException occured in mutating block (Add Layer Connection):\n{traceback.format_exc()}\n") 658 | return None 659 | 660 | return None 661 | 662 | 663 | def remove_layer(self, old_block): 664 | 665 | mutated_block = copy.deepcopy(old_block) 666 | 667 | l = len(mutated_block['constituents']['layers']) 668 | 669 | if l > 2: 670 | 671 | try: 672 | 673 | mutated_layer_index = random.choice(list(range(l))) 674 | mutated_layer = mutated_block['constituents']['layers'][mutated_layer_index] 675 | 676 | del mutated_block['constituents']['layers'][mutated_layer_index] 677 | 678 | mutated_connections = [] 679 | for i, edge in enumerate(mutated_block['constituents']['edges']): 680 | 681 | if edge[0] == mutated_layer_index or edge[1] == mutated_layer_index: del mutated_block['constituents']['edges'][i] 682 | 683 | else: 684 | 685 | new_edge = [] 686 | 687 | for i in range(2): 688 | 689 | if edge[i] > mutated_layer_index: new_edge += [edge[i] - 1] 690 | else: new_edge += [edge[i]] 691 | 692 | mutated_connections.append(tuple(new_edge)) 693 | 694 | mutated_block['constituents']['edges'] = mutated_connections 695 | 696 | return mutated_block 697 | 698 | except Exception as e: 699 | 700 | self.write_exceptions_log(f"\nException occured in mutating block (Remove Layer):\n{traceback.format_exc()}\n") 701 | return None 702 | 703 | return None 704 | 705 | 706 | def remove_layer_connection(self, old_block): 707 | 708 | mutated_block = copy.deepcopy(old_block) 709 | 710 | l = len(mutated_block['constituents']['edges']) 711 | 712 | if l > 1: 713 | 714 | try: 715 | 716 | mutated_connection_index = random.choice(list(range(l))) 717 | mutated_connection = mutated_block['constituents']['edges'][mutated_connection_index] 718 | 719 | del mutated_block['constituents']['edges'][mutated_connection_index] 720 | 721 | return mutated_block 722 | 723 | except Exception as e: 724 | 725 | self.write_exceptions_log(f"\nException occured in mutating block (Remove Layer Connection):\n{traceback.format_exc()}\n") 726 | return None 727 | 728 | return None 729 | 730 | 731 | def add_block(self, old_model): 732 | 733 | mutated_model = copy.deepcopy(old_model) 734 | 735 | try: 736 | 737 | l_n = len(old_model['constituents']['blocks']) 738 | 739 | block = self.get_random_blocks(1)[0] 740 | 741 | mutated_model['constituents']['blocks'] += [block] 742 | 743 | start, end = random.sample(list(range(l_n)) + [-1, -1], 2) 744 | 745 | if start == -1 and end != -1: 746 | 747 | mutated_model['constituents']['edges'] += [(l_n, end)] 748 | 749 | elif start != -1 and end == -1: 750 | 751 | mutated_model['constituents']['edges'] += [(start, l_n)] 752 | 753 | elif start != -1 and end != -1: 754 | 755 | mutated_model['constituents']['edges'] += [(start, l_n)] 756 | mutated_model['constituents']['edges'] += [(l_n, end)] 757 | 758 | return mutated_model 759 | 760 | except Exception as e: 761 | 762 | self.write_exceptions_log(f"\nException occured in mutating model (Add Block):\n{traceback.format_exc()}\n") 763 | return None 764 | 765 | 766 | def add_block_connection(self, old_model): 767 | 768 | mutated_model = copy.deepcopy(old_model) 769 | 770 | l = len(mutated_model['constituents']['blocks']) 771 | 772 | if l > 2: 773 | 774 | try: 775 | 776 | new_connection = mutated_model['constituents']['edges'][0] 777 | while new_connection in mutated_model['constituents']['edges']: new_connection = tuple(random.sample(list(range(l)), 2)) 778 | 779 | mutated_model['constituents']['edges'] += [new_connection] 780 | 781 | return mutated_model 782 | 783 | except Exception as e: 784 | 785 | self.write_exceptions_log(f"\nException occured in mutating model (Add Block Connection):\n{traceback.format_exc()}\n") 786 | return None 787 | 788 | return None 789 | 790 | 791 | def remove_block(self, old_model): 792 | 793 | mutated_model = copy.deepcopy(old_model) 794 | 795 | l = len(old_model['constituents']['blocks']) 796 | 797 | if l > 2: 798 | 799 | try: 800 | 801 | mutated_block_index = random.choice(list(range(l))) 802 | mutated_block = mutated_model['constituents']['blocks'][mutated_block_index] 803 | 804 | del mutated_model['constituents']['blocks'][mutated_block_index] 805 | 806 | mutated_connections =[] 807 | for i, edge in enumerate(mutated_model['constituents']['edges']): 808 | 809 | if edge[0] == mutated_block_index or edge[1] == mutated_block_index: del mutated_model['constituents']['edges'][i] 810 | else: 811 | 812 | new_edge = [] 813 | 814 | for i in range(2): 815 | 816 | if edge[i] > mutated_block_index: new_edge += [edge[i] - 1] 817 | else: new_edge += [edge[i]] 818 | 819 | mutated_connections.append(tuple(new_edge)) 820 | 821 | mutated_model['constituents']['edges'] = mutated_connections 822 | 823 | return mutated_model 824 | 825 | except Exception as e: 826 | 827 | self.write_exceptions_log(f"\nException occured in mutating model (Remove Block):\n{traceback.format_exc()}\n") 828 | return None 829 | 830 | return None 831 | 832 | 833 | def remove_block_connection(self, old_model): 834 | 835 | mutated_model = copy.deepcopy(old_model) 836 | 837 | l = len(mutated_model['constituents']['edges']) 838 | 839 | if l > 1: 840 | 841 | try: 842 | 843 | mutated_connection_index = random.choice(list(range(l))) 844 | mutated_connection = mutated_model['constituents']['edges'][mutated_connection_index] 845 | 846 | del mutated_model['constituents']['edges'][mutated_connection_index] 847 | 848 | return mutated_model 849 | 850 | except Exception as e: 851 | 852 | self.write_exceptions_log(f"\nException occured in mutating model (Remove Block Connection):\n{traceback.format_exc()}\n") 853 | return None 854 | 855 | return None 856 | 857 | 858 | def get_spectrum(self, graph, blocks, block_connections): 859 | 860 | layers = graph.nodes 861 | layer_connections = graph.edges 862 | 863 | num_blocks = len(blocks) 864 | num_block_connections = len(block_connections) 865 | 866 | num_total_layers = len(list(layers)) 867 | num_total_connections = len(list(layer_connections)) 868 | 869 | num_d_layers, num_c_layers = 0, 0 870 | 871 | for layer in layers: 872 | 873 | if layers[layer]['type_layer'] == 'fully_connected': num_d_layers += 1 874 | elif layers[layer]['type_layer'] == 'convolution_2d': num_c_layers += 1 875 | 876 | num_dd_connection, num_dc_connection, num_cd_connection, num_cc_connection = 0, 0, 0, 0 877 | 878 | for start, end in layer_connections: 879 | 880 | t_start = layers[start]['type_layer'] 881 | t_end = layers[end]['type_layer'] 882 | 883 | if t_start == 'fully_connected': 884 | 885 | if t_end == 'fully_connected': num_dd_connection += 1 886 | elif t_end == 'convolution_2d': num_dc_connection += 1 887 | 888 | elif t_start == 'convolution_2d': 889 | 890 | if t_end == 'fully_connected': num_cd_connection += 1 891 | elif t_end == 'convolution_2d': num_cc_connection += 1 892 | 893 | return [ 894 | num_blocks, num_block_connections, 895 | num_d_layers, num_dd_connection, num_dc_connection, 896 | num_c_layers, num_cd_connection, num_cc_connection, 897 | num_total_layers, num_total_connections, 898 | ] -------------------------------------------------------------------------------- /process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Author: Shashank Kotyan 5 | Email: shashankkotyan@gmail.com 6 | """ 7 | 8 | import traceback, multiprocessing as mp 9 | 10 | 11 | class Process(mp.Process): 12 | 13 | 14 | def __init__(self, *args, **kwargs): 15 | 16 | mp.Process.__init__(self, *args, **kwargs) 17 | 18 | self._pconn, self._cconn = mp.Pipe() 19 | self._exception = None 20 | 21 | 22 | def run(self): 23 | 24 | try: 25 | 26 | mp.Process.run(self) 27 | self._cconn.send(None) 28 | 29 | except Exception as e: 30 | 31 | tb = traceback.format_exc() 32 | self._cconn.send((e, tb)) 33 | 34 | 35 | @property 36 | def exception(self): 37 | 38 | if self._pconn.poll(): self._exception = self._pconn.recv() 39 | 40 | return self._exception 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | GPUtil==1.4.0 2 | matplotlib==3.1.1 3 | networkx==2.3 4 | numpy==1.17.2 5 | scipy==1.4.1 6 | tensorflow==2.1.0 7 | -------------------------------------------------------------------------------- /run_evolution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Author: Shashank Kotyan 5 | Email: shashankkotyan@gmail.com 6 | """ 7 | 8 | import os, sys, warnings 9 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 10 | warnings.filterwarnings('ignore') 11 | 12 | import argparse, glob, pickle, time, GPUtil, numpy as np 13 | 14 | from scipy.cluster.vq import whiten as normalise 15 | from multiprocessing.managers import BaseManager 16 | 17 | import build 18 | from population import Population 19 | from worker import Worker 20 | from process import Process 21 | 22 | 23 | class NeuroEvolution: 24 | 25 | 26 | def __init__(self, args): 27 | 28 | self.population_size = args.population_size 29 | self.population_dir = args.population_dir 30 | self.num_mutations = args.num_mutations 31 | self.number_of_child = args.num_mutated_child 32 | 33 | self.gen = -1 34 | self.ind_counter = 0 35 | 36 | if not os.path.exists(self.population_dir): os.makedirs(self.population_dir) 37 | if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) 38 | 39 | BaseManager.register('Population', Population) 40 | manager = BaseManager() 41 | manager.start() 42 | 43 | self.population = manager.Population() 44 | self.population.set(args) 45 | 46 | self.worker = Worker(args, build.load_data(args.dataset)) 47 | 48 | 49 | def parallel_run(self, type_create, args): 50 | 51 | start = False 52 | 53 | while not start: 54 | 55 | for deviceID in range(7): 56 | 57 | if deviceID in self.jobs: 58 | 59 | if not self.jobs[deviceID].is_alive(): 60 | 61 | self.jobs[deviceID].close() 62 | if self.jobs[deviceID].exception is not None: raise Exception(f"{self.jobs[deviceID].exception[0]}, {self.jobs[deviceID].exception[1]}") 63 | finished_job = self.jobs.pop(deviceID, None) 64 | 65 | with open("num_workers.txt", "r") as f: num_workers = int(f.read()) 66 | 67 | deviceIDs = GPUtil.getAvailable(order='memory', limit=7, maxLoad=1.1, maxMemory=0.5) 68 | 69 | alive = -1 70 | 71 | if len(deviceIDs) != 0: 72 | 73 | for deviceID in deviceIDs: 74 | 75 | if deviceID not in self.jobs: 76 | 77 | alive = deviceID 78 | break 79 | 80 | if len(self.jobs) < num_workers and alive > -1: 81 | 82 | print(f"GPU {alive} running {self.ind_counter}") 83 | 84 | if type_create == 0: target=self.worker.create_parent 85 | else: target=self.worker.create_child 86 | 87 | start = True 88 | 89 | args[0] = alive 90 | job = Process(target=target, args=tuple(args)) 91 | self.jobs[alive] = job 92 | job.start() 93 | 94 | else: 95 | 96 | time.sleep(0.1) 97 | 98 | 99 | def run(self): 100 | 101 | found = False 102 | 103 | print(f"Searching for previous geneartions") 104 | population = sorted(glob.glob(f"{self.population_dir}/*/*/alive.txt")) 105 | 106 | if len(population) > 0: 107 | 108 | if len(population) == self.population_size or len(population) == self.population_size*(self.number_of_child+1): 109 | 110 | found = True 111 | 112 | for individual in population: 113 | 114 | self.gen = max(self.gen, int(individual.split('/')[1])) 115 | self.ind_counter = max(self.ind_counter, int(individual.split('/')[2])) 116 | 117 | if len(population) == self.population_size*(self.number_of_child+1): self.evolve_ras() 118 | 119 | print(f"Found Last Generation {self.gen} with last individual {self.ind_counter}") 120 | 121 | else: raise Exception(f"Corrupted Files, please delete the files in {self.population_dir}. Maybe the files were in between the evolution") 122 | 123 | else: found = False 124 | 125 | if found == False: self.create_initial_population() 126 | 127 | self.evolve() 128 | 129 | 130 | def create_initial_population(self): 131 | 132 | print(f"Did Not Found Any Last Generation\n") 133 | 134 | self.gen += 1 135 | 136 | generation_directory = f"{self.population_dir}/{self.gen}/" 137 | if not os.path.exists(generation_directory): os.makedirs(generation_directory) 138 | 139 | self.jobs = {} 140 | 141 | for _ in range(self.population_size): 142 | self.ind_counter += 1 143 | 144 | individual = f"{self.population_dir}/{self.gen}/{self.ind_counter}" 145 | if not os.path.exists(individual): os.makedirs(individual) 146 | 147 | self.create_lineage(individual) 148 | self.parallel_run(0, [0, individual, self.population, self.store_individual, self.gen]) 149 | 150 | for deviceID in self.jobs: 151 | self.jobs[deviceID].join() 152 | if self.jobs[deviceID].exception is not None: raise Exception(f"{self.jobs[deviceID].exception[0]}, {self.jobs[deviceID].exception[1]}") 153 | 154 | self.population.write_log(f"\n") 155 | self.population.save_populations(f"{self.population_dir}/{self.gen}") 156 | 157 | 158 | def evolve_ras(self): 159 | 160 | population = sorted(glob.glob(f"{self.population_dir}/*/*/alive.txt")) 161 | assert len(population) == self.population_size 162 | 163 | self.population.read_populations(f"{self.population_dir}/{self.gen}") 164 | 165 | self.gen += 1 166 | 167 | generation_directory = f"{self.population_dir}/{self.gen}/" 168 | if not os.path.exists(generation_directory): os.makedirs(generation_directory) 169 | 170 | cluster_population = {'individual':[], 'dna':[], 'metrics':[], 'spectrum': []} 171 | 172 | for i, individual in enumerate(population): 173 | 174 | individual = individual.split('/alive.txt')[0] 175 | dna, metrics = self.read_individual(individual) 176 | 177 | cluster_population['individual'] += [individual] 178 | cluster_population['dna'] += [dna] 179 | cluster_population['metrics'] += [metrics] 180 | cluster_population['spectrum'] += [dna['graph'].graph['spectrum']] 181 | 182 | child_individuals = [] 183 | 184 | self.jobs = {} 185 | 186 | for parent_individual in population * self.number_of_child: 187 | 188 | self.ind_counter += 1 189 | 190 | parent_individual = parent_individual.split('/alive.txt')[0] 191 | parent_dna, parent_metrics = self.read_individual(parent_individual) 192 | 193 | child_individual = f"{self.population_dir}/{self.gen}/{self.ind_counter}" 194 | if not os.path.exists(child_individual): os.makedirs(child_individual) 195 | 196 | child_individuals += [child_individual] 197 | 198 | self.store_lineage(child_individual, parent_individual) 199 | self.parallel_run(1, [0, parent_dna, self.num_mutations, child_individual, self.population, self.store_individual, self.gen]) 200 | 201 | for deviceID in self.jobs: 202 | 203 | self.jobs[deviceID].join() 204 | if self.jobs[deviceID].exception is not None: raise Exception(f"{self.jobs[deviceID].exception[0]}, {self.jobs[deviceID].exception[1]}") 205 | 206 | for child_individual in child_individuals: 207 | 208 | child_dna, child_metrics = self.read_individual(child_individual) 209 | child_spectrum = child_dna['graph'].graph['spectrum'] 210 | 211 | normalisation_spectrum = normalise(cluster_population['spectrum'] + [child_spectrum]) 212 | distance = [np.linalg.norm(x-normalisation_spectrum[-1]) for x in normalisation_spectrum[:-1]] 213 | closest_cluster_index = distance.index(min(distance)) 214 | 215 | if cluster_population['metrics'][closest_cluster_index]['fitness'] < child_metrics['fitness']: 216 | 217 | self.population.write_log(f"--> Worker changed cluster {closest_cluster_index} head {cluster_population['individual'][closest_cluster_index].split('/')[2]} of fitness {cluster_population['metrics'][closest_cluster_index]['fitness']:.2f} to {child_individual.split('/')[2]} of fitness {child_metrics['fitness']:.2f}\n") 218 | 219 | dead_individual_dir = cluster_population['individual'][closest_cluster_index] 220 | 221 | os.remove(f"{dead_individual_dir}/alive.txt") 222 | open(f"{dead_individual_dir}/dead_{self.gen}.txt", 'w').close() 223 | 224 | cluster_population['individual'][closest_cluster_index] = [child_individual] 225 | cluster_population['dna'][closest_cluster_index] = [child_dna] 226 | cluster_population['metrics'][closest_cluster_index] = [child_metrics] 227 | cluster_population['spectrum'] [closest_cluster_index] = [child_spectrum] 228 | 229 | else: 230 | 231 | self.population.write_log(f"--> Worker retained cluster {closest_cluster_index} head {cluster_population['individual'][closest_cluster_index].split('/')[2]} of fitness {cluster_population['metrics'][closest_cluster_index]['fitness']:.2f} over {child_individual.split('/')[2]} of fitness {child_metrics['fitness']:.2f}\n") 232 | 233 | dead_individual_dir = child_individual 234 | 235 | os.remove(f"{dead_individual_dir}/alive.txt") 236 | open(f"{dead_individual_dir}/dead_{self.gen}.txt", 'w').close() 237 | 238 | self.population.write_log(f"\n") 239 | self.population.clean_populations() 240 | 241 | for pop in sorted(glob.glob(f"{self.population_dir}/*/*/alive.txt")): 242 | 243 | dna, metrics = self.read_individual(pop.split('/alive.txt')[0]) 244 | self.population.update_populations(dna) 245 | 246 | self.population.save_populations(f"{self.population_dir}/{self.gen}") 247 | 248 | 249 | def evolve(self): 250 | while True: 251 | self.evolve_ras() 252 | 253 | 254 | def read_individual(self, individual): 255 | with open(f"{individual}/dna.pkl", 'rb') as dna_file: dna = pickle.load(dna_file) 256 | with open(f"{individual}/metrics.pkl", 'rb') as metrics_file: metrics = pickle.load(metrics_file) 257 | return dna, metrics 258 | 259 | 260 | def store_individual(self, individual, dna, metrics, mutations=None): 261 | with open(f"{individual}/dna.pkl", 'wb') as dna_file: pickle.dump(dna, dna_file, pickle.HIGHEST_PROTOCOL) 262 | with open(f"{individual}/metrics.pkl", 'wb') as metrics_file: pickle.dump(metrics, metrics_file, pickle.HIGHEST_PROTOCOL) 263 | if mutations is not None: 264 | with open(f"{individual}/mutations.pkl", 'wb') as mutations_file: pickle.dump(mutations, mutations_file, pickle.HIGHEST_PROTOCOL) 265 | 266 | 267 | def store_lineage(self, child_individual, parent_individual): 268 | with open(f"{parent_individual}/lineage.pkl", 'rb') as lineage_file: lineage = pickle.load(lineage_file) 269 | lineage += [parent_individual.split('/')[2]] 270 | with open(f"{child_individual}/lineage.pkl", 'wb') as lineage_file: pickle.dump(lineage, lineage_file, pickle.HIGHEST_PROTOCOL) 271 | 272 | 273 | def create_lineage(self, individual): 274 | with open(f"{individual}/lineage.pkl", 'wb') as lineage_file: pickle.dump([individual.split('/')[2]], lineage_file, pickle.HIGHEST_PROTOCOL) 275 | 276 | 277 | def to_bool(arg_bool): 278 | 279 | if arg_bool in ('True', 'true', 'T', 't', '1', 'Y', 'y'): return True 280 | elif arg_bool in ('False', 'false', 'F', 'f', '0', 'N', 'n'): return False 281 | else: raise argparse.ArgumentTypeError('Boolean value expected.') 282 | 283 | if __name__ == '__main__': 284 | 285 | parser = argparse.ArgumentParser(description=('Robust Architecture Search')) 286 | 287 | # Use for creating and mutating individual of population 288 | parser.add_argument("--use_cycles", "-cy", action="store_false", help="if not to use cycles in populations") 289 | parser.add_argument("--use_adaptive_mutations", "-am", action="store_true", help="if use adaptive mutations") 290 | parser.add_argument("--use_random_params", "-rp", action="store_true", help="if use random params") 291 | parser.add_argument("--use_non_squares", "-sq", action="store_true", help="if use non-square kernels and strides") 292 | 293 | # Use for training an individual 294 | parser.add_argument("--use_augmentation", "-au", action="store_true", help="if use augmented training") 295 | parser.add_argument("--use_limited_data", "-ld", action="store_true", help="if use limited data for training") 296 | parser.add_argument("--dataset", "-d", default=2, type=int, help="Dataset to be used for training") 297 | parser.add_argument("--epochs", "-e", default=50, type=int, help="Number of epochs to be used for a single individual") 298 | 299 | # Use for type of evolving generations 300 | parser.add_argument("--num_mutations", "-m", default=5, type=int, help="Number of mutations an individual undergoes") 301 | parser.add_argument("--num_mutated_child", "-n", default=2, type=int, help="Number of mutated individuals for a single parent") 302 | parser.add_argument("--population_size", "-p", default=25, type=int, help="Number of individuals in the population") 303 | 304 | # Use for changing directories 305 | parser.add_argument("--population_dir", "-dir", default="population", type=str, help="Directory for storing all individuals") 306 | parser.add_argument("--log_dir", "-log", default="logs", type=str, help="Directory for logs") 307 | 308 | # Use for dry run 309 | parser.add_argument("--test", "-test", action="store_false", help="Option for Dry Run to test, if the code runs. Please also check ./-log_dir-/exceptions.log too.") 310 | 311 | args = parser.parse_args() 312 | 313 | print(args) 314 | 315 | NeuroEvolution(args).run() 316 | 317 | -------------------------------------------------------------------------------- /run_unit_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python -u run_evolution.py --test 4 | -------------------------------------------------------------------------------- /worker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Author: Shashank Kotyan 5 | Email: shashankkotyan@gmail.com 6 | """ 7 | 8 | import os, random, traceback, pickle, numpy as np 9 | from time import time 10 | 11 | import build 12 | from process import Process 13 | 14 | 15 | class Worker: 16 | 17 | 18 | def __init__(self, args, data): 19 | 20 | self.test = args.test 21 | 22 | self.dataset = args.dataset 23 | self.use_augmentation = args.use_augmentation 24 | self.use_limited_data = args.use_limited_data 25 | self.epochs = args.epochs 26 | 27 | self.data = data 28 | self.batch_size = 128 29 | 30 | 31 | def preprocess(self, x): 32 | 33 | def process(x, mean, std): 34 | for i in range(3): x[:,:,:,i] = (x[:,:,:,i] - mean[i]) / std[i] 35 | return x 36 | 37 | if self.dataset == 2: mean, std = [125.307, 122.95, 113.865], [62.9932, 62.0887, 66.7048] 38 | else: mean, std = [0, 0, 0], [255, 255, 255] 39 | 40 | return process(x, mean, std) 41 | 42 | 43 | def train(self, gen): 44 | 45 | self.x_train, self.y_train, self.x_test, self.y_test = self.preprocess(self.data['x_train']), self.data['y_train'], self.preprocess(self.data['x_test']), self.data['y_test'] 46 | 47 | if self.test: history = self.train_test() 48 | 49 | else: 50 | 51 | if self.use_limited_data: history = self.train_limited() 52 | else: history = self.train_all() 53 | 54 | return history.history 55 | 56 | 57 | def train_test(self): 58 | 59 | return self.model.fit( 60 | self.x_train[:1], self.y_train[:1], batch_size=self.batch_size, 61 | epochs=1, verbose=0, validation_data=(self.x_test[:1], self.y_test[:1]) 62 | ) 63 | 64 | 65 | def train_all(self): 66 | 67 | if self.use_augmentation: 68 | 69 | return self.model.fit_generator( 70 | self.data['datagen'].flow(self.x_train, self.y_train, batch_size=self.batch_size), 71 | steps_per_epoch= (len(self.data['x_train'])//self.batch_size), 72 | epochs=self.epochs, verbose=0, callbacks=self.data['callbacks'], validation_data=(self.x_test, self.y_test) 73 | ) 74 | 75 | else: 76 | 77 | return self.model.fit( 78 | self.x_train, self.y_train, batch_size=self.batch_size, 79 | epochs=self.epochs, verbose=0, callbacks=self.data['callbacks'], validation_data=(self.x_test, self.y_test) 80 | ) 81 | 82 | 83 | def train_limited(self): 84 | 85 | random.seed(time()) 86 | 87 | indices = random.sample(list(range(self.data['count_x_train'])), 0.1*self.data['count_x_train']) 88 | 89 | if self.use_augmentation: 90 | 91 | return self.model.fit_generator( 92 | self.data['datagen'].flow(self.x_train[indices], self.y_train[indices], batch_size=self.batch_size), steps_per_epoch= (len(self.data['x_train'][:10000])//self.batch_size), 93 | epochs=self.epochs, verbose=0, callbacks=self.data['callbacks'], validation_data=(self.x_test, self.y_test) 94 | ) 95 | 96 | else: 97 | 98 | return self.model.fit( 99 | self.x_train[indices], self.y_train[indices], batch_size=self.batch_size, 100 | epochs=self.epochs, verbose=0, callbacks=self.data['callbacks'], validation_data=(self.x_test, self.y_test) 101 | ) 102 | 103 | 104 | def run_model(self, gpu_index, individual, dna, gen): 105 | 106 | self.model = build.build_block(dna['graph'], num_classes=self.data['num_classes'], gpu_index=gpu_index) 107 | 108 | start_time = time() 109 | history = self.train(gen) 110 | end_time = time() 111 | 112 | with open(f"{individual}/training_history.pkl", 'wb') as history_file: pickle.dump(history, history_file, pickle.HIGHEST_PROTOCOL) 113 | 114 | fitness = history['val_accuracy'][-1] 115 | metrics = {"fitness": fitness, "evaluation_time": end_time - start_time} 116 | 117 | with open(f"{individual}/metrics.pkl", 'wb') as metrics_file: pickle.dump(metrics, metrics_file, pickle.HIGHEST_PROTOCOL) 118 | 119 | # from tensorflow.keras import utils 120 | # utils.plot_model(self.model, show_shapes=True, to_file=f"{individual}/model.png") 121 | 122 | 123 | def evaluate_individual(self, gpu_index, individual, dna, gen, population): 124 | 125 | try: 126 | 127 | p = Process(target=self.run_model, args=(gpu_index, individual, dna, gen)) 128 | p.start() 129 | p.join() 130 | if p.exception is not None: raise Exception(f"{p.exception[0]}, {p.exception[1]}") 131 | 132 | population.update_populations(dna) 133 | with open(f"{individual}/metrics.pkl", 'rb') as metrics_file: metrics = pickle.load(metrics_file) 134 | 135 | population.write_log(f"GPU {gpu_index} completed training {individual.split('/')[2]} in {metrics['evaluation_time']:.2f} seconds with fitness {metrics['fitness']:.2f}\n") 136 | 137 | return metrics 138 | 139 | except Exception as e: 140 | 141 | population.write_exceptions_log(f"Exception occured at training model: {e} \n{traceback.format_exc()}\n") 142 | return None 143 | 144 | 145 | def create_child(self, gpu_index, parent_dna, num_mutations, individual, population, store_individual, gen): 146 | 147 | metrics = None 148 | while metrics is None: 149 | 150 | dna = parent_dna 151 | mutations = [] 152 | 153 | for _ in range(num_mutations): 154 | 155 | test_dna = None 156 | while test_dna is None: mutation, test_dna = population.mutate(dna) 157 | 158 | dna = test_dna 159 | mutations += [mutation] 160 | 161 | metrics = self.evaluate_individual(gpu_index, individual, dna, gen, population) 162 | 163 | store_individual(individual, dna, metrics, mutations) 164 | open(f"{individual}/alive.txt", 'w').close() 165 | 166 | 167 | def create_parent(self, gpu_index, individual, population, store_individual, gen): 168 | 169 | metrics = None 170 | 171 | while metrics is None: 172 | 173 | dna = population.create_random_model() 174 | metrics = self.evaluate_individual(gpu_index, individual, dna, gen, population) 175 | 176 | store_individual(individual, dna, metrics) 177 | open(f"{individual}/alive.txt", 'w').close() --------------------------------------------------------------------------------