├── .gitignore ├── LICENSE ├── README.md ├── code ├── README.md ├── arch_search.py ├── arch_search │ ├── __init__.py │ ├── arch_search_convnet_net2net.py │ └── arch_search_densenet_net2net.py ├── client.py ├── data_providers │ ├── __init__.py │ ├── base_provider.py │ ├── cifar.py │ ├── downloader.py │ ├── svhn.py │ └── utils.py ├── expdir_monitor │ ├── __init__.py │ ├── arch_manager.py │ ├── distributed.py │ └── expdir_monitor.py ├── main.py ├── meta_controller │ ├── __init__.py │ ├── base_controller.py │ └── rl_controller.py ├── models │ ├── __init__.py │ ├── basic_model.py │ ├── convnet.py │ ├── dense_net.py │ ├── layer_cascade.py │ ├── layer_multi_branch.py │ ├── layers.py │ └── utils.py ├── run_dense_net.py ├── run_simple_convnet.py └── server_config ├── figures └── result_sample.png └── start_nets └── start_net_convnet_small_C10+ ├── init └── net.config /.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | 10 | ## Intermediate documents: 11 | *.dvi 12 | *-converted-to.* 13 | # these rules might exclude image files for figures etc. 14 | # *.ps 15 | # *.eps 16 | # *.pdf 17 | /Datasets 18 | 19 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 20 | *.bbl 21 | *.bcf 22 | *.blg 23 | *-blx.aux 24 | *-blx.bib 25 | *.brf 26 | *.run.xml 27 | 28 | ## Build tool auxiliary files: 29 | *.fdb_latexmk 30 | *.synctex 31 | *.synctex.gz 32 | *.synctex.gz(busy) 33 | *.pdfsync 34 | 35 | ## Auxiliary and intermediate files from other packages: 36 | 37 | 38 | # algorithms 39 | *.alg 40 | *.loa 41 | 42 | # achemso 43 | acs-*.bib 44 | 45 | # amsthm 46 | *.thm 47 | 48 | # beamer 49 | *.nav 50 | *.snm 51 | *.vrb 52 | 53 | #(e)ledmac/(e)ledpar 54 | *.end 55 | *.[1-9] 56 | *.[1-9][0-9] 57 | *.[1-9][0-9][0-9] 58 | *.[1-9]R 59 | *.[1-9][0-9]R 60 | *.[1-9][0-9][0-9]R 61 | *.eledsec[1-9] 62 | *.eledsec[1-9]R 63 | *.eledsec[1-9][0-9] 64 | *.eledsec[1-9][0-9]R 65 | *.eledsec[1-9][0-9][0-9] 66 | *.eledsec[1-9][0-9][0-9]R 67 | 68 | # glossaries 69 | *.acn 70 | *.acr 71 | *.glg 72 | *.glo 73 | *.gls 74 | 75 | # gnuplottex 76 | *-gnuplottex-* 77 | 78 | # hyperref 79 | 80 | # knitr 81 | *-concordance.tex 82 | *.tikz 83 | *-tikzDictionary 84 | 85 | # listings 86 | *.lol 87 | 88 | # makeidx 89 | *.idx 90 | *.ilg 91 | *.ind 92 | *.ist 93 | tex/rl-meta.pdf 94 | 95 | # minitoc 96 | *.maf 97 | *.mtc 98 | *.mtc[0-9] 99 | *.mtc[1-9][0-9] 100 | 101 | # minted 102 | _minted* 103 | *.pyg 104 | 105 | # morewrites 106 | *.mw 107 | 108 | # mylatexformat 109 | *.fmt 110 | 111 | # nomencl 112 | *.nlo 113 | 114 | # sagetex 115 | *.sagetex.sage 116 | *.sagetex.py 117 | *.sagetex.scmd 118 | 119 | # sympy 120 | *.sout 121 | *.sympy 122 | sympy-plots-for-*.tex/ 123 | 124 | # TikZ & PGF 125 | *.dpth 126 | *.md5 127 | *.auxlock 128 | 129 | # todonotes 130 | *.tdo 131 | 132 | # xindy 133 | *.xdy 134 | 135 | # WinEdt 136 | *.bak 137 | *.sav 138 | *.DS_Store 139 | /data 140 | */.idea/ 141 | /output/ 142 | /exp/ 143 | /backup/ 144 | 145 | # python 146 | __pycache__ 147 | .pyc 148 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Han Cai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Efficient Architecture Search by Network Transformation 2 | 3 | Code for the paper [Efficient Architecture Search by Network Transformation](https://arxiv.org/abs/1707.04873) in AAAI 2018. 4 | 5 | ## Reference 6 | ```bash 7 | @inproceedings{cai2018efficient, 8 | title={Efficient Architecture Search by Network Transformation}, 9 | author={Cai, Han and Chen, Tianyao and Zhang, Weinan and Yu, Yong and Wang, Jun}, 10 | booktitle={AAAI}, 11 | year={2018} 12 | } 13 | ``` 14 | 15 | ## Related Projects 16 | - [Path-Level Network Transformation for Efficient Architecture Search](https://arxiv.org/abs/1806.02639), in ICML 2018. [Code](https://github.com/han-cai/PathLevel-EAS). 17 | 18 | ## Dependencies 19 | 20 | * Python 3.6 21 | * Tensorflow 1.3.0 22 | 23 | ## Top Nets 24 | 25 | | nets | test accuracy (%) | Dataset | 26 | | ----------------------- | ------------- | ----- | 27 | | [C10+_Conv_Depth_20](https://drive.google.com/open?id=1BaSHPXSTxKO5avmtzJGwinLUkSPbwJYf) | 95.77 | C10+ | 28 | | [C10+_DenseNet_Depth_76](https://drive.google.com/open?id=1zXTB_DmS7i9HiDAxmzrBLmwjmZmfXI2n) | 96.56 | C10+ | 29 | | [C10_DenseNet_Depth_70](https://drive.google.com/open?id=1T0UMowk6lN9GzDmWcjwMG6lmbh9rogXx) | 95.34 | C10 | 30 | | [SVHN_Conv_Depth_20](https://drive.google.com/open?id=14CoT52n6Q-dOXSHQPGNGlIh_0SjXE6q7) | 98.27 | SVHN | 31 | 32 | For checking these networks, please download the corresponding model files and run the following command under the folder of **code**: 33 | ```bash 34 | $ python3 main.py --test --path= 35 | ``` 36 | 37 | For example, by running 38 | ```bash 39 | $ python3 main.py --test --path=../final_nets/C10+_Conv_Depth_20 40 | ``` 41 | you will get 42 | ```bash 43 | Testing... 44 | mean cross_entropy: 0.210500, mean accuracy: 0.957700 45 | test performance: 0.9577 46 | ``` 47 | 48 | ## Acknowledgement 49 | The DenseNet part of this code is based on the [repository by Illarion](https://github.com/ikhlestov/vision_networks). Many thanks to [Illarion](https://github.com/ikhlestov). 50 | 51 | -------------------------------------------------------------------------------- /code/README.md: -------------------------------------------------------------------------------- 1 | ## Architecture Search and Distributed Running 2 | To run architecture search experiments, you should first set up your 3 | environment for distributed running. Suppose there are a server computer 4 | and multiple GPU clients which can be accessed on the server side 5 | via **ssh**. 6 | 7 | On the server side, you should have a configuration file **server_config** 8 | under the folder of **code**. An example of the **server_config** file is: 9 | ```bash 10 | [ 11 | ["", , "/client.py"], 12 | ["", , "/client.py"], 13 | ["", , "/client.py"] 14 | ] 15 | ``` 16 | Once you make the **server_config** ready, you can run the following command under the folder of 17 | **code** on the server side to start the experiment: 18 | ```bash 19 | python3 arch_search.py --setting=convnet 20 | ``` 21 | 22 | 23 | When a remote GPU, e.g. GPU_0 on client 1, is chosen 24 | by the server, the following command is executed 25 | ```bash 26 | ssh CUDA_VISIBLE_DEVICES=0 python3 /client.py 27 | ``` 28 | Make sure that 29 | - you can visit each client via **ssh** without password on the server side. 30 | [ssh-copy-id](https://www.ssh.com/ssh/copy-id) may be helpful if you have some problems with the password. 31 | - the command "CUDA_VISIBLE_DEVICES=0 python3 /client.py" can be 32 | executed correctly on the client side. 33 | 34 | Further details, please refer to **code/expdir_monitor/distributed.py**. 35 | 36 | By running the code using the small network, i.e. 37 | **start_nets/start_net_convnet_small_C10+**, as the start point, 38 | you can get results like: 39 | 40 | ![](../figures/result_sample.png) 41 | -------------------------------------------------------------------------------- /code/arch_search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | from arch_search.arch_search_densenet_net2net import arch_search_densenet 4 | from arch_search.arch_search_convnet_net2net import arch_search_convnet 5 | 6 | _SEED = 110 7 | np.random.seed(_SEED) 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | '--setting', type=str, default='convnet', choices=['convnet', 'densenet'], 13 | ) 14 | 15 | args = parser.parse_args() 16 | if args.setting == 'convnet': 17 | """ 18 | Architecture Search on Convnet 19 | """ 20 | arch_search_convnet( 21 | start_net_path='../start_nets/start_net_small_C10+', 22 | arch_search_folder='../arch_search/Convnet/C10+/Conv_C10+_rl_small', 23 | net_pool_folder='../net_pool/Convnet/C10+/Conv_C10+_rl_small', 24 | max_episodes=15, 25 | random=False, 26 | ) 27 | elif args.setting == 'densenet': 28 | """ 29 | Architecture Search on DenseNet 30 | """ 31 | arch_search_densenet( 32 | start_net_path='placeholder', 33 | arch_search_folder='placeholder', 34 | net_pool_folder='placeholder', 35 | max_episodes=15, 36 | ) 37 | else: 38 | pass 39 | -------------------------------------------------------------------------------- /code/arch_search/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/arch_search/__init__.py -------------------------------------------------------------------------------- /code/arch_search/arch_search_convnet_net2net.py: -------------------------------------------------------------------------------- 1 | from expdir_monitor.arch_manager import ArchManager 2 | from meta_controller.base_controller import Vocabulary, EncoderNet, WiderActorNet, DeeperActorNet 3 | from meta_controller.rl_controller import ReinforceNet2NetController 4 | from time import gmtime, strftime, time 5 | from datetime import timedelta 6 | from models.layers import ConvLayer, FCLayer, PoolLayer 7 | import re 8 | import numpy as np 9 | 10 | 11 | def get_net_str(net_configs): 12 | if isinstance(net_configs, list): 13 | if len(net_configs) == 1: 14 | net_config = net_configs[0] 15 | net_str = [] 16 | for layer in net_config.layer_cascade.layers[:-1]: 17 | if isinstance(layer, ConvLayer): 18 | net_str.append('conv-%d-%d' % (layer.filter_num, layer.kernel_size)) 19 | elif isinstance(layer, FCLayer): 20 | net_str.append('fc-%d' % layer.units) 21 | else: 22 | net_str.append('pool') 23 | return ['_'.join(net_str)] 24 | else: 25 | net_str_list = [] 26 | for net_config in net_configs: 27 | net_str_list += get_net_str([net_config]) 28 | return net_str_list 29 | else: 30 | return get_net_str([net_configs])[0] 31 | 32 | 33 | def get_net_seq(net_configs, vocabulary, num_steps): 34 | net_str_list = get_net_str(net_configs) 35 | net_seq = [] 36 | seq_len = [] 37 | for net_str in net_str_list: 38 | net_str = re.split('_', net_str) 39 | net_code = vocabulary.get_code(net_str) 40 | _len = len(net_code) 41 | net_code += [vocabulary.pad_code for _ in range(len(net_code), num_steps)] 42 | net_seq.append(net_code) 43 | seq_len.append(_len) 44 | return np.array(net_seq), np.array(seq_len) 45 | 46 | 47 | def get_block_layer_num(net_configs): 48 | if len(net_configs) == 1: 49 | net_config = net_configs[0] 50 | block_layer_num = [] 51 | _count = 0 52 | for layer in net_config.layer_cascade.layers[:-1]: 53 | if isinstance(layer, PoolLayer): 54 | block_layer_num.append(_count) 55 | _count = 0 56 | else: 57 | _count += 1 58 | block_layer_num.append(_count) 59 | return np.array([block_layer_num]) 60 | else: 61 | block_layer_num = [] 62 | for net_config in net_configs: 63 | block_layer_num.append(get_block_layer_num([net_config])) 64 | return np.concatenate(block_layer_num, axis=0) 65 | 66 | 67 | def apply_wider_decision(wider_decision, net_configs, filter_num_list, units_num_list, noise): 68 | if len(net_configs) == 1: 69 | decision = wider_decision[0] 70 | net_config = net_configs[0] 71 | decision_mask = [] 72 | for _i, layer in enumerate(net_config.layer_cascade.layers[:-1]): 73 | if isinstance(layer, ConvLayer): 74 | if layer.filter_num >= filter_num_list[-1]: 75 | decision_mask.append(0.0) 76 | else: 77 | decision_mask.append(1.0) 78 | if decision[_i]: 79 | new_filter_number = layer.filter_num 80 | for fn in filter_num_list: 81 | if fn > new_filter_number: 82 | new_filter_number = fn 83 | break 84 | net_config.widen( 85 | layer_idx=_i, new_width=new_filter_number, noise=noise 86 | ) 87 | elif isinstance(layer, FCLayer): 88 | if layer.units >= units_num_list[-1]: 89 | decision_mask.append(0.0) 90 | else: 91 | decision_mask.append(1.0) 92 | if decision[_i]: 93 | new_units_num = layer.units 94 | for un in units_num_list: 95 | if un > new_units_num: 96 | new_units_num = un 97 | break 98 | net_config.widen( 99 | layer_idx=_i, new_width=new_units_num, noise=noise, 100 | ) 101 | else: 102 | decision_mask.append(0.0) 103 | decision_mask += [0.0] * (len(decision) - len(decision_mask)) 104 | return np.array([decision_mask]) 105 | else: 106 | decision_mask = [] 107 | for _i, net_config in enumerate(net_configs): 108 | decision = wider_decision[_i] 109 | mask = apply_wider_decision([decision], [net_config], filter_num_list, units_num_list, noise) 110 | decision_mask.append(mask) 111 | return np.concatenate(decision_mask, axis=0) 112 | 113 | 114 | def apply_deeper_decision(deeper_decision, net_configs, kernel_size_list, noise): 115 | if len(net_configs) == 1: 116 | decision = deeper_decision[0] 117 | net_config = net_configs[0] 118 | 119 | block_decision, layer_idx_decision, ks_decision = decision 120 | decision_mask = [1.0, 1.0] 121 | block_idx, _pt = 0, 0 122 | to_set_layers = [] 123 | for _i, layer in enumerate(net_config.layer_cascade.layers[:-1]): 124 | if _pt == block_decision: 125 | real_layer_idx = _i + layer_idx_decision 126 | prev_layer = net_config.layer_cascade.layers[real_layer_idx] 127 | if isinstance(prev_layer, ConvLayer): 128 | if 'conv' in net_config.drop_scheme['type']: 129 | keep_prob = net_config.drop_scheme.get('conv_drop', 1.0) 130 | else: 131 | keep_prob = 1.0 132 | decision_mask.append(1.0) 133 | ks = kernel_size_list[ks_decision] 134 | new_layer, prev_layer = net_config.deepen( 135 | layer_idx=real_layer_idx, 136 | new_layer_config={'name': 'conv', 'kernel_size': ks, 'pre_activation': False, 137 | 'keep_prob': keep_prob}, 138 | ) 139 | to_set_layers.append([new_layer, prev_layer]) 140 | elif isinstance(prev_layer, FCLayer): 141 | if 'fc' in net_config.drop_scheme['type']: 142 | keep_prob = net_config.drop_scheme.get('fc_drop', 1.0) 143 | else: 144 | keep_prob = 1.0 145 | decision_mask.append(0.0) 146 | new_layer, prev_layer = net_config.deepen( 147 | layer_idx=real_layer_idx, 148 | new_layer_config={'name': 'fc', 'keep_prob': keep_prob}, 149 | ) 150 | to_set_layers.append([new_layer, prev_layer]) 151 | else: 152 | raise ValueError 153 | break 154 | if isinstance(layer, PoolLayer): 155 | _pt += 1 156 | return np.array([decision_mask]), to_set_layers 157 | else: 158 | decision_mask = [] 159 | to_set_layers = [] 160 | for _i, net_config in enumerate(net_configs): 161 | decision = deeper_decision[_i] 162 | mask, to_set = apply_deeper_decision([decision], [net_config], kernel_size_list, noise) 163 | decision_mask.append(mask) 164 | to_set_layers.append(to_set) 165 | return np.concatenate(decision_mask, axis=0), to_set_layers 166 | 167 | 168 | def arch_search_convnet(start_net_path, arch_search_folder, net_pool_folder, max_episodes, random=False): 169 | filter_num_list = [_i for _i in range(4, 44, 4)] 170 | units_num_list = [_i for _i in range(8, 88, 8)] 171 | # filter_num_list = [16, 32, 64, 96, 128, 192, 256, 320, 384, 448, 512, 576, 640] 172 | # units_num_list = [64, 128, 256, 384, 512, 640, 768, 896, 1024, 1152, 1280] 173 | kernel_size_list = [1, 3, 5] 174 | 175 | # encoder config 176 | layer_token_list = ['conv-%d-%d' % (f, k) for f in filter_num_list for k in [1, 3, 5]] 177 | layer_token_list += ['fc-%d' % u for u in units_num_list] + ['pool'] 178 | encoder_config = { 179 | 'num_steps': 50, 180 | 'vocab': Vocabulary(layer_token_list), 181 | 'embedding_dim': 16, 182 | 'rnn_units': 50, 183 | 'rnn_type': 'bi_lstm', 184 | 'rnn_layers': 1, 185 | } 186 | 187 | # wider actor config 188 | wider_actor_config = { 189 | 'out_dim': 1, 190 | 'num_steps': encoder_config['num_steps'], 191 | 'net_type': 'simple', 192 | 'net_config': None, 193 | } 194 | 195 | # deeper actor config 196 | deeper_actor_config = { 197 | 'decision_num': 3, 198 | 'out_dims': [5, 10, len(kernel_size_list)], 199 | 'embedding_dim': encoder_config['embedding_dim'], 200 | 'cell_type': 'lstm', 201 | 'rnn_layers': 1, 202 | 'attention_config': None, 203 | } 204 | 205 | # meta-controller config 206 | entropy_penalty = 1e-5 207 | learning_rate = 2e-3 208 | opt_config = ['adam', {}] 209 | 210 | # net2net noise config 211 | noise_config = { 212 | 'wider': {'type': 'normal', 'ratio': 1e-2}, 213 | 'deeper': {'type': 'normal', 'ratio': 1e-3}, 214 | } 215 | 216 | # episode config 217 | episode_config = { 218 | 'batch_size': 10, 219 | 'wider_action_num': 4, 220 | 'deeper_action_num': 5, 221 | } 222 | 223 | # arch search run config 224 | arch_search_run_config = { 225 | 'n_epochs': 20, 226 | 'init_lr': 0.02, 227 | 'validation_size': 5000, 228 | 'other_lr_schedule': {'type': 'cosine'}, 229 | 'batch_size': 64, 230 | 'include_extra': False, 231 | } 232 | 233 | # reward config 234 | reward_config = { 235 | 'func': 'tan', 236 | 'decay': 0.95, 237 | } 238 | 239 | arch_manager = ArchManager(start_net_path, arch_search_folder, net_pool_folder) 240 | _, run_config, _ = arch_manager.get_start_net() 241 | run_config.update(arch_search_run_config) 242 | 243 | encoder = EncoderNet(**encoder_config) 244 | wider_actor = WiderActorNet(**wider_actor_config) 245 | deeper_actor = DeeperActorNet(**deeper_actor_config) 246 | meta_controller = ReinforceNet2NetController(arch_manager.meta_controller_path, entropy_penalty, 247 | encoder, wider_actor, deeper_actor, opt_config) 248 | meta_controller.load() 249 | 250 | for _i in range(arch_manager.episode + 1, max_episodes + 1): 251 | print('episode. %d start. current time: %s' % (_i, strftime("%a, %d %b %Y %H:%M:%S", gmtime()))) 252 | start_time = time() 253 | 254 | nets = [arch_manager.get_start_net(copy=True) for _ in range(episode_config['batch_size'])] 255 | net_configs = [net_config for net_config, _, _ in nets] 256 | 257 | # feed_dict for update the controller 258 | wider_decision_trajectory, wider_decision_mask = [], [] 259 | deeper_decision_trajectory, deeper_decision_mask = [], [] 260 | deeper_block_layer_num = [] 261 | encoder_input_seq, encoder_seq_len = [], [] 262 | wider_seg_deeper = 0 263 | 264 | if random: 265 | # random search 266 | remain_wider_num = episode_config['wider_action_num'] 267 | remain_deeper_num = episode_config['deeper_action_num'] 268 | while remain_wider_num > 0 or remain_deeper_num > 0: 269 | rand_idx = np.random.randint(0, remain_wider_num + remain_deeper_num) 270 | if rand_idx < remain_wider_num: 271 | wider_decision = np.random.choice(2, [episode_config['batch_size'], encoder.num_steps]) 272 | apply_wider_decision(wider_decision, net_configs, filter_num_list, units_num_list, noise_config) 273 | remain_wider_num -= 1 274 | else: 275 | block_layer_num = get_block_layer_num(net_configs) 276 | deeper_decision = np.zeros([episode_config['batch_size'], deeper_actor.decision_num], np.int) 277 | deeper_decision[:, 0] = np.random.choice(deeper_actor.out_dims[0], deeper_decision[:, 0].shape) 278 | for _k, block_decision in enumerate(deeper_decision[:, 0]): 279 | available_layer_num = block_layer_num[_k, block_decision] 280 | deeper_decision[_k, 1] = np.random.randint(0, available_layer_num) 281 | deeper_decision[:, 2] = np.random.choice(deeper_actor.out_dims[2], deeper_decision[:, 2].shape) 282 | 283 | _, to_set_layers = apply_deeper_decision(deeper_decision, net_configs, 284 | kernel_size_list, noise_config) 285 | for _k, net_config in enumerate(net_configs): 286 | net_config.set_identity4deepen(to_set_layers[_k], arch_manager.data_provider, 287 | batch_size=64, batch_num=1, noise=noise_config) 288 | remain_deeper_num -= 1 289 | else: 290 | # on-policy training 291 | for _j in range(episode_config['wider_action_num']): 292 | input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps) 293 | wider_decision, wider_probs = meta_controller.sample_wider_decision(input_seq, seq_len) 294 | # modify net config according to wider_decision 295 | wider_mask = apply_wider_decision(wider_decision, net_configs, filter_num_list, 296 | units_num_list, noise_config) 297 | 298 | wider_decision_trajectory.append(wider_decision) 299 | wider_decision_mask.append(wider_mask) 300 | wider_seg_deeper += len(net_configs) 301 | encoder_input_seq.append(input_seq) 302 | encoder_seq_len.append(seq_len) 303 | 304 | to_set_layers = [[] for _ in range(episode_config['batch_size'])] 305 | for _j in range(episode_config['deeper_action_num']): 306 | input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps) 307 | block_layer_num = get_block_layer_num(net_configs) 308 | deeper_decision, deeper_probs = meta_controller.sample_deeper_decision(input_seq, seq_len, 309 | block_layer_num) 310 | # modify net config according to deeper_decision 311 | deeper_mask, to_set = apply_deeper_decision(deeper_decision, net_configs, 312 | kernel_size_list, noise_config) 313 | for _k in range(episode_config['batch_size']): 314 | to_set_layers[_k] += to_set[_k] 315 | 316 | deeper_decision_trajectory.append(deeper_decision) 317 | deeper_decision_mask.append(deeper_mask) 318 | deeper_block_layer_num.append(block_layer_num) 319 | encoder_input_seq.append(input_seq) 320 | encoder_seq_len.append(seq_len) 321 | 322 | for _k, net_config in enumerate(net_configs): 323 | net_config.set_identity4deepen(to_set_layers[_k], arch_manager.data_provider, 324 | batch_size=64, batch_num=1, noise=noise_config) 325 | # prepare feed dict 326 | encoder_input_seq = np.concatenate(encoder_input_seq, axis=0) 327 | encoder_seq_len = np.concatenate(encoder_seq_len, axis=0) 328 | if episode_config['wider_action_num'] > 0: 329 | wider_decision_trajectory = np.concatenate(wider_decision_trajectory, axis=0) 330 | wider_decision_mask = np.concatenate(wider_decision_mask, axis=0) 331 | else: 332 | wider_decision_trajectory = -np.ones([1, meta_controller.encoder.num_steps]) 333 | wider_decision_mask = -np.ones([1, meta_controller.encoder.num_steps]) 334 | if episode_config['deeper_action_num'] > 0: 335 | deeper_decision_trajectory = np.concatenate(deeper_decision_trajectory, axis=0) 336 | deeper_decision_mask = np.concatenate(deeper_decision_mask, axis=0) 337 | deeper_block_layer_num = np.concatenate(deeper_block_layer_num, axis=0) 338 | else: 339 | deeper_decision_trajectory = - np.ones([1, meta_controller.deeper_actor.decision_num]) 340 | deeper_decision_mask = - np.ones([1, meta_controller.deeper_actor.decision_num]) 341 | deeper_block_layer_num = np.ones([1, meta_controller.deeper_actor.out_dims[0]]) 342 | 343 | run_configs = [run_config] * len(net_configs) 344 | net_str_list = get_net_str(net_configs) 345 | 346 | net_vals = arch_manager.get_net_vals(net_str_list, net_configs, run_configs) 347 | rewards = arch_manager.reward(net_vals, reward_config) 348 | 349 | rewards = np.concatenate([rewards for _ in range(episode_config['wider_action_num'] + 350 | episode_config['deeper_action_num'])]) 351 | rewards /= episode_config['batch_size'] 352 | 353 | # update the agent 354 | if not random: 355 | meta_controller.update_controller(learning_rate, wider_seg_deeper, wider_decision_trajectory, 356 | wider_decision_mask, deeper_decision_trajectory, deeper_decision_mask, 357 | rewards, deeper_block_layer_num, encoder_input_seq, encoder_seq_len) 358 | 359 | meta_controller.save() 360 | # episode end 361 | time_per_episode = time() - start_time 362 | seconds_left = int((max_episodes - _i) * time_per_episode) 363 | print('Time per Episode: %s, Est. complete in: %s' % ( 364 | str(timedelta(seconds=time_per_episode)), 365 | str(timedelta(seconds=seconds_left)))) 366 | -------------------------------------------------------------------------------- /code/arch_search/arch_search_densenet_net2net.py: -------------------------------------------------------------------------------- 1 | from expdir_monitor.arch_manager import ArchManager 2 | from meta_controller.base_controller import Vocabulary, EncoderNet, WiderActorNet, DeeperActorNet 3 | from meta_controller.rl_controller import ReinforceNet2NetController 4 | from time import gmtime, strftime, time 5 | from datetime import timedelta 6 | from models.dense_net import DenseBlock, TransitionBlock 7 | import re 8 | import numpy as np 9 | 10 | 11 | def get_net_str(net_configs): 12 | if len(net_configs) == 1: 13 | net_config = net_configs[0] 14 | net_str = [] 15 | for block in net_config.blocks: 16 | if isinstance(block, DenseBlock): 17 | block_str = [] 18 | for miniblock in block.miniblocks: 19 | block_str.append('g%d' % miniblock.out_features_dim) 20 | block_str = '-'.join(block_str) 21 | net_str.append(block_str) 22 | else: 23 | net_str.append('t') 24 | return ['_'.join(net_str)] 25 | else: 26 | net_str_list = [] 27 | for net_config in net_configs: 28 | net_str_list += get_net_str([net_config]) 29 | return net_str_list 30 | 31 | 32 | def get_net_seq(net_configs, vocabulary, num_steps): 33 | net_str_list = get_net_str(net_configs) 34 | net_seq = [] 35 | seq_len = [] 36 | for net_str in net_str_list: 37 | net_str = re.split('_|-', net_str) 38 | net_code = vocabulary.get_code(net_str) 39 | _len = len(net_code) 40 | net_code += [vocabulary.pad_code for _ in range(len(net_code), num_steps)] 41 | net_seq.append(net_code) 42 | seq_len.append(_len) 43 | return np.array(net_seq), np.array(seq_len) 44 | 45 | 46 | def get_block_layer_num(net_configs): 47 | if len(net_configs) == 1: 48 | net_config = net_configs[0] 49 | block_layer_num = [] 50 | for block in net_config.blocks: 51 | if isinstance(block, DenseBlock): 52 | block_layer_num.append(len(block.miniblocks)) 53 | return np.array([block_layer_num]) 54 | else: 55 | block_layer_num = [] 56 | for net_config in net_configs: 57 | block_layer_num.append(get_block_layer_num([net_config])) 58 | return np.concatenate(block_layer_num, axis=0) 59 | 60 | 61 | def apply_wider_decision(wider_decision, net_configs, growth_rate_list, noise): 62 | if len(net_configs) == 1: 63 | decision = wider_decision[0] 64 | net_config = net_configs[0] 65 | _pt = 0 66 | decision_mask = [] 67 | for block_idx, block in enumerate(net_config.blocks): 68 | if isinstance(block, DenseBlock): 69 | for miniblock_idx, miniblock in enumerate(block.miniblocks): 70 | growth_rate = miniblock.out_features_dim 71 | if growth_rate >= growth_rate_list[-1]: 72 | decision_mask.append(0.0) 73 | else: 74 | decision_mask.append(1.0) 75 | new_gr = growth_rate 76 | for gr in growth_rate_list: 77 | if gr > new_gr: 78 | new_gr = gr 79 | break 80 | if decision[_pt]: 81 | net_config.widen( 82 | loc={'block': block_idx, 'miniblock': miniblock_idx, 83 | 'multi-branch': 'in_bottle', 'layer': 0}, 84 | new_width=net_config.bc_ratio * new_gr, 85 | noise=noise, 86 | ) 87 | net_config.widen( 88 | loc={'block': block_idx, 'miniblock': miniblock_idx, 89 | 'multi-branch': 'branch', 'branch': 0, 'layer': 0}, 90 | new_width=new_gr, 91 | noise=noise, 92 | ) 93 | _pt += 1 94 | else: 95 | decision_mask.append(0.0) 96 | _pt += 1 97 | decision_mask += [0.0] * (len(decision) - len(decision_mask)) 98 | return np.array([decision_mask]) 99 | else: 100 | decision_mask = [] 101 | for _i, net_config in enumerate(net_configs): 102 | decision = wider_decision[_i] 103 | mask = apply_wider_decision([decision], [net_config], growth_rate_list, noise) 104 | decision_mask.append(mask) 105 | return np.concatenate(decision_mask, axis=0) 106 | 107 | 108 | def apply_deeper_decision(deeper_decision, net_configs, noise): 109 | if len(net_configs) == 1: 110 | decision = deeper_decision[0] 111 | net_config = net_configs[0] 112 | 113 | block_decision, layer_idx_decision = decision 114 | decision_mask = [1.0, 1.0] 115 | block_idx, _pt = 0, 0 116 | for _i, block in enumerate(net_config.blocks): 117 | if isinstance(block, DenseBlock): 118 | if _pt == block_decision: 119 | block_idx = _i 120 | break 121 | _pt += 1 122 | net_config.insert_miniblock( 123 | loc={'block': block_idx, 'miniblock': layer_idx_decision}, 124 | miniblock_config={'bc_mode': True}, 125 | noise=noise, 126 | ) 127 | return np.array([decision_mask]) 128 | else: 129 | decision_mask = [] 130 | for _i, net_config in enumerate(net_configs): 131 | decision = deeper_decision[_i] 132 | mask = apply_deeper_decision([decision], [net_config], noise) 133 | decision_mask.append(mask) 134 | return np.concatenate(decision_mask, axis=0) 135 | 136 | 137 | def widen_transition(net_configs, noise): 138 | for net_config in net_configs: 139 | new_out_dim = int(net_config.average_growth_rate * net_config.first_ratio) 140 | if new_out_dim > net_config.blocks[0].out_features_dim: 141 | net_config.widen( 142 | loc={'block': 0, 'layer': 0}, 143 | new_width=new_out_dim, 144 | noise=noise, 145 | ) 146 | out_features_dim = new_out_dim 147 | for _i, block in enumerate(net_config.blocks[2:-1], 2): 148 | if isinstance(block, TransitionBlock): 149 | new_out_dim = int(net_config.blocks[_i - 1].out_features_dim(net_config.blocks[_i - 2].out_features_dim) 150 | * net_config.reduction) 151 | if new_out_dim > block.out_features_dim: 152 | net_config.widen( 153 | loc={'block': _i, 'layer': 0}, 154 | new_width=new_out_dim, 155 | noise=noise, 156 | ) 157 | out_features_dim = block.out_features_dim 158 | else: 159 | out_features_dim = block.out_features_dim(out_features_dim) 160 | 161 | 162 | def arch_search_densenet(start_net_path, arch_search_folder, net_pool_folder, max_episodes): 163 | growth_rate_list = [_i for _i in range(4, 50, 2)] 164 | # encoder config 165 | layer_token_list = ['g%d' % growth_rate for growth_rate in growth_rate_list] 166 | encoder_config = { 167 | 'num_steps': 50, 168 | 'vocab': Vocabulary(layer_token_list + ['t']), 169 | 'embedding_dim': 16, 170 | 'rnn_units': 50, 171 | 'rnn_type': 'bi_lstm', 172 | 'rnn_layers': 1, 173 | } 174 | 175 | # wider actor config 176 | wider_actor_config = { 177 | 'out_dim': 1, 178 | 'num_steps': encoder_config['num_steps'], 179 | 'net_type': 'simple', 180 | 'net_config': None, 181 | } 182 | 183 | # deeper actor config 184 | deeper_actor_config = { 185 | 'decision_num': 2, 186 | 'out_dims': [3, 20], 187 | 'embedding_dim': encoder_config['embedding_dim'], 188 | 'cell_type': 'lstm', 189 | 'rnn_layers': 1, 190 | 'attention_config': None, 191 | } 192 | 193 | # meta-controller config 194 | entropy_penalty = 1e-5 195 | learning_rate = 2e-3 196 | opt_config = ['adam', {}] 197 | 198 | # net2net noise config 199 | noise_config = { 200 | 'wider': {'type': 'normal', 'ratio': 1e-2}, 201 | 'deeper': {'type': 'normal', 'ratio': 1e-3}, 202 | } 203 | 204 | # episode config 205 | episode_config = { 206 | 'batch_size': 10, 207 | 'wider_action_num': 10, 208 | 'deeper_action_num': 5, 209 | } 210 | 211 | # arch search run config 212 | arch_search_run_config = { 213 | 'n_epochs': 20, 214 | 'init_lr': 0.02, 215 | 'validation_size': 5000, 216 | 'other_lr_schedule': {'type': 'cosine'}, 217 | 'batch_size': 64, 218 | 'include_extra': False, 219 | } 220 | 221 | # reward config 222 | reward_config = { 223 | 'func': 'tan', 224 | 'decay': 0.95, 225 | } 226 | 227 | arch_manager = ArchManager(start_net_path, arch_search_folder, net_pool_folder) 228 | _, run_config, _ = arch_manager.get_start_net() 229 | run_config.update(arch_search_run_config) 230 | 231 | encoder = EncoderNet(**encoder_config) 232 | wider_actor = WiderActorNet(**wider_actor_config) 233 | deeper_actor = DeeperActorNet(**deeper_actor_config) 234 | meta_controller = ReinforceNet2NetController(arch_manager.meta_controller_path, entropy_penalty, 235 | encoder, wider_actor, deeper_actor, opt_config) 236 | meta_controller.load() 237 | 238 | for _i in range(arch_manager.episode + 1, max_episodes + 1): 239 | print('episode. %d start. current time: %s' % (_i, strftime("%a, %d %b %Y %H:%M:%S", gmtime()))) 240 | start_time = time() 241 | 242 | nets = [arch_manager.get_start_net(copy=True) for _ in range(episode_config['batch_size'])] 243 | net_configs = [net_config for net_config, _, _ in nets] 244 | 245 | # feed_dict for update the controller 246 | wider_decision_trajectory, wider_decision_mask = [], [] 247 | deeper_decision_trajectory, deeper_decision_mask = [], [] 248 | deeper_block_layer_num = [] 249 | encoder_input_seq, encoder_seq_len = [], [] 250 | wider_seg_deeper = 0 251 | 252 | # on-policy training 253 | for _j in range(episode_config['wider_action_num']): 254 | input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps) 255 | wider_decision, wider_probs = meta_controller.sample_wider_decision(input_seq, seq_len) 256 | # modify net config according to wider_decision 257 | wider_mask = apply_wider_decision(wider_decision, net_configs, growth_rate_list, noise_config) 258 | 259 | wider_decision_trajectory.append(wider_decision) 260 | wider_decision_mask.append(wider_mask) 261 | wider_seg_deeper += len(net_configs) 262 | encoder_input_seq.append(input_seq) 263 | encoder_seq_len.append(seq_len) 264 | 265 | for _j in range(episode_config['deeper_action_num']): 266 | input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps) 267 | block_layer_num = get_block_layer_num(net_configs) 268 | deeper_decision, deeper_probs = meta_controller.sample_deeper_decision(input_seq, seq_len, block_layer_num) 269 | # modify net config according to deeper_decision 270 | deeper_mask = apply_deeper_decision(deeper_decision, net_configs, noise_config) 271 | 272 | deeper_decision_trajectory.append(deeper_decision) 273 | deeper_decision_mask.append(deeper_mask) 274 | deeper_block_layer_num.append(block_layer_num) 275 | encoder_input_seq.append(input_seq) 276 | encoder_seq_len.append(seq_len) 277 | 278 | widen_transition(net_configs, noise_config) 279 | 280 | run_configs = [run_config] * len(net_configs) 281 | net_str_list = get_net_str(net_configs) 282 | 283 | net_vals = arch_manager.get_net_vals(net_str_list, net_configs, run_configs) 284 | rewards = arch_manager.reward(net_vals, reward_config) 285 | 286 | # prepare feed dict 287 | encoder_input_seq = np.concatenate(encoder_input_seq, axis=0) 288 | encoder_seq_len = np.concatenate(encoder_seq_len, axis=0) 289 | if episode_config['wider_action_num'] > 0: 290 | wider_decision_trajectory = np.concatenate(wider_decision_trajectory, axis=0) 291 | wider_decision_mask = np.concatenate(wider_decision_mask, axis=0) 292 | else: 293 | wider_decision_trajectory = -np.ones([1, meta_controller.encoder.num_steps]) 294 | wider_decision_mask = -np.ones([1, meta_controller.encoder.num_steps]) 295 | if episode_config['deeper_action_num'] > 0: 296 | deeper_decision_trajectory = np.concatenate(deeper_decision_trajectory, axis=0) 297 | deeper_decision_mask = np.concatenate(deeper_decision_mask, axis=0) 298 | deeper_block_layer_num = np.concatenate(deeper_block_layer_num, axis=0) 299 | else: 300 | deeper_decision_trajectory = - np.ones([1, meta_controller.deeper_actor.decision_num]) 301 | deeper_decision_mask = - np.ones([1, meta_controller.deeper_actor.decision_num]) 302 | deeper_block_layer_num = np.ones([1, meta_controller.deeper_actor.out_dims[0]]) 303 | rewards = np.concatenate([rewards for _ in range(episode_config['wider_action_num'] + 304 | episode_config['deeper_action_num'])]) 305 | rewards /= episode_config['batch_size'] 306 | 307 | # update the agent 308 | meta_controller.update_controller(learning_rate, wider_seg_deeper, wider_decision_trajectory, 309 | wider_decision_mask, deeper_decision_trajectory, deeper_decision_mask, 310 | rewards, deeper_block_layer_num, encoder_input_seq, encoder_seq_len) 311 | 312 | meta_controller.save() 313 | # episode end 314 | time_per_episode = time() - start_time 315 | seconds_left = int((max_episodes - _i) * time_per_episode) 316 | print('Time per Episode: %s, Est. complete in: %s' % ( 317 | str(timedelta(seconds=time_per_episode)), 318 | str(timedelta(seconds=seconds_left)))) 319 | 320 | -------------------------------------------------------------------------------- /code/client.py: -------------------------------------------------------------------------------- 1 | """ 2 | The file to run in the client side 3 | Train the network and return the validation performance 4 | """ 5 | import os 6 | from expdir_monitor.expdir_monitor import ExpdirMonitor 7 | import time 8 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 9 | 10 | 11 | def run(expdir): 12 | start_time = time.time() 13 | expdir_monitor = ExpdirMonitor(expdir) 14 | valid_performance = expdir_monitor.run(pure=True, restore=False) 15 | end_time = time.time() 16 | print('running time: %s' % (end_time - start_time)) 17 | print('valid performance: %s' % valid_performance) 18 | 19 | 20 | def main(): 21 | expdir = input().strip('\n') 22 | run(expdir) 23 | 24 | 25 | if __name__ == "__main__": 26 | try: 27 | main() 28 | except KeyboardInterrupt: 29 | pass 30 | -------------------------------------------------------------------------------- /code/data_providers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/data_providers/__init__.py -------------------------------------------------------------------------------- /code/data_providers/base_provider.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class DataSet: 5 | """Class to represent some dataset: train, validation, test""" 6 | 7 | @property 8 | def num_examples(self): 9 | """Return qtty of examples in dataset""" 10 | raise NotImplementedError 11 | 12 | def next_batch(self, batch_size): 13 | """Return batch of required size of data, labels""" 14 | raise NotImplementedError 15 | 16 | 17 | class ImagesDataSet(DataSet): 18 | """Dataset for images that provide some often used methods""" 19 | 20 | @staticmethod 21 | def measure_mean_and_std(images): 22 | # for every channel in image 23 | means = [] 24 | stds = [] 25 | # for every channel in image(assume this is last dimension) 26 | for ch in range(images.shape[-1]): 27 | means.append(np.mean(images[:, :, :, ch])) 28 | stds.append(np.std(images[:, :, :, ch])) 29 | return means, stds 30 | 31 | @staticmethod 32 | def shuffle_images_and_labels(images, labels): 33 | rand_indexes = np.random.permutation(images.shape[0]) 34 | shuffled_images = images[rand_indexes] 35 | shuffled_labels = labels[rand_indexes] 36 | return shuffled_images, shuffled_labels 37 | 38 | @staticmethod 39 | def normalize_images(images, normalization_type, meanstd=None): 40 | """ 41 | Args: 42 | images: numpy 4D array 43 | normalization_type: `str`, available choices: 44 | - divide_255 45 | - divide_256 46 | - by_channels 47 | meanstd 48 | """ 49 | if normalization_type is not None: 50 | if normalization_type == 'divide_255': 51 | images = images / 255 52 | elif normalization_type == 'divide_256': 53 | images = images / 256 54 | elif normalization_type == 'by_channels': 55 | images = images.astype('float64') 56 | # for every channel in image(assume this is last dimension) 57 | means, stds = meanstd 58 | for i in range(images.shape[-1]): 59 | images[:, :, :, i] = ((images[:, :, :, i] - means[i]) / stds[i]) 60 | else: 61 | raise Exception('Unknown type of normalization') 62 | return images 63 | 64 | 65 | class DataProvider: 66 | _SEED = 88 67 | 68 | @property 69 | def data_shape(self): 70 | """Return shape as python list of one data entry""" 71 | raise NotImplementedError 72 | 73 | @property 74 | def n_classes(self): 75 | """Return `int` of num classes""" 76 | raise NotImplementedError 77 | 78 | def labels_to_one_hot(self, labels): 79 | """Convert 1D array of labels to one hot representation 80 | 81 | Args: 82 | labels: 1D numpy array 83 | """ 84 | new_labels = np.zeros((labels.shape[0], self.n_classes)) 85 | new_labels[range(labels.shape[0]), labels] = np.ones(labels.shape) 86 | return new_labels 87 | 88 | @staticmethod 89 | def labels_from_one_hot(labels): 90 | """Convert 2D array of labels to 1D class based representation 91 | 92 | Args: 93 | labels: 2D numpy array 94 | """ 95 | return np.argmax(labels, axis=1) 96 | -------------------------------------------------------------------------------- /code/data_providers/cifar.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import os 3 | import pickle 4 | import random 5 | 6 | import numpy as np 7 | 8 | from data_providers.base_provider import ImagesDataSet, DataProvider 9 | from data_providers.downloader import download_data_url 10 | 11 | 12 | def augment_image(image, pad): 13 | """Perform zero padding, randomly crop image to original size, 14 | maybe mirror horizontally""" 15 | init_shape = image.shape 16 | new_shape = [init_shape[0] + pad * 2, 17 | init_shape[1] + pad * 2, 18 | init_shape[2]] 19 | zeros_padded = np.zeros(new_shape) 20 | zeros_padded[pad:init_shape[0] + pad, pad:init_shape[1] + pad, :] = image 21 | # randomly crop to original size 22 | init_x = np.random.randint(0, pad * 2) 23 | init_y = np.random.randint(0, pad * 2) 24 | cropped = zeros_padded[ 25 | init_x: init_x + init_shape[0], 26 | init_y: init_y + init_shape[1], 27 | :] 28 | flip = random.getrandbits(1) 29 | if flip: 30 | cropped = cropped[:, ::-1, :] 31 | return cropped 32 | 33 | 34 | def augment_all_images(initial_images, pad=4): 35 | new_images = np.zeros(initial_images.shape) 36 | for i in range(initial_images.shape[0]): 37 | new_images[i] = augment_image(initial_images[i], pad) 38 | return new_images 39 | 40 | 41 | class CifarDataSet(ImagesDataSet): 42 | def __init__(self, images, labels, n_classes, shuffle, normalization, augmentation, meanstd): 43 | """ 44 | Args: 45 | images: 4D numpy array 46 | labels: 2D or 1D numpy array 47 | n_classes: `int`, number of cifar classes - 10 or 100 48 | shuffle: `str` or None 49 | None: no any shuffling 50 | once_prior_train: shuffle train data only once prior train 51 | every_epoch: shuffle train data prior every epoch 52 | normalization: `str` or None 53 | None: no any normalization 54 | divide_255: divide all pixels by 255 55 | divide_256: divide all pixels by 256 56 | by_channels: substract mean of every chanel and divide each 57 | chanel data by it's standard deviation 58 | augmentation: `bool` 59 | """ 60 | if shuffle is None: 61 | self.shuffle_every_epoch = False 62 | elif shuffle == 'once_prior_train': 63 | self.shuffle_every_epoch = False 64 | images, labels = self.shuffle_images_and_labels(images, labels) 65 | elif shuffle == 'every_epoch': 66 | self.shuffle_every_epoch = True 67 | else: 68 | raise Exception('Unknown type of shuffling') 69 | self._batch_counter, self.epoch_images, self.epoch_labels = 0, None, None 70 | 71 | self.images = images 72 | self.labels = labels 73 | self.n_classes = n_classes 74 | self.augmentation = augmentation 75 | self.normalization = normalization 76 | self.meanstd = meanstd 77 | self.images = self.normalize_images(images, self.normalization, self.meanstd) 78 | self.start_new_epoch() 79 | 80 | def start_new_epoch(self): 81 | self._batch_counter = 0 82 | if self.shuffle_every_epoch: 83 | images, labels = self.shuffle_images_and_labels( 84 | self.images, self.labels) 85 | else: 86 | images, labels = self.images, self.labels 87 | if self.augmentation: 88 | images = augment_all_images(images, pad=4) 89 | self.epoch_images = images 90 | self.epoch_labels = labels 91 | 92 | @property 93 | def num_examples(self): 94 | return self.labels.shape[0] 95 | 96 | def next_batch(self, batch_size): 97 | start = self._batch_counter * batch_size 98 | end = (self._batch_counter + 1) * batch_size 99 | self._batch_counter += 1 100 | images_slice = self.epoch_images[start: end] 101 | labels_slice = self.epoch_labels[start: end] 102 | if images_slice.shape[0] != batch_size: 103 | self.start_new_epoch() 104 | return self.next_batch(batch_size) 105 | else: 106 | return images_slice, labels_slice 107 | 108 | 109 | class CifarDataProvider(DataProvider): 110 | """Abstract class for cifar readers""" 111 | def __init__(self, save_path=None, validation_size=None, shuffle=None, normalization=None, 112 | one_hot=True, **kwargs): 113 | """ 114 | Args: 115 | save_path: `str` 116 | validation_set: `bool`. 117 | validation_split: `float` or None 118 | float: chunk of `train set` will be marked as `validation set`. 119 | None: if 'validation set' == True, `validation set` will be 120 | copy of `test set` 121 | shuffle: `str` or None 122 | None: no any shuffling 123 | once_prior_train: shuffle train data only once prior train 124 | every_epoch: shuffle train data prior every epoch 125 | normalization: `str` or None 126 | None: no any normalization 127 | divide_255: divide all pixels by 255 128 | divide_256: divide all pixels by 256 129 | by_channels: substract mean of every chanel and divide each 130 | chanel data by it's standard deviation 131 | one_hot: `bool`, return laels one hot encoded 132 | """ 133 | self._save_path = save_path 134 | self.one_hot = one_hot 135 | download_data_url(self.data_url, self.save_path) 136 | train_fnames, test_fnames = self.get_filenames(self.save_path) 137 | 138 | # add train and validations datasets 139 | images, labels = self.read_cifar(train_fnames) 140 | train_meanstd = ImagesDataSet.measure_mean_and_std(images) 141 | if validation_size is not None: 142 | np.random.seed(DataProvider._SEED) 143 | rand_indexes = np.random.permutation(images.shape[0]) 144 | valid_indexes = rand_indexes[:validation_size] 145 | train_indexes = rand_indexes[validation_size:] 146 | self.train = CifarDataSet( 147 | images=images[train_indexes], labels=labels[train_indexes], 148 | n_classes=self.n_classes, shuffle=shuffle, 149 | normalization=normalization, 150 | augmentation=self.data_augmentation, meanstd=train_meanstd) 151 | self.validation = CifarDataSet( 152 | images=images[valid_indexes], labels=labels[valid_indexes], 153 | n_classes=self.n_classes, shuffle=None, 154 | normalization=normalization, 155 | augmentation=False, meanstd=train_meanstd) 156 | else: 157 | self.train = CifarDataSet( 158 | images=images, labels=labels, 159 | n_classes=self.n_classes, shuffle=shuffle, 160 | normalization=normalization, 161 | augmentation=self.data_augmentation, meanstd=train_meanstd) 162 | 163 | # add test set 164 | images, labels = self.read_cifar(test_fnames) 165 | self.test = CifarDataSet( 166 | images=images, labels=labels, 167 | shuffle=None, n_classes=self.n_classes, 168 | normalization=normalization, 169 | augmentation=False, meanstd=train_meanstd) 170 | 171 | if validation_size is None: 172 | self.validation = self.test 173 | 174 | @property 175 | def save_path(self): 176 | if self._save_path is None: 177 | self._save_path = os.path.join( 178 | tempfile.gettempdir(), 'cifar%d' % self.n_classes) 179 | return self._save_path 180 | 181 | @property 182 | def data_url(self): 183 | """Return url for downloaded data depends on cifar class""" 184 | data_url = ('http://www.cs.toronto.edu/' 185 | '~kriz/cifar-%d-python.tar.gz' % self.n_classes) 186 | return data_url 187 | 188 | @property 189 | def data_shape(self): 190 | return 32, 32, 3 191 | 192 | @property 193 | def n_classes(self): 194 | return self._n_classes 195 | 196 | def get_filenames(self, save_path): 197 | """Return two lists of train and test filenames for dataset""" 198 | raise NotImplementedError 199 | 200 | def read_cifar(self, filenames): 201 | if self.n_classes == 10: 202 | labels_key = b'labels' 203 | elif self.n_classes == 100: 204 | labels_key = b'fine_labels' 205 | 206 | images_res = [] 207 | labels_res = [] 208 | for fname in filenames: 209 | with open(fname, 'rb') as f: 210 | images_and_labels = pickle.load(f, encoding='bytes') 211 | images = images_and_labels[b'data'] 212 | images = images.reshape(-1, 3, 32, 32) 213 | images = images.swapaxes(1, 3).swapaxes(1, 2) 214 | images_res.append(images) 215 | labels_res.append(images_and_labels[labels_key]) 216 | images_res = np.vstack(images_res) 217 | labels_res = np.hstack(labels_res) 218 | if self.one_hot: 219 | labels_res = self.labels_to_one_hot(labels_res) 220 | return images_res, labels_res 221 | 222 | 223 | class Cifar10DataProvider(CifarDataProvider): 224 | _n_classes = 10 225 | data_augmentation = False 226 | 227 | def get_filenames(self, save_path): 228 | sub_save_path = os.path.join(save_path, 'cifar-10-batches-py') 229 | train_filenames = [ 230 | os.path.join( 231 | sub_save_path, 232 | 'data_batch_%d' % i) for i in range(1, 6)] 233 | test_filenames = [os.path.join(sub_save_path, 'test_batch')] 234 | return train_filenames, test_filenames 235 | 236 | 237 | class Cifar100DataProvider(CifarDataProvider): 238 | _n_classes = 100 239 | data_augmentation = False 240 | 241 | def get_filenames(self, save_path): 242 | sub_save_path = os.path.join(save_path, 'cifar-100-python') 243 | train_filenames = [os.path.join(sub_save_path, 'train')] 244 | test_filenames = [os.path.join(sub_save_path, 'test')] 245 | return train_filenames, test_filenames 246 | 247 | 248 | class Cifar10AugmentedDataProvider(Cifar10DataProvider): 249 | _n_classes = 10 250 | data_augmentation = True 251 | 252 | 253 | class Cifar100AugmentedDataProvider(Cifar100DataProvider): 254 | _n_classes = 100 255 | data_augmentation = True 256 | 257 | -------------------------------------------------------------------------------- /code/data_providers/downloader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import urllib.request 4 | import tarfile 5 | import zipfile 6 | 7 | 8 | def report_download_progress(count, block_size, total_size): 9 | pct_complete = float(count * block_size) / total_size 10 | msg = '\r {0:.1%} already downloaded'.format(pct_complete) 11 | sys.stdout.write(msg) 12 | sys.stdout.flush() 13 | 14 | 15 | def download_data_url(url, download_dir): 16 | filename = url.split('/')[-1] 17 | file_path = os.path.join(download_dir, filename) 18 | 19 | if not os.path.exists(file_path): 20 | os.makedirs(download_dir, exist_ok=True) 21 | 22 | print('Download %s to %s' % (url, file_path)) 23 | file_path, _ = urllib.request.urlretrieve( 24 | url=url, 25 | filename=file_path, 26 | reporthook=report_download_progress) 27 | 28 | print('\nExtracting files') 29 | if file_path.endswith('.zip'): 30 | zipfile.ZipFile(file=file_path, mode='r').extractall(download_dir) 31 | elif file_path.endswith(('.tar.gz', '.tgz')): 32 | tarfile.open(name=file_path, mode='r:gz').extractall(download_dir) 33 | -------------------------------------------------------------------------------- /code/data_providers/svhn.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import os 3 | import scipy.io 4 | 5 | import numpy as np 6 | 7 | from data_providers.base_provider import ImagesDataSet, DataProvider 8 | from data_providers.downloader import download_data_url 9 | 10 | 11 | class SVHNDataSet(ImagesDataSet): 12 | n_classes = 10 13 | 14 | def __init__(self, images, labels, shuffle, normalization): 15 | """ 16 | Args: 17 | images: 4D numpy array 18 | labels: 2D or 1D numpy array 19 | shuffle: `bool`, should shuffle data or not 20 | normalization: `str` or None 21 | None: no any normalization 22 | divide_255: divide all pixels by 255 23 | divide_256: divide all pixels by 256 24 | by_channels: substract mean of every chanel and divide each 25 | chanel data by it's standard deviation 26 | """ 27 | self._batch_counter, self.epoch_images, self.epoch_labels = 0, None, None 28 | 29 | self.shuffle = shuffle 30 | self.images = images 31 | self.labels = labels 32 | self.normalization = normalization 33 | self.start_new_epoch() 34 | 35 | def start_new_epoch(self): 36 | self._batch_counter = 0 37 | if self.shuffle: 38 | self.epoch_images, self.epoch_labels = self.shuffle_images_and_labels( 39 | self.images, self.labels) 40 | else: 41 | self.epoch_images, self.epoch_labels = self.images, self.labels 42 | 43 | @property 44 | def num_examples(self): 45 | return self.labels.shape[0] 46 | 47 | def next_batch(self, batch_size): 48 | start = self._batch_counter * batch_size 49 | end = (self._batch_counter + 1) * batch_size 50 | self._batch_counter += 1 51 | images_slice = self.epoch_images[start: end] 52 | labels_slice = self.epoch_labels[start: end] 53 | # due to memory error it should be done inside batch 54 | if self.normalization is not None: 55 | images_slice = self.normalize_images( 56 | images_slice, self.normalization) 57 | if images_slice.shape[0] != batch_size: 58 | self.start_new_epoch() 59 | return self.next_batch(batch_size) 60 | else: 61 | return images_slice, labels_slice 62 | 63 | 64 | class SVHNDataProvider(DataProvider): 65 | def __init__(self, save_path=None, validation_size=None, shuffle=False, normalization=None, 66 | one_hot=True, include_extra=True, **kwargs): 67 | """ 68 | Args: 69 | save_path: `str` 70 | validation_set: `bool`. 71 | validation_split: `int` or None 72 | float: chunk of `train set` will be marked as `validation set`. 73 | None: if 'validation set' == True, `validation set` will be 74 | copy of `test set` 75 | shuffle: `bool`, should shuffle data or not 76 | normalization: `str` or None 77 | None: no any normalization 78 | divide_255: divide all pixels by 255 79 | divide_256: divide all pixels by 256 80 | by_chanels: substract mean of every chanel and divide each 81 | chanel data by it's standart deviation 82 | one_hot: `bool`, return lasels one hot encoded 83 | """ 84 | self._save_path = save_path 85 | train_images = [] 86 | train_labels = [] 87 | if include_extra: 88 | train_data_src = ['train', 'extra'] 89 | else: 90 | train_data_src = ['train'] 91 | for part in train_data_src: 92 | images, labels = self.get_images_and_labels(part, one_hot) 93 | train_images.append(images) 94 | train_labels.append(labels) 95 | train_images = np.vstack(train_images) 96 | if one_hot: 97 | train_labels = np.vstack(train_labels) 98 | else: 99 | train_labels = np.hstack(train_labels) 100 | if validation_size is not None: 101 | np.random.seed(DataProvider._SEED) 102 | rand_indexes = np.random.permutation(train_images.shape[0]) 103 | valid_indexes = rand_indexes[:validation_size] 104 | train_indexes = rand_indexes[validation_size:] 105 | valid_images, valid_labels = train_images[valid_indexes], train_labels[valid_indexes] 106 | train_images, train_labels = train_images[train_indexes], train_labels[train_indexes] 107 | self.validation = SVHNDataSet( 108 | valid_images, valid_labels, False, normalization) 109 | 110 | self.train = SVHNDataSet( 111 | train_images, train_labels, shuffle, normalization) 112 | 113 | test_images, test_labels = self.get_images_and_labels('test', one_hot) 114 | self.test = SVHNDataSet(test_images, test_labels, False, normalization) 115 | 116 | if validation_size is None: 117 | self.validation = self.test 118 | 119 | def get_images_and_labels(self, name_part, one_hot=False): 120 | url = self.data_url + name_part + '_32x32.mat' 121 | download_data_url(url, self.save_path) 122 | filename = os.path.join(self.save_path, name_part + '_32x32.mat') 123 | data = scipy.io.loadmat(filename) 124 | images = data['X'].transpose(3, 0, 1, 2) 125 | labels = data['y'].reshape((-1)) 126 | labels[labels == 10] = 0 127 | if one_hot: 128 | labels = self.labels_to_one_hot(labels) 129 | return images, labels 130 | 131 | @property 132 | def n_classes(self): 133 | return 10 134 | 135 | @property 136 | def save_path(self): 137 | if self._save_path is None: 138 | self._save_path = os.path.join(tempfile.gettempdir(), 'svhn') 139 | return self._save_path 140 | 141 | @property 142 | def data_url(self): 143 | return 'http://ufldl.stanford.edu/housenumbers/' 144 | 145 | @property 146 | def data_shape(self): 147 | return 32, 32, 3 148 | -------------------------------------------------------------------------------- /code/data_providers/utils.py: -------------------------------------------------------------------------------- 1 | from data_providers.cifar import Cifar10DataProvider, Cifar100DataProvider, \ 2 | Cifar10AugmentedDataProvider, Cifar100AugmentedDataProvider 3 | from data_providers.svhn import SVHNDataProvider 4 | 5 | 6 | def get_data_provider_by_name(name, train_params): 7 | """Return required data provider class""" 8 | if name == 'C10': 9 | return Cifar10DataProvider(**train_params) 10 | if name == 'C10+': 11 | return Cifar10AugmentedDataProvider(**train_params) 12 | if name == 'C100': 13 | return Cifar100DataProvider(**train_params) 14 | if name == 'C100+': 15 | return Cifar100AugmentedDataProvider(**train_params) 16 | if name == 'SVHN': 17 | return SVHNDataProvider(**train_params) 18 | else: 19 | print('Sorry, data provider for `%s` dataset ' 20 | 'was not implemented yet' % name) 21 | exit() 22 | -------------------------------------------------------------------------------- /code/expdir_monitor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/expdir_monitor/__init__.py -------------------------------------------------------------------------------- /code/expdir_monitor/arch_manager.py: -------------------------------------------------------------------------------- 1 | """ 2 | Manage the folder for architecture search 3 | """ 4 | import os 5 | import subprocess 6 | import json 7 | import pickle 8 | import numpy as np 9 | from data_providers.utils import get_data_provider_by_name 10 | from expdir_monitor.expdir_monitor import ExpdirMonitor 11 | from expdir_monitor import distributed 12 | 13 | 14 | class NetPool: 15 | def __init__(self, path): 16 | self.path = os.path.realpath(path) 17 | os.makedirs(self.path, exist_ok=True) 18 | 19 | self.net_str2id = {} 20 | self.net_id2val = {} 21 | self.running_set = {'stone': 0} 22 | 23 | self.on_load() 24 | 25 | @property 26 | def str2id_path(self): 27 | return '%s/net.str2id' % self.path 28 | 29 | @property 30 | def id2val_path(self): 31 | return '%s/net.id2val' % self.path 32 | 33 | def on_load(self): 34 | if os.path.isfile(self.str2id_path): 35 | self.net_str2id = json.load(open(self.str2id_path, 'r')) 36 | if os.path.isfile(self.id2val_path): 37 | net_id2val = json.load(open(self.id2val_path, 'r')) 38 | for key in net_id2val: 39 | self.net_id2val[int(key)] = net_id2val[key] 40 | to_rename = [] 41 | for folder in os.listdir(self.path): 42 | if folder.startswith('#'): 43 | out_file = '%s/%s/output' % (self.path, folder) 44 | if not os.path.isfile(out_file): 45 | subprocess.run(['rm', '-rf', os.path.join(self.path, folder)]) 46 | else: 47 | net_str = json.load(open('%s/%s/net.str' % (self.path, folder), 'r'))['net_str'] 48 | if self.net_str2id.get(net_str) is None: 49 | record = json.load(open(out_file, 'r')) 50 | net_val = float(record['valid_acc']) 51 | net_id = self.add_net(net_str, net_val) 52 | folder_path = self.get_net_folder(net_id) 53 | else: 54 | net_id = self.net_str2id[net_str] 55 | folder_path = self.get_net_folder(net_id) 56 | if folder_path != folder: 57 | to_rename.append([folder, folder_path]) 58 | for src_folder, dst_folder in to_rename: 59 | src_folder = os.path.join(self.path, src_folder) 60 | dst_folder = os.path.join(self.path, dst_folder) 61 | os.rename(src_folder, dst_folder) 62 | 63 | def add_net(self, net_str, net_val): 64 | assert self.net_str2id.get(net_str) is None, '%s exists' % net_str 65 | net_id = net_str.__hash__() 66 | while net_id in self.net_id2val: 67 | net_id += 1 68 | self.net_str2id[net_str] = net_id 69 | self.net_id2val[net_id] = net_val 70 | return net_id 71 | 72 | def get_net_val(self, net_str): 73 | net_id = self.net_str2id.get(net_str) 74 | if net_id is None: 75 | if net_str in self.running_set: 76 | running_id = self.running_set[net_str] 77 | else: 78 | running_id = net_str.__hash__() 79 | while running_id in self.running_set.values(): 80 | running_id += 1 81 | self.running_set[net_str] = running_id 82 | net_folder = '%s/#Running_%s' % (self.path, running_id) 83 | return None, net_folder 84 | else: 85 | net_val = self.net_id2val[net_id] 86 | net_folder = '%s/%s' % (self.path, self.get_net_folder(net_id)) 87 | return net_val, net_folder 88 | 89 | def on_running_finished(self, net_str, net_folder, net_val): 90 | net_id = self.add_net(net_str, net_val) 91 | # folder_path = self.get_net_folder(net_id) 92 | # self.running_set.pop(net_str) 93 | # os.rename(net_folder, os.path.join(self.path, folder_path)) 94 | 95 | def save(self): 96 | json.dump(self.net_str2id, open(self.str2id_path, 'w'), indent=4) 97 | json.dump(self.net_id2val, open(self.id2val_path, 'w'), indent=4) 98 | 99 | @staticmethod 100 | def get_net_folder(net_id): 101 | return '#%s' % net_id 102 | 103 | 104 | class ArchManager: 105 | def __init__(self, start_net_path, arch_path, net_pool_path): 106 | self.start_net_monitor = ExpdirMonitor(start_net_path) 107 | self.start_net_config, self.data_provider = None, None 108 | 109 | self.net_pool = NetPool(net_pool_path) 110 | 111 | self.arch_path = os.path.realpath(arch_path) 112 | os.makedirs(self.arch_path, exist_ok=True) 113 | 114 | self.episode = 0 115 | self.net_val_wrt_episode = [] 116 | 117 | self.val_log_writer = open(self.val_logs_path, 'a') 118 | self.net_log_writer = open(self.net_logs_path, 'a') 119 | self.on_load() 120 | 121 | @property 122 | def meta_controller_path(self): 123 | return '%s/controller' % self.arch_path 124 | 125 | @property 126 | def val_logs_path(self): 127 | return '%s/val.log' % self.arch_path 128 | 129 | @property 130 | def net_logs_path(self): 131 | return '%s/net.log' % self.arch_path 132 | 133 | def on_load(self): 134 | if os.path.isfile(self.val_logs_path): 135 | with open(self.val_logs_path, 'r') as fin: 136 | for line in fin.readlines(): 137 | line = line[:-1] 138 | self.episode += 1 139 | net_val_list = line.split('\t')[4:] 140 | net_val_list = [float(net_val) for net_val in net_val_list] 141 | self.net_val_wrt_episode.append(net_val_list) 142 | 143 | def get_start_net(self, copy=False): 144 | if self.start_net_config is None: 145 | # prepare start net 146 | print('Load start net from %s' % self.start_net_monitor.expdir) 147 | init = self.start_net_monitor.load_init() 148 | dataset = 'C10+' if init is None else init.get('dataset', 'C10+') 149 | run_config = self.start_net_monitor.load_run_config(print_info=True, dataset=dataset) 150 | run_config.renew_logs = False 151 | 152 | net_config, model_name = self.start_net_monitor.load_net_config(init, print_info=True) 153 | self.data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config()) 154 | self.start_net_config = [net_config, run_config, model_name] 155 | if copy: 156 | net_config, run_config, model_name = self.start_net_config[:3] 157 | return [ 158 | net_config.copy(), run_config.copy(), model_name 159 | ] 160 | else: 161 | return self.start_net_config 162 | 163 | @staticmethod 164 | def prepare_folder_for_valid(net_str, net_config, run_config, exp_dir): 165 | os.makedirs(exp_dir, exist_ok=True) 166 | monitor = ExpdirMonitor(exp_dir) 167 | json.dump(net_config.get_config(), open(monitor.net_config_path, 'w'), indent=4) 168 | json.dump(run_config.get_config(), open(monitor.run_config_path, 'w'), indent=4) 169 | pickle.dump(net_config.renew_init(None), open(monitor.init, 'wb')) 170 | json.dump({'net_str': net_str}, open(os.path.join(monitor.expdir, 'net.str'), 'w'), indent=4) 171 | 172 | def get_net_vals(self, net_str_list, net_configs, run_configs): 173 | net_val_list = [-1] * len(net_str_list) 174 | 175 | to_run = {} 176 | for _i, net_str in enumerate(net_str_list): 177 | net_val, net_folder = self.net_pool.get_net_val(net_str) 178 | if net_val is None: 179 | if net_folder in to_run: to_run[net_folder] += [_i] 180 | else: 181 | to_run[net_folder] = [_i] 182 | self.prepare_folder_for_valid(net_str, net_configs[_i], run_configs[_i], net_folder) 183 | else: 184 | net_val_list[_i] = net_val 185 | 186 | task_list = [[net_folder, to_run[net_folder]] for net_folder in to_run] 187 | distributed.run(task_list) 188 | episode_total_running_time = 0 189 | for net_folder, idx, net_val in task_list: 190 | net_str = net_str_list[idx[0]] 191 | net_val, running_time = net_val 192 | episode_total_running_time += running_time 193 | self.net_pool.on_running_finished(net_str, net_folder, net_val) 194 | for _id in idx: 195 | net_val_list[_id] = net_val 196 | self.log_nets(net_str_list, episode_total_running_time) 197 | self.net_pool.save() 198 | return net_val_list 199 | 200 | def val2reward(self, net_val_list, func=None): 201 | rewards = [] 202 | for net_val in net_val_list: 203 | if func is None: 204 | rewards.append(net_val) 205 | elif func == 'tan': 206 | reward = np.tan(net_val * np.pi / 2) 207 | rewards.append(reward) 208 | else: 209 | raise NotImplementedError 210 | return rewards 211 | 212 | def reward(self, net_val_list, reward_config): 213 | rewards = self.val2reward(net_val_list, reward_config.get('func')) 214 | rewards = np.array(rewards) 215 | # baseline function 216 | decay = reward_config['decay'] 217 | if 'exp_moving_avg' not in self.__dict__: 218 | self.exp_moving_avg = 0 219 | for old_net_val_list in self.net_val_wrt_episode[:-1]: 220 | old_rewards = self.val2reward(old_net_val_list, reward_config.get('func')) 221 | self.exp_moving_avg += decay * (np.mean(old_rewards) - self.exp_moving_avg) 222 | self.exp_moving_avg += decay * (np.mean(rewards) - self.exp_moving_avg) 223 | return rewards - self.exp_moving_avg 224 | 225 | def log_nets(self, net_str_list, running_time, print_info=True): 226 | net_id_list = [self.net_pool.net_str2id[net_str] for net_str in net_str_list] 227 | nets_num = len(net_id_list) 228 | new_nets_num = len(set(net_id_list)) 229 | 230 | net_val_list = [self.net_pool.net_id2val[net_id] for net_id in net_id_list] 231 | mean_val, max_val = np.mean(net_val_list), np.max(net_val_list) 232 | self.net_log_writer.write('%d.\t nets=%d (total=%d)\t%s\n' % (self.episode, new_nets_num, nets_num, 233 | '\t'.join([str(net_id) for net_id in net_id_list]))) 234 | log_str = '%d.\t nets=%d (total=%d)\t mean_val=%s (max_val=%s)\t using %s(min)\t%s' % \ 235 | (self.episode + 1, new_nets_num, nets_num, mean_val, max_val, running_time, 236 | '\t'.join([str(net_val) for net_val in net_val_list])) 237 | if print_info: 238 | print(log_str) 239 | self.val_log_writer.write(log_str + '\n') 240 | 241 | self.val_log_writer.flush() 242 | self.net_log_writer.flush() 243 | self.net_val_wrt_episode.append(net_val_list) 244 | self.episode += 1 245 | -------------------------------------------------------------------------------- /code/expdir_monitor/distributed.py: -------------------------------------------------------------------------------- 1 | from subprocess import Popen, PIPE 2 | from threading import Thread, Lock 3 | from queue import Queue 4 | from time import sleep 5 | from sys import stderr 6 | import re 7 | import json 8 | import shlex 9 | 10 | max_running_machine = 5 11 | 12 | _max_used_mem = 0.3 13 | _max_used_gpu = 0.3 14 | config_file = 'server_config' 15 | 16 | 17 | class GpuChecker: 18 | def __init__(self, nvidia_getter, gpuid): 19 | self.nvidia_getter = nvidia_getter 20 | self.gpuid = gpuid 21 | 22 | def state_parser(self, state_str): 23 | result = [] 24 | for line in state_str.split('\n'): 25 | # .*?(\d*)C.*\|(.*?)MiB.*?/(.*?)MiB.*?\|.*?(\d*)\% 26 | # .*?(\d*)C.*\|(.*?)MiB.*?/(.*?)MiB.*?\|.*?(\d*)% 27 | pattern = re.search('.*?(\d*)C.*\|(.*?)MiB.*?/(.*?)MiB.*?\|.*?(\d*)%', line) 28 | if pattern is not None: 29 | result.append([int(x) for x in pattern.groups()]) 30 | if self.gpuid >= len(result): 31 | return None 32 | # assert self.gpuid < len(result), 'Parsing error or not enough gpus.' 33 | return result[self.gpuid] 34 | 35 | def instance_available(self, state_str): 36 | parse_result = self.state_parser(state_str) 37 | if parse_result is None: return False 38 | _, used_mem, total_mem, occupation = parse_result 39 | occupation /= 100 40 | return used_mem / total_mem < _max_used_mem and occupation < _max_used_gpu 41 | 42 | def check(self): 43 | _check_times = 3 44 | try: 45 | for _i in range(_check_times): 46 | assert self.instance_available(self.nvidia_getter()) 47 | if _i < _check_times - 1: 48 | sleep(0.5) 49 | except AssertionError: 50 | return False 51 | return True 52 | 53 | def is_on(self): 54 | try: 55 | parse_result = self.state_parser(self.nvidia_getter()) 56 | if parse_result is None: 57 | return False 58 | else: 59 | return True 60 | except Exception: 61 | return False 62 | 63 | 64 | class RemoteController: 65 | def __init__(self, remote, gpuid, executive): 66 | self.remote = remote 67 | self.gpuid = gpuid 68 | self.executive = executive 69 | 70 | self.gpu_checker = GpuChecker(lambda: self.run('nvidia-smi'), self.gpuid) 71 | 72 | self._lock = Lock() 73 | self._occupied = False 74 | self._on_running = None 75 | 76 | @property 77 | def occupied(self): 78 | with self._lock: 79 | return self._occupied 80 | 81 | @occupied.setter 82 | def occupied(self, val): 83 | assert isinstance(val, bool), 'Occupied must be True or False, but {} received.'.format(val) 84 | with self._lock: 85 | self._occupied = val 86 | 87 | def run(self, cmd, stdin=None): 88 | proc = Popen('ssh {} {}'.format(self.remote, shlex.quote(cmd)), shell=True, stdin=PIPE, stdout=PIPE, 89 | universal_newlines=True) 90 | return proc.communicate(input=stdin)[0] 91 | 92 | @property 93 | def gpu_state(self): 94 | return self.gpu_checker.check() 95 | 96 | @property 97 | def exe_cmd(self): 98 | return 'CUDA_VISIBLE_DEVICES={gpuid} python3 {executive}'.format( 99 | executive=self.executive, 100 | gpuid=self.gpuid 101 | ) 102 | 103 | def check_on(self, queue): 104 | if not self.gpu_checker.is_on(): 105 | if self._on_running is not None: 106 | queue.put(self._on_running) 107 | self._on_running = None 108 | print('Remote Error.') 109 | return False 110 | return True 111 | 112 | def remote_executer(self, idx, expdir, queue): 113 | self.occupied = True 114 | cmd = self.exe_cmd 115 | print('{}: {} {}'.format(self.remote, cmd, expdir), file=stderr) 116 | result = self.run(cmd, stdin=expdir) 117 | try: 118 | result = str(result).split('\n') 119 | used_time = result[-3] 120 | result = result[-2] 121 | assert result.startswith('valid performance: ') and used_time.startswith('running time: '), \ 122 | 'Invalid return: %s, %s' % (used_time, result) 123 | used_time = used_time[len('running time: '):] 124 | used_time = float(used_time) / 60 # minutes 125 | result = result[len('valid performance: '):] 126 | result = float(result) 127 | queue.put([idx, (result, used_time)]) 128 | print('{}th task: {} is successfully executed, result is {}, using {} min.'. 129 | format(idx, expdir, result, used_time), file=stderr) 130 | except Exception: 131 | queue.put([idx, expdir]) 132 | print('{}th task: {} fails, with return: %s.'.format(idx, expdir, result), file=stderr) 133 | self.occupied = False 134 | 135 | def execute(self, idx, expdir, queue): 136 | if self.occupied or not self.gpu_state: 137 | queue.put([idx, expdir]) 138 | else: 139 | self._on_running = [idx, expdir] 140 | thr = Thread(target=self.remote_executer, args=(idx, expdir, queue)) 141 | thr.start() 142 | self._on_running = None 143 | 144 | 145 | class ClusterController: 146 | def __init__(self, config_list): 147 | self.cluster = [RemoteController(*config) for config in config_list] 148 | self._pt = 0 149 | 150 | def choice(self, queue): 151 | remotes_available, occupy_num = self.get_available(queue) 152 | while occupy_num >= max_running_machine: 153 | sleep(0.5) 154 | remotes_available, occupy_num = self.get_available(queue) 155 | while not remotes_available[self._pt]: 156 | self._pt = (self._pt + 1) % len(self.cluster) 157 | choose_remote = self.cluster[self._pt] 158 | self._pt = (self._pt + 1) % len(self.cluster) 159 | return choose_remote 160 | # return random.choice(self.cluster) 161 | 162 | def get_available(self, queue): 163 | remotes_available = [False] * len(self.cluster) 164 | occupy_num = len(self.cluster) 165 | for _i, remote in enumerate(self.cluster): 166 | if not remote.check_on(queue): 167 | occupy_num -= 1 168 | continue 169 | if not remote.occupied: 170 | remotes_available[_i] = True 171 | occupy_num -= 1 172 | return remotes_available, occupy_num 173 | 174 | def execute(self, idx, expdir, queue): 175 | self.choice(queue).execute(idx, expdir, queue) 176 | 177 | 178 | def run_tasks(config_list, expdir_list): 179 | controller = ClusterController(config_list) 180 | result_list = [None for _ in expdir_list] 181 | 182 | queue = Queue() 183 | for idx, expdir in enumerate(expdir_list): 184 | queue.put([idx, expdir]) 185 | 186 | remained = len(result_list) 187 | while remained > 0: 188 | idx, val = queue.get() 189 | if isinstance(val, str): 190 | # expdir, need to execute 191 | controller.execute(idx, val, queue) 192 | elif isinstance(val, tuple): 193 | # result, need to be put in result_list 194 | result_list[idx] = val 195 | remained -= 1 196 | return result_list 197 | 198 | 199 | def run(task_list): 200 | with open(config_file, 'r') as f: 201 | config_list = json.load(f) 202 | expdir_list = [expdir for expdir, *_ in task_list] 203 | result_list = run_tasks(config_list, expdir_list) 204 | for idx, _ in enumerate(task_list): 205 | task_list[idx].append(result_list[idx]) 206 | -------------------------------------------------------------------------------- /code/expdir_monitor/expdir_monitor.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import subprocess 4 | from models.utils import RunConfig, get_model_config_by_name, get_model_by_name 5 | from data_providers.utils import get_data_provider_by_name 6 | import pickle 7 | 8 | 9 | class ExpdirMonitor: 10 | def __init__(self, expdir): 11 | self.expdir = os.path.realpath(expdir) 12 | os.makedirs(self.expdir, exist_ok=True) 13 | 14 | @property 15 | def logs(self): return '%s/logs' % self.expdir 16 | 17 | @property 18 | def checkpoint(self): return '%s/checkpoint' % self.expdir 19 | 20 | @property 21 | def snapshot(self): return '%s/snapshot' % self.expdir 22 | 23 | @property 24 | def output(self): return '%s/output' % self.expdir 25 | 26 | @property 27 | def init(self): return '%s/init' % self.expdir 28 | 29 | @property 30 | def run_config_path(self): return '%s/run.config' % self.expdir 31 | 32 | @property 33 | def net_config_path(self): return '%s/net.config' % self.expdir 34 | 35 | def load_run_config(self, print_info=False, dataset='C10+'): 36 | if os.path.isfile(self.run_config_path): 37 | run_config = json.load(open(self.run_config_path, 'r')) 38 | else: 39 | print('Use Default Run Config for %s' % dataset) 40 | run_config = RunConfig.get_default_run_config(dataset) 41 | if print_info: 42 | print('Run config:') 43 | for k, v in run_config.items(): 44 | print('\t%s: %s' % (k, v)) 45 | return RunConfig(**run_config) 46 | 47 | def load_init(self): 48 | init_path = '%s/init' % self.expdir 49 | if os.path.isfile(init_path): 50 | return pickle.load(open(self.init, 'rb')) 51 | else: 52 | return None 53 | 54 | def load_net_config(self, init, print_info=False): 55 | assert os.path.isfile(self.net_config_path), \ 56 | 'Net configs do not exist in the given expdir <%s>' % self.expdir 57 | net_config_json = json.load(open(self.net_config_path, 'r')) 58 | net_config = get_model_config_by_name(net_config_json['name'])() 59 | net_config.set_net_from_config(net_config_json, init=init, print_info=print_info) 60 | return net_config, net_config_json['name'] 61 | 62 | def run(self, pure=True, restore=False, test=False, valid=False, valid_size=-1): 63 | if not restore: 64 | _clear_files = ['logs', 'checkpoint', 'snapshot', 'output'] 65 | for file in _clear_files: 66 | subprocess.run(['rm', '-rf', os.path.join(self.expdir, file)]) 67 | init = self.load_init() 68 | dataset = 'C10+' if init is None else init.get('dataset', 'C10+') 69 | run_config = self.load_run_config(print_info=(not pure), dataset=dataset) 70 | run_config.renew_logs = False 71 | if valid_size > 0: 72 | run_config.validation_size = valid_size 73 | 74 | data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config()) 75 | net_config, model_name = self.load_net_config(init, print_info=(not pure)) 76 | model = get_model_by_name(model_name)(self.expdir, data_provider, run_config, net_config, pure=pure) 77 | start_epoch = 1 78 | if restore: 79 | model.load_model() 80 | epoch_info_file = '%s/checkpoint/epoch.info' % self.expdir 81 | if os.path.isfile(epoch_info_file): 82 | start_epoch = json.load(open(epoch_info_file, 'r'))['epoch'] 83 | if not pure: 84 | print('start epoch: %d' % start_epoch) 85 | if test: 86 | print('Testing...') 87 | loss, accuracy = model.test(data_provider.test, batch_size=200) 88 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 89 | json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open(self.output, 'w')) 90 | elif valid: 91 | print('validating...') 92 | loss, accuracy = model.test(data_provider.validation, batch_size=200) 93 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 94 | json.dump({'valid_loss': '%s' % loss, 'valid_acc': '%s' % accuracy}, open(self.output, 'w')) 95 | elif pure: 96 | model.pure_train() 97 | loss, accuracy = model.test(data_provider.validation, batch_size=200) 98 | json.dump({'valid_loss': '%s' % loss, 'valid_acc': '%s' % accuracy}, open(self.output, 'w')) 99 | model.save_init(self.snapshot, print_info=(not pure)) 100 | model.save_config(self.expdir, print_info=(not pure)) 101 | else: 102 | # train the model 103 | print('Data provider train images: ', data_provider.train.num_examples) 104 | model.train_all_epochs(start_epoch) 105 | print('Data provider test images: ', data_provider.test.num_examples) 106 | print('Testing...') 107 | loss, accuracy = model.test(data_provider.test, batch_size=200) 108 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 109 | json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open(self.output, 'w')) 110 | model.save_init(self.snapshot, print_info=(not pure)) 111 | model.save_config(self.expdir, print_info=(not pure)) 112 | return accuracy 113 | -------------------------------------------------------------------------------- /code/main.py: -------------------------------------------------------------------------------- 1 | from expdir_monitor.expdir_monitor import ExpdirMonitor 2 | import argparse 3 | 4 | 5 | """ 6 | Given a expdir, run the exp 7 | """ 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument( 10 | '--test', action='store_true', 11 | help='Test model for required dataset if pretrained model exists.' 12 | ) 13 | parser.add_argument( 14 | '--valid', action='store_true', 15 | ) 16 | parser.add_argument( 17 | '--valid_size', type=int, default=-1, 18 | ) 19 | parser.add_argument('--path', type=str) 20 | parser.add_argument('--restore', action='store_true') 21 | args = parser.parse_args() 22 | expdir_monitor = ExpdirMonitor(args.path) 23 | test_performance = expdir_monitor.run(pure=False, restore=args.restore, test=args.test, valid=args.valid, 24 | valid_size=args.valid_size) 25 | if args.valid: 26 | print('validation performance: %s' % test_performance) 27 | else: 28 | print('test performance: %s' % test_performance) 29 | -------------------------------------------------------------------------------- /code/meta_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/meta_controller/__init__.py -------------------------------------------------------------------------------- /code/meta_controller/base_controller.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib import rnn 3 | from tensorflow.python.ops import array_ops 4 | from models.basic_model import BasicModel 5 | import numpy as np 6 | import os 7 | 8 | 9 | class BaseController: 10 | def __init__(self, path): 11 | self.path = os.path.realpath(path) 12 | os.makedirs(self.path, exist_ok=True) 13 | 14 | def load(self): 15 | raise NotImplementedError 16 | 17 | def save(self, global_step=None): 18 | raise NotImplementedError 19 | 20 | @property 21 | def save_path(self): 22 | return '%s/model.ckpt' % self.path 23 | 24 | @property 25 | def logs_path(self): 26 | return '%s/logs' % self.path 27 | 28 | 29 | class Vocabulary: 30 | def __init__(self, token_list): 31 | token_list = ['PAD'] + token_list 32 | self.vocab = {} 33 | for idx, token in enumerate(token_list): 34 | self.vocab[token] = idx 35 | self.vocab[idx] = token 36 | 37 | @property 38 | def size(self): 39 | return len(self.vocab) // 2 40 | 41 | def get_code(self, token_list): 42 | return [self.vocab[token] for token in token_list] 43 | 44 | def get_token(self, code_list): 45 | return [self.vocab[code] for code in code_list] 46 | 47 | @property 48 | def pad_code(self): 49 | return self.vocab['PAD'] 50 | 51 | 52 | def embedding(_input, vocab_size, embedding_dim, name='embedding'): 53 | """ 54 | _input: [batch_size, max_num_steps] 55 | output: [batch_size, max_num_steps, embedding_dim] 56 | """ 57 | # embedding 58 | embedding_var = tf.get_variable( 59 | name=name, 60 | shape=[vocab_size, embedding_dim], 61 | initializer=tf.random_uniform_initializer(-np.sqrt(3), np.sqrt(3)), 62 | dtype=tf.float32, 63 | ) # Initialize embeddings to have variance=1. 64 | output = tf.nn.embedding_lookup(embedding_var, _input) 65 | return output 66 | 67 | 68 | def build_cell(units, cell_type='lstm', num_layers=1): 69 | if num_layers > 1: 70 | cell = rnn.MultiRNNCell([ 71 | build_cell(units, cell_type, 1) for _ in range(num_layers) 72 | ]) 73 | else: 74 | if cell_type == "lstm": 75 | cell = rnn.LSTMCell(units) 76 | elif cell_type == "gru": 77 | cell = rnn.GRUCell(units) 78 | else: 79 | raise ValueError('Do not support %s' % cell_type) 80 | return cell 81 | 82 | 83 | def seq_len(sequence): 84 | """ 85 | assume padding with zero vectors 86 | sequence: [batch_size, num_steps, features] 87 | length: [batch_size] 88 | """ 89 | used = tf.sign(tf.reduce_max(tf.abs(sequence), 2)) 90 | length = tf.reduce_sum(used, 1) 91 | length = tf.cast(length, tf.int32) 92 | return length 93 | 94 | 95 | class EncoderNet: 96 | def __init__(self, num_steps, vocab, embedding_dim, rnn_units, rnn_type='bi_lstm', rnn_layers=1): 97 | self.num_steps = num_steps 98 | self.vocab = vocab 99 | self.embedding_dim = embedding_dim 100 | 101 | self.rnn_units = rnn_units 102 | self.rnn_type = rnn_type 103 | self.rnn_layers = rnn_layers 104 | 105 | # placeholder 106 | self.seq_len, self.input_seq = None, None 107 | # op 108 | self.encoder_output, self.encoder_state = None, None 109 | 110 | @property 111 | def bidirectional(self): 112 | return self.rnn_type.startswith('bi') 113 | 114 | @property 115 | def cell_type(self): 116 | return self.rnn_type.split('_')[-1] 117 | 118 | def _define_input(self): 119 | self.seq_len = tf.placeholder( 120 | tf.int32, 121 | [None], 122 | 'seq_len' 123 | ) # length of each sequence, shape = [batch_size, ] 124 | 125 | self.input_seq = tf.placeholder( 126 | tf.int32, 127 | [None, self.num_steps], 128 | 'input_seq' 129 | ) # input sequence, shape = [batch_size, num_steps] 130 | 131 | def build(self): 132 | self._define_input() 133 | 134 | output = self.input_seq 135 | output = embedding(output, self.vocab.size, self.embedding_dim, name='layer_embedding') 136 | input_dim = self.embedding_dim 137 | 138 | # Prepare data shape to match rnn function requirements 139 | # Current data input shape: [batch_size, num_steps, input_dim] 140 | # Required shape: 'num_steps' tensors list of shape [batch_size, input_dim] 141 | output = tf.transpose(output, [1, 0, 2]) 142 | output = tf.reshape(output, [-1, input_dim]) 143 | output = tf.split(output, self.num_steps, 0) 144 | 145 | if self.bidirectional: 146 | # 'num_steps' tensors list of shape [batch_size, rnn_units * 2] 147 | fw_cell = build_cell(self.rnn_units, self.cell_type, self.rnn_layers) 148 | bw_cell = build_cell(self.rnn_units, self.cell_type, self.rnn_layers) 149 | output, state_fw, state_bw = rnn.static_bidirectional_rnn( 150 | fw_cell, bw_cell, output, dtype=tf.float32, sequence_length=self.seq_len, scope='encoder') 151 | 152 | if isinstance(state_fw, tf.contrib.rnn.LSTMStateTuple): 153 | encoder_state_c = tf.concat([state_fw.c, state_bw.c], axis=1, name='bidirectional_concat_c') 154 | encoder_state_h = tf.concat([state_fw.h, state_bw.h], axis=1, name='bidirectional_concat_h') 155 | state = tf.contrib.rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h) 156 | elif isinstance(state_fw, tf.Tensor): 157 | state = tf.concat([state_fw, state_bw], axis=1, name='bidirectional_concat') 158 | else: 159 | raise ValueError 160 | else: 161 | # 'num_steps' tensors list of shape [batch_size, rnn_units] 162 | cell = build_cell(self.rnn_units, self.cell_type, self.rnn_layers) 163 | output, state = rnn.static_rnn(cell, output, dtype=tf.float32, sequence_length=self.seq_len, 164 | scope='encoder') 165 | 166 | output = tf.stack(output, axis=0) # [num_steps, batch_size, rnn_units] 167 | output = tf.transpose(output, [1, 0, 2]) # [batch_size, num_steps, rnn_units] 168 | self.encoder_output = output 169 | self.encoder_state = state 170 | return output, state 171 | 172 | 173 | class WiderActorNet: 174 | def __init__(self, out_dim, num_steps, net_type='simple', net_config=None): 175 | self.out_dim = out_dim 176 | self.num_steps = num_steps 177 | self.net_type = net_type 178 | self.net_config = net_config 179 | 180 | # placeholder 181 | self.decision, self.probs = None, None 182 | 183 | def build_forward(self, _input): 184 | output = _input # [batch_size, num_steps, rnn_units] 185 | feature_dim = int(output.get_shape()[2]) # rnn_units 186 | output = tf.reshape(output, [-1, feature_dim]) # [batch_size * num_steps, rnn_units] 187 | final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax' 188 | if self.net_type == 'simple': 189 | net_config = [] if self.net_config is None else self.net_config 190 | with tf.variable_scope('wider_actor'): 191 | for layer in net_config: 192 | units, activation = layer.get('units'), layer.get('activation', 'relu') 193 | output = BasicModel.fc_layer(output, units, use_bias=True) 194 | output = BasicModel.activation(output, activation) 195 | logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True) # [batch_size * num_steps, out_dim] 196 | probs = BasicModel.activation(logits, final_activation) # [batch_size * num_steps, out_dim] 197 | probs_dim = self.out_dim 198 | if self.out_dim == 1: 199 | probs = tf.concat([1 - probs, probs], axis=1) 200 | probs_dim = 2 201 | 202 | self.decision = tf.multinomial(tf.log(probs), 1) # [batch_size * num_steps, 1] 203 | self.decision = tf.reshape(self.decision, [-1, self.num_steps]) # [batch_size, num_steps] 204 | self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim] 205 | else: 206 | raise ValueError('Do not support %s' % self.net_type) 207 | 208 | 209 | class DeeperActorNet: 210 | def __init__(self, decision_num, out_dims, embedding_dim, 211 | cell_type='lstm', rnn_layers=1, attention_config=None): 212 | self.decision_num = decision_num 213 | self.out_dims = out_dims 214 | self.embedding_dim = embedding_dim 215 | 216 | self.cell_type = cell_type 217 | self.rnn_layers = rnn_layers 218 | self.attention_config = attention_config 219 | 220 | # placeholder 221 | self.block_layer_num = None 222 | # op 223 | self.decision, self.probs = None, None 224 | 225 | def _define_input(self): 226 | self.block_layer_num = tf.placeholder( 227 | tf.int32, 228 | shape=[None, self.out_dims[0]] 229 | ) # [batch_size, block_num] 230 | 231 | def build_decoder_cell(self, encoder_state): 232 | if isinstance(encoder_state, tf.contrib.rnn.LSTMStateTuple): 233 | rnn_units = int(encoder_state.c.get_shape()[1]) 234 | assert self.cell_type == 'lstm', 'Do not match' 235 | else: 236 | rnn_units = int(encoder_state.get_shape()[1]) 237 | cell = build_cell(rnn_units, self.cell_type, self.rnn_layers) 238 | return cell 239 | 240 | def build_forward(self, encoder_output, encoder_state, is_training, decision_trajectory): 241 | self._define_input() 242 | self.decision, self.probs = [], [] 243 | 244 | batch_size = array_ops.shape(encoder_output)[0] 245 | if self.attention_config is None: 246 | cell = self.build_decoder_cell(encoder_state) 247 | cell_state = encoder_state 248 | cell_input = tf.zeros(shape=[batch_size], dtype=tf.int32) 249 | with tf.variable_scope('deeper_actor'): 250 | for _i in range(self.decision_num): 251 | cell_input_embed = embedding(cell_input, 1 if _i == 0 else self.out_dims[_i - 1], 252 | self.embedding_dim, name='deeper_actor_embedding_%d' % _i) 253 | with tf.variable_scope('rnn', reuse=(_i > 0)): 254 | cell_output, cell_state = cell(cell_input_embed, cell_state) 255 | with tf.variable_scope('classifier_%d' % _i): 256 | logits_i = BasicModel.fc_layer(cell_output, self.out_dims[_i], use_bias=True) 257 | act_i = 'softmax' 258 | probs_i = BasicModel.activation(logits_i, activation=act_i) # [batch_size, out_dim_i] 259 | if _i == 1: 260 | # determine the layer index for deeper actor 261 | # require mask 262 | one_hot_block_decision = tf.one_hot(cell_input, depth=self.out_dims[0], dtype=tf.int32) 263 | max_layer_num = tf.multiply(self.block_layer_num, one_hot_block_decision) 264 | max_layer_num = tf.reduce_max(max_layer_num, axis=1) # [batch_size] 265 | layer_mask = tf.sequence_mask(max_layer_num, self.out_dims[1], dtype=tf.float32) 266 | probs_i = tf.multiply(probs_i, layer_mask) 267 | # rescale the sum to 1 268 | probs_i = tf.divide(probs_i, tf.reduce_sum(probs_i, axis=1, keep_dims=True)) 269 | decision_i = tf.multinomial(tf.log(probs_i), 1) # [batch_size, 1] 270 | decision_i = tf.cast(decision_i, tf.int32) 271 | decision_i = tf.reshape(decision_i, shape=[-1]) # [batch_size] 272 | 273 | cell_input = tf.cond( 274 | is_training, 275 | lambda: decision_trajectory[:, _i], 276 | lambda: decision_i, 277 | ) 278 | self.decision.append(decision_i) 279 | self.probs.append(probs_i) 280 | self.decision = tf.stack(self.decision, axis=1) # [batch_size, decision_num] 281 | else: 282 | raise NotImplementedError 283 | 284 | 285 | -------------------------------------------------------------------------------- /code/meta_controller/rl_controller.py: -------------------------------------------------------------------------------- 1 | from meta_controller.base_controller import WiderActorNet, DeeperActorNet, EncoderNet, BaseController 2 | import tensorflow as tf 3 | import os 4 | from tensorflow.python.ops import array_ops 5 | from models.basic_model import BasicModel 6 | import shutil 7 | import numpy as np 8 | 9 | 10 | class RLNet2NetController(BaseController): 11 | def save(self, global_step=None): 12 | self.saver.save(self.sess, self.save_path, global_step=global_step) 13 | 14 | def load(self): 15 | if os.path.isfile('%s/model.ckpt.index' % self.path): 16 | try: 17 | self.saver.restore(self.sess, self.save_path) 18 | except Exception: 19 | print('Failed to to load model ' 20 | 'from save path: %s' % self.save_path) 21 | print('Successfully load model from save path: %s' % self.save_path) 22 | else: 23 | print('No model files in ' + '%s/model.ckpt.index' % self.path) 24 | 25 | def __init__(self, path, entropy_penalty, 26 | encoder: EncoderNet, wider_actor: WiderActorNet, deeper_actor: DeeperActorNet, opt_config): 27 | BaseController.__init__(self, path) 28 | self.entropy_penalty = entropy_penalty 29 | 30 | self.encoder = encoder 31 | self.wider_actor = wider_actor 32 | self.deeper_actor = deeper_actor 33 | self.opt_config = opt_config 34 | 35 | self.graph = tf.Graph() 36 | self.obj, self.train_step = None, None 37 | with self.graph.as_default(): 38 | self._define_input() 39 | self.build_forward() 40 | self.build_training_process() 41 | self.global_variables_initializer = tf.global_variables_initializer() 42 | self.saver = tf.train.Saver() 43 | self._initialize_session() 44 | 45 | def _define_input(self): 46 | self.learning_rate = tf.placeholder( 47 | tf.float32, 48 | shape=[], 49 | name='learning_rate') 50 | self.is_training = tf.placeholder(tf.bool, shape=[], name='is_training') 51 | self.wider_seg_deeper = tf.placeholder(tf.int32, shape=[], name='wider_seg_deeper') 52 | 53 | self.wider_decision_trajectory = tf.placeholder( 54 | tf.int32, 55 | shape=[None, self.encoder.num_steps], 56 | name='wider_decision_trajectory', 57 | ) # [wider_batch_size, num_steps] 58 | self.wider_decision_mask = tf.placeholder( 59 | tf.float32, 60 | shape=[None, self.encoder.num_steps], 61 | name='wider_decision_mask', 62 | ) # [wider_batch_size, num_steps] 63 | 64 | self.deeper_decision_trajectory = tf.placeholder( 65 | tf.int32, 66 | shape=[None, self.deeper_actor.decision_num], 67 | name='deeper_decision_trajectory', 68 | ) # [deeper_batch_size, deeper_decision_num] 69 | 70 | self.deeper_decision_mask = tf.placeholder( 71 | tf.float32, 72 | shape=[None, self.deeper_actor.decision_num], 73 | name='deeper_decision_mask', 74 | ) # [deeper_batch_size, deeper_decision_num] 75 | 76 | self.reward = tf.placeholder( 77 | tf.float32, 78 | shape=[None], 79 | name='reward', 80 | ) # [batch_size] 81 | self.has_deeper = tf.placeholder( 82 | tf.bool, 83 | shape=[], 84 | name='has_deeper', 85 | ) 86 | 87 | def update_controller(self, learning_rate, wider_seg_deeper, wider_decision_trajectory, wider_decision_mask, 88 | deeper_decision_trajectory, deeper_decison_mask, reward, block_layer_num, input_seq, seq_len): 89 | has_deeper = wider_seg_deeper < len(input_seq) 90 | feed_dict = { 91 | self.learning_rate: learning_rate, 92 | self.wider_seg_deeper: wider_seg_deeper, 93 | self.wider_decision_trajectory: wider_decision_trajectory, 94 | self.wider_decision_mask: wider_decision_mask, 95 | self.deeper_decision_trajectory: deeper_decision_trajectory, 96 | self.deeper_decision_mask: deeper_decison_mask, 97 | self.reward: reward, 98 | self.is_training: True and has_deeper, 99 | self.deeper_actor.block_layer_num: block_layer_num, 100 | self.encoder.input_seq: input_seq, 101 | self.encoder.seq_len: seq_len, 102 | self.has_deeper: has_deeper, 103 | } 104 | self.sess.run(self.train_step, feed_dict=feed_dict) 105 | 106 | def build_forward(self): 107 | encoder_output, encoder_state = self.encoder.build() 108 | feed2wider_output = encoder_output[:self.wider_seg_deeper] 109 | feed2deeper_output = encoder_output[self.wider_seg_deeper:] 110 | if isinstance(encoder_state, tf.contrib.rnn.LSTMStateTuple): 111 | encoder_state_c = encoder_state.c 112 | encoder_state_h = encoder_state.h 113 | 114 | feed2wider_c = encoder_state_c[:self.wider_seg_deeper] 115 | feed2wider_h = encoder_state_h[:self.wider_seg_deeper] 116 | feed2wider_state = tf.contrib.rnn.LSTMStateTuple(c=feed2wider_c, h=feed2wider_h) 117 | 118 | feed2deeper_c = encoder_state_c[self.wider_seg_deeper:] 119 | feed2deeper_h = encoder_state_h[self.wider_seg_deeper:] 120 | feed2deeper_state = tf.contrib.rnn.LSTMStateTuple(c=feed2deeper_c, h=feed2deeper_h) 121 | elif isinstance(encoder_state, tf.Tensor): 122 | feed2wider_state = encoder_state[:self.wider_seg_deeper] 123 | feed2deeper_state = encoder_state[self.wider_seg_deeper:] 124 | else: 125 | raise ValueError 126 | 127 | self.wider_actor.build_forward(feed2wider_output) 128 | self.deeper_actor.build_forward(feed2deeper_output, feed2deeper_state, self.is_training, 129 | self.deeper_decision_trajectory) 130 | 131 | def build_training_process(self): 132 | raise NotImplementedError 133 | 134 | def sample_wider_decision(self, input_seq, seq_len): 135 | batch_size = len(seq_len) 136 | wider_decision, wider_probs = self.sess.run( 137 | fetches=[self.wider_actor.decision, self.wider_actor.probs], 138 | feed_dict={ 139 | self.encoder.input_seq: input_seq, 140 | self.encoder.seq_len: seq_len, 141 | self.wider_seg_deeper: batch_size, 142 | } 143 | ) # [batch_size, num_steps] 144 | return wider_decision, wider_probs 145 | 146 | def sample_deeper_decision(self, input_seq, seq_len, block_layer_num): 147 | deeper_decision, deeper_probs = self.sess.run( 148 | fetches=[self.deeper_actor.decision, self.deeper_actor.probs], 149 | feed_dict={ 150 | self.encoder.input_seq: input_seq, 151 | self.encoder.seq_len: seq_len, 152 | self.wider_seg_deeper: 0, 153 | self.is_training: False, 154 | self.deeper_actor.block_layer_num: block_layer_num, 155 | self.deeper_decision_trajectory: -np.ones([len(seq_len), self.deeper_actor.decision_num]) 156 | } 157 | ) # [batch_size, decision_num] 158 | return deeper_decision, deeper_probs 159 | 160 | def _initialize_session(self): 161 | config = tf.ConfigProto() 162 | # restrict model GPU memory utilization to min required 163 | config.gpu_options.allow_growth = True 164 | self.sess = tf.Session(graph=self.graph, config=config) 165 | 166 | self.sess.run(self.global_variables_initializer) 167 | shutil.rmtree(self.logs_path, ignore_errors=True) 168 | self.summary_writer = tf.summary.FileWriter(self.logs_path, graph=self.graph) 169 | 170 | def get_wider_entropy(self): 171 | wider_entropy = -tf.multiply(tf.log(self.wider_actor.probs), self.wider_actor.probs) 172 | wider_entropy = tf.reduce_sum(wider_entropy, axis=2) 173 | wider_entropy = tf.multiply(wider_entropy, self.wider_decision_mask) 174 | wider_entropy = tf.div(tf.reduce_sum(wider_entropy, axis=1), tf.reduce_sum(self.wider_decision_mask, axis=1)) 175 | wider_entropy = tf.reduce_mean(wider_entropy) 176 | return wider_entropy 177 | 178 | def get_deeper_entropy(self): 179 | deeper_entropy = [] 180 | for _i in range(self.deeper_actor.decision_num): 181 | deeper_probs = self.deeper_actor.probs[_i] 182 | entropy = -tf.multiply(tf.log(deeper_probs + 1e-10), deeper_probs) 183 | entropy = tf.reduce_sum(entropy, axis=1) 184 | deeper_entropy.append(entropy) 185 | deeper_entropy = tf.reduce_mean(deeper_entropy) 186 | return deeper_entropy 187 | 188 | 189 | class ReinforceNet2NetController(RLNet2NetController): 190 | def build_training_process(self): 191 | wider_side_obj, wider_entropy = tf.cond( 192 | tf.greater(self.wider_seg_deeper, 0), 193 | lambda: self.get_wider_side_obj(), 194 | lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32)) 195 | ) 196 | batch_size = array_ops.shape(self.reward)[0] 197 | deeper_side_obj, deeper_entropy = tf.cond( 198 | self.has_deeper, 199 | lambda: self.get_deeper_side_obj(), 200 | lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32)) 201 | ) 202 | self.obj = wider_side_obj + deeper_side_obj 203 | entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \ 204 | deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32) 205 | entropy_term /= tf.cast(batch_size, tf.float32) 206 | 207 | optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1]) 208 | self.train_step = optimizer.minimize(- self.obj - self.entropy_penalty * entropy_term) 209 | 210 | def get_wider_side_obj(self): 211 | wider_side_reward = self.reward[:self.wider_seg_deeper] 212 | 213 | # obj from wider side 214 | wider_trajectory = tf.one_hot(self.wider_decision_trajectory, depth=max(self.wider_actor.out_dim, 2)) 215 | wider_probs = tf.reduce_max(tf.multiply(wider_trajectory, self.wider_actor.probs), axis=2) 216 | wider_probs = tf.log(wider_probs) # [wider_batch_size, num_steps] 217 | wider_probs = tf.multiply(wider_probs, self.wider_decision_mask) 218 | wider_probs = tf.multiply(wider_probs, tf.reshape(wider_side_reward, shape=[-1, 1])) 219 | 220 | wider_side_obj = tf.reduce_sum(wider_probs) 221 | return wider_side_obj, self.get_wider_entropy() 222 | 223 | def get_deeper_side_obj(self): 224 | deeper_side_reward = self.reward[self.wider_seg_deeper:] 225 | 226 | # obj from deeper side 227 | deeper_side_obj = [] 228 | for _i in range(self.deeper_actor.decision_num): 229 | decision_trajectory = self.deeper_decision_trajectory[:, _i] 230 | deeper_decision_mask = self.deeper_decision_mask[:, _i] 231 | decision_trajectory = tf.one_hot(decision_trajectory, depth=self.deeper_actor.out_dims[_i]) 232 | deeper_probs = tf.reduce_max(tf.multiply(decision_trajectory, self.deeper_actor.probs[_i]), axis=1) 233 | deeper_probs = tf.log(deeper_probs) # [deeper_batch_size] 234 | deeper_probs = tf.multiply(deeper_probs, deeper_decision_mask) 235 | deeper_probs = tf.multiply(deeper_probs, deeper_side_reward) 236 | 237 | deeper_side_obj.append(tf.reduce_sum(deeper_probs)) 238 | deeper_side_obj = tf.reduce_sum(deeper_side_obj) 239 | return deeper_side_obj, self.get_deeper_entropy() 240 | 241 | -------------------------------------------------------------------------------- /code/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/models/__init__.py -------------------------------------------------------------------------------- /code/models/basic_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tensorflow as tf 4 | import numpy as np 5 | import time 6 | from datetime import timedelta 7 | import json 8 | import pickle 9 | 10 | 11 | class BasicModel: 12 | def __init__(self, path, data_provider, run_config, net_config, pure=False, only_forward=False): 13 | if only_forward: pure = True 14 | self.graph = tf.Graph() 15 | 16 | self.data_provider = data_provider 17 | self._path = path 18 | self.run_config = run_config 19 | self.net_config = net_config 20 | 21 | self.data_shape = data_provider.data_shape 22 | self.n_classes = data_provider.n_classes 23 | 24 | self._save_path, self._logs_path = None, None 25 | self.batches_step = 0 26 | 27 | self.cross_entropy, self.train_step, self.accuracy = None, None, None 28 | with self.graph.as_default(): 29 | self._define_inputs() 30 | self._build_graph(only_forward=only_forward) 31 | self.global_variables_initializer = tf.global_variables_initializer() 32 | if not pure: 33 | self._count_trainable_params() 34 | self.saver = tf.train.Saver() 35 | self._initialize_session(set_logs=(not pure)) 36 | 37 | @property 38 | def save_path(self): 39 | if self._save_path is None: 40 | save_path = '%s/checkpoint' % self._path 41 | os.makedirs(save_path, exist_ok=True) 42 | save_path = os.path.join(save_path, 'model.ckpt') 43 | self._save_path = save_path 44 | return self._save_path 45 | 46 | @property 47 | def logs_path(self): 48 | if self._logs_path is None: 49 | logs_path = '%s/logs' % self._path 50 | if self.run_config.renew_logs: 51 | shutil.rmtree(logs_path, ignore_errors=True) 52 | os.makedirs(logs_path, exist_ok=True) 53 | self._logs_path = logs_path 54 | return self._logs_path 55 | 56 | def _build_graph(self, only_forward=False): 57 | raise NotImplementedError 58 | 59 | def _define_inputs(self): 60 | shape = [None] 61 | shape.extend(self.data_shape) 62 | self.images = tf.placeholder( 63 | tf.float32, 64 | shape=shape, 65 | name='input_images') 66 | self.labels = tf.placeholder( 67 | tf.float32, 68 | shape=[None, self.n_classes], 69 | name='labels') 70 | self.learning_rate = tf.placeholder( 71 | tf.float32, 72 | shape=[], 73 | name='learning_rate') 74 | self.is_training = tf.placeholder(tf.bool, shape=[], name='is_training') 75 | 76 | def _initialize_session(self, set_logs=True): 77 | """Initialize session, variables""" 78 | config = tf.ConfigProto() 79 | # restrict model GPU memory utilization to min required 80 | config.gpu_options.allow_growth = True 81 | self.sess = tf.Session(graph=self.graph, config=config) 82 | 83 | self.sess.run(self.global_variables_initializer) 84 | if set_logs: 85 | logswriter = tf.summary.FileWriter 86 | self.summary_writer = logswriter(self.logs_path, graph=self.graph) 87 | 88 | def train_all_epochs(self, start_epoch=1): 89 | n_epochs = self.run_config.n_epochs 90 | learning_rate = self.run_config.init_lr 91 | batch_size = self.run_config.batch_size 92 | 93 | total_start_time = time.time() 94 | for epoch in range(start_epoch, n_epochs + 1): 95 | print('\n', '-' * 30, 'Train epoch: %d' % epoch, '-' * 30, '\n') 96 | start_time = time.time() 97 | new_lr = self.run_config.learning_rate(epoch) 98 | if new_lr != learning_rate: 99 | learning_rate = new_lr 100 | print('Decrease learning rate, new lr = %f' % learning_rate) 101 | 102 | print('Training...') 103 | loss, acc = self.train_one_epoch( 104 | self.data_provider.train, batch_size, learning_rate) 105 | # save logs about "loss" and "acc" if the option is true 106 | if self.run_config.should_save_logs: 107 | self.log_loss_accuracy(loss, acc, epoch, prefix='train') 108 | 109 | if self.run_config.validation_frequency and epoch % self.run_config.validation_frequency == 0: 110 | print('Validation...') 111 | loss, acc = self.test(self.data_provider.validation, batch_size) 112 | if self.run_config.should_save_logs: 113 | self.log_loss_accuracy(loss, acc, epoch, prefix='valid') 114 | if self.run_config.should_save_model: 115 | self.save_model() 116 | json.dump({'epoch': epoch + 1}, open('%s/checkpoint/epoch.info' % self._path, 'w')) 117 | 118 | time_per_epoch = time.time() - start_time 119 | seconds_left = int((n_epochs - epoch) * time_per_epoch) 120 | print('Time per epoch: %s, Est. complete in: %s' % ( 121 | str(timedelta(seconds=time_per_epoch)), 122 | str(timedelta(seconds=seconds_left)))) 123 | 124 | if self.run_config.should_save_model: 125 | self.save_model() 126 | 127 | total_training_time = time.time() - total_start_time 128 | print('\nTotal training time: %s' % str(timedelta( 129 | seconds=total_training_time))) 130 | 131 | def train_one_epoch(self, data, batch_size, learning_rate): 132 | num_examples = data.num_examples 133 | total_loss = [] 134 | total_accuracy = [] 135 | for i in range(num_examples // batch_size): 136 | batch = data.next_batch(batch_size) 137 | images, labels = batch 138 | feed_dict = { 139 | self.images: images, 140 | self.labels: labels, 141 | self.learning_rate: learning_rate, 142 | self.is_training: True, 143 | } 144 | fetches = [self.train_step, self.cross_entropy, self.accuracy] 145 | result = self.sess.run(fetches, feed_dict=feed_dict) 146 | _, loss, accuracy = result 147 | total_loss.append(loss) 148 | total_accuracy.append(accuracy) 149 | # save logs about "loss" and "acc" if the option is true 150 | if self.run_config.should_save_logs: 151 | self.batches_step += 1 152 | self.log_loss_accuracy( 153 | loss, accuracy, self.batches_step, prefix='per_batch', 154 | should_print=False) 155 | mean_loss = np.mean(total_loss) 156 | mean_accuracy = np.mean(total_accuracy) 157 | return mean_loss, mean_accuracy 158 | 159 | def test(self, data, batch_size): 160 | num_examples = data.num_examples 161 | total_loss = [] 162 | total_accuracy = [] 163 | for i in range(num_examples // batch_size): 164 | batch = data.next_batch(batch_size) 165 | feed_dict = { 166 | self.images: batch[0], 167 | self.labels: batch[1], 168 | self.is_training: False, 169 | } 170 | fetches = [self.cross_entropy, self.accuracy] 171 | loss, accuracy = self.sess.run(fetches, feed_dict=feed_dict) 172 | total_loss.append(loss) 173 | total_accuracy.append(accuracy) 174 | mean_loss = np.mean(total_loss) 175 | mean_accuracy = np.mean(total_accuracy) 176 | remain_num = num_examples % batch_size 177 | if remain_num != 0: 178 | batch = data.next_batch(remain_num) 179 | feed_dict = { 180 | self.images: batch[0], 181 | self.labels: batch[1], 182 | self.is_training: False, 183 | } 184 | fetches = [self.cross_entropy, self.accuracy] 185 | loss, accuracy = self.sess.run(fetches, feed_dict=feed_dict) 186 | 187 | mean_loss = (mean_loss * (num_examples - remain_num) + loss * remain_num) / num_examples 188 | mean_accuracy = (mean_accuracy * (num_examples - remain_num) + accuracy * remain_num) / num_examples 189 | return mean_loss, mean_accuracy 190 | 191 | def save_config(self, save_path, print_info=True): 192 | os.makedirs(save_path, exist_ok=True) 193 | net_save_path = os.path.join(save_path, 'net.config') 194 | json.dump(self.net_config.get_config(), open(net_save_path, 'w'), indent=4) 195 | if print_info: print('Network configs dump to %s' % save_path) 196 | run_save_path = os.path.join(save_path, 'run.config') 197 | json.dump(self.run_config.get_config(), open(run_save_path, 'w'), indent=4) 198 | if print_info: print('Run configs dump to %s' % run_save_path) 199 | 200 | def save_init(self, save_path, print_info=True): 201 | os.makedirs(save_path, exist_ok=True) 202 | save_path = os.path.join(save_path, 'init') 203 | to_save_init = self.net_config.renew_init(self) 204 | to_save_init['dataset'] = self.run_config.dataset 205 | pickle.dump(to_save_init, open(save_path, 'wb')) 206 | if print_info: print('Network weights dump to %s' % save_path) 207 | 208 | def pure_train(self): 209 | n_epochs = self.run_config.n_epochs 210 | batch_size = self.run_config.batch_size 211 | 212 | for epoch in range(1, n_epochs + 1): 213 | learning_rate = self.run_config.learning_rate(epoch) 214 | 215 | # train one epoch 216 | data = self.data_provider.train 217 | num_examples = data.num_examples 218 | for i in range(num_examples // batch_size): 219 | batch = data.next_batch(batch_size) 220 | images, labels = batch 221 | feed_dict = { 222 | self.images: images, 223 | self.labels: labels, 224 | self.learning_rate: learning_rate, 225 | self.is_training: True, 226 | } 227 | fetches = self.train_step 228 | self.sess.run(fetches, feed_dict=feed_dict) 229 | 230 | def save_model(self, global_step=None): 231 | self.saver.save(self.sess, self.save_path, global_step=global_step) 232 | 233 | def load_model(self): 234 | try: 235 | self.saver.restore(self.sess, self.save_path) 236 | except Exception: 237 | raise IOError('Failed to to load model ' 238 | 'from save path: %s' % self.save_path) 239 | print('Successfully load model from save path: %s' % self.save_path) 240 | 241 | def log_loss_accuracy(self, loss, accuracy, epoch, prefix, should_print=True, write2file=True): 242 | if should_print: 243 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 244 | summary = tf.Summary(value=[ 245 | tf.Summary.Value( 246 | tag='loss_%s' % prefix, simple_value=float(loss)), 247 | tf.Summary.Value( 248 | tag='accuracy_%s' % prefix, simple_value=float(accuracy)) 249 | ]) 250 | self.summary_writer.add_summary(summary, epoch) 251 | if write2file and prefix == 'valid': 252 | with open('%s/console.txt' % self.logs_path, 'a') as fout: 253 | fout.write('%d: mean cross_entropy: %f, mean accuracy: %f\n' % (epoch, loss, accuracy)) 254 | 255 | @staticmethod 256 | def _count_trainable_params(): 257 | total_parameters = 0 258 | for variable in tf.trainable_variables(): 259 | shape = variable.get_shape() 260 | variable_parameters = 1 261 | for dim in shape: 262 | variable_parameters *= dim.value 263 | total_parameters += variable_parameters 264 | print('Total training params: %.2fM' % (total_parameters / 1e6)) 265 | 266 | @staticmethod 267 | def dropout(_input, keep_prob, is_training): 268 | if keep_prob < 1: 269 | output = tf.cond( 270 | is_training, 271 | lambda: tf.nn.dropout(_input, keep_prob), 272 | lambda: _input 273 | ) 274 | else: 275 | output = _input 276 | return output 277 | 278 | @staticmethod 279 | def weight_variable(shape, name, initializer): 280 | return tf.get_variable( 281 | name, 282 | shape=shape, 283 | initializer=initializer, 284 | ) 285 | 286 | @staticmethod 287 | def avg_pool(_input, k=2, s=2): 288 | ksize = [1, k, k, 1] 289 | strides = [1, s, s, 1] 290 | padding = 'VALID' 291 | # if stride = 1, keep the image size unchanged 292 | if s == 1: padding = 'SAME' 293 | output = tf.nn.avg_pool(_input, ksize, strides, padding) 294 | return output 295 | 296 | @staticmethod 297 | def max_pool(_input, k=2, s=2): 298 | ksize = [1, k, k, 1] 299 | strides = [1, s, s, 1] 300 | padding = 'VALID' 301 | # if stride = 1, keep the image size unchanged 302 | if s == 1: padding = 'SAME' 303 | output = tf.nn.max_pool(_input, ksize, strides, padding) 304 | return output 305 | 306 | @staticmethod 307 | def conv2d(_input, out_features, kernel_size, strides=1, padding='SAME', param_initializer=None): 308 | if kernel_size == 1: padding = 'VALID' 309 | 310 | in_features = int(_input.get_shape()[-1]) 311 | if not param_initializer: param_initializer = {} 312 | kernel = BasicModel.weight_variable( 313 | [kernel_size, kernel_size, in_features, out_features], 314 | name='kernel', 315 | initializer=param_initializer.get('kernel', tf.contrib.layers.variance_scaling_initializer()) 316 | ) 317 | output = tf.nn.conv2d(_input, kernel, [1, strides, strides, 1], padding) 318 | return output 319 | 320 | @staticmethod 321 | def fc_layer(_input, out_units, use_bias=False, param_initializer=None): 322 | features_total = int(_input.get_shape()[-1]) 323 | if not param_initializer: param_initializer = {} 324 | W = BasicModel.weight_variable( 325 | [features_total, out_units], name='W', 326 | initializer=param_initializer.get('W', tf.contrib.layers.xavier_initializer()) 327 | ) 328 | output = tf.matmul(_input, W) 329 | if use_bias: 330 | bias = BasicModel.weight_variable( 331 | [out_units], name='bias', 332 | initializer=param_initializer.get('bias', tf.constant_initializer([0.0] * out_units)) 333 | ) 334 | output += bias 335 | return output 336 | 337 | @staticmethod 338 | def batch_norm(_input, is_training, epsilon=1e-3, decay=0.999, param_initializer=None): 339 | output = tf.contrib.layers.batch_norm( 340 | _input, scale=True, is_training=is_training, param_initializers=param_initializer, 341 | updates_collections=None, epsilon=epsilon, decay=decay) 342 | return output 343 | 344 | @staticmethod 345 | def activation(_input, activation='relu'): 346 | if activation == 'relu': 347 | return tf.nn.relu(_input) 348 | elif activation == 'tanh': 349 | return tf.tanh(_input) 350 | elif activation == 'sigmoid': 351 | return tf.sigmoid(_input) 352 | elif activation == 'softmax': 353 | return tf.nn.softmax(_input) 354 | elif activation is None: 355 | return _input 356 | else: 357 | raise ValueError('Do not support %s' % activation) 358 | 359 | @staticmethod 360 | def build_optimizer(learning_rate, opt_name, opt_param): 361 | if opt_name == 'momentum': 362 | return tf.train.MomentumOptimizer(learning_rate, **opt_param) 363 | elif opt_name == 'adam': 364 | return tf.train.AdamOptimizer(learning_rate, **opt_param) 365 | else: 366 | raise ValueError('Do not support the optimizer type: %s' % opt_name) 367 | 368 | @staticmethod 369 | def flatten(_input): 370 | input_shape = _input.shape.as_list() 371 | if len(input_shape) != 2: 372 | return tf.reshape(_input, [-1, np.prod(input_shape[1:])]) 373 | else: 374 | return _input 375 | -------------------------------------------------------------------------------- /code/models/convnet.py: -------------------------------------------------------------------------------- 1 | from models.basic_model import BasicModel 2 | from data_providers.base_provider import DataProvider 3 | from models.layers import ConvLayer, PoolLayer, FCLayer 4 | from models.layer_cascade import LayerCascade 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | 9 | class SimpleConvnetConfig: 10 | def __init__(self): 11 | self.net_config = { 12 | 'weight_decay': None, 13 | 'bn_epsilon': None, 14 | 'bn_decay': None, 15 | 'drop_scheme': None, 16 | } 17 | self.layer_cascade = None 18 | 19 | @property 20 | def weight_decay(self): return self.net_config['weight_decay'] 21 | 22 | @property 23 | def bn_epsilon(self): return self.net_config['bn_epsilon'] 24 | 25 | @property 26 | def bn_decay(self): return self.net_config['bn_decay'] 27 | 28 | @property 29 | def drop_scheme(self): return self.net_config['drop_scheme'] 30 | 31 | @property 32 | def depth(self): return self.layer_cascade.depth 33 | 34 | def get_config(self): 35 | return { 36 | 'name': 'SimpleConvnet', 37 | **self.net_config, 38 | 'layer_cascade': self.layer_cascade.get_config() 39 | } 40 | 41 | def copy(self): 42 | net_config = SimpleConvnetConfig() 43 | net_config.set_net_from_config(self.get_config(), self.renew_init(None), print_info=False) 44 | return net_config 45 | 46 | def renew_init(self, convnet): 47 | return { 48 | 'layer_cascade': self.layer_cascade.renew_init(convnet) 49 | } 50 | 51 | def set_standard_convnet(self, data_provider: DataProvider, conv_blocks_config, fc_block_config, weight_decay, 52 | drop_scheme, bn_epsilon, bn_decay, print_info=True, **kwargs): 53 | self.net_config = { 54 | 'weight_decay': weight_decay, 55 | 'bn_epsilon': bn_epsilon, 56 | 'bn_decay': bn_decay, 57 | 'drop_scheme': drop_scheme, 58 | } 59 | 60 | image_size = data_provider.data_shape[0] 61 | 62 | layers = [] 63 | conv_id = 0 64 | for _i, block_config in enumerate(conv_blocks_config): 65 | num_layers, kernel_size, filter_num = block_config 66 | for _j in range(num_layers): 67 | keep_prob = 1.0 68 | if 'conv' in drop_scheme['type']: 69 | keep_prob = 1.0 if _i + _j == 0 else drop_scheme.get('conv_drop', 1.0) 70 | conv_layer = ConvLayer('conv_%d' % conv_id, filter_num, kernel_size=kernel_size, keep_prob=keep_prob, 71 | pre_activation=False) 72 | conv_id += 1 73 | layers.append(conv_layer) 74 | if _i < len(conv_blocks_config) - 1: 75 | keep_prob = 1.0 76 | if 'pool' in drop_scheme['type']: 77 | keep_prob = drop_scheme.get('pool_drop', 1.0) 78 | pool_layer = PoolLayer('pool_%d' % _i, 'max', keep_prob=keep_prob, pre_activation=False) 79 | layers.append(pool_layer) 80 | image_size = image_size // 2 81 | global_avg_pool = PoolLayer('pool_%d' % len(conv_blocks_config), 'avg', 82 | kernel_size=image_size, strides=image_size, pre_activation=False) 83 | layers.append(global_avg_pool) 84 | for _i, units in enumerate(fc_block_config): 85 | keep_prob = 1.0 86 | if 'fc' in drop_scheme['type']: 87 | keep_prob = drop_scheme.get('fc_drop', 1.0) 88 | fc_layer = FCLayer('fc_%d' % _i, units, keep_prob=keep_prob) 89 | layers.append(fc_layer) 90 | final_fc_layer = FCLayer('fc_%d' % len(fc_block_config), data_provider.n_classes, use_bn=False, use_bias=True, 91 | activation=None) 92 | layers.append(final_fc_layer) 93 | self.layer_cascade = LayerCascade('SimpleConvNet', layers) 94 | 95 | if print_info: 96 | pass 97 | return self 98 | 99 | def set_net_from_config(self, net_config_json, init=None, print_info=True): 100 | for key in self.net_config.keys(): 101 | self.net_config[key] = net_config_json[key] 102 | init = init['layer_cascade'] if init is not None else None 103 | self.layer_cascade = LayerCascade.set_from_config(net_config_json['layer_cascade'], init) 104 | if print_info: 105 | pass 106 | return self 107 | 108 | def widen(self, layer_idx, new_width, widen_type='output_dim', noise=None): 109 | change_out_dim, _, _ = self.layer_cascade.widen(layer_idx, new_width, widen_type, noise) 110 | if change_out_dim: 111 | raise ValueError('Can not change the final logits number') 112 | 113 | def deepen(self, layer_idx, new_layer_config): 114 | return self.layer_cascade.deepen(layer_idx, new_layer_config, None) 115 | 116 | def set_identity4deepen(self, to_set_layers, data_provider, batch_size, batch_num=1, strict=True, noise=None): 117 | """ 118 | to_set_layers = [(new_layer, prev_layer), ...] 119 | """ 120 | task_list = {} 121 | for new_layer, prev_layer in to_set_layers: 122 | if new_layer.ready: continue 123 | if new_layer.use_bn and strict: 124 | task_id = id(prev_layer) 125 | if task_id in task_list: 126 | task_list[task_id][1].append(new_layer) 127 | else: 128 | task_list[task_id] = (prev_layer, [new_layer]) 129 | else: 130 | new_layer.set_identity_layer(strict=strict, noise=noise) 131 | if len(task_list) > 0: 132 | model = SimpleConvnet(None, data_provider, None, net_config=self, only_forward=True) 133 | task_list = list(task_list.values()) 134 | fetches = [prev_layer.output_op for prev_layer, _ in task_list] 135 | statistics = [[0, 0] for _ in task_list] 136 | for _i in range(batch_num): 137 | input_images, _ = data_provider.train.next_batch(batch_size) 138 | outputs = model.sess.run(fetches, feed_dict={model.images: input_images, model.is_training: False}) 139 | for _j, out in enumerate(outputs): 140 | out = out.astype('float32') 141 | axis = tuple(range(len(out.shape) - 1)) 142 | mean = np.mean(out, axis=axis, keepdims=True) 143 | variance = np.mean(np.square(out - mean), axis=axis, keepdims=True) 144 | mean, variance = np.squeeze(mean), np.squeeze(variance) 145 | statistics[_j][0] += mean 146 | statistics[_j][1] += variance 147 | for _j, (prev_layer, new_layers) in enumerate(task_list): 148 | mean, variance = statistics[_j][0] / batch_num, statistics[_j][1] / batch_num 149 | for new_layer in new_layers: 150 | if new_layer.ready: continue 151 | param = { 152 | 'moving_mean': mean, 153 | 'moving_variance': variance, 154 | 'epsilon': self.bn_epsilon, 155 | } 156 | new_layer.set_identity_layer(strict=strict, param=param, noise=noise) 157 | 158 | 159 | class SimpleConvnet(BasicModel): 160 | def _build_graph(self, only_forward=False): 161 | _input = self.images 162 | output = _input 163 | 164 | output = self.net_config.layer_cascade.build(output, self, store_output_op=only_forward) 165 | 166 | if not only_forward: 167 | logits = output 168 | with tf.variable_scope('L2_Loss'): 169 | l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) 170 | 171 | prediction = tf.nn.softmax(logits) 172 | 173 | # losses 174 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( 175 | logits=logits, labels=self.labels)) 176 | self.cross_entropy = cross_entropy 177 | 178 | # optimizer and train step 179 | optimizer = self.build_optimizer(self.learning_rate, 180 | self.run_config.opt_config[0], self.run_config.opt_config[1]) 181 | self.train_step = optimizer.minimize( 182 | cross_entropy + l2_loss * self.net_config.weight_decay) 183 | correct_prediction = tf.equal( 184 | tf.argmax(prediction, 1), 185 | tf.argmax(self.labels, 1)) 186 | self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 187 | -------------------------------------------------------------------------------- /code/models/dense_net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.basic_model import BasicModel 3 | from models.layers import ConvLayer, FCLayer, PoolLayer, get_magnifier, apply_noise 4 | from data_providers.base_provider import DataProvider 5 | from models.layer_cascade import LayerCascade 6 | from models.layer_multi_branch import LayerMultiBranch 7 | import numpy as np 8 | 9 | 10 | def get_block_by_name(name): 11 | if name == 'transition': 12 | return TransitionBlock 13 | elif name == 'dense_block': 14 | return DenseBlock 15 | else: 16 | raise ValueError('Unsupported block type: %s' % name) 17 | 18 | 19 | class TransitionBlock(LayerCascade): 20 | def get_config(self): 21 | return { 22 | 'name': 'transition', 23 | **super(TransitionBlock, self).get_config(), 24 | } 25 | 26 | @staticmethod 27 | def set_from_config(config_json, init=None, return_class=True): 28 | _id, layers = LayerCascade.set_from_config(config_json, init, return_class=False) 29 | return TransitionBlock(_id, layers) 30 | 31 | def prev_widen(self, indices, magnifier, noise=None): 32 | super(TransitionBlock, self).prev_widen(indices, magnifier, noise=noise) 33 | return False, None, None 34 | 35 | def widen(self, loc, new_width, widen_type='output_dim', noise=None, input_dim=None): 36 | return super(TransitionBlock, self).widen(loc['layer'], new_width, widen_type, noise=noise) 37 | 38 | def deepen(self, loc, new_layer_config, input_dim): 39 | return super(TransitionBlock, self).deepen(loc['layer'], new_layer_config, input_dim) 40 | 41 | 42 | class DenseBlock: 43 | def __init__(self, _id, miniblocks): 44 | self._id = _id 45 | self.miniblocks = miniblocks 46 | 47 | self.output_op = None 48 | 49 | @property 50 | def id(self): 51 | return self._id 52 | 53 | @id.setter 54 | def id(self, value): 55 | self._id = value 56 | 57 | @property 58 | def depth(self): 59 | depth = 0 60 | for miniblock in self.miniblocks: 61 | depth += miniblock.depth 62 | return depth 63 | 64 | def out_features_dim(self, in_features_dim): 65 | out_features_dim = in_features_dim 66 | for miniblock in self.miniblocks: 67 | out_features_dim += miniblock.out_features_dim 68 | return out_features_dim 69 | 70 | def build(self, _input, densenet, store_output_op=False): 71 | output = _input 72 | with tf.variable_scope(self._id): 73 | for miniblock in self.miniblocks: 74 | comp_out = miniblock.build(output, densenet, store_output_op=store_output_op) 75 | output = tf.concat(axis=3, values=(output, comp_out)) 76 | if store_output_op: 77 | self.output_op = output 78 | return output 79 | 80 | def get_config(self): 81 | return { 82 | 'name': 'dense_block', 83 | '_id': self._id, 84 | 'miniblocks': [miniblock.get_config() for miniblock in self.miniblocks] 85 | } 86 | 87 | def renew_init(self, densenet): 88 | return { 89 | '_id': self._id, 90 | 'miniblocks': [miniblock.renew_init(densenet) for miniblock in self.miniblocks] 91 | } 92 | 93 | @staticmethod 94 | def set_from_config(config_json, init=None): 95 | _id = config_json['_id'] 96 | miniblocks = [] 97 | for _i, miniblock_config in enumerate(config_json['miniblocks']): 98 | miniblock_init = init['miniblocks'][_i] if init is not None else None 99 | miniblock = LayerMultiBranch.set_from_config(miniblock_config, miniblock_init) 100 | miniblocks.append(miniblock) 101 | return DenseBlock(_id, miniblocks) 102 | 103 | """ 104 | Network Transformation Operations 105 | """ 106 | def insert_miniblock(self, idx, miniblock_config, input_dim, noise=None, scheme=0): 107 | assert 0 <= idx < len(self.miniblocks), 'Invalid miniblock index %d' % idx 108 | if miniblock_config['bc_mode']: 109 | # DenseNet-BC 110 | if scheme == 0: 111 | copy_idx = idx 112 | copy_miniblock = self.miniblocks[copy_idx] 113 | new_in_bottle = copy_miniblock.in_bottle.copy() 114 | new_in_layer = new_in_bottle.layers[0] 115 | pad_kernel_shape = list(new_in_layer.init['kernel'].shape) 116 | pad_kernel_shape[2] = copy_miniblock.out_features_dim 117 | new_in_layer.init['kernel'] = \ 118 | np.concatenate([new_in_layer.init['kernel'], np.zeros(pad_kernel_shape)], axis=2) 119 | if new_in_layer.pre_activation and new_in_layer.use_bn: 120 | new_in_layer.init['beta'] = \ 121 | np.concatenate([new_in_layer.init['beta'], np.zeros([copy_miniblock.out_features_dim])]) 122 | new_in_layer.init['gamma'] = \ 123 | np.concatenate([new_in_layer.init['gamma'], np.ones([copy_miniblock.out_features_dim])]) 124 | new_in_layer.init['moving_mean'] = \ 125 | np.concatenate([new_in_layer.init['moving_mean'], np.zeros([copy_miniblock.out_features_dim])]) 126 | new_in_layer.init['moving_variance'] = \ 127 | np.concatenate([new_in_layer.init['moving_variance'], np.ones([copy_miniblock.out_features_dim])]) 128 | new_in_layer.init['kernel'] = apply_noise(new_in_layer.init['kernel'], noise.get('wider')) 129 | if copy_miniblock.out_bottle is None: 130 | new_branches, indices = copy_miniblock.remapped_branches(noise=noise) 131 | new_miniblock = LayerMultiBranch('M_%d' % (idx + 2), new_branches, 132 | merge=copy_miniblock.merge, in_bottle=new_in_bottle) 133 | old_size = len(indices) 134 | indices = np.concatenate([np.arange(old_size), indices]) 135 | magnifier = get_magnifier(old_size, indices) 136 | 137 | prev_miniblock_out_dim = input_dim 138 | for _i in range(0, idx): 139 | prev_miniblock_out_dim += self.miniblocks[_i].out_features_dim 140 | indices = np.concatenate([ 141 | np.arange(prev_miniblock_out_dim), 142 | indices + prev_miniblock_out_dim, 143 | ]) 144 | magnifier = np.concatenate([ 145 | [1] * prev_miniblock_out_dim, 146 | magnifier, 147 | ]) 148 | prev_miniblock_out_dim += old_size 149 | for _i in range(idx + 1, len(self.miniblocks)): 150 | miniblock_out_dim = self.miniblocks[_i].out_features_dim 151 | self.miniblocks[_i].id = 'M_%d' % (_i + 2) 152 | self.miniblocks[_i].prev_widen(indices, magnifier, noise=noise) 153 | indices = np.concatenate([ 154 | indices, 155 | np.arange(prev_miniblock_out_dim, prev_miniblock_out_dim + miniblock_out_dim) 156 | ]) 157 | magnifier = np.concatenate([ 158 | magnifier, 159 | [1] * miniblock_out_dim, 160 | ]) 161 | prev_miniblock_out_dim += miniblock_out_dim 162 | self.miniblocks = self.miniblocks[:idx + 1] + [new_miniblock] + self.miniblocks[idx + 1:] 163 | return indices, magnifier 164 | else: 165 | raise NotImplementedError 166 | else: 167 | # identity scheme 168 | raise NotImplementedError 169 | else: 170 | # DenseNet without BC 171 | raise NotImplementedError 172 | 173 | def prev_widen(self, indices, magnifier, noise=None): 174 | old_size = np.max(indices) + 1 175 | prev_miniblock_out_dim = old_size 176 | for miniblock in self.miniblocks: 177 | miniblock_out_dim = miniblock.out_features_dim 178 | miniblock.prev_widen(indices, magnifier, noise=noise) 179 | indices = np.concatenate([ 180 | indices, 181 | np.arange(prev_miniblock_out_dim, prev_miniblock_out_dim + miniblock_out_dim) 182 | ]) 183 | magnifier = np.concatenate([ 184 | magnifier, 185 | [1] * miniblock_out_dim, 186 | ]) 187 | prev_miniblock_out_dim += miniblock_out_dim 188 | return True, indices, magnifier 189 | 190 | def widen(self, loc, new_width, widen_type='output_dim', noise=None, input_dim=3): 191 | miniblock_idx = loc['miniblock'] 192 | miniblock = self.miniblocks[miniblock_idx] 193 | old_miniblock_out_dim = miniblock.out_features_dim 194 | change_out_dim, indices, magnifier = miniblock.widen(loc, new_width, widen_type, noise=noise) 195 | if change_out_dim: 196 | prev_miniblock_out_dim = input_dim 197 | for _i in range(0, miniblock_idx): 198 | prev_miniblock_out_dim += self.miniblocks[_i].out_features_dim 199 | indices = np.concatenate([ 200 | np.arange(prev_miniblock_out_dim), 201 | indices + prev_miniblock_out_dim, 202 | ]) 203 | magnifier = np.concatenate([ 204 | [1] * prev_miniblock_out_dim, 205 | magnifier, 206 | ]) 207 | prev_miniblock_out_dim += old_miniblock_out_dim 208 | for _i in range(miniblock_idx + 1, len(self.miniblocks)): 209 | miniblock_out_dim = self.miniblocks[_i].out_features_dim 210 | self.miniblocks[_i].prev_widen(indices, magnifier, noise=noise) 211 | indices = np.concatenate([ 212 | indices, 213 | np.arange(prev_miniblock_out_dim, prev_miniblock_out_dim + miniblock_out_dim) 214 | ]) 215 | magnifier = np.concatenate([ 216 | magnifier, 217 | [1] * miniblock_out_dim, 218 | ]) 219 | prev_miniblock_out_dim += miniblock_out_dim 220 | return True, indices, magnifier 221 | else: 222 | return False, None, None 223 | 224 | def deepen(self, loc, new_layer_config, input_dim): 225 | miniblock_idx = loc['miniblock'] 226 | for _i in range(0, miniblock_idx): 227 | input_dim += self.miniblocks[_i].out_features_dim 228 | return self.miniblocks[miniblock_idx].deepen(loc, new_layer_config, input_dim) 229 | 230 | 231 | class DenseNetConfig: 232 | def __init__(self): 233 | self.net_config = { 234 | 'model_type': None, 235 | 'weight_decay': None, 236 | 'first_ratio': None, 237 | 'reduction': None, 238 | 'bc_ratio': None, 239 | 'bn_epsilon': None, 240 | 'bn_decay': None, 241 | 'pre_activation': None, 242 | } 243 | self.blocks = None 244 | 245 | @property 246 | def model_type(self): return self.net_config['model_type'] 247 | 248 | @property 249 | def weight_decay(self): return self.net_config['weight_decay'] 250 | 251 | @property 252 | def first_ratio(self): return self.net_config['first_ratio'] 253 | 254 | @property 255 | def reduction(self): return self.net_config['reduction'] 256 | 257 | @property 258 | def bc_ratio(self): return self.net_config['bc_ratio'] 259 | 260 | @property 261 | def bn_epsilon(self): return self.net_config['bn_epsilon'] 262 | 263 | @property 264 | def bn_decay(self): return self.net_config['bn_decay'] 265 | 266 | @property 267 | def depth(self): 268 | depth = 0 269 | for block in self.blocks: 270 | depth += block.depth 271 | return depth 272 | 273 | @property 274 | def average_growth_rate(self): 275 | growth_rate_list = [] 276 | for block in self.blocks: 277 | if isinstance(block, DenseBlock): 278 | for miniblock in block.miniblocks: 279 | growth_rate = miniblock.out_features_dim 280 | growth_rate_list.append(growth_rate) 281 | return np.mean(growth_rate_list) 282 | 283 | def copy(self): 284 | net_config = DenseNetConfig() 285 | net_config.set_net_from_config(self.get_config(), self.renew_init(None), print_info=False) 286 | return net_config 287 | 288 | def get_config(self): 289 | return { 290 | 'name': 'DenseNet', 291 | **self.net_config, 292 | 'blocks': [block.get_config() for block in self.blocks] 293 | } 294 | 295 | def renew_init(self, densenet): 296 | return { 297 | 'blocks': [block.renew_init(densenet) for block in self.blocks] 298 | } 299 | 300 | def set_standard_dense_net(self, data_provider: DataProvider, growth_rate, depth, total_blocks, 301 | keep_prob, weight_decay, model_type, 302 | first_ratio=2, reduction=1.0, bc_ratio=4, 303 | bn_epsilon=1e-5, bn_decay=0.9, print_info=True, 304 | pre_activation=True, **kwargs): 305 | self.net_config = { 306 | 'model_type': model_type, 307 | 'weight_decay': weight_decay, 308 | 'first_ratio': first_ratio, 309 | 'reduction': reduction, 310 | 'bc_ratio': bc_ratio, 311 | 'bn_epsilon': bn_epsilon, 312 | 'bn_decay': bn_decay, 313 | 'pre_activation': pre_activation, 314 | } 315 | 316 | image_size = data_provider.data_shape[0] 317 | 318 | first_output_features = growth_rate * first_ratio 319 | bc_mode = (model_type == 'DenseNet-BC') 320 | layers_per_block = (depth - (total_blocks + 1)) // total_blocks 321 | if bc_mode: layers_per_block = layers_per_block // 2 322 | 323 | # initial conv 324 | if pre_activation: 325 | init_conv_layer = ConvLayer('conv_0', first_output_features, kernel_size=3, activation=None, use_bn=False) 326 | else: 327 | init_conv_layer = ConvLayer('conv_0', first_output_features, kernel_size=3, pre_activation=False) 328 | init_transition = TransitionBlock('T_0_first', [init_conv_layer]) 329 | self.blocks = [init_transition] 330 | 331 | # Dense Blocks 332 | in_features_dim = first_output_features 333 | for block_idx in range(1, total_blocks + 1): 334 | miniblocks = [] 335 | block_id = 'D_%d' % block_idx 336 | for miniblock_idx in range(1, layers_per_block + 1): 337 | miniblock_id = 'M_%d' % miniblock_idx 338 | in_bottle = None 339 | if bc_mode: 340 | bottelneck_layer = ConvLayer('conv_0', growth_rate * bc_ratio, kernel_size=1, keep_prob=keep_prob, 341 | pre_activation=pre_activation) 342 | in_bottle = LayerCascade('in_bottle', [bottelneck_layer]) 343 | 344 | branch_0 = LayerCascade('B_0', [ 345 | ConvLayer('conv_0', growth_rate, kernel_size=3, 346 | keep_prob=keep_prob, pre_activation=pre_activation) 347 | ]) 348 | miniblocks.append(LayerMultiBranch(miniblock_id, [branch_0], in_bottle=in_bottle)) 349 | dense_block = DenseBlock(block_id, miniblocks) 350 | self.blocks += [dense_block] 351 | 352 | out_features_dim = dense_block.out_features_dim(in_features_dim) 353 | if block_idx != total_blocks: 354 | out_features_dim = int(out_features_dim * reduction) 355 | transition_id = 'T_%d_middle' % block_idx 356 | conv_layer = ConvLayer('conv_0', out_features_dim, kernel_size=1, keep_prob=keep_prob, 357 | pre_activation=pre_activation) 358 | avg_pool_layer = PoolLayer('pool_0', 'avg', kernel_size=2, strides=2) 359 | transition = TransitionBlock(transition_id, [conv_layer, avg_pool_layer]) 360 | self.blocks.append(transition) 361 | image_size = image_size // 2 362 | in_features_dim = out_features_dim 363 | 364 | # Transition to classes 365 | if pre_activation: 366 | global_avg_pool = PoolLayer('pool_0', 'avg', kernel_size=image_size, strides=image_size, 367 | activation='relu', use_bn=True) 368 | else: 369 | global_avg_pool = PoolLayer('pool_0', 'avg', kernel_size=image_size, strides=image_size, 370 | pre_activation=False) 371 | final_fc_layer = FCLayer('fc_0', data_provider.n_classes, use_bn=False, use_bias=True, activation=None) 372 | transition_to_classes = TransitionBlock('T_to_classes', [global_avg_pool, final_fc_layer]) 373 | self.blocks.append(transition_to_classes) 374 | 375 | # print information about the network 376 | if print_info: 377 | print('Set Standard %s' % model_type) 378 | 379 | if not bc_mode: 380 | print('Build %s model with %d blocks, ' 381 | '%d composite layers each.' % (model_type, total_blocks, layers_per_block)) 382 | if bc_mode: 383 | print('Build %s model with %d blocks, ' 384 | '%d bottleneck layers and %d composite layers each.' % ( 385 | model_type, total_blocks, layers_per_block, layers_per_block)) 386 | print('Reduction at transition layers: %.2f' % reduction) 387 | return self 388 | 389 | def set_net_from_config(self, net_config_json, init=None, print_info=True): 390 | # load config and init (if exist) 391 | for key in self.net_config.keys(): 392 | self.net_config[key] = net_config_json[key] 393 | self.blocks = [] 394 | for _i, block_config in enumerate(net_config_json['blocks']): 395 | block_init = init['blocks'][_i] if init is not None else None 396 | block = get_block_by_name(block_config['name']) 397 | self.blocks.append(block.set_from_config(block_config, block_init)) 398 | if print_info: 399 | print('Set DenseNet from config:') 400 | for k, v in self.net_config.items(): 401 | print('\t%s: %s' % (k, v)) 402 | print('\t%s: %d' % ('depth', self.depth)) 403 | return self 404 | 405 | def widen(self, loc, new_width, widen_type='output_dim', noise=None, image_channel=3): 406 | """ 407 | widen_type: "output_dim" or "kernel" 408 | """ 409 | block_idx = loc['block'] 410 | if block_idx == 0: 411 | input_dim = image_channel 412 | elif isinstance(self.blocks[block_idx - 1], TransitionBlock): 413 | input_dim = self.blocks[block_idx - 1].out_features_dim 414 | else: 415 | input_dim = self.blocks[block_idx - 1].out_features_dim(self.blocks[block_idx - 2].out_features_dim) 416 | 417 | change_out_dim, indices, magnifier = \ 418 | self.blocks[block_idx].widen(loc, new_width, widen_type, noise=noise, input_dim=input_dim) 419 | while change_out_dim: 420 | change_out_dim, indices, magnifier = self.blocks[block_idx + 1].prev_widen(indices, magnifier, noise=noise) 421 | block_idx += 1 422 | 423 | def deepen(self, loc, new_layer_config, image_channel=3): 424 | new_layer_config['pre_activation'] = self.net_config['pre_activation'] 425 | block_idx = loc['block'] 426 | if block_idx == 0: 427 | input_dim = image_channel 428 | elif isinstance(self.blocks[block_idx - 1], TransitionBlock): 429 | input_dim = self.blocks[block_idx - 1].out_features_dim 430 | else: 431 | input_dim = self.blocks[block_idx - 1].out_features_dim(self.blocks[block_idx - 2].out_features_dim) 432 | 433 | return self.blocks[block_idx].deepen(loc, new_layer_config, input_dim) 434 | 435 | def set_identity4deepen(self, to_set_layers, data_provider, batch_size, batch_num=1, strict=True, noise=None): 436 | """ 437 | to_set_layers = [(new_layer, prev_layer), ...] 438 | """ 439 | task_list = {} 440 | for new_layer, prev_layer in to_set_layers: 441 | if new_layer.ready: continue 442 | if new_layer.use_bn and strict: 443 | task_id = id(prev_layer) 444 | if task_id in task_list: 445 | task_list[task_id][1].append(new_layer) 446 | else: 447 | task_list[task_id] = (prev_layer, [new_layer]) 448 | else: 449 | new_layer.set_identity_layer(strict=strict, noise=noise) 450 | if len(task_list) > 0: 451 | model = DenseNet(None, data_provider, None, net_config=self, only_forward=True) 452 | task_list = list(task_list.values()) 453 | fetches = [prev_layer.output_op for prev_layer, _ in task_list] 454 | statistics = [[0, 0] for _ in task_list] 455 | for _i in range(batch_num): 456 | input_images, _ = data_provider.train.next_batch(batch_size) 457 | outputs = model.sess.run(fetches, feed_dict={model.images: input_images, model.is_training: False}) 458 | for _j, out in enumerate(outputs): 459 | out = out.astype('float32') 460 | axis = tuple(range(len(out.shape) - 1)) 461 | mean = np.mean(out, axis=axis, keepdims=True) 462 | variance = np.mean(np.square(out - mean), axis=axis, keepdims=True) 463 | mean, variance = np.squeeze(mean), np.squeeze(variance) 464 | statistics[_j][0] += mean 465 | statistics[_j][1] += variance 466 | for _j, (prev_layer, new_layers) in enumerate(task_list): 467 | mean, variance = statistics[_j][0] / batch_num, statistics[_j][1] / batch_num 468 | for new_layer in new_layers: 469 | if new_layer.ready: continue 470 | param = { 471 | 'moving_mean': mean, 472 | 'moving_variance': variance, 473 | 'epsilon': self.bn_epsilon, 474 | } 475 | new_layer.set_identity_layer(strict=strict, param=param, noise=noise) 476 | 477 | def insert_miniblock(self, loc, miniblock_config, image_channel=3, noise=None): 478 | block_idx = loc['block'] 479 | if block_idx == 0: 480 | input_dim = image_channel 481 | elif isinstance(self.blocks[block_idx - 1], TransitionBlock): 482 | input_dim = self.blocks[block_idx - 1].out_features_dim 483 | else: 484 | input_dim = self.blocks[block_idx - 1].out_features_dim(self.blocks[block_idx - 2].out_features_dim) 485 | 486 | assert isinstance(self.blocks[block_idx], DenseBlock), 'Invalid' 487 | indices, magnifier = \ 488 | self.blocks[block_idx].insert_miniblock(loc['miniblock'], miniblock_config, input_dim, noise=noise) 489 | self.blocks[block_idx + 1].prev_widen(indices, magnifier, noise=noise) 490 | 491 | 492 | class DenseNet(BasicModel): 493 | def _build_graph(self, only_forward=False): 494 | _input = self.images 495 | output = _input 496 | # building blocks (transition and dense) 497 | for block in self.net_config.blocks: 498 | output = block.build(output, self, store_output_op=only_forward) 499 | 500 | if not only_forward: 501 | logits = output 502 | with tf.variable_scope('L2_Loss'): 503 | l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) 504 | 505 | prediction = tf.nn.softmax(logits) 506 | 507 | # losses 508 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( 509 | logits=logits, labels=self.labels)) 510 | self.cross_entropy = cross_entropy 511 | 512 | # optimizer and train step 513 | optimizer = self.build_optimizer(self.learning_rate, 514 | self.run_config.opt_config[0], self.run_config.opt_config[1]) 515 | self.train_step = optimizer.minimize( 516 | cross_entropy + l2_loss * self.net_config.weight_decay) 517 | correct_prediction = tf.equal( 518 | tf.argmax(prediction, 1), 519 | tf.argmax(self.labels, 1)) 520 | self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 521 | -------------------------------------------------------------------------------- /code/models/layer_cascade.py: -------------------------------------------------------------------------------- 1 | from models.layers import ConvLayer, FCLayer, PoolLayer, get_layer_by_name 2 | import tensorflow as tf 3 | 4 | 5 | class LayerCascade: 6 | def __init__(self, _id, layers): 7 | self._id = _id 8 | self.layers = layers 9 | 10 | self.output_op = None 11 | 12 | @property 13 | def id(self): 14 | return self._id 15 | 16 | @id.setter 17 | def id(self, value): 18 | self._id = value 19 | 20 | @property 21 | def out_features_dim(self): 22 | for layer in self.layers[::-1]: 23 | if isinstance(layer, ConvLayer): 24 | return layer.filter_num 25 | elif isinstance(layer, FCLayer): 26 | return layer.units 27 | return None 28 | 29 | @property 30 | def depth(self): 31 | depth = 0 32 | for layer in self.layers: 33 | if isinstance(layer, ConvLayer) or isinstance(layer, FCLayer): 34 | depth += 1 35 | return depth 36 | 37 | def get_str(self): 38 | layers_str = [layer.layer_str for layer in self.layers] 39 | return '-'.join(layers_str) 40 | 41 | def build(self, _input, densenet, store_output_op=False): 42 | output = _input 43 | with tf.variable_scope(self._id): 44 | for layer in self.layers: 45 | output = layer.build(output, densenet, store_output_op=store_output_op) 46 | if store_output_op: 47 | self.output_op = output 48 | return output 49 | 50 | def get_config(self): 51 | return { 52 | '_id': self._id, 53 | 'layers': [layer.get_config() for layer in self.layers] 54 | } 55 | 56 | def renew_init(self, densenet): 57 | return { 58 | '_id': self._id, 59 | 'layers': [layer.renew_init(densenet) for layer in self.layers] 60 | } 61 | 62 | def copy(self): 63 | return self.set_from_config(self.get_config(), init=self.renew_init(None)) 64 | 65 | @staticmethod 66 | def set_from_config(config_json, init=None, return_class=True): 67 | _id = config_json['_id'] 68 | layers = [] 69 | for _i, layer_config in enumerate(config_json['layers']): 70 | layer_init = init['layers'][_i] if init is not None else None 71 | layer = get_layer_by_name(layer_config['name']) 72 | layers.append(layer.set_from_config(layer_config, layer_init)) 73 | if return_class: 74 | return LayerCascade(_id, layers) 75 | else: 76 | return _id, layers 77 | 78 | """ 79 | Network Transformation Operations 80 | """ 81 | 82 | def prev_widen(self, indices, magnifier, noise=None): 83 | for layer in self.layers: 84 | if isinstance(layer, ConvLayer) or isinstance(layer, FCLayer): 85 | layer.prev_widen(indices, magnifier, noise=noise) 86 | break 87 | else: 88 | layer.prev_widen(indices, magnifier, noise=noise) 89 | 90 | def widen(self, idx, new_width, widen_type='output_dim', noise=None): 91 | assert idx < len(self.layers), 'Index out of range: %d' % idx 92 | if widen_type == 'output_dim': 93 | assert isinstance(self.layers[idx], ConvLayer) or \ 94 | isinstance(self.layers[idx], FCLayer), 'Operation not available' 95 | to_widen_layer = self.layers[idx] 96 | 97 | if isinstance(to_widen_layer, ConvLayer): 98 | indices, magnifier = to_widen_layer.widen_filters(new_filter_num=new_width, noise=noise) 99 | else: 100 | indices, magnifier = to_widen_layer.widen_units(new_units_num=new_width, noise=noise) 101 | after_widen_layer = None 102 | for _i in range(idx + 1, len(self.layers)): 103 | if isinstance(self.layers[_i], ConvLayer) or isinstance(self.layers[_i], FCLayer): 104 | self.layers[_i].prev_widen(indices, magnifier, noise=noise) 105 | after_widen_layer = self.layers[_i] 106 | break 107 | else: 108 | self.layers[_i].prev_widen(indices, magnifier, noise=noise) 109 | return after_widen_layer is None, indices, magnifier 110 | else: 111 | raise ValueError('%s is not supported' % widen_type) 112 | 113 | def deepen(self, idx, new_layer_config, input_dim): 114 | assert idx < len(self.layers), 'Index out of range: %d' % idx 115 | if new_layer_config['name'] == 'fc': 116 | assert idx == len(self.layers) - 1 or isinstance(self.layers[idx + 1], FCLayer), 'Invalid' 117 | assert isinstance(self.layers[idx], FCLayer) or isinstance(self.layers[idx], PoolLayer), 'Invalid' 118 | # prepare the new fc layer 119 | units = input_dim 120 | for _i in range(idx, -1, -1): 121 | if isinstance(self.layers[_i], FCLayer): 122 | units = self.layers[_i].units 123 | break 124 | elif isinstance(self.layers[_i], ConvLayer): 125 | units = self.layers[_i].filter_num 126 | break 127 | fc_idx = 0 128 | for _i in range(0, idx + 1): 129 | if isinstance(self.layers[_i], FCLayer): 130 | fc_idx += 1 131 | _id = 'fc_%d' % fc_idx 132 | # change the id of following fc layers 133 | for _i in range(idx + 1, len(self.layers)): 134 | if isinstance(self.layers[_i], FCLayer): 135 | self.layers[_i].id = 'fc_%d' % (fc_idx + 1) 136 | fc_idx += 1 137 | prev_layer = None 138 | for _i in range(idx, -1, -1): 139 | if self.layers[_i].ready: 140 | prev_layer = self.layers[_i] 141 | break 142 | assert prev_layer is not None, 'Invalid' 143 | new_fc_layer = FCLayer(_id, units, ready=False, **new_layer_config) 144 | # insert the new layer into the cascade 145 | self.layers = self.layers[:idx + 1] + [new_fc_layer] + self.layers[idx + 1:] 146 | return new_fc_layer, prev_layer 147 | elif new_layer_config['name'] == 'conv': 148 | assert idx == len(self.layers) - 1 or not isinstance(self.layers[idx + 1], FCLayer), 'Invalid' 149 | assert isinstance(self.layers[idx], ConvLayer) or isinstance(self.layers[idx], FCLayer), 'Invalid' 150 | # prepare the new conv layer 151 | filter_num = input_dim 152 | for _i in range(idx, -1, -1): 153 | if isinstance(self.layers[_i], ConvLayer): 154 | filter_num = self.layers[_i].filter_num 155 | break 156 | conv_idx = 0 157 | for _i in range(0, idx + 1): 158 | if isinstance(self.layers[_i], ConvLayer): 159 | conv_idx += 1 160 | _id = 'conv_%d' % conv_idx 161 | # change the id of following conv layers 162 | for _i in range(idx + 1, len(self.layers)): 163 | if isinstance(self.layers[_i], ConvLayer): 164 | self.layers[_i].id = 'conv_%d' % (conv_idx + 1) 165 | conv_idx += 1 166 | prev_layer = None 167 | for _i in range(idx, -1, -1): 168 | if self.layers[_i].ready: 169 | prev_layer = self.layers[_i] 170 | break 171 | assert prev_layer is not None, 'Invalid' 172 | new_conv_layer = ConvLayer(_id, filter_num, ready=False, **new_layer_config) 173 | self.layers = self.layers[:idx + 1] + [new_conv_layer] + self.layers[idx + 1:] 174 | return new_conv_layer, prev_layer 175 | else: 176 | raise ValueError('Not support to insert a %s layer' % new_layer_config['name']) 177 | -------------------------------------------------------------------------------- /code/models/layer_multi_branch.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from models.layer_cascade import LayerCascade 4 | 5 | 6 | class LayerMultiBranch: 7 | def __init__(self, _id, branches, merge=None, in_bottle=None, out_bottle=None): 8 | self._id = _id 9 | self.in_bottle = in_bottle 10 | self.branches = branches 11 | self.out_bottle = out_bottle 12 | self.merge = merge 13 | if self.merge == 'add': 14 | out_dim = [] 15 | for branch in self.branches: 16 | out_dim.append(branch.out_features_dim) 17 | assert np.std(out_dim) == 0, '<%s> require the output dim of all branches are the same' % self.merge 18 | elif self.merge is None: 19 | assert len(self.branches) == 1, 'Invalid' 20 | 21 | self.output_op = None 22 | 23 | @property 24 | def id(self): 25 | return self._id 26 | 27 | @id.setter 28 | def id(self, value): 29 | self._id = value 30 | 31 | @property 32 | def out_features_dim(self): 33 | if self.out_bottle: 34 | return self.out_bottle.out_features_dim 35 | out_dim = [] 36 | for branch in self.branches: 37 | out_dim.append(branch.out_features_dim) 38 | if self.merge == 'concat': 39 | return np.sum(out_dim) 40 | elif self.merge == 'add' or self.merge is None: 41 | return out_dim[0] 42 | else: 43 | pass 44 | 45 | @property 46 | def depth(self): 47 | depth = 0 48 | if self.in_bottle: 49 | depth += self.in_bottle.depth 50 | if self.out_bottle: 51 | depth += self.out_bottle.depth 52 | branch_depth = [] 53 | for branch in self.branches: 54 | branch_depth.append(branch.depth) 55 | depth += np.max(branch_depth) 56 | return depth 57 | 58 | def get_str(self): 59 | in_bottle_str = 'N' if self.in_bottle is None else self.in_bottle.get_str() 60 | branches_str = [branch.get_str() for branch in self.branches] 61 | branches_str = '+'.join(branches_str) 62 | out_bottle_str = 'N' if self.out_bottle is None else self.out_bottle.get_str() 63 | return '%s~%s~%s' % (in_bottle_str, branches_str, out_bottle_str) 64 | 65 | def build(self, _input, densenet, store_output_op=False): 66 | with tf.variable_scope(self._id): 67 | output = _input 68 | # in bottle 69 | if self.in_bottle: 70 | output = self.in_bottle.build(output, densenet, store_output_op=store_output_op) 71 | # branches 72 | branch_out = [] 73 | for branch in self.branches: 74 | branch_out.append(branch.build(output, densenet, store_output_op=store_output_op)) 75 | if self.merge == 'concat': 76 | output = tf.concat(branch_out, axis=3) 77 | elif self.merge == 'add': 78 | output = tf.add_n(branch_out) 79 | elif self.merge is None: 80 | output = branch_out[0] 81 | else: 82 | raise ValueError('Do not support <%s>' % self.merge) 83 | # out bottle 84 | if self.out_bottle: 85 | output = self.out_bottle.build(output, densenet, store_output_op=store_output_op) 86 | if store_output_op: 87 | self.output_op = output 88 | return output 89 | 90 | def get_config(self): 91 | return { 92 | '_id': self._id, 93 | 'merge': self.merge, 94 | 'branches': [branch.get_config() for branch in self.branches], 95 | 'in_bottle': None if self.in_bottle is None else self.in_bottle.get_config(), 96 | 'out_bottle': None if self.out_bottle is None else self.out_bottle.get_config(), 97 | } 98 | 99 | def renew_init(self, densenet): 100 | return { 101 | '_id': self._id, 102 | 'branches': [branch.renew_init(densenet) for branch in self.branches], 103 | 'in_bottle': None if self.in_bottle is None else self.in_bottle.renew_init(densenet), 104 | 'out_bottle': None if self.out_bottle is None else self.out_bottle.renew_init(densenet), 105 | } 106 | 107 | @staticmethod 108 | def set_from_config(config_json, init=None): 109 | _id = config_json['_id'] 110 | merge = config_json['merge'] 111 | branches = [] 112 | for _i, branch_config in enumerate(config_json['branches']): 113 | branch_init = init['branches'][_i] if init is not None else None 114 | branch = LayerCascade.set_from_config(branch_config, branch_init) 115 | branches.append(branch) 116 | in_bottle = config_json['in_bottle'] 117 | if in_bottle: 118 | in_bottle_init = init['in_bottle'] if init is not None else None 119 | in_bottle = LayerCascade.set_from_config(in_bottle, in_bottle_init) 120 | out_bottle = config_json['out_bottle'] 121 | if out_bottle: 122 | out_bottle_init = init['out_bottle'] if init is not None else None 123 | out_bottle = LayerCascade.set_from_config(out_bottle, out_bottle_init) 124 | return LayerMultiBranch(_id, branches, merge, in_bottle=in_bottle, out_bottle=out_bottle) 125 | 126 | """ 127 | Network Transformation Operations 128 | """ 129 | 130 | def prev_widen(self, indices, magnifier, noise=None): 131 | if self.in_bottle: 132 | self.in_bottle.prev_widen(indices, magnifier, noise=noise) 133 | else: 134 | for branch in self.branches: 135 | branch.prev_widen(indices, magnifier, noise=noise) 136 | 137 | def widen(self, loc, new_width, widen_type='output_dim', noise=None): 138 | if loc['multi-branch'] == 'in_bottle': 139 | assert self.in_bottle is not None, 'Invalid' 140 | change_out_dim, indices, magnifier = self.in_bottle.widen(loc['layer'], new_width, widen_type, noise=noise) 141 | if change_out_dim: 142 | for branch in self.branches: 143 | branch.prev_widen(indices, magnifier, noise=noise) 144 | return False, None, None 145 | elif loc['multi-branch'] == 'out_bottle': 146 | assert self.out_bottle is not None, 'Invalid' 147 | change_out_dim, indices, magnifier = self.out_bottle.widen(loc['layer'], new_width, widen_type, noise=noise) 148 | return change_out_dim, indices, magnifier 149 | elif loc['multi-branch'] == 'branch': 150 | branch_idx = loc['branch'] 151 | branch = self.branches[branch_idx] 152 | old_branch_out_dim = branch.out_features_dim 153 | change_out_dim, indices, magnifier = branch.widen(loc['layer'], new_width, widen_type, noise=noise) 154 | if change_out_dim: 155 | assert self.merge != 'add', 'Invalid' 156 | prev_branch_out_dim = 0 157 | for _i in range(0, branch_idx): 158 | prev_branch_out_dim += self.branches[_i].out_features_dim 159 | post_branch_out_dim = 0 160 | for _i in range(branch_idx + 1, len(self.branches)): 161 | post_branch_out_dim += self.branches[_i].out_features_dim 162 | old_size = prev_branch_out_dim + old_branch_out_dim + post_branch_out_dim 163 | base = np.arange(old_size) 164 | indices = np.concatenate([ 165 | base[:prev_branch_out_dim], 166 | indices + prev_branch_out_dim, 167 | base[prev_branch_out_dim + old_branch_out_dim:] 168 | ]) 169 | magnifier = np.concatenate([ 170 | [1] * prev_branch_out_dim, 171 | magnifier, 172 | [1] * post_branch_out_dim, 173 | ]) 174 | if self.out_bottle is None: 175 | return True, indices, magnifier 176 | else: 177 | self.out_bottle.prev_widen(indices, magnifier, noise=noise) 178 | return False, None, None 179 | else: 180 | return False, None, None 181 | else: 182 | raise ValueError('Do not support %s' % loc['multi-branch']) 183 | 184 | def deepen(self, loc, new_layer_config, input_dim): 185 | if loc['multi-branch'] == 'in_bottle': 186 | assert self.in_bottle is not None, 'Invalid' 187 | return self.in_bottle.deepen(loc['layer'], new_layer_config, input_dim) 188 | elif loc['multi-branch'] == 'out_bottle': 189 | assert self.out_bottle is not None, 'Invalid' 190 | if self.merge == 'concat': input_dim = np.sum([branch.out_features_dim for branch in self.branches]) 191 | else: input_dim = self.branches[0].out_features_dim 192 | return self.out_bottle.deepen(loc['layer'], new_layer_config, input_dim) 193 | elif loc['multi-branch'] == 'branch': 194 | if self.in_bottle is not None: input_dim = self.in_bottle.out_features_dim 195 | return self.branches[loc['branch']].deepen(loc['layer'], new_layer_config, input_dim) 196 | else: 197 | raise ValueError('Do not support %s' % loc['multi-branch']) 198 | 199 | def remapped_branches(self, noise=None): 200 | if self.merge == 'add' or self.merge is None: 201 | size = self.out_features_dim 202 | indices = np.random.choice(np.arange(size), size) 203 | new_branches = [] 204 | for branch in self.branches: 205 | new_layers = [layer.copy() for layer in branch.layers[:-1]] 206 | last_layer = branch.layers[-1].copy().remap(indices, noise=noise) 207 | new_layers.append(last_layer) 208 | new_branch = LayerCascade(branch.id, new_layers) 209 | new_branches.append(new_branch) 210 | elif self.merge == 'concat': 211 | new_branches = [] 212 | offset = 0 213 | indices = [] 214 | for branch in self.branches: 215 | size = branch.out_features_dim 216 | sub_indices = np.random.choice(np.arange(size), size) 217 | new_layers = [layer.copy() for layer in branch.layers[:-1]] 218 | last_layer = branch.layers[-1].copy().remap(sub_indices, noise=noise) 219 | new_layers.append(last_layer) 220 | new_branch = LayerCascade(branch.id, new_layers) 221 | new_branches.append(new_branch) 222 | indices.append(sub_indices + offset) 223 | offset += size 224 | indices = np.concatenate(indices) 225 | else: 226 | raise NotImplementedError 227 | return new_branches, indices 228 | -------------------------------------------------------------------------------- /code/models/layers.py: -------------------------------------------------------------------------------- 1 | from models.basic_model import BasicModel 2 | import tensorflow as tf 3 | import numpy as np 4 | import copy 5 | 6 | 7 | def apply_noise(weights, noise_config): 8 | if noise_config is None: 9 | return weights 10 | noise_type = noise_config.get('type', 'normal') 11 | if noise_type == 'normal': 12 | ratio = noise_config.get('ratio', 1e-3) 13 | std = np.std(weights) 14 | noise = np.random.normal(0, std * ratio, size=weights.shape) 15 | elif noise_type == 'uniform': 16 | ratio = noise_config.get('ratio', 1e-3) 17 | mean, _max = np.mean(weights), np.max(weights) 18 | width = (_max - mean) * ratio 19 | noise = np.random.uniform(-width, width, size=weights.shape) 20 | else: 21 | raise NotImplementedError 22 | return weights + noise 23 | 24 | 25 | def get_layer_by_name(name): 26 | if name == 'conv': 27 | return ConvLayer 28 | elif name == 'fc': 29 | return FCLayer 30 | elif name == 'pool': 31 | return PoolLayer 32 | else: 33 | raise ValueError('Unknown layer type: %s' % name) 34 | 35 | 36 | def get_magnifier(old_size, indices): 37 | _l = np.zeros(old_size) 38 | for x in indices: 39 | _l[x] += 1 40 | magnifier = (1.0 / _l)[indices] 41 | return magnifier 42 | 43 | 44 | def get_random_remapping(old_size, new_size): 45 | base = np.arange(old_size) 46 | indices = np.concatenate([base, np.random.choice(base, new_size - old_size)]) 47 | 48 | magnifier = get_magnifier(old_size, indices) 49 | return indices, magnifier 50 | 51 | 52 | class BaseLayer: 53 | """ 54 | _id, batch normalization, activation, dropout, ready 55 | """ 56 | def __init__(self, _id, use_bn=True, activation='relu', keep_prob=1.0, ready=True, pre_activation=True): 57 | self._id = _id 58 | self.use_bn = use_bn 59 | self.activation = activation 60 | self.keep_prob = keep_prob 61 | self.ready = ready 62 | self.pre_activation = pre_activation 63 | 64 | self._scope = None 65 | self._init = None 66 | self.output_op = None 67 | 68 | @property 69 | def id(self): return self._id 70 | 71 | @id.setter 72 | def id(self, value): self._id = value 73 | 74 | @property 75 | def init(self): 76 | return self._init 77 | 78 | @property 79 | def param_initializer(self): 80 | if self._init is None: 81 | return None 82 | param_initializer = {} 83 | for key in self.variable_list.keys(): 84 | if self._init[key] is not None: 85 | param_initializer[key] = tf.constant_initializer(self._init[key]) 86 | if len(param_initializer) == 0: 87 | param_initializer = None 88 | return param_initializer 89 | 90 | def renew_init(self, net: BasicModel): 91 | if net is None: 92 | return copy.deepcopy(self._init) 93 | 94 | self._init = {} 95 | for key, var_name in self.variable_list.items(): 96 | var = net.graph.get_tensor_by_name('%s/%s' % (self._scope, var_name)) 97 | self._init[key] = net.sess.run(var) 98 | if len(self._init) == 0: 99 | self._init = None 100 | return copy.deepcopy(self._init) 101 | 102 | def copy(self): 103 | return self.set_from_config(self.get_config(), layer_init=copy.deepcopy(self._init)) 104 | 105 | def get_config(self): 106 | return { 107 | '_id': self.id, 108 | 'use_bn': self.use_bn, 109 | 'activation': self.activation, 110 | 'keep_prob': self.keep_prob, 111 | 'pre_activation': self.pre_activation, 112 | } 113 | 114 | @property 115 | def variable_list(self): 116 | """ 117 | beta: mean scale 118 | gamma: variance scale 119 | y = gamma * (x - moving_mean) / sqrt(epsilon + moving_variance) + beta 120 | """ 121 | if self.use_bn: 122 | return { 123 | 'moving_mean': 'BatchNorm/moving_mean:0', 124 | 'moving_variance': 'BatchNorm/moving_variance:0', 125 | 'beta': 'BatchNorm/beta:0', 126 | 'gamma': 'BatchNorm/gamma:0', 127 | } 128 | else: 129 | return {} 130 | 131 | @staticmethod 132 | def set_from_config(layer_config, layer_init): 133 | raise NotImplementedError 134 | 135 | def build(self, _input, net, store_output_op): 136 | raise NotImplementedError 137 | 138 | def prev_widen(self, indices, magnifier, noise=None): 139 | raise NotImplementedError 140 | 141 | def set_identity_layer(self, strict, param, noise): 142 | raise NotImplementedError 143 | 144 | def widen_bn(self, indices, magnifier, noise=None): 145 | if self.use_bn: 146 | self._init['beta'] = self._init['beta'][indices] 147 | self._init['gamma'] = self._init['gamma'][indices] 148 | self._init['moving_mean'] = self._init['moving_mean'][indices] 149 | self._init['moving_variance'] = self._init['moving_variance'][indices] 150 | 151 | def set_bn_identity(self, strict=True, param=None, noise=None): 152 | if self.use_bn: 153 | if strict: 154 | self._init['moving_mean'] = param['moving_mean'] 155 | self._init['moving_variance'] = param['moving_variance'] 156 | self._init['beta'] = self._init['moving_mean'] 157 | self._init['gamma'] = np.sqrt(self._init['moving_variance'] + param['epsilon']) 158 | else: 159 | # use default initialization for batch normalization layer 160 | self._init['moving_mean'], self._init['moving_variance'] = None, None 161 | self._init['beta'], self._init['gamma'] = None, None 162 | 163 | 164 | class ConvLayer(BaseLayer): 165 | def __init__(self, _id, filter_num, kernel_size=3, strides=1, 166 | use_bn=True, activation='relu', keep_prob=1.0, ready=True, pre_activation=True, **kwargs): 167 | BaseLayer.__init__(self, _id, use_bn, activation, keep_prob, ready, pre_activation) 168 | self.filter_num = filter_num 169 | self.kernel_size = kernel_size 170 | self.strides = strides 171 | 172 | @property 173 | def layer_str(self): 174 | return 'C%d,%d,%d' % (self.filter_num, self.kernel_size, self.strides) 175 | 176 | @property 177 | def variable_list(self): 178 | var_list = {'kernel': 'kernel:0'} 179 | var_list.update(super(ConvLayer, self).variable_list) 180 | return var_list 181 | 182 | def get_config(self): 183 | return { 184 | 'name': 'conv', 185 | 'filter_num': self.filter_num, 186 | 'kernel_size': self.kernel_size, 187 | 'strides': self.strides, 188 | **super(ConvLayer, self).get_config(), 189 | } 190 | 191 | @staticmethod 192 | def set_from_config(layer_config, layer_init=None): 193 | conv_layer = ConvLayer(**layer_config) 194 | conv_layer._init = layer_init 195 | return conv_layer 196 | 197 | def build(self, _input, net: BasicModel, store_output_op=False): 198 | output = _input 199 | if not self.ready: 200 | return output 201 | with tf.variable_scope(self._id): 202 | self._scope = tf.get_variable_scope().name 203 | param_initializer = self.param_initializer 204 | if self.pre_activation: 205 | # batch normalization 206 | if self.use_bn: 207 | output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon, 208 | net.net_config.bn_decay, param_initializer=param_initializer) 209 | # activation 210 | output = BasicModel.activation(output, self.activation) 211 | # convolutional 212 | output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides, 213 | param_initializer=param_initializer) 214 | else: 215 | # convolutional 216 | output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides, 217 | param_initializer=param_initializer) 218 | # batch normalization 219 | if self.use_bn: 220 | output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon, 221 | net.net_config.bn_decay, param_initializer=param_initializer) 222 | # activation 223 | output = BasicModel.activation(output, self.activation) 224 | # dropout 225 | output = BasicModel.dropout(output, self.keep_prob, net.is_training) 226 | if store_output_op: 227 | self.output_op = output 228 | return output 229 | 230 | def widen_filters(self, new_filter_num, noise=None): 231 | """ 232 | Increase the filter number of a conv layer while preserving the functionality 233 | Proposed in 'Net2Net': https://arxiv.org/abs/1511.05641 234 | """ 235 | assert new_filter_num > self.filter_num, 'Invalid new filter number: %d' % new_filter_num 236 | assert self._init is not None, 'Uninitialized layer' 237 | old_size, new_size = self.filter_num, new_filter_num 238 | indices, magnifier = get_random_remapping(old_size, new_size) 239 | # more filters 240 | self.filter_num = new_filter_num 241 | new_kernel = self._init['kernel'][:, :, :, indices] 242 | new_kernel[:, :, :, old_size:] = apply_noise(new_kernel[:, :, :, old_size:], noise.get('wider')) 243 | self._init['kernel'] = new_kernel 244 | if not self.pre_activation: 245 | # widen batch norm variables if use batch norm 246 | self.widen_bn(indices, magnifier, noise=noise) 247 | return indices, magnifier 248 | 249 | def prev_widen(self, indices, magnifier, noise=None): 250 | assert self._init is not None, 'Uninitialized layer' 251 | # rescale kernel 252 | self._init['kernel'] = self._init['kernel'][:, :, indices, :] * magnifier.reshape([1, 1, -1, 1]) 253 | if self.pre_activation: 254 | self.widen_bn(indices, magnifier, noise=noise) 255 | 256 | def set_identity_layer(self, strict=True, param=None, noise=None): 257 | self._init = {} 258 | self.set_bn_identity(strict, param, noise=noise) 259 | mid = self.kernel_size // 2 260 | self._init['kernel'] = np.zeros([self.kernel_size, self.kernel_size, self.filter_num, self.filter_num]) 261 | self._init['kernel'][mid, mid] = np.eye(self.filter_num) 262 | self._init['kernel'] = apply_noise(self._init['kernel'], noise.get('deeper')) 263 | self.ready = True 264 | 265 | def remap(self, indices, noise=None): 266 | self.filter_num = len(indices) 267 | self._init['kernel'] = self._init['kernel'][:, :, :, indices] 268 | self._init['kernel'] = apply_noise(self._init['kernel'], noise.get('wider')) 269 | if not self.pre_activation: 270 | self.widen_bn(indices, None, noise=noise) 271 | return self 272 | 273 | 274 | class FCLayer(BaseLayer): 275 | def __init__(self, _id, units, use_bn=True, use_bias=False, activation='relu', keep_prob=1.0, ready=True, 276 | pre_activation=False, **kwargs): 277 | BaseLayer.__init__(self, _id, use_bn, activation, keep_prob, ready, pre_activation) 278 | self.units = units 279 | self.use_bias = use_bias 280 | 281 | @property 282 | def layer_str(self): 283 | return 'FC%d' % self.units 284 | 285 | @property 286 | def variable_list(self): 287 | var_list = {'W': 'W:0'} 288 | if self.use_bias: 289 | var_list['bias'] = 'bias:0' 290 | var_list.update(super(FCLayer, self).variable_list) 291 | return var_list 292 | 293 | def get_config(self): 294 | return { 295 | 'name': 'fc', 296 | 'units': self.units, 297 | 'use_bias': self.use_bias, 298 | **super(FCLayer, self).get_config(), 299 | } 300 | 301 | @staticmethod 302 | def set_from_config(layer_config, layer_init=None): 303 | fc_layer = FCLayer(**layer_config) 304 | fc_layer._init = layer_init 305 | return fc_layer 306 | 307 | def build(self, _input, net: BasicModel, store_output_op=False): 308 | output = _input 309 | if not self.ready: 310 | return output 311 | with tf.variable_scope(self._id): 312 | self._scope = tf.get_variable_scope().name 313 | param_initializer = self.param_initializer 314 | # flatten if not 315 | output = BasicModel.flatten(output) 316 | if self.pre_activation: 317 | # batch normalization 318 | if self.use_bn: 319 | output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon, 320 | net.net_config.bn_decay, param_initializer=param_initializer) 321 | # activation 322 | output = BasicModel.activation(output, self.activation) 323 | # FC 324 | output = BasicModel.fc_layer(output, self.units, self.use_bias, param_initializer=param_initializer) 325 | else: 326 | # FC 327 | output = BasicModel.fc_layer(output, self.units, self.use_bias, param_initializer=param_initializer) 328 | # batch normalization 329 | if self.use_bn: 330 | output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon, 331 | net.net_config.bn_decay, param_initializer=param_initializer) 332 | # activation 333 | output = BasicModel.activation(output, self.activation) 334 | # dropout 335 | output = BasicModel.dropout(output, self.keep_prob, net.is_training) 336 | if store_output_op: 337 | self.output_op = output 338 | return output 339 | 340 | def widen_units(self, new_units_num, noise=None): 341 | """ 342 | Increase the units number of a fc layer while preserving the functionality 343 | Proposed in 'Net2Net': https://arxiv.org/abs/1511.05641 344 | W: [in_dim, out_units] 345 | bias: [out_units] 346 | """ 347 | assert new_units_num > self.units, 'Invalid new units number: %d' % new_units_num 348 | assert self._init is not None, 'Uninitialized layer' 349 | old_size, new_size = self.units, new_units_num 350 | indices, magnifier = get_random_remapping(old_size, new_size) 351 | # more units 352 | self._init['W'] = self._init['W'][:, indices] 353 | self._init['W'][:, old_size:] = apply_noise(self._init['W'][:, old_size:], noise.get('wider')) 354 | self.units = new_units_num 355 | # widen bias variable if exist 356 | if self.use_bias: 357 | self._init['bias'] = self._init['bias'][indices] 358 | self._init['bias'][old_size:] = apply_noise(self._init['bias'][old_size:], noise.get('wider')) 359 | if not self.pre_activation: 360 | # widen batch norm variables if use batch norm 361 | self.widen_bn(indices, magnifier, noise=noise) 362 | return indices, magnifier 363 | 364 | def prev_widen(self, indices, magnifier, noise=None): 365 | assert self._init is not None, 'Uninitialized layer' 366 | # rescale W 367 | self._init['W'] = self._init['W'][indices] * magnifier.reshape([-1, 1]) 368 | if self.pre_activation: 369 | self.widen_bn(indices, magnifier, noise=noise) 370 | 371 | def set_identity_layer(self, strict=True, param=None, noise=None): 372 | self._init = {} 373 | self.set_bn_identity(strict, param, noise=noise) 374 | if self.use_bias: 375 | self._init['bias'] = [0.0] * self.units 376 | self._init['W'] = np.eye(self.units) 377 | self._init['W'] = apply_noise(self._init['W'], noise.get('deeper')) 378 | self.ready = True 379 | 380 | def remap(self, indices, noise=None): 381 | self.units = len(indices) 382 | self._init['W'] = self._init['W'][:, indices] 383 | self._init['W'] = apply_noise(self._init['W'], noise.get('wider')) 384 | if self.use_bias: 385 | self._init['bias'] = self._init['bias'][indices] 386 | if not self.pre_activation: 387 | self.widen_bn(indices, None, noise=noise) 388 | return self 389 | 390 | 391 | class PoolLayer(BaseLayer): 392 | def __init__(self, _id, _type, kernel_size=2, strides=2, use_bn=False, activation=None, keep_prob=1.0, 393 | ready=True, pre_activation=True, **kwargs): 394 | BaseLayer.__init__(self, _id, use_bn, activation, keep_prob, ready, pre_activation) 395 | 396 | self._type = _type 397 | self.kernel_size = kernel_size 398 | self.strides = strides 399 | 400 | @property 401 | def layer_str(self): 402 | return 'P%d,%d' % (self.kernel_size, self.strides) 403 | 404 | def get_config(self): 405 | return { 406 | 'name': 'pool', 407 | '_type': self._type, 408 | 'kernel_size': self.kernel_size, 409 | 'strides': self.strides, 410 | **super(PoolLayer, self).get_config(), 411 | } 412 | 413 | @staticmethod 414 | def set_from_config(layer_config, layer_init=None): 415 | pool_layer = PoolLayer(**layer_config) 416 | pool_layer._init = layer_init 417 | return pool_layer 418 | 419 | def build(self, _input, net: BasicModel, store_output_op=False): 420 | output = _input 421 | if not self.ready: 422 | return output 423 | with tf.variable_scope(self._id): 424 | self._scope = tf.get_variable_scope().name 425 | param_initializer = self.param_initializer 426 | if self.pre_activation: 427 | # batch normalization 428 | if self.use_bn: 429 | output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon, 430 | net.net_config.bn_decay, param_initializer=param_initializer) 431 | # activation 432 | output = BasicModel.activation(output, self.activation) 433 | # Pooling 434 | if self._type == 'avg': 435 | output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides) 436 | elif self._type == 'max': 437 | output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides) 438 | else: 439 | raise ValueError('Do not support the pooling type: %s' % self._type) 440 | else: 441 | # Pooling 442 | if self._type == 'avg': 443 | output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides) 444 | elif self._type == 'max': 445 | output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides) 446 | else: 447 | raise ValueError('Do not support the pooling type: %s' % self._type) 448 | # batch normalization 449 | if self.use_bn: 450 | output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon, 451 | net.net_config.bn_decay, param_initializer=param_initializer) 452 | # activation 453 | output = BasicModel.activation(output, self.activation) 454 | # dropout 455 | output = BasicModel.dropout(output, self.keep_prob, net.is_training) 456 | if store_output_op: 457 | self.output_op = output 458 | return output 459 | 460 | def set_identity_layer(self, strict=True, param=None, noise=None): 461 | raise ValueError('Pooling layer can never be an identity layer') 462 | 463 | def prev_widen(self, indices, magnifier, noise=None): 464 | self.widen_bn(indices, magnifier, noise=noise) 465 | -------------------------------------------------------------------------------- /code/models/utils.py: -------------------------------------------------------------------------------- 1 | from models.dense_net import DenseNetConfig, DenseNet 2 | from models.convnet import SimpleConvnetConfig, SimpleConvnet 3 | import numpy as np 4 | 5 | 6 | def get_model_config_by_name(name): 7 | if name == 'DenseNet': 8 | return DenseNetConfig 9 | elif name == 'SimpleConvnet': 10 | return SimpleConvnetConfig 11 | else: 12 | raise ValueError('Unknown model type %s' % name) 13 | 14 | 15 | def get_model_by_name(name): 16 | if name == 'DenseNet': 17 | return DenseNet 18 | elif name == 'SimpleConvnet': 19 | return SimpleConvnet 20 | else: 21 | raise ValueError('Unknown model type %s' % name) 22 | 23 | 24 | class RunConfig: 25 | def __init__(self, batch_size, n_epochs, init_lr, reduce_lr_epochs, reduce_lr_factors, opt_config, 26 | dataset, validation_size, validation_frequency, shuffle, normalization, should_save_logs, 27 | should_save_model, renew_logs=False, other_lr_schedule=None, include_extra=True, **kwargs): 28 | 29 | self.batch_size = batch_size 30 | self.n_epochs = n_epochs 31 | self.init_lr = init_lr 32 | self.reduce_lr_epochs = reduce_lr_epochs 33 | self.reduce_lr_factors = reduce_lr_factors 34 | self.opt_config = opt_config 35 | self.dataset = dataset 36 | self.validation_size = validation_size 37 | self.validation_frequency = validation_frequency 38 | self.shuffle = shuffle 39 | self.normalization = normalization 40 | self.should_save_logs = should_save_logs 41 | self.should_save_model = should_save_model 42 | self.renew_logs = renew_logs 43 | self.other_lr_schedule = other_lr_schedule 44 | self.include_extra = include_extra 45 | 46 | def get_config(self): 47 | return self.__dict__ 48 | 49 | def update(self, new_config): 50 | self.__dict__.update(new_config) 51 | 52 | def copy(self): 53 | return RunConfig(**self.get_config()) 54 | 55 | def learning_rate(self, epoch): 56 | if self.other_lr_schedule is None or self.other_lr_schedule.get('type') is None: 57 | lr = self.init_lr 58 | for reduce_lr_epoch, reduce_factor in zip(self.reduce_lr_epochs, self.reduce_lr_factors): 59 | if epoch >= reduce_lr_epoch * self.n_epochs: 60 | lr /= reduce_factor 61 | else: 62 | if self.other_lr_schedule['type'] == 'cosine': 63 | lr_max = self.init_lr 64 | lr_min = self.other_lr_schedule.get('lr_min', 0) 65 | lr = lr_min + 0.5 * (lr_max - lr_min) * (1 + np.cos((epoch - 1) / self.n_epochs * np.pi)) 66 | else: 67 | raise ValueError('Do not support %s' % self.other_lr_schedule['type']) 68 | return lr 69 | 70 | @staticmethod 71 | def get_default_run_config(dataset='C10+'): 72 | if dataset in ['C10', 'C10+', 'C100', 'C100+']: 73 | run_config = { 74 | 'batch_size': 64, 75 | 'n_epochs': 300, 76 | 'init_lr': 0.1, 77 | 'reduce_lr_epochs': [0.5, 0.75], # epochs * 0.5, epochs * 0.75 78 | 'reduce_lr_factors': [10, 10], 79 | 'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}], 80 | 'dataset': dataset, # choices = [C10, C10+, C100, C100+] 81 | 'validation_size': None, # None or int 82 | 'validation_frequency': 10, 83 | 'shuffle': 'every_epoch', # None, once_prior_train, every_epoch 84 | 'normalization': 'by_channels', # None, divide_256, divide_255, by_channels 85 | 'should_save_logs': True, 86 | 'should_save_model': True, 87 | 'renew_logs': True, 88 | 'other_lr_schedule': {'type': 'cosine'}, # None, or cosine 89 | } 90 | elif dataset in ['SVHN']: 91 | run_config = { 92 | 'batch_size': 64, 93 | 'n_epochs': 40, 94 | 'init_lr': 0.1, 95 | 'reduce_lr_epochs': [0.5, 0.75], # epochs * 0.5, epochs * 0.75 96 | 'reduce_lr_factors': [10, 10], 97 | 'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}], 98 | 'dataset': dataset, # choices = [C10, C10+, C100, C100+] 99 | 'validation_size': None, # None or int 100 | 'validation_frequency': 1, 101 | 'shuffle': True, 102 | 'normalization': 'divide_255', # None, divide_256, divide_255, by_channels 103 | 'should_save_logs': True, 104 | 'should_save_model': True, 105 | 'renew_logs': True, 106 | 'other_lr_schedule': {'type': 'cosine'}, # None, or cosine 107 | 'include_extra': False, 108 | } 109 | else: 110 | raise ValueError 111 | return run_config 112 | 113 | -------------------------------------------------------------------------------- /code/run_dense_net.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from models.dense_net import DenseNet, DenseNetConfig 4 | from data_providers.utils import get_data_provider_by_name 5 | from models.utils import RunConfig 6 | import json 7 | 8 | run_config_cifar = { 9 | 'batch_size': 64, 10 | 'n_epochs': 300, 11 | 'init_lr': 0.1, 12 | 'reduce_lr_epochs': [0.5, 0.75], # epochs * 0.5, epochs * 0.75 13 | 'reduce_lr_factors': [10, 10], 14 | 'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}], 15 | 'dataset': 'C10+', # choices = [C10, C10+, C100, C100+] 16 | 'validation_size': 10000, # None or int 17 | 'validation_frequency': 10, 18 | 'shuffle': 'every_epoch', # None, once_prior_train, every_epoch 19 | 'normalization': 'by_channels', # None, divide_256, divide_255, by_channels 20 | 'should_save_logs': True, 21 | 'should_save_model': True, 22 | 'renew_logs': True, 23 | 'other_lr_schedule': {'type': 'cosine'}, # None, or cosine 24 | } 25 | 26 | standard_net_config_cifar = { 27 | 'model_type': 'DenseNet-BC', 28 | 'weight_decay': 1e-4, 29 | 'first_ratio': 2, 30 | 'reduction': 0.5, 31 | 'bc_ratio': 4, 32 | 'bn_epsilon': 1e-5, 33 | 'bn_decay': 0.9, 34 | 'growth_rate': 4, 35 | 'depth': 10, 36 | 'total_blocks': 3, 37 | 'keep_prob': 0.8, 38 | 'pre_activation': True, 39 | } 40 | 41 | 42 | if __name__ == '__main__': 43 | parser = argparse.ArgumentParser() 44 | parser.add_argument( 45 | '--train', action='store_true') 46 | parser.add_argument( 47 | '--test', action='store_true', 48 | help='Test model for required dataset if pretrained model exists.') 49 | parser.add_argument( 50 | '--dataset', type=str, default='C10+', choices=['C10', 'C10+', 'C100', 'C100+'], 51 | ) 52 | 53 | parser.add_argument('--path', type=str, default='') 54 | parser.add_argument('--save_config', action='store_true', help='Whether to save config in the path') 55 | parser.add_argument('--save_init', action='store_true') 56 | parser.add_argument('--load_model', action='store_true') 57 | 58 | args = parser.parse_args() 59 | if args.dataset in ['C10', 'C100', 'C10+', 'C100+']: 60 | run_config_cifar['dataset'] = args.dataset 61 | run_config = RunConfig(**run_config_cifar) 62 | net_config = standard_net_config_cifar 63 | else: 64 | raise ValueError 65 | if len(args.path) == 0: 66 | args.path = '../trained_nets/DenseNet/vs=%s_%s_%s_L=%d_K=%d_%s' % \ 67 | (run_config.validation_size, os.uname()[1], net_config['model_type'], net_config['depth'], 68 | net_config['growth_rate'], run_config.dataset) 69 | 70 | if run_config.dataset in ['C10+', 'C100+']: 71 | net_config['keep_prob'] = 1.0 72 | if standard_net_config_cifar['model_type'] == 'DenseNet': 73 | net_config['reduction'] = 1.0 74 | if args.test: args.load_model = True 75 | 76 | # print configurations 77 | print('Run config:') 78 | for k, v in run_config.get_config().items(): 79 | print('\t%s: %s' % (k, v)) 80 | print('Network config:') 81 | for k, v in net_config.items(): 82 | print('\t%s: %s' % (k, v)) 83 | 84 | print('Prepare training data...') 85 | data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config()) 86 | 87 | # set net config 88 | net_config = DenseNetConfig().set_standard_dense_net(data_provider=data_provider, **net_config) 89 | print('Initialize the model...') 90 | model = DenseNet(args.path, data_provider, run_config, net_config) 91 | 92 | # save configs 93 | if args.save_config: 94 | model.save_config(args.path) 95 | 96 | if args.load_model: model.load_model() 97 | if args.test: 98 | # test 99 | print('Data provider test images: ', data_provider.test.num_examples) 100 | print('Testing...') 101 | loss, accuracy = model.test(data_provider.test, batch_size=200) 102 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 103 | json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w')) 104 | elif args.train: 105 | # train the model 106 | print('Data provider train images: ', data_provider.train.num_examples) 107 | model.train_all_epochs() 108 | print('Data provider test images: ', data_provider.test.num_examples) 109 | print('Testing...') 110 | loss, accuracy = model.test(data_provider.test, batch_size=200) 111 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 112 | 113 | # save inits 114 | if args.save_init: 115 | model.save_init(os.path.join(args.path, 'snapshot')) 116 | json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w')) 117 | 118 | 119 | -------------------------------------------------------------------------------- /code/run_simple_convnet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from models.convnet import SimpleConvnetConfig, SimpleConvnet 4 | from data_providers.utils import get_data_provider_by_name 5 | from models.utils import RunConfig 6 | import json 7 | import copy 8 | 9 | 10 | run_config_cifar = { 11 | 'batch_size': 64, 12 | 'n_epochs': 300, 13 | 'init_lr': 0.1, 14 | 'reduce_lr_epochs': [0.5, 0.75], # epochs * 0.5, epochs * 0.75 15 | 'reduce_lr_factors': [10, 10], 16 | 'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}], 17 | 'dataset': 'C10+', # choices = [C10, C10+, C100, C100+] 18 | 'validation_size': 10000, # None or int 19 | 'validation_frequency': 10, 20 | 'shuffle': 'every_epoch', # None, once_prior_train, every_epoch 21 | 'normalization': 'by_channels', # None, divide_256, divide_255, by_channels 22 | 'should_save_logs': True, 23 | 'should_save_model': True, 24 | 'renew_logs': True, 25 | 'other_lr_schedule': {'type': 'cosine'}, # None, or cosine 26 | } 27 | 28 | standard_net_config = { 29 | 'conv_blocks_config': [ 30 | [1, 3, 4], 31 | [1, 3, 4], 32 | [1, 3, 4], 33 | [1, 3, 4], 34 | ], 35 | 'fc_block_config': [8], 36 | 'weight_decay': 1e-4, 37 | 'drop_scheme': {'type': 'conv', 'conv_drop': 0.8, 'pool_drop': 0.7, 'fc_drop': 0.5}, 38 | 'bn_epsilon': 1e-5, 39 | 'bn_decay': 0.9, 40 | } 41 | 42 | run_config_svhn = copy.deepcopy(run_config_cifar) 43 | run_config_svhn.update({ 44 | 'n_epochs': 40, 45 | 'dataset': 'SVHN', 46 | 'validation_size': 10000, 47 | 'validation_frequency': 1, 48 | 'normalization': 'divide_255', 49 | 'other_lr_schedule': {'type': 'cosine'}, 50 | 'include_extra': False, 51 | 'shuffle': True, 52 | }) 53 | 54 | 55 | run_str = '_'.join(['{}-{}-{}'.format(*block_config) for block_config in 56 | standard_net_config['conv_blocks_config']]) 57 | run_str += '_%s' % '_'.join([str(units) for units in standard_net_config['fc_block_config']]) 58 | 59 | if __name__ == '__main__': 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument( 62 | '--train', action='store_true') 63 | parser.add_argument( 64 | '--test', action='store_true', 65 | help='Test model for required dataset if pretrained model exists.') 66 | parser.add_argument( 67 | '--dataset', type=str, default='C10+', choices=['C10', 'C10+', 'C100', 'C100+', 'SVHN'], 68 | ) 69 | parser.add_argument('--path', type=str, default='') 70 | parser.add_argument('--save_config', action='store_true', help='Whether to save config in the path') 71 | parser.add_argument('--save_init', action='store_true') 72 | parser.add_argument('--load_model', action='store_true') 73 | 74 | args = parser.parse_args() 75 | if args.dataset in ['C10', 'C100', 'C10+', 'C100+']: 76 | run_config_cifar['dataset'] = args.dataset 77 | run_config = RunConfig(**run_config_cifar) 78 | elif args.dataset in ['SVHN']: 79 | run_config = RunConfig(**run_config_svhn) 80 | else: 81 | raise ValueError 82 | if len(args.path) == 0: 83 | args.path = '../trained_nets/Convnet/vs=%s_Convnet_%s_%s_%s' % \ 84 | (run_config.validation_size, os.uname()[1], run_str, run_config.dataset) 85 | if args.test: args.load_model = True 86 | 87 | # print configurations 88 | print('Run config:') 89 | for k, v in run_config.get_config().items(): 90 | print('\t%s: %s' % (k, v)) 91 | print('Network config:') 92 | for k, v in standard_net_config.items(): 93 | print('\t%s: %s' % (k, v)) 94 | 95 | print('Prepare training data...') 96 | data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config()) 97 | 98 | # set net config 99 | net_config = SimpleConvnetConfig() 100 | net_config.set_standard_convnet(data_provider=data_provider, **standard_net_config) 101 | print('Initialize the model...') 102 | model = SimpleConvnet(args.path, data_provider, run_config, net_config) 103 | 104 | # save configs 105 | if args.save_config: 106 | model.save_config(args.path) 107 | 108 | if args.load_model: model.load_model() 109 | if args.test: 110 | # test 111 | print('Data provider test images: ', data_provider.test.num_examples) 112 | print('Testing...') 113 | loss, accuracy = model.test(data_provider.test, batch_size=200) 114 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 115 | json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w')) 116 | elif args.train: 117 | # train the model 118 | print('Data provider train images: ', data_provider.train.num_examples) 119 | model.train_all_epochs() 120 | print('Data provider test images: ', data_provider.test.num_examples) 121 | print('Testing...') 122 | loss, accuracy = model.test(data_provider.test, batch_size=200) 123 | print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy)) 124 | 125 | # save inits 126 | if args.save_init: 127 | model.save_init(os.path.join(args.path, 'snapshot')) 128 | json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w')) 129 | -------------------------------------------------------------------------------- /code/server_config: -------------------------------------------------------------------------------- 1 | [ 2 | ["", , "/client.py"], 3 | ["", , "/client.py"], 4 | ["", , "/client.py"] 5 | ] -------------------------------------------------------------------------------- /figures/result_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/figures/result_sample.png -------------------------------------------------------------------------------- /start_nets/start_net_convnet_small_C10+/init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/start_nets/start_net_convnet_small_C10+/init -------------------------------------------------------------------------------- /start_nets/start_net_convnet_small_C10+/net.config: -------------------------------------------------------------------------------- 1 | { 2 | "name": "SimpleConvnet", 3 | "weight_decay": 0.0001, 4 | "bn_epsilon": 1e-05, 5 | "bn_decay": 0.9, 6 | "drop_scheme": { 7 | "type": "conv", 8 | "conv_drop": 1.0, 9 | "pool_drop": 0.7, 10 | "fc_drop": 0.5 11 | }, 12 | "layer_cascade": { 13 | "_id": "SimpleConvNet", 14 | "layers": [ 15 | { 16 | "name": "conv", 17 | "filter_num": 4, 18 | "kernel_size": 3, 19 | "strides": 1, 20 | "_id": "conv_0", 21 | "use_bn": true, 22 | "activation": "relu", 23 | "keep_prob": 1.0, 24 | "pre_activation": false 25 | }, 26 | { 27 | "name": "pool", 28 | "_type": "max", 29 | "kernel_size": 2, 30 | "strides": 2, 31 | "_id": "pool_0", 32 | "use_bn": false, 33 | "activation": null, 34 | "keep_prob": 1.0, 35 | "pre_activation": false 36 | }, 37 | { 38 | "name": "conv", 39 | "filter_num": 4, 40 | "kernel_size": 3, 41 | "strides": 1, 42 | "_id": "conv_1", 43 | "use_bn": true, 44 | "activation": "relu", 45 | "keep_prob": 1.0, 46 | "pre_activation": false 47 | }, 48 | { 49 | "name": "pool", 50 | "_type": "max", 51 | "kernel_size": 2, 52 | "strides": 2, 53 | "_id": "pool_1", 54 | "use_bn": false, 55 | "activation": null, 56 | "keep_prob": 1.0, 57 | "pre_activation": false 58 | }, 59 | { 60 | "name": "conv", 61 | "filter_num": 4, 62 | "kernel_size": 3, 63 | "strides": 1, 64 | "_id": "conv_2", 65 | "use_bn": true, 66 | "activation": "relu", 67 | "keep_prob": 1.0, 68 | "pre_activation": false 69 | }, 70 | { 71 | "name": "pool", 72 | "_type": "max", 73 | "kernel_size": 2, 74 | "strides": 2, 75 | "_id": "pool_2", 76 | "use_bn": false, 77 | "activation": null, 78 | "keep_prob": 1.0, 79 | "pre_activation": false 80 | }, 81 | { 82 | "name": "conv", 83 | "filter_num": 4, 84 | "kernel_size": 3, 85 | "strides": 1, 86 | "_id": "conv_3", 87 | "use_bn": true, 88 | "activation": "relu", 89 | "keep_prob": 1.0, 90 | "pre_activation": false 91 | }, 92 | { 93 | "name": "pool", 94 | "_type": "avg", 95 | "kernel_size": 4, 96 | "strides": 4, 97 | "_id": "pool_4", 98 | "use_bn": false, 99 | "activation": null, 100 | "keep_prob": 1.0, 101 | "pre_activation": false 102 | }, 103 | { 104 | "name": "fc", 105 | "units": 8, 106 | "use_bias": false, 107 | "_id": "fc_0", 108 | "use_bn": true, 109 | "activation": "relu", 110 | "keep_prob": 1.0, 111 | "pre_activation": false 112 | }, 113 | { 114 | "name": "fc", 115 | "units": 10, 116 | "use_bias": true, 117 | "_id": "fc_1", 118 | "use_bn": false, 119 | "activation": null, 120 | "keep_prob": 1.0, 121 | "pre_activation": false 122 | } 123 | ] 124 | } 125 | } --------------------------------------------------------------------------------