├── .gitignore
├── LICENSE
├── README.md
├── code
    ├── README.md
    ├── arch_search.py
    ├── arch_search
    │   ├── __init__.py
    │   ├── arch_search_convnet_net2net.py
    │   └── arch_search_densenet_net2net.py
    ├── client.py
    ├── data_providers
    │   ├── __init__.py
    │   ├── base_provider.py
    │   ├── cifar.py
    │   ├── downloader.py
    │   ├── svhn.py
    │   └── utils.py
    ├── expdir_monitor
    │   ├── __init__.py
    │   ├── arch_manager.py
    │   ├── distributed.py
    │   └── expdir_monitor.py
    ├── main.py
    ├── meta_controller
    │   ├── __init__.py
    │   ├── base_controller.py
    │   └── rl_controller.py
    ├── models
    │   ├── __init__.py
    │   ├── basic_model.py
    │   ├── convnet.py
    │   ├── dense_net.py
    │   ├── layer_cascade.py
    │   ├── layer_multi_branch.py
    │   ├── layers.py
    │   └── utils.py
    ├── run_dense_net.py
    ├── run_simple_convnet.py
    └── server_config
├── figures
    └── result_sample.png
└── start_nets
    └── start_net_convnet_small_C10+
        ├── init
        └── net.config


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | 
 10 | ## Intermediate documents:
 11 | *.dvi
 12 | *-converted-to.*
 13 | # these rules might exclude image files for figures etc.
 14 | # *.ps
 15 | # *.eps
 16 | # *.pdf
 17 | /Datasets
 18 | 
 19 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 20 | *.bbl
 21 | *.bcf
 22 | *.blg
 23 | *-blx.aux
 24 | *-blx.bib
 25 | *.brf
 26 | *.run.xml
 27 | 
 28 | ## Build tool auxiliary files:
 29 | *.fdb_latexmk
 30 | *.synctex
 31 | *.synctex.gz
 32 | *.synctex.gz(busy)
 33 | *.pdfsync
 34 | 
 35 | ## Auxiliary and intermediate files from other packages:
 36 | 
 37 | 
 38 | # algorithms
 39 | *.alg
 40 | *.loa
 41 | 
 42 | # achemso
 43 | acs-*.bib
 44 | 
 45 | # amsthm
 46 | *.thm
 47 | 
 48 | # beamer
 49 | *.nav
 50 | *.snm
 51 | *.vrb
 52 | 
 53 | #(e)ledmac/(e)ledpar
 54 | *.end
 55 | *.[1-9]
 56 | *.[1-9][0-9]
 57 | *.[1-9][0-9][0-9]
 58 | *.[1-9]R
 59 | *.[1-9][0-9]R
 60 | *.[1-9][0-9][0-9]R
 61 | *.eledsec[1-9]
 62 | *.eledsec[1-9]R
 63 | *.eledsec[1-9][0-9]
 64 | *.eledsec[1-9][0-9]R
 65 | *.eledsec[1-9][0-9][0-9]
 66 | *.eledsec[1-9][0-9][0-9]R
 67 | 
 68 | # glossaries
 69 | *.acn
 70 | *.acr
 71 | *.glg
 72 | *.glo
 73 | *.gls
 74 | 
 75 | # gnuplottex
 76 | *-gnuplottex-*
 77 | 
 78 | # hyperref
 79 | 
 80 | # knitr
 81 | *-concordance.tex
 82 | *.tikz
 83 | *-tikzDictionary
 84 | 
 85 | # listings
 86 | *.lol
 87 | 
 88 | # makeidx
 89 | *.idx
 90 | *.ilg
 91 | *.ind
 92 | *.ist
 93 | tex/rl-meta.pdf
 94 | 
 95 | # minitoc
 96 | *.maf
 97 | *.mtc
 98 | *.mtc[0-9]
 99 | *.mtc[1-9][0-9]
100 | 
101 | # minted
102 | _minted*
103 | *.pyg
104 | 
105 | # morewrites
106 | *.mw
107 | 
108 | # mylatexformat
109 | *.fmt
110 | 
111 | # nomencl
112 | *.nlo
113 | 
114 | # sagetex
115 | *.sagetex.sage
116 | *.sagetex.py
117 | *.sagetex.scmd
118 | 
119 | # sympy
120 | *.sout
121 | *.sympy
122 | sympy-plots-for-*.tex/
123 | 
124 | # TikZ & PGF
125 | *.dpth
126 | *.md5
127 | *.auxlock
128 | 
129 | # todonotes
130 | *.tdo
131 | 
132 | # xindy
133 | *.xdy
134 | 
135 | # WinEdt
136 | *.bak
137 | *.sav
138 | *.DS_Store
139 | /data
140 | */.idea/
141 | /output/
142 | /exp/
143 | /backup/
144 | 
145 | # python
146 | __pycache__
147 | .pyc
148 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Han Cai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Efficient Architecture Search by Network Transformation
 2 | 
 3 | Code for the paper [Efficient Architecture Search by Network Transformation](https://arxiv.org/abs/1707.04873) in AAAI 2018. 
 4 | 
 5 | ## Reference
 6 | ```bash
 7 | @inproceedings{cai2018efficient,
 8 |   title={Efficient Architecture Search by Network Transformation},
 9 |   author={Cai, Han and Chen, Tianyao and Zhang, Weinan and Yu, Yong and Wang, Jun},
10 |   booktitle={AAAI},
11 |   year={2018}
12 | }
13 | ```
14 | 
15 | ## Related Projects
16 | - [Path-Level Network Transformation for Efficient Architecture Search](https://arxiv.org/abs/1806.02639), in ICML 2018. [Code](https://github.com/han-cai/PathLevel-EAS).
17 | 
18 | ## Dependencies
19 | 
20 | * Python 3.6 
21 | * Tensorflow 1.3.0
22 | 
23 | ## Top Nets
24 | 
25 | | nets               | test accuracy (%)       | Dataset  |
26 | | ----------------------- | ------------- | ----- |
27 | | [C10+_Conv_Depth_20](https://drive.google.com/open?id=1BaSHPXSTxKO5avmtzJGwinLUkSPbwJYf)     | 95.77 | C10+ |
28 | | [C10+_DenseNet_Depth_76](https://drive.google.com/open?id=1zXTB_DmS7i9HiDAxmzrBLmwjmZmfXI2n)     | 96.56 | C10+ |
29 | | [C10_DenseNet_Depth_70](https://drive.google.com/open?id=1T0UMowk6lN9GzDmWcjwMG6lmbh9rogXx)     | 95.34 | C10 |
30 | | [SVHN_Conv_Depth_20](https://drive.google.com/open?id=14CoT52n6Q-dOXSHQPGNGlIh_0SjXE6q7)    | 98.27 | SVHN |
31 | 
32 | For checking these networks, please download the corresponding model files and run the following command under the folder of **code**:
33 | ```bash
34 | $ python3 main.py --test --path=<nets path>
35 | ```
36 | 
37 | For example, by running
38 | ```bash
39 | $ python3 main.py --test --path=../final_nets/C10+_Conv_Depth_20
40 | ```
41 | you will get
42 | ```bash
43 | Testing...
44 | mean cross_entropy: 0.210500, mean accuracy: 0.957700
45 | test performance: 0.9577
46 | ```
47 | 
48 | ## Acknowledgement
49 | The DenseNet part of this code is based on the [repository by Illarion](https://github.com/ikhlestov/vision_networks). Many thanks to [Illarion](https://github.com/ikhlestov). 
50 | 
51 | 


--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
 1 | ## Architecture Search and Distributed Running
 2 | To run architecture search experiments, you should first set up your 
 3 | environment for distributed running. Suppose there are a server computer
 4 | and multiple GPU clients which can be accessed on the server side 
 5 | via **ssh**. 
 6 | 
 7 | On the server side, you should have a configuration file **server_config** 
 8 | under the folder of **code**. An example of the **server_config** file is:
 9 | ```bash
10 | [
11 | 	["<client 1 address>", <gpu_id_0>, "<path to the **code** folder on client 1>/client.py"],
12 | 	["<client 2 address>", <gpu_id_0>, "<path to the **code** folder on client 2>/client.py"],
13 | 	["<client 2 address>", <gpu_id_1>, "<path to the **code** folder on client 2>/client.py"]
14 | ]
15 | ```
16 | Once you make the **server_config** ready, you can run the following command under the folder of 
17 | **code** on the server side to start the experiment:
18 | ```bash
19 | python3 arch_search.py --setting=convnet
20 | ```
21 | 
22 | 
23 | When a remote GPU, e.g. GPU_0 on client 1, is chosen 
24 | by the server, the following command is executed
25 | ```bash
26 | ssh <client 1 address> CUDA_VISIBLE_DEVICES=0 python3 <path to the **code** folder on client 1>/client.py 
27 | ```
28 | Make sure that
29 | - you can visit each client via **ssh** without password on the server side. 
30 | [ssh-copy-id](https://www.ssh.com/ssh/copy-id) may be helpful if you have some problems with the password.
31 | - the command "CUDA_VISIBLE_DEVICES=0 python3 <path to the **code** folder on client 1>/client.py" can be 
32 | executed correctly on the client side.
33 | 
34 | Further details, please refer to **code/expdir_monitor/distributed.py**.
35 | 
36 | By running the code using the small network, i.e. 
37 | **start_nets/start_net_convnet_small_C10+**, as the start point, 
38 | you can get results like:
39 | 
40 | ![](../figures/result_sample.png)
41 | 


--------------------------------------------------------------------------------
/code/arch_search.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | from arch_search.arch_search_densenet_net2net import arch_search_densenet
 4 | from arch_search.arch_search_convnet_net2net import arch_search_convnet
 5 | 
 6 | _SEED = 110
 7 | np.random.seed(_SEED)
 8 | 
 9 | 
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument(
12 | 	'--setting', type=str, default='convnet', choices=['convnet', 'densenet'],
13 | )
14 | 
15 | args = parser.parse_args()
16 | if args.setting == 'convnet':
17 | 	"""
18 | 	Architecture Search on Convnet
19 | 	"""
20 | 	arch_search_convnet(
21 | 		start_net_path='../start_nets/start_net_small_C10+',
22 | 		arch_search_folder='../arch_search/Convnet/C10+/Conv_C10+_rl_small',
23 | 		net_pool_folder='../net_pool/Convnet/C10+/Conv_C10+_rl_small',
24 | 		max_episodes=15,
25 | 		random=False,
26 | 	)
27 | elif args.setting == 'densenet':
28 | 	"""
29 | 	Architecture Search on DenseNet
30 | 	"""
31 | 	arch_search_densenet(
32 | 		start_net_path='placeholder',
33 | 		arch_search_folder='placeholder',
34 | 		net_pool_folder='placeholder',
35 | 		max_episodes=15,
36 | 	)
37 | else:
38 | 	pass
39 | 


--------------------------------------------------------------------------------
/code/arch_search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/arch_search/__init__.py


--------------------------------------------------------------------------------
/code/arch_search/arch_search_convnet_net2net.py:
--------------------------------------------------------------------------------
  1 | from expdir_monitor.arch_manager import ArchManager
  2 | from meta_controller.base_controller import Vocabulary, EncoderNet, WiderActorNet, DeeperActorNet
  3 | from meta_controller.rl_controller import ReinforceNet2NetController
  4 | from time import gmtime, strftime, time
  5 | from datetime import timedelta
  6 | from models.layers import ConvLayer, FCLayer, PoolLayer
  7 | import re
  8 | import numpy as np
  9 | 
 10 | 
 11 | def get_net_str(net_configs):
 12 | 	if isinstance(net_configs, list):
 13 | 		if len(net_configs) == 1:
 14 | 			net_config = net_configs[0]
 15 | 			net_str = []
 16 | 			for layer in net_config.layer_cascade.layers[:-1]:
 17 | 				if isinstance(layer, ConvLayer):
 18 | 					net_str.append('conv-%d-%d' % (layer.filter_num, layer.kernel_size))
 19 | 				elif isinstance(layer, FCLayer):
 20 | 					net_str.append('fc-%d' % layer.units)
 21 | 				else:
 22 | 					net_str.append('pool')
 23 | 			return ['_'.join(net_str)]
 24 | 		else:
 25 | 			net_str_list = []
 26 | 			for net_config in net_configs:
 27 | 				net_str_list += get_net_str([net_config])
 28 | 			return net_str_list
 29 | 	else:
 30 | 		return get_net_str([net_configs])[0]
 31 | 
 32 | 
 33 | def get_net_seq(net_configs, vocabulary, num_steps):
 34 | 	net_str_list = get_net_str(net_configs)
 35 | 	net_seq = []
 36 | 	seq_len = []
 37 | 	for net_str in net_str_list:
 38 | 		net_str = re.split('_', net_str)
 39 | 		net_code = vocabulary.get_code(net_str)
 40 | 		_len = len(net_code)
 41 | 		net_code += [vocabulary.pad_code for _ in range(len(net_code), num_steps)]
 42 | 		net_seq.append(net_code)
 43 | 		seq_len.append(_len)
 44 | 	return np.array(net_seq), np.array(seq_len)
 45 | 
 46 | 
 47 | def get_block_layer_num(net_configs):
 48 | 	if len(net_configs) == 1:
 49 | 		net_config = net_configs[0]
 50 | 		block_layer_num = []
 51 | 		_count = 0
 52 | 		for layer in net_config.layer_cascade.layers[:-1]:
 53 | 			if isinstance(layer, PoolLayer):
 54 | 				block_layer_num.append(_count)
 55 | 				_count = 0
 56 | 			else:
 57 | 				_count += 1
 58 | 		block_layer_num.append(_count)
 59 | 		return np.array([block_layer_num])
 60 | 	else:
 61 | 		block_layer_num = []
 62 | 		for net_config in net_configs:
 63 | 			block_layer_num.append(get_block_layer_num([net_config]))
 64 | 		return np.concatenate(block_layer_num, axis=0)
 65 | 
 66 | 
 67 | def apply_wider_decision(wider_decision, net_configs, filter_num_list, units_num_list, noise):
 68 | 	if len(net_configs) == 1:
 69 | 		decision = wider_decision[0]
 70 | 		net_config = net_configs[0]
 71 | 		decision_mask = []
 72 | 		for _i, layer in enumerate(net_config.layer_cascade.layers[:-1]):
 73 | 			if isinstance(layer, ConvLayer):
 74 | 				if layer.filter_num >= filter_num_list[-1]:
 75 | 					decision_mask.append(0.0)
 76 | 				else:
 77 | 					decision_mask.append(1.0)
 78 | 					if decision[_i]:
 79 | 						new_filter_number = layer.filter_num
 80 | 						for fn in filter_num_list:
 81 | 							if fn > new_filter_number:
 82 | 								new_filter_number = fn
 83 | 								break
 84 | 						net_config.widen(
 85 | 							layer_idx=_i, new_width=new_filter_number, noise=noise
 86 | 						)
 87 | 			elif isinstance(layer, FCLayer):
 88 | 				if layer.units >= units_num_list[-1]:
 89 | 					decision_mask.append(0.0)
 90 | 				else:
 91 | 					decision_mask.append(1.0)
 92 | 					if decision[_i]:
 93 | 						new_units_num = layer.units
 94 | 						for un in units_num_list:
 95 | 							if un > new_units_num:
 96 | 								new_units_num = un
 97 | 								break
 98 | 						net_config.widen(
 99 | 							layer_idx=_i, new_width=new_units_num, noise=noise,
100 | 						)
101 | 			else:
102 | 				decision_mask.append(0.0)
103 | 		decision_mask += [0.0] * (len(decision) - len(decision_mask))
104 | 		return np.array([decision_mask])
105 | 	else:
106 | 		decision_mask = []
107 | 		for _i, net_config in enumerate(net_configs):
108 | 			decision = wider_decision[_i]
109 | 			mask = apply_wider_decision([decision], [net_config], filter_num_list, units_num_list, noise)
110 | 			decision_mask.append(mask)
111 | 		return np.concatenate(decision_mask, axis=0)
112 | 
113 | 
114 | def apply_deeper_decision(deeper_decision, net_configs, kernel_size_list, noise):
115 | 	if len(net_configs) == 1:
116 | 		decision = deeper_decision[0]
117 | 		net_config = net_configs[0]
118 | 
119 | 		block_decision, layer_idx_decision, ks_decision = decision
120 | 		decision_mask = [1.0, 1.0]
121 | 		block_idx, _pt = 0, 0
122 | 		to_set_layers = []
123 | 		for _i, layer in enumerate(net_config.layer_cascade.layers[:-1]):
124 | 			if _pt == block_decision:
125 | 				real_layer_idx = _i + layer_idx_decision
126 | 				prev_layer = net_config.layer_cascade.layers[real_layer_idx]
127 | 				if isinstance(prev_layer, ConvLayer):
128 | 					if 'conv' in net_config.drop_scheme['type']:
129 | 						keep_prob = net_config.drop_scheme.get('conv_drop', 1.0)
130 | 					else:
131 | 						keep_prob = 1.0
132 | 					decision_mask.append(1.0)
133 | 					ks = kernel_size_list[ks_decision]
134 | 					new_layer, prev_layer = net_config.deepen(
135 | 						layer_idx=real_layer_idx,
136 | 						new_layer_config={'name': 'conv', 'kernel_size': ks, 'pre_activation': False,
137 | 										  'keep_prob': keep_prob},
138 | 					)
139 | 					to_set_layers.append([new_layer, prev_layer])
140 | 				elif isinstance(prev_layer, FCLayer):
141 | 					if 'fc' in net_config.drop_scheme['type']:
142 | 						keep_prob = net_config.drop_scheme.get('fc_drop', 1.0)
143 | 					else:
144 | 						keep_prob = 1.0
145 | 					decision_mask.append(0.0)
146 | 					new_layer, prev_layer = net_config.deepen(
147 | 						layer_idx=real_layer_idx,
148 | 						new_layer_config={'name': 'fc', 'keep_prob': keep_prob},
149 | 					)
150 | 					to_set_layers.append([new_layer, prev_layer])
151 | 				else:
152 | 					raise ValueError
153 | 				break
154 | 			if isinstance(layer, PoolLayer):
155 | 				_pt += 1
156 | 		return np.array([decision_mask]), to_set_layers
157 | 	else:
158 | 		decision_mask = []
159 | 		to_set_layers = []
160 | 		for _i, net_config in enumerate(net_configs):
161 | 			decision = deeper_decision[_i]
162 | 			mask, to_set = apply_deeper_decision([decision], [net_config], kernel_size_list, noise)
163 | 			decision_mask.append(mask)
164 | 			to_set_layers.append(to_set)
165 | 		return np.concatenate(decision_mask, axis=0), to_set_layers
166 | 	
167 | 
168 | def arch_search_convnet(start_net_path, arch_search_folder, net_pool_folder, max_episodes, random=False):
169 | 	filter_num_list = [_i for _i in range(4, 44, 4)]
170 | 	units_num_list = [_i for _i in range(8, 88, 8)]
171 | 	# filter_num_list = [16, 32, 64, 96, 128, 192, 256, 320, 384, 448, 512, 576, 640]
172 | 	# units_num_list = [64, 128, 256, 384, 512, 640, 768, 896, 1024, 1152, 1280]
173 | 	kernel_size_list = [1, 3, 5]
174 | 	
175 | 	# encoder config
176 | 	layer_token_list = ['conv-%d-%d' % (f, k) for f in filter_num_list for k in [1, 3, 5]]
177 | 	layer_token_list += ['fc-%d' % u for u in units_num_list] + ['pool']
178 | 	encoder_config = {
179 | 		'num_steps': 50,
180 | 		'vocab': Vocabulary(layer_token_list),
181 | 		'embedding_dim': 16,
182 | 		'rnn_units': 50,
183 | 		'rnn_type': 'bi_lstm',
184 | 		'rnn_layers': 1,
185 | 	}
186 | 	
187 | 	# wider actor config
188 | 	wider_actor_config = {
189 | 		'out_dim': 1,
190 | 		'num_steps': encoder_config['num_steps'],
191 | 		'net_type': 'simple',
192 | 		'net_config': None,
193 | 	}
194 | 	
195 | 	# deeper actor config
196 | 	deeper_actor_config = {
197 | 		'decision_num': 3,
198 | 		'out_dims': [5, 10, len(kernel_size_list)],
199 | 		'embedding_dim': encoder_config['embedding_dim'],
200 | 		'cell_type': 'lstm',
201 | 		'rnn_layers': 1,
202 | 		'attention_config': None,
203 | 	}
204 | 	
205 | 	# meta-controller config
206 | 	entropy_penalty = 1e-5
207 | 	learning_rate = 2e-3
208 | 	opt_config = ['adam', {}]
209 | 	
210 | 	# net2net noise config
211 | 	noise_config = {
212 | 		'wider': {'type': 'normal', 'ratio': 1e-2},
213 | 		'deeper': {'type': 'normal', 'ratio': 1e-3},
214 | 	}
215 | 	
216 | 	# episode config
217 | 	episode_config = {
218 | 		'batch_size': 10,
219 | 		'wider_action_num': 4,
220 | 		'deeper_action_num': 5,
221 | 	}
222 | 	
223 | 	# arch search run config
224 | 	arch_search_run_config = {
225 | 		'n_epochs': 20,
226 | 		'init_lr': 0.02,
227 | 		'validation_size': 5000,
228 | 		'other_lr_schedule': {'type': 'cosine'},
229 | 		'batch_size': 64,
230 | 		'include_extra': False,
231 | 	}
232 | 	
233 | 	# reward config
234 | 	reward_config = {
235 | 		'func': 'tan',
236 | 		'decay': 0.95,
237 | 	}
238 | 	
239 | 	arch_manager = ArchManager(start_net_path, arch_search_folder, net_pool_folder)
240 | 	_, run_config, _ = arch_manager.get_start_net()
241 | 	run_config.update(arch_search_run_config)
242 | 	
243 | 	encoder = EncoderNet(**encoder_config)
244 | 	wider_actor = WiderActorNet(**wider_actor_config)
245 | 	deeper_actor = DeeperActorNet(**deeper_actor_config)
246 | 	meta_controller = ReinforceNet2NetController(arch_manager.meta_controller_path, entropy_penalty,
247 | 												 encoder, wider_actor, deeper_actor, opt_config)
248 | 	meta_controller.load()
249 | 	
250 | 	for _i in range(arch_manager.episode + 1, max_episodes + 1):
251 | 		print('episode. %d start. current time: %s' % (_i, strftime("%a, %d %b %Y %H:%M:%S", gmtime())))
252 | 		start_time = time()
253 | 		
254 | 		nets = [arch_manager.get_start_net(copy=True) for _ in range(episode_config['batch_size'])]
255 | 		net_configs = [net_config for net_config, _, _ in nets]
256 | 		
257 | 		# feed_dict for update the controller
258 | 		wider_decision_trajectory, wider_decision_mask = [], []
259 | 		deeper_decision_trajectory, deeper_decision_mask = [], []
260 | 		deeper_block_layer_num = []
261 | 		encoder_input_seq, encoder_seq_len = [], []
262 | 		wider_seg_deeper = 0
263 | 		
264 | 		if random:
265 | 			# random search
266 | 			remain_wider_num = episode_config['wider_action_num']
267 | 			remain_deeper_num = episode_config['deeper_action_num']
268 | 			while remain_wider_num > 0 or remain_deeper_num > 0:
269 | 				rand_idx = np.random.randint(0, remain_wider_num + remain_deeper_num)
270 | 				if rand_idx < remain_wider_num:
271 | 					wider_decision = np.random.choice(2, [episode_config['batch_size'], encoder.num_steps])
272 | 					apply_wider_decision(wider_decision, net_configs, filter_num_list, units_num_list, noise_config)
273 | 					remain_wider_num -= 1
274 | 				else:
275 | 					block_layer_num = get_block_layer_num(net_configs)
276 | 					deeper_decision = np.zeros([episode_config['batch_size'], deeper_actor.decision_num], np.int)
277 | 					deeper_decision[:, 0] = np.random.choice(deeper_actor.out_dims[0], deeper_decision[:, 0].shape)
278 | 					for _k, block_decision in enumerate(deeper_decision[:, 0]):
279 | 						available_layer_num = block_layer_num[_k, block_decision]
280 | 						deeper_decision[_k, 1] = np.random.randint(0, available_layer_num)
281 | 					deeper_decision[:, 2] = np.random.choice(deeper_actor.out_dims[2],  deeper_decision[:, 2].shape)
282 | 					
283 | 					_, to_set_layers = apply_deeper_decision(deeper_decision, net_configs,
284 | 					                                         kernel_size_list, noise_config)
285 | 					for _k, net_config in enumerate(net_configs):
286 | 						net_config.set_identity4deepen(to_set_layers[_k], arch_manager.data_provider,
287 | 						                               batch_size=64, batch_num=1, noise=noise_config)
288 | 					remain_deeper_num -= 1
289 | 		else:
290 | 			# on-policy training
291 | 			for _j in range(episode_config['wider_action_num']):
292 | 				input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps)
293 | 				wider_decision, wider_probs = meta_controller.sample_wider_decision(input_seq, seq_len)
294 | 				# modify net config according to wider_decision
295 | 				wider_mask = apply_wider_decision(wider_decision, net_configs, filter_num_list,
296 | 												  units_num_list, noise_config)
297 | 				
298 | 				wider_decision_trajectory.append(wider_decision)
299 | 				wider_decision_mask.append(wider_mask)
300 | 				wider_seg_deeper += len(net_configs)
301 | 				encoder_input_seq.append(input_seq)
302 | 				encoder_seq_len.append(seq_len)
303 | 			
304 | 			to_set_layers = [[] for _ in range(episode_config['batch_size'])]
305 | 			for _j in range(episode_config['deeper_action_num']):
306 | 				input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps)
307 | 				block_layer_num = get_block_layer_num(net_configs)
308 | 				deeper_decision, deeper_probs = meta_controller.sample_deeper_decision(input_seq, seq_len,
309 | 				                                                                       block_layer_num)
310 | 				# modify net config according to deeper_decision
311 | 				deeper_mask, to_set = apply_deeper_decision(deeper_decision, net_configs,
312 | 				                                            kernel_size_list, noise_config)
313 | 				for _k in range(episode_config['batch_size']):
314 | 					to_set_layers[_k] += to_set[_k]
315 | 				
316 | 				deeper_decision_trajectory.append(deeper_decision)
317 | 				deeper_decision_mask.append(deeper_mask)
318 | 				deeper_block_layer_num.append(block_layer_num)
319 | 				encoder_input_seq.append(input_seq)
320 | 				encoder_seq_len.append(seq_len)
321 | 			
322 | 			for _k, net_config in enumerate(net_configs):
323 | 				net_config.set_identity4deepen(to_set_layers[_k], arch_manager.data_provider,
324 | 				                               batch_size=64, batch_num=1, noise=noise_config)
325 | 			# prepare feed dict
326 | 			encoder_input_seq = np.concatenate(encoder_input_seq, axis=0)
327 | 			encoder_seq_len = np.concatenate(encoder_seq_len, axis=0)
328 | 			if episode_config['wider_action_num'] > 0:
329 | 				wider_decision_trajectory = np.concatenate(wider_decision_trajectory, axis=0)
330 | 				wider_decision_mask = np.concatenate(wider_decision_mask, axis=0)
331 | 			else:
332 | 				wider_decision_trajectory = -np.ones([1, meta_controller.encoder.num_steps])
333 | 				wider_decision_mask = -np.ones([1, meta_controller.encoder.num_steps])
334 | 			if episode_config['deeper_action_num'] > 0:
335 | 				deeper_decision_trajectory = np.concatenate(deeper_decision_trajectory, axis=0)
336 | 				deeper_decision_mask = np.concatenate(deeper_decision_mask, axis=0)
337 | 				deeper_block_layer_num = np.concatenate(deeper_block_layer_num, axis=0)
338 | 			else:
339 | 				deeper_decision_trajectory = - np.ones([1, meta_controller.deeper_actor.decision_num])
340 | 				deeper_decision_mask = - np.ones([1, meta_controller.deeper_actor.decision_num])
341 | 				deeper_block_layer_num = np.ones([1, meta_controller.deeper_actor.out_dims[0]])
342 | 		
343 | 		run_configs = [run_config] * len(net_configs)
344 | 		net_str_list = get_net_str(net_configs)
345 | 		
346 | 		net_vals = arch_manager.get_net_vals(net_str_list, net_configs, run_configs)
347 | 		rewards = arch_manager.reward(net_vals, reward_config)
348 | 		
349 | 		rewards = np.concatenate([rewards for _ in range(episode_config['wider_action_num'] +
350 | 														 episode_config['deeper_action_num'])])
351 | 		rewards /= episode_config['batch_size']
352 | 		
353 | 		# update the agent
354 | 		if not random:
355 | 			meta_controller.update_controller(learning_rate, wider_seg_deeper, wider_decision_trajectory,
356 | 											  wider_decision_mask, deeper_decision_trajectory, deeper_decision_mask,
357 | 											  rewards, deeper_block_layer_num, encoder_input_seq, encoder_seq_len)
358 | 			
359 | 			meta_controller.save()
360 | 		# episode end
361 | 		time_per_episode = time() - start_time
362 | 		seconds_left = int((max_episodes - _i) * time_per_episode)
363 | 		print('Time per Episode: %s, Est. complete in: %s' % (
364 | 			str(timedelta(seconds=time_per_episode)),
365 | 			str(timedelta(seconds=seconds_left))))
366 | 


--------------------------------------------------------------------------------
/code/arch_search/arch_search_densenet_net2net.py:
--------------------------------------------------------------------------------
  1 | from expdir_monitor.arch_manager import ArchManager
  2 | from meta_controller.base_controller import Vocabulary, EncoderNet, WiderActorNet, DeeperActorNet
  3 | from meta_controller.rl_controller import ReinforceNet2NetController
  4 | from time import gmtime, strftime, time
  5 | from datetime import timedelta
  6 | from models.dense_net import DenseBlock, TransitionBlock
  7 | import re
  8 | import numpy as np
  9 | 
 10 | 
 11 | def get_net_str(net_configs):
 12 | 	if len(net_configs) == 1:
 13 | 		net_config = net_configs[0]
 14 | 		net_str = []
 15 | 		for block in net_config.blocks:
 16 | 			if isinstance(block, DenseBlock):
 17 | 				block_str = []
 18 | 				for miniblock in block.miniblocks:
 19 | 					block_str.append('g%d' % miniblock.out_features_dim)
 20 | 				block_str = '-'.join(block_str)
 21 | 				net_str.append(block_str)
 22 | 			else:
 23 | 				net_str.append('t')
 24 | 		return ['_'.join(net_str)]
 25 | 	else:
 26 | 		net_str_list = []
 27 | 		for net_config in net_configs:
 28 | 			net_str_list += get_net_str([net_config])
 29 | 		return net_str_list
 30 | 
 31 | 
 32 | def get_net_seq(net_configs, vocabulary, num_steps):
 33 | 	net_str_list = get_net_str(net_configs)
 34 | 	net_seq = []
 35 | 	seq_len = []
 36 | 	for net_str in net_str_list:
 37 | 		net_str = re.split('_|-', net_str)
 38 | 		net_code = vocabulary.get_code(net_str)
 39 | 		_len = len(net_code)
 40 | 		net_code += [vocabulary.pad_code for _ in range(len(net_code), num_steps)]
 41 | 		net_seq.append(net_code)
 42 | 		seq_len.append(_len)
 43 | 	return np.array(net_seq), np.array(seq_len)
 44 | 
 45 | 
 46 | def get_block_layer_num(net_configs):
 47 | 	if len(net_configs) == 1:
 48 | 		net_config = net_configs[0]
 49 | 		block_layer_num = []
 50 | 		for block in net_config.blocks:
 51 | 			if isinstance(block, DenseBlock):
 52 | 				block_layer_num.append(len(block.miniblocks))
 53 | 		return np.array([block_layer_num])
 54 | 	else:
 55 | 		block_layer_num = []
 56 | 		for net_config in net_configs:
 57 | 			block_layer_num.append(get_block_layer_num([net_config]))
 58 | 		return np.concatenate(block_layer_num, axis=0)
 59 | 
 60 | 
 61 | def apply_wider_decision(wider_decision, net_configs, growth_rate_list, noise):
 62 | 	if len(net_configs) == 1:
 63 | 		decision = wider_decision[0]
 64 | 		net_config = net_configs[0]
 65 | 		_pt = 0
 66 | 		decision_mask = []
 67 | 		for block_idx, block in enumerate(net_config.blocks):
 68 | 			if isinstance(block, DenseBlock):
 69 | 				for miniblock_idx, miniblock in enumerate(block.miniblocks):
 70 | 					growth_rate = miniblock.out_features_dim
 71 | 					if growth_rate >= growth_rate_list[-1]:
 72 | 						decision_mask.append(0.0)
 73 | 					else:
 74 | 						decision_mask.append(1.0)
 75 | 						new_gr = growth_rate
 76 | 						for gr in growth_rate_list:
 77 | 							if gr > new_gr:
 78 | 								new_gr = gr
 79 | 								break
 80 | 						if decision[_pt]:
 81 | 							net_config.widen(
 82 | 								loc={'block': block_idx, 'miniblock': miniblock_idx,
 83 | 									 'multi-branch': 'in_bottle', 'layer': 0},
 84 | 								new_width=net_config.bc_ratio * new_gr,
 85 | 								noise=noise,
 86 | 							)
 87 | 							net_config.widen(
 88 | 								loc={'block': block_idx, 'miniblock': miniblock_idx,
 89 | 									 'multi-branch': 'branch', 'branch': 0, 'layer': 0},
 90 | 								new_width=new_gr,
 91 | 								noise=noise,
 92 | 							)
 93 | 					_pt += 1
 94 | 			else:
 95 | 				decision_mask.append(0.0)
 96 | 				_pt += 1
 97 | 		decision_mask += [0.0] * (len(decision) - len(decision_mask))
 98 | 		return np.array([decision_mask])
 99 | 	else:
100 | 		decision_mask = []
101 | 		for _i, net_config in enumerate(net_configs):
102 | 			decision = wider_decision[_i]
103 | 			mask = apply_wider_decision([decision], [net_config], growth_rate_list, noise)
104 | 			decision_mask.append(mask)
105 | 		return np.concatenate(decision_mask, axis=0)
106 | 
107 | 
108 | def apply_deeper_decision(deeper_decision, net_configs, noise):
109 | 	if len(net_configs) == 1:
110 | 		decision = deeper_decision[0]
111 | 		net_config = net_configs[0]
112 | 
113 | 		block_decision, layer_idx_decision = decision
114 | 		decision_mask = [1.0, 1.0]
115 | 		block_idx, _pt = 0, 0
116 | 		for _i, block in enumerate(net_config.blocks):
117 | 			if isinstance(block, DenseBlock):
118 | 				if _pt == block_decision:
119 | 					block_idx = _i
120 | 					break
121 | 				_pt += 1
122 | 		net_config.insert_miniblock(
123 | 			loc={'block': block_idx, 'miniblock': layer_idx_decision},
124 | 			miniblock_config={'bc_mode': True},
125 | 			noise=noise,
126 | 		)
127 | 		return np.array([decision_mask])
128 | 	else:
129 | 		decision_mask = []
130 | 		for _i, net_config in enumerate(net_configs):
131 | 			decision = deeper_decision[_i]
132 | 			mask = apply_deeper_decision([decision], [net_config], noise)
133 | 			decision_mask.append(mask)
134 | 		return np.concatenate(decision_mask, axis=0)
135 | 
136 | 
137 | def widen_transition(net_configs, noise):
138 | 	for net_config in net_configs:
139 | 		new_out_dim = int(net_config.average_growth_rate * net_config.first_ratio)
140 | 		if new_out_dim > net_config.blocks[0].out_features_dim:
141 | 			net_config.widen(
142 | 				loc={'block': 0, 'layer': 0},
143 | 				new_width=new_out_dim,
144 | 				noise=noise,
145 | 			)
146 | 		out_features_dim = new_out_dim
147 | 		for _i, block in enumerate(net_config.blocks[2:-1], 2):
148 | 			if isinstance(block, TransitionBlock):
149 | 				new_out_dim = int(net_config.blocks[_i - 1].out_features_dim(net_config.blocks[_i - 2].out_features_dim)
150 | 								  * net_config.reduction)
151 | 				if new_out_dim > block.out_features_dim:
152 | 					net_config.widen(
153 | 						loc={'block': _i, 'layer': 0},
154 | 						new_width=new_out_dim,
155 | 						noise=noise,
156 | 					)
157 | 				out_features_dim = block.out_features_dim
158 | 			else:
159 | 				out_features_dim = block.out_features_dim(out_features_dim)
160 | 			
161 | 
162 | def arch_search_densenet(start_net_path, arch_search_folder, net_pool_folder, max_episodes):
163 | 	growth_rate_list = [_i for _i in range(4, 50, 2)]
164 | 	# encoder config
165 | 	layer_token_list = ['g%d' % growth_rate for growth_rate in growth_rate_list]
166 | 	encoder_config = {
167 | 		'num_steps': 50,
168 | 		'vocab': Vocabulary(layer_token_list + ['t']),
169 | 		'embedding_dim': 16,
170 | 		'rnn_units': 50,
171 | 		'rnn_type': 'bi_lstm',
172 | 		'rnn_layers': 1,
173 | 	}
174 | 	
175 | 	# wider actor config
176 | 	wider_actor_config = {
177 | 		'out_dim': 1,
178 | 		'num_steps': encoder_config['num_steps'],
179 | 		'net_type': 'simple',
180 | 		'net_config': None,
181 | 	}
182 | 	
183 | 	# deeper actor config
184 | 	deeper_actor_config = {
185 | 		'decision_num': 2,
186 | 		'out_dims': [3, 20],
187 | 		'embedding_dim': encoder_config['embedding_dim'],
188 | 		'cell_type': 'lstm',
189 | 		'rnn_layers': 1,
190 | 		'attention_config': None,
191 | 	}
192 | 	
193 | 	# meta-controller config
194 | 	entropy_penalty = 1e-5
195 | 	learning_rate = 2e-3
196 | 	opt_config = ['adam', {}]
197 | 	
198 | 	# net2net noise config
199 | 	noise_config = {
200 | 		'wider': {'type': 'normal', 'ratio': 1e-2},
201 | 		'deeper': {'type': 'normal', 'ratio': 1e-3},
202 | 	}
203 | 	
204 | 	# episode config
205 | 	episode_config = {
206 | 		'batch_size': 10,
207 | 		'wider_action_num': 10,
208 | 		'deeper_action_num': 5,
209 | 	}
210 | 	
211 | 	# arch search run config
212 | 	arch_search_run_config = {
213 | 		'n_epochs': 20,
214 | 		'init_lr': 0.02,
215 | 		'validation_size': 5000,
216 | 		'other_lr_schedule': {'type': 'cosine'},
217 | 		'batch_size': 64,
218 | 		'include_extra': False,
219 | 	}
220 | 	
221 | 	# reward config
222 | 	reward_config = {
223 | 		'func': 'tan',
224 | 		'decay': 0.95,
225 | 	}
226 | 	
227 | 	arch_manager = ArchManager(start_net_path, arch_search_folder, net_pool_folder)
228 | 	_, run_config, _ = arch_manager.get_start_net()
229 | 	run_config.update(arch_search_run_config)
230 | 	
231 | 	encoder = EncoderNet(**encoder_config)
232 | 	wider_actor = WiderActorNet(**wider_actor_config)
233 | 	deeper_actor = DeeperActorNet(**deeper_actor_config)
234 | 	meta_controller = ReinforceNet2NetController(arch_manager.meta_controller_path, entropy_penalty,
235 | 												 encoder, wider_actor, deeper_actor, opt_config)
236 | 	meta_controller.load()
237 | 	
238 | 	for _i in range(arch_manager.episode + 1, max_episodes + 1):
239 | 		print('episode. %d start. current time: %s' % (_i, strftime("%a, %d %b %Y %H:%M:%S", gmtime())))
240 | 		start_time = time()
241 | 		
242 | 		nets = [arch_manager.get_start_net(copy=True) for _ in range(episode_config['batch_size'])]
243 | 		net_configs = [net_config for net_config, _, _ in nets]
244 | 		
245 | 		# feed_dict for update the controller
246 | 		wider_decision_trajectory, wider_decision_mask = [], []
247 | 		deeper_decision_trajectory, deeper_decision_mask = [], []
248 | 		deeper_block_layer_num = []
249 | 		encoder_input_seq, encoder_seq_len = [], []
250 | 		wider_seg_deeper = 0
251 | 		
252 | 		# on-policy training
253 | 		for _j in range(episode_config['wider_action_num']):
254 | 			input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps)
255 | 			wider_decision, wider_probs = meta_controller.sample_wider_decision(input_seq, seq_len)
256 | 			# modify net config according to wider_decision
257 | 			wider_mask = apply_wider_decision(wider_decision, net_configs, growth_rate_list, noise_config)
258 | 			
259 | 			wider_decision_trajectory.append(wider_decision)
260 | 			wider_decision_mask.append(wider_mask)
261 | 			wider_seg_deeper += len(net_configs)
262 | 			encoder_input_seq.append(input_seq)
263 | 			encoder_seq_len.append(seq_len)
264 | 		
265 | 		for _j in range(episode_config['deeper_action_num']):
266 | 			input_seq, seq_len = get_net_seq(net_configs, encoder.vocab, encoder.num_steps)
267 | 			block_layer_num = get_block_layer_num(net_configs)
268 | 			deeper_decision, deeper_probs = meta_controller.sample_deeper_decision(input_seq, seq_len, block_layer_num)
269 | 			# modify net config according to deeper_decision
270 | 			deeper_mask = apply_deeper_decision(deeper_decision, net_configs, noise_config)
271 | 			
272 | 			deeper_decision_trajectory.append(deeper_decision)
273 | 			deeper_decision_mask.append(deeper_mask)
274 | 			deeper_block_layer_num.append(block_layer_num)
275 | 			encoder_input_seq.append(input_seq)
276 | 			encoder_seq_len.append(seq_len)
277 | 		
278 | 		widen_transition(net_configs, noise_config)
279 | 		
280 | 		run_configs = [run_config] * len(net_configs)
281 | 		net_str_list = get_net_str(net_configs)
282 | 		
283 | 		net_vals = arch_manager.get_net_vals(net_str_list, net_configs, run_configs)
284 | 		rewards = arch_manager.reward(net_vals, reward_config)
285 | 		
286 | 		# prepare feed dict
287 | 		encoder_input_seq = np.concatenate(encoder_input_seq, axis=0)
288 | 		encoder_seq_len = np.concatenate(encoder_seq_len, axis=0)
289 | 		if episode_config['wider_action_num'] > 0:
290 | 			wider_decision_trajectory = np.concatenate(wider_decision_trajectory, axis=0)
291 | 			wider_decision_mask = np.concatenate(wider_decision_mask, axis=0)
292 | 		else:
293 | 			wider_decision_trajectory = -np.ones([1, meta_controller.encoder.num_steps])
294 | 			wider_decision_mask = -np.ones([1, meta_controller.encoder.num_steps])
295 | 		if episode_config['deeper_action_num'] > 0:
296 | 			deeper_decision_trajectory = np.concatenate(deeper_decision_trajectory, axis=0)
297 | 			deeper_decision_mask = np.concatenate(deeper_decision_mask, axis=0)
298 | 			deeper_block_layer_num = np.concatenate(deeper_block_layer_num, axis=0)
299 | 		else:
300 | 			deeper_decision_trajectory = - np.ones([1, meta_controller.deeper_actor.decision_num])
301 | 			deeper_decision_mask = - np.ones([1, meta_controller.deeper_actor.decision_num])
302 | 			deeper_block_layer_num = np.ones([1, meta_controller.deeper_actor.out_dims[0]])
303 | 		rewards = np.concatenate([rewards for _ in range(episode_config['wider_action_num'] +
304 | 														 episode_config['deeper_action_num'])])
305 | 		rewards /= episode_config['batch_size']
306 | 		
307 | 		# update the agent
308 | 		meta_controller.update_controller(learning_rate, wider_seg_deeper, wider_decision_trajectory,
309 | 										  wider_decision_mask, deeper_decision_trajectory, deeper_decision_mask,
310 | 										  rewards, deeper_block_layer_num, encoder_input_seq, encoder_seq_len)
311 | 		
312 | 		meta_controller.save()
313 | 		# episode end
314 | 		time_per_episode = time() - start_time
315 | 		seconds_left = int((max_episodes - _i) * time_per_episode)
316 | 		print('Time per Episode: %s, Est. complete in: %s' % (
317 | 			str(timedelta(seconds=time_per_episode)),
318 | 			str(timedelta(seconds=seconds_left))))
319 | 
320 | 


--------------------------------------------------------------------------------
/code/client.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The file to run in the client side
 3 | Train the network and return the validation performance
 4 | """
 5 | import os
 6 | from expdir_monitor.expdir_monitor import ExpdirMonitor
 7 | import time
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 9 | 
10 | 
11 | def run(expdir):
12 | 	start_time = time.time()
13 | 	expdir_monitor = ExpdirMonitor(expdir)
14 | 	valid_performance = expdir_monitor.run(pure=True, restore=False)
15 | 	end_time = time.time()
16 | 	print('running time: %s' % (end_time - start_time))
17 | 	print('valid performance: %s' % valid_performance)
18 | 
19 | 
20 | def main():
21 | 	expdir = input().strip('\n')
22 | 	run(expdir)
23 | 
24 | 
25 | if __name__ == "__main__":
26 | 	try:
27 | 		main()
28 | 	except KeyboardInterrupt:
29 | 		pass
30 | 


--------------------------------------------------------------------------------
/code/data_providers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/data_providers/__init__.py


--------------------------------------------------------------------------------
/code/data_providers/base_provider.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class DataSet:
 5 | 	"""Class to represent some dataset: train, validation, test"""
 6 | 	
 7 | 	@property
 8 | 	def num_examples(self):
 9 | 		"""Return qtty of examples in dataset"""
10 | 		raise NotImplementedError
11 | 	
12 | 	def next_batch(self, batch_size):
13 | 		"""Return batch of required size of data, labels"""
14 | 		raise NotImplementedError
15 | 
16 | 
17 | class ImagesDataSet(DataSet):
18 | 	"""Dataset for images that provide some often used methods"""
19 | 	
20 | 	@staticmethod
21 | 	def measure_mean_and_std(images):
22 | 		# for every channel in image
23 | 		means = []
24 | 		stds = []
25 | 		# for every channel in image(assume this is last dimension)
26 | 		for ch in range(images.shape[-1]):
27 | 			means.append(np.mean(images[:, :, :, ch]))
28 | 			stds.append(np.std(images[:, :, :, ch]))
29 | 		return means, stds
30 | 	
31 | 	@staticmethod
32 | 	def shuffle_images_and_labels(images, labels):
33 | 		rand_indexes = np.random.permutation(images.shape[0])
34 | 		shuffled_images = images[rand_indexes]
35 | 		shuffled_labels = labels[rand_indexes]
36 | 		return shuffled_images, shuffled_labels
37 | 	
38 | 	@staticmethod
39 | 	def normalize_images(images, normalization_type, meanstd=None):
40 | 		"""
41 | 		Args:
42 | 			images: numpy 4D array
43 | 			normalization_type: `str`, available choices:
44 | 				- divide_255
45 | 				- divide_256
46 | 				- by_channels
47 | 			meanstd
48 | 		"""
49 | 		if normalization_type is not None:
50 | 			if normalization_type == 'divide_255':
51 | 				images = images / 255
52 | 			elif normalization_type == 'divide_256':
53 | 				images = images / 256
54 | 			elif normalization_type == 'by_channels':
55 | 				images = images.astype('float64')
56 | 				# for every channel in image(assume this is last dimension)
57 | 				means, stds = meanstd
58 | 				for i in range(images.shape[-1]):
59 | 					images[:, :, :, i] = ((images[:, :, :, i] - means[i]) / stds[i])
60 | 			else:
61 | 				raise Exception('Unknown type of normalization')
62 | 		return images
63 | 	
64 | 	
65 | class DataProvider:
66 | 	_SEED = 88
67 | 	
68 | 	@property
69 | 	def data_shape(self):
70 | 		"""Return shape as python list of one data entry"""
71 | 		raise NotImplementedError
72 | 	
73 | 	@property
74 | 	def n_classes(self):
75 | 		"""Return `int` of num classes"""
76 | 		raise NotImplementedError
77 | 	
78 | 	def labels_to_one_hot(self, labels):
79 | 		"""Convert 1D array of labels to one hot representation
80 | 		
81 | 		Args:
82 | 			labels: 1D numpy array
83 | 		"""
84 | 		new_labels = np.zeros((labels.shape[0], self.n_classes))
85 | 		new_labels[range(labels.shape[0]), labels] = np.ones(labels.shape)
86 | 		return new_labels
87 | 	
88 | 	@staticmethod
89 | 	def labels_from_one_hot(labels):
90 | 		"""Convert 2D array of labels to 1D class based representation
91 | 		
92 | 		Args:
93 | 			labels: 2D numpy array
94 | 		"""
95 | 		return np.argmax(labels, axis=1)
96 | 


--------------------------------------------------------------------------------
/code/data_providers/cifar.py:
--------------------------------------------------------------------------------
  1 | import tempfile
  2 | import os
  3 | import pickle
  4 | import random
  5 | 
  6 | import numpy as np
  7 | 
  8 | from data_providers.base_provider import ImagesDataSet, DataProvider
  9 | from data_providers.downloader import download_data_url
 10 | 
 11 | 
 12 | def augment_image(image, pad):
 13 | 	"""Perform zero padding, randomly crop image to original size,
 14 | 	maybe mirror horizontally"""
 15 | 	init_shape = image.shape
 16 | 	new_shape = [init_shape[0] + pad * 2,
 17 | 				 init_shape[1] + pad * 2,
 18 | 				 init_shape[2]]
 19 | 	zeros_padded = np.zeros(new_shape)
 20 | 	zeros_padded[pad:init_shape[0] + pad, pad:init_shape[1] + pad, :] = image
 21 | 	# randomly crop to original size
 22 | 	init_x = np.random.randint(0, pad * 2)
 23 | 	init_y = np.random.randint(0, pad * 2)
 24 | 	cropped = zeros_padded[
 25 | 			  init_x: init_x + init_shape[0],
 26 | 			  init_y: init_y + init_shape[1],
 27 | 			  :]
 28 | 	flip = random.getrandbits(1)
 29 | 	if flip:
 30 | 		cropped = cropped[:, ::-1, :]
 31 | 	return cropped
 32 | 
 33 | 
 34 | def augment_all_images(initial_images, pad=4):
 35 | 	new_images = np.zeros(initial_images.shape)
 36 | 	for i in range(initial_images.shape[0]):
 37 | 		new_images[i] = augment_image(initial_images[i], pad)
 38 | 	return new_images
 39 | 
 40 | 
 41 | class CifarDataSet(ImagesDataSet):
 42 | 	def __init__(self, images, labels, n_classes, shuffle, normalization, augmentation, meanstd):
 43 | 		"""
 44 | 		Args:
 45 | 			images: 4D numpy array
 46 | 			labels: 2D or 1D numpy array
 47 | 			n_classes: `int`, number of cifar classes - 10 or 100
 48 | 			shuffle: `str` or None
 49 | 				None: no any shuffling
 50 | 				once_prior_train: shuffle train data only once prior train
 51 | 				every_epoch: shuffle train data prior every epoch
 52 | 			normalization: `str` or None
 53 | 				None: no any normalization
 54 | 				divide_255: divide all pixels by 255
 55 | 				divide_256: divide all pixels by 256
 56 | 				by_channels: substract mean of every chanel and divide each
 57 | 					chanel data by it's standard deviation
 58 | 			augmentation: `bool`
 59 | 		"""
 60 | 		if shuffle is None:
 61 | 			self.shuffle_every_epoch = False
 62 | 		elif shuffle == 'once_prior_train':
 63 | 			self.shuffle_every_epoch = False
 64 | 			images, labels = self.shuffle_images_and_labels(images, labels)
 65 | 		elif shuffle == 'every_epoch':
 66 | 			self.shuffle_every_epoch = True
 67 | 		else:
 68 | 			raise Exception('Unknown type of shuffling')
 69 | 		self._batch_counter, self.epoch_images, self.epoch_labels = 0, None, None
 70 | 		
 71 | 		self.images = images
 72 | 		self.labels = labels
 73 | 		self.n_classes = n_classes
 74 | 		self.augmentation = augmentation
 75 | 		self.normalization = normalization
 76 | 		self.meanstd = meanstd
 77 | 		self.images = self.normalize_images(images, self.normalization, self.meanstd)
 78 | 		self.start_new_epoch()
 79 | 	
 80 | 	def start_new_epoch(self):
 81 | 		self._batch_counter = 0
 82 | 		if self.shuffle_every_epoch:
 83 | 			images, labels = self.shuffle_images_and_labels(
 84 | 				self.images, self.labels)
 85 | 		else:
 86 | 			images, labels = self.images, self.labels
 87 | 		if self.augmentation:
 88 | 			images = augment_all_images(images, pad=4)
 89 | 		self.epoch_images = images
 90 | 		self.epoch_labels = labels
 91 | 	
 92 | 	@property
 93 | 	def num_examples(self):
 94 | 		return self.labels.shape[0]
 95 | 	
 96 | 	def next_batch(self, batch_size):
 97 | 		start = self._batch_counter * batch_size
 98 | 		end = (self._batch_counter + 1) * batch_size
 99 | 		self._batch_counter += 1
100 | 		images_slice = self.epoch_images[start: end]
101 | 		labels_slice = self.epoch_labels[start: end]
102 | 		if images_slice.shape[0] != batch_size:
103 | 			self.start_new_epoch()
104 | 			return self.next_batch(batch_size)
105 | 		else:
106 | 			return images_slice, labels_slice
107 | 
108 | 
109 | class CifarDataProvider(DataProvider):
110 | 	"""Abstract class for cifar readers"""
111 | 	def __init__(self, save_path=None, validation_size=None, shuffle=None, normalization=None,
112 | 				 one_hot=True, **kwargs):
113 | 		"""
114 | 		Args:
115 | 			save_path: `str`
116 | 			validation_set: `bool`.
117 | 			validation_split: `float` or None
118 | 				float: chunk of `train set` will be marked as `validation set`.
119 | 				None: if 'validation set' == True, `validation set` will be
120 | 					copy of `test set`
121 | 			shuffle: `str` or None
122 | 				None: no any shuffling
123 | 				once_prior_train: shuffle train data only once prior train
124 | 				every_epoch: shuffle train data prior every epoch
125 | 			normalization: `str` or None
126 | 				None: no any normalization
127 | 				divide_255: divide all pixels by 255
128 | 				divide_256: divide all pixels by 256
129 | 				by_channels: substract mean of every chanel and divide each
130 | 					chanel data by it's standard deviation
131 | 			one_hot: `bool`, return laels one hot encoded
132 | 		"""
133 | 		self._save_path = save_path
134 | 		self.one_hot = one_hot
135 | 		download_data_url(self.data_url, self.save_path)
136 | 		train_fnames, test_fnames = self.get_filenames(self.save_path)
137 | 		
138 | 		# add train and validations datasets
139 | 		images, labels = self.read_cifar(train_fnames)
140 | 		train_meanstd = ImagesDataSet.measure_mean_and_std(images)
141 | 		if validation_size is not None:
142 | 			np.random.seed(DataProvider._SEED)
143 | 			rand_indexes = np.random.permutation(images.shape[0])
144 | 			valid_indexes = rand_indexes[:validation_size]
145 | 			train_indexes = rand_indexes[validation_size:]
146 | 			self.train = CifarDataSet(
147 | 				images=images[train_indexes], labels=labels[train_indexes],
148 | 				n_classes=self.n_classes, shuffle=shuffle,
149 | 				normalization=normalization,
150 | 				augmentation=self.data_augmentation, meanstd=train_meanstd)
151 | 			self.validation = CifarDataSet(
152 | 				images=images[valid_indexes], labels=labels[valid_indexes],
153 | 				n_classes=self.n_classes, shuffle=None,
154 | 				normalization=normalization,
155 | 				augmentation=False, meanstd=train_meanstd)
156 | 		else:
157 | 			self.train = CifarDataSet(
158 | 				images=images, labels=labels,
159 | 				n_classes=self.n_classes, shuffle=shuffle,
160 | 				normalization=normalization,
161 | 				augmentation=self.data_augmentation, meanstd=train_meanstd)
162 | 		
163 | 		# add test set
164 | 		images, labels = self.read_cifar(test_fnames)
165 | 		self.test = CifarDataSet(
166 | 			images=images, labels=labels,
167 | 			shuffle=None, n_classes=self.n_classes,
168 | 			normalization=normalization,
169 | 			augmentation=False, meanstd=train_meanstd)
170 | 		
171 | 		if validation_size is None:
172 | 			self.validation = self.test
173 | 	
174 | 	@property
175 | 	def save_path(self):
176 | 		if self._save_path is None:
177 | 			self._save_path = os.path.join(
178 | 				tempfile.gettempdir(), 'cifar%d' % self.n_classes)
179 | 		return self._save_path
180 | 	
181 | 	@property
182 | 	def data_url(self):
183 | 		"""Return url for downloaded data depends on cifar class"""
184 | 		data_url = ('http://www.cs.toronto.edu/'
185 | 					'~kriz/cifar-%d-python.tar.gz' % self.n_classes)
186 | 		return data_url
187 | 	
188 | 	@property
189 | 	def data_shape(self):
190 | 		return 32, 32, 3
191 | 	
192 | 	@property
193 | 	def n_classes(self):
194 | 		return self._n_classes
195 | 	
196 | 	def get_filenames(self, save_path):
197 | 		"""Return two lists of train and test filenames for dataset"""
198 | 		raise NotImplementedError
199 | 	
200 | 	def read_cifar(self, filenames):
201 | 		if self.n_classes == 10:
202 | 			labels_key = b'labels'
203 | 		elif self.n_classes == 100:
204 | 			labels_key = b'fine_labels'
205 | 		
206 | 		images_res = []
207 | 		labels_res = []
208 | 		for fname in filenames:
209 | 			with open(fname, 'rb') as f:
210 | 				images_and_labels = pickle.load(f, encoding='bytes')
211 | 			images = images_and_labels[b'data']
212 | 			images = images.reshape(-1, 3, 32, 32)
213 | 			images = images.swapaxes(1, 3).swapaxes(1, 2)
214 | 			images_res.append(images)
215 | 			labels_res.append(images_and_labels[labels_key])
216 | 		images_res = np.vstack(images_res)
217 | 		labels_res = np.hstack(labels_res)
218 | 		if self.one_hot:
219 | 			labels_res = self.labels_to_one_hot(labels_res)
220 | 		return images_res, labels_res
221 | 
222 | 
223 | class Cifar10DataProvider(CifarDataProvider):
224 | 	_n_classes = 10
225 | 	data_augmentation = False
226 | 	
227 | 	def get_filenames(self, save_path):
228 | 		sub_save_path = os.path.join(save_path, 'cifar-10-batches-py')
229 | 		train_filenames = [
230 | 			os.path.join(
231 | 				sub_save_path,
232 | 				'data_batch_%d' % i) for i in range(1, 6)]
233 | 		test_filenames = [os.path.join(sub_save_path, 'test_batch')]
234 | 		return train_filenames, test_filenames
235 | 
236 | 
237 | class Cifar100DataProvider(CifarDataProvider):
238 | 	_n_classes = 100
239 | 	data_augmentation = False
240 | 	
241 | 	def get_filenames(self, save_path):
242 | 		sub_save_path = os.path.join(save_path, 'cifar-100-python')
243 | 		train_filenames = [os.path.join(sub_save_path, 'train')]
244 | 		test_filenames = [os.path.join(sub_save_path, 'test')]
245 | 		return train_filenames, test_filenames
246 | 
247 | 
248 | class Cifar10AugmentedDataProvider(Cifar10DataProvider):
249 | 	_n_classes = 10
250 | 	data_augmentation = True
251 | 
252 | 
253 | class Cifar100AugmentedDataProvider(Cifar100DataProvider):
254 | 	_n_classes = 100
255 | 	data_augmentation = True
256 | 
257 | 


--------------------------------------------------------------------------------
/code/data_providers/downloader.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import urllib.request
 4 | import tarfile
 5 | import zipfile
 6 | 
 7 | 
 8 | def report_download_progress(count, block_size, total_size):
 9 | 	pct_complete = float(count * block_size) / total_size
10 | 	msg = '\r {0:.1%} already downloaded'.format(pct_complete)
11 | 	sys.stdout.write(msg)
12 | 	sys.stdout.flush()
13 | 
14 | 
15 | def download_data_url(url, download_dir):
16 | 	filename = url.split('/')[-1]
17 | 	file_path = os.path.join(download_dir, filename)
18 | 	
19 | 	if not os.path.exists(file_path):
20 | 		os.makedirs(download_dir, exist_ok=True)
21 | 		
22 | 		print('Download %s to %s' % (url, file_path))
23 | 		file_path, _ = urllib.request.urlretrieve(
24 | 			url=url,
25 | 			filename=file_path,
26 | 			reporthook=report_download_progress)
27 | 		
28 | 		print('\nExtracting files')
29 | 		if file_path.endswith('.zip'):
30 | 			zipfile.ZipFile(file=file_path, mode='r').extractall(download_dir)
31 | 		elif file_path.endswith(('.tar.gz', '.tgz')):
32 | 			tarfile.open(name=file_path, mode='r:gz').extractall(download_dir)
33 | 


--------------------------------------------------------------------------------
/code/data_providers/svhn.py:
--------------------------------------------------------------------------------
  1 | import tempfile
  2 | import os
  3 | import scipy.io
  4 | 
  5 | import numpy as np
  6 | 
  7 | from data_providers.base_provider import ImagesDataSet, DataProvider
  8 | from data_providers.downloader import download_data_url
  9 | 
 10 | 
 11 | class SVHNDataSet(ImagesDataSet):
 12 | 	n_classes = 10
 13 | 	
 14 | 	def __init__(self, images, labels, shuffle, normalization):
 15 | 		"""
 16 | 		Args:
 17 | 			images: 4D numpy array
 18 | 			labels: 2D or 1D numpy array
 19 | 			shuffle: `bool`, should shuffle data or not
 20 | 			normalization: `str` or None
 21 | 				None: no any normalization
 22 | 				divide_255: divide all pixels by 255
 23 | 				divide_256: divide all pixels by 256
 24 | 				by_channels: substract mean of every chanel and divide each
 25 | 					chanel data by it's standard deviation
 26 | 		"""
 27 | 		self._batch_counter, self.epoch_images, self.epoch_labels = 0, None, None
 28 | 		
 29 | 		self.shuffle = shuffle
 30 | 		self.images = images
 31 | 		self.labels = labels
 32 | 		self.normalization = normalization
 33 | 		self.start_new_epoch()
 34 | 	
 35 | 	def start_new_epoch(self):
 36 | 		self._batch_counter = 0
 37 | 		if self.shuffle:
 38 | 			self.epoch_images, self.epoch_labels = self.shuffle_images_and_labels(
 39 | 				self.images, self.labels)
 40 | 		else:
 41 | 			self.epoch_images, self.epoch_labels = self.images, self.labels
 42 | 	
 43 | 	@property
 44 | 	def num_examples(self):
 45 | 		return self.labels.shape[0]
 46 | 	
 47 | 	def next_batch(self, batch_size):
 48 | 		start = self._batch_counter * batch_size
 49 | 		end = (self._batch_counter + 1) * batch_size
 50 | 		self._batch_counter += 1
 51 | 		images_slice = self.epoch_images[start: end]
 52 | 		labels_slice = self.epoch_labels[start: end]
 53 | 		# due to memory error it should be done inside batch
 54 | 		if self.normalization is not None:
 55 | 			images_slice = self.normalize_images(
 56 | 				images_slice, self.normalization)
 57 | 		if images_slice.shape[0] != batch_size:
 58 | 			self.start_new_epoch()
 59 | 			return self.next_batch(batch_size)
 60 | 		else:
 61 | 			return images_slice, labels_slice
 62 | 
 63 | 
 64 | class SVHNDataProvider(DataProvider):
 65 | 	def __init__(self, save_path=None, validation_size=None, shuffle=False, normalization=None,
 66 | 				 one_hot=True, include_extra=True, **kwargs):
 67 | 		"""
 68 | 		Args:
 69 | 			save_path: `str`
 70 | 			validation_set: `bool`.
 71 | 			validation_split: `int` or None
 72 | 				float: chunk of `train set` will be marked as `validation set`.
 73 | 				None: if 'validation set' == True, `validation set` will be
 74 | 					copy of `test set`
 75 | 			shuffle: `bool`, should shuffle data or not
 76 | 			normalization: `str` or None
 77 | 				None: no any normalization
 78 | 				divide_255: divide all pixels by 255
 79 | 				divide_256: divide all pixels by 256
 80 | 				by_chanels: substract mean of every chanel and divide each
 81 | 					chanel data by it's standart deviation
 82 | 			one_hot: `bool`, return lasels one hot encoded
 83 | 		"""
 84 | 		self._save_path = save_path
 85 | 		train_images = []
 86 | 		train_labels = []
 87 | 		if include_extra:
 88 | 			train_data_src = ['train', 'extra']
 89 | 		else:
 90 | 			train_data_src = ['train']
 91 | 		for part in train_data_src:
 92 | 			images, labels = self.get_images_and_labels(part, one_hot)
 93 | 			train_images.append(images)
 94 | 			train_labels.append(labels)
 95 | 		train_images = np.vstack(train_images)
 96 | 		if one_hot:
 97 | 			train_labels = np.vstack(train_labels)
 98 | 		else:
 99 | 			train_labels = np.hstack(train_labels)
100 | 		if validation_size is not None:
101 | 			np.random.seed(DataProvider._SEED)
102 | 			rand_indexes = np.random.permutation(train_images.shape[0])
103 | 			valid_indexes = rand_indexes[:validation_size]
104 | 			train_indexes = rand_indexes[validation_size:]
105 | 			valid_images, valid_labels = train_images[valid_indexes], train_labels[valid_indexes]
106 | 			train_images, train_labels = train_images[train_indexes], train_labels[train_indexes]
107 | 			self.validation = SVHNDataSet(
108 | 				valid_images, valid_labels, False, normalization)
109 | 		
110 | 		self.train = SVHNDataSet(
111 | 			train_images, train_labels, shuffle, normalization)
112 | 		
113 | 		test_images, test_labels = self.get_images_and_labels('test', one_hot)
114 | 		self.test = SVHNDataSet(test_images, test_labels, False, normalization)
115 | 		
116 | 		if validation_size is None:
117 | 			self.validation = self.test
118 | 	
119 | 	def get_images_and_labels(self, name_part, one_hot=False):
120 | 		url = self.data_url + name_part + '_32x32.mat'
121 | 		download_data_url(url, self.save_path)
122 | 		filename = os.path.join(self.save_path, name_part + '_32x32.mat')
123 | 		data = scipy.io.loadmat(filename)
124 | 		images = data['X'].transpose(3, 0, 1, 2)
125 | 		labels = data['y'].reshape((-1))
126 | 		labels[labels == 10] = 0
127 | 		if one_hot:
128 | 			labels = self.labels_to_one_hot(labels)
129 | 		return images, labels
130 | 	
131 | 	@property
132 | 	def n_classes(self):
133 | 		return 10
134 | 	
135 | 	@property
136 | 	def save_path(self):
137 | 		if self._save_path is None:
138 | 			self._save_path = os.path.join(tempfile.gettempdir(), 'svhn')
139 | 		return self._save_path
140 | 	
141 | 	@property
142 | 	def data_url(self):
143 | 		return 'http://ufldl.stanford.edu/housenumbers/'
144 | 	
145 | 	@property
146 | 	def data_shape(self):
147 | 		return 32, 32, 3
148 | 


--------------------------------------------------------------------------------
/code/data_providers/utils.py:
--------------------------------------------------------------------------------
 1 | from data_providers.cifar import Cifar10DataProvider, Cifar100DataProvider, \
 2 | 	Cifar10AugmentedDataProvider, Cifar100AugmentedDataProvider
 3 | from data_providers.svhn import SVHNDataProvider
 4 | 
 5 | 
 6 | def get_data_provider_by_name(name, train_params):
 7 | 	"""Return required data provider class"""
 8 | 	if name == 'C10':
 9 | 		return Cifar10DataProvider(**train_params)
10 | 	if name == 'C10+':
11 | 		return Cifar10AugmentedDataProvider(**train_params)
12 | 	if name == 'C100':
13 | 		return Cifar100DataProvider(**train_params)
14 | 	if name == 'C100+':
15 | 		return Cifar100AugmentedDataProvider(**train_params)
16 | 	if name == 'SVHN':
17 | 		return SVHNDataProvider(**train_params)
18 | 	else:
19 | 		print('Sorry, data provider for `%s` dataset '
20 | 			  'was not implemented yet' % name)
21 | 		exit()
22 | 


--------------------------------------------------------------------------------
/code/expdir_monitor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/expdir_monitor/__init__.py


--------------------------------------------------------------------------------
/code/expdir_monitor/arch_manager.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Manage the folder for architecture search
  3 | """
  4 | import os
  5 | import subprocess
  6 | import json
  7 | import pickle
  8 | import numpy as np
  9 | from data_providers.utils import get_data_provider_by_name
 10 | from expdir_monitor.expdir_monitor import ExpdirMonitor
 11 | from expdir_monitor import distributed
 12 | 
 13 | 
 14 | class NetPool:
 15 | 	def __init__(self, path):
 16 | 		self.path = os.path.realpath(path)
 17 | 		os.makedirs(self.path, exist_ok=True)
 18 | 		
 19 | 		self.net_str2id = {}
 20 | 		self.net_id2val = {}
 21 | 		self.running_set = {'stone': 0}
 22 | 		
 23 | 		self.on_load()
 24 | 	
 25 | 	@property
 26 | 	def str2id_path(self):
 27 | 		return '%s/net.str2id' % self.path
 28 | 	
 29 | 	@property
 30 | 	def id2val_path(self):
 31 | 		return '%s/net.id2val' % self.path
 32 | 	
 33 | 	def on_load(self):
 34 | 		if os.path.isfile(self.str2id_path):
 35 | 			self.net_str2id = json.load(open(self.str2id_path, 'r'))
 36 | 		if os.path.isfile(self.id2val_path):
 37 | 			net_id2val = json.load(open(self.id2val_path, 'r'))
 38 | 			for key in net_id2val:
 39 | 				self.net_id2val[int(key)] = net_id2val[key]
 40 | 		to_rename = []
 41 | 		for folder in os.listdir(self.path):
 42 | 			if folder.startswith('#'):
 43 | 				out_file = '%s/%s/output' % (self.path, folder)
 44 | 				if not os.path.isfile(out_file):
 45 | 					subprocess.run(['rm', '-rf', os.path.join(self.path, folder)])
 46 | 				else:
 47 | 					net_str = json.load(open('%s/%s/net.str' % (self.path, folder), 'r'))['net_str']
 48 | 					if self.net_str2id.get(net_str) is None:
 49 | 						record = json.load(open(out_file, 'r'))
 50 | 						net_val = float(record['valid_acc'])
 51 | 						net_id = self.add_net(net_str, net_val)
 52 | 						folder_path = self.get_net_folder(net_id)
 53 | 					else:
 54 | 						net_id = self.net_str2id[net_str]
 55 | 						folder_path = self.get_net_folder(net_id)
 56 | 					if folder_path != folder:
 57 | 						to_rename.append([folder, folder_path])
 58 | 		for src_folder, dst_folder in to_rename:
 59 | 			src_folder = os.path.join(self.path, src_folder)
 60 | 			dst_folder = os.path.join(self.path, dst_folder)
 61 | 			os.rename(src_folder, dst_folder)
 62 | 	
 63 | 	def add_net(self, net_str, net_val):
 64 | 		assert self.net_str2id.get(net_str) is None, '%s exists' % net_str
 65 | 		net_id = net_str.__hash__()
 66 | 		while net_id in self.net_id2val:
 67 | 			net_id += 1
 68 | 		self.net_str2id[net_str] = net_id
 69 | 		self.net_id2val[net_id] = net_val
 70 | 		return net_id
 71 | 	
 72 | 	def get_net_val(self, net_str):
 73 | 		net_id = self.net_str2id.get(net_str)
 74 | 		if net_id is None:
 75 | 			if net_str in self.running_set:
 76 | 				running_id = self.running_set[net_str]
 77 | 			else:
 78 | 				running_id = net_str.__hash__()
 79 | 				while running_id in self.running_set.values():
 80 | 					running_id += 1
 81 | 				self.running_set[net_str] = running_id
 82 | 			net_folder = '%s/#Running_%s' % (self.path, running_id)
 83 | 			return None, net_folder
 84 | 		else:
 85 | 			net_val = self.net_id2val[net_id]
 86 | 			net_folder = '%s/%s' % (self.path, self.get_net_folder(net_id))
 87 | 			return net_val, net_folder
 88 | 	
 89 | 	def on_running_finished(self, net_str, net_folder, net_val):
 90 | 		net_id = self.add_net(net_str, net_val)
 91 | 		# folder_path = self.get_net_folder(net_id)
 92 | 		# self.running_set.pop(net_str)
 93 | 		# os.rename(net_folder, os.path.join(self.path, folder_path))
 94 | 	
 95 | 	def save(self):
 96 | 		json.dump(self.net_str2id, open(self.str2id_path, 'w'), indent=4)
 97 | 		json.dump(self.net_id2val, open(self.id2val_path, 'w'), indent=4)
 98 | 	
 99 | 	@staticmethod
100 | 	def get_net_folder(net_id):
101 | 		return '#%s' % net_id
102 | 		
103 | 
104 | class ArchManager:
105 | 	def __init__(self, start_net_path, arch_path, net_pool_path):
106 | 		self.start_net_monitor = ExpdirMonitor(start_net_path)
107 | 		self.start_net_config, self.data_provider = None, None
108 | 		
109 | 		self.net_pool = NetPool(net_pool_path)
110 | 		
111 | 		self.arch_path = os.path.realpath(arch_path)
112 | 		os.makedirs(self.arch_path, exist_ok=True)
113 | 		
114 | 		self.episode = 0
115 | 		self.net_val_wrt_episode = []
116 | 		
117 | 		self.val_log_writer = open(self.val_logs_path, 'a')
118 | 		self.net_log_writer = open(self.net_logs_path, 'a')
119 | 		self.on_load()
120 | 	
121 | 	@property
122 | 	def meta_controller_path(self):
123 | 		return '%s/controller' % self.arch_path
124 | 
125 | 	@property
126 | 	def val_logs_path(self):
127 | 		return '%s/val.log' % self.arch_path
128 | 	
129 | 	@property
130 | 	def net_logs_path(self):
131 | 		return '%s/net.log' % self.arch_path
132 | 	
133 | 	def on_load(self):
134 | 		if os.path.isfile(self.val_logs_path):
135 | 			with open(self.val_logs_path, 'r') as fin:
136 | 				for line in fin.readlines():
137 | 					line = line[:-1]
138 | 					self.episode += 1
139 | 					net_val_list = line.split('\t')[4:]
140 | 					net_val_list = [float(net_val) for net_val in net_val_list]
141 | 					self.net_val_wrt_episode.append(net_val_list)
142 | 				
143 | 	def get_start_net(self, copy=False):
144 | 		if self.start_net_config is None:
145 | 			# prepare start net
146 | 			print('Load start net from %s' % self.start_net_monitor.expdir)
147 | 			init = self.start_net_monitor.load_init()
148 | 			dataset = 'C10+' if init is None else init.get('dataset', 'C10+')
149 | 			run_config = self.start_net_monitor.load_run_config(print_info=True, dataset=dataset)
150 | 			run_config.renew_logs = False
151 | 			
152 | 			net_config, model_name = self.start_net_monitor.load_net_config(init, print_info=True)
153 | 			self.data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config())
154 | 			self.start_net_config = [net_config, run_config, model_name]
155 | 		if copy:
156 | 			net_config, run_config, model_name = self.start_net_config[:3]
157 | 			return [
158 | 				net_config.copy(), run_config.copy(), model_name
159 | 			]
160 | 		else:
161 | 			return self.start_net_config
162 | 	
163 | 	@staticmethod
164 | 	def prepare_folder_for_valid(net_str, net_config, run_config, exp_dir):
165 | 		os.makedirs(exp_dir, exist_ok=True)
166 | 		monitor = ExpdirMonitor(exp_dir)
167 | 		json.dump(net_config.get_config(), open(monitor.net_config_path, 'w'), indent=4)
168 | 		json.dump(run_config.get_config(), open(monitor.run_config_path, 'w'), indent=4)
169 | 		pickle.dump(net_config.renew_init(None), open(monitor.init, 'wb'))
170 | 		json.dump({'net_str': net_str}, open(os.path.join(monitor.expdir, 'net.str'), 'w'), indent=4)
171 | 	
172 | 	def get_net_vals(self, net_str_list, net_configs, run_configs):
173 | 		net_val_list = [-1] * len(net_str_list)
174 | 		
175 | 		to_run = {}
176 | 		for _i, net_str in enumerate(net_str_list):
177 | 			net_val, net_folder = self.net_pool.get_net_val(net_str)
178 | 			if net_val is None:
179 | 				if net_folder in to_run: to_run[net_folder] += [_i]
180 | 				else:
181 | 					to_run[net_folder] = [_i]
182 | 					self.prepare_folder_for_valid(net_str, net_configs[_i], run_configs[_i], net_folder)
183 | 			else:
184 | 				net_val_list[_i] = net_val
185 | 		
186 | 		task_list = [[net_folder, to_run[net_folder]] for net_folder in to_run]
187 | 		distributed.run(task_list)
188 | 		episode_total_running_time = 0
189 | 		for net_folder, idx, net_val in task_list:
190 | 			net_str = net_str_list[idx[0]]
191 | 			net_val, running_time = net_val
192 | 			episode_total_running_time += running_time
193 | 			self.net_pool.on_running_finished(net_str, net_folder, net_val)
194 | 			for _id in idx:
195 | 				net_val_list[_id] = net_val
196 | 		self.log_nets(net_str_list, episode_total_running_time)
197 | 		self.net_pool.save()
198 | 		return net_val_list
199 | 	
200 | 	def val2reward(self, net_val_list, func=None):
201 | 		rewards = []
202 | 		for net_val in net_val_list:
203 | 			if func is None:
204 | 				rewards.append(net_val)
205 | 			elif func == 'tan':
206 | 				reward = np.tan(net_val * np.pi / 2)
207 | 				rewards.append(reward)
208 | 			else:
209 | 				raise NotImplementedError
210 | 		return rewards
211 | 		
212 | 	def reward(self, net_val_list, reward_config):
213 | 		rewards = self.val2reward(net_val_list, reward_config.get('func'))
214 | 		rewards = np.array(rewards)
215 | 		# baseline function
216 | 		decay = reward_config['decay']
217 | 		if 'exp_moving_avg' not in self.__dict__:
218 | 			self.exp_moving_avg = 0
219 | 			for old_net_val_list in self.net_val_wrt_episode[:-1]:
220 | 				old_rewards = self.val2reward(old_net_val_list, reward_config.get('func'))
221 | 				self.exp_moving_avg += decay * (np.mean(old_rewards) - self.exp_moving_avg)
222 | 		self.exp_moving_avg += decay * (np.mean(rewards) - self.exp_moving_avg)
223 | 		return rewards - self.exp_moving_avg
224 | 	
225 | 	def log_nets(self, net_str_list, running_time, print_info=True):
226 | 		net_id_list = [self.net_pool.net_str2id[net_str] for net_str in net_str_list]
227 | 		nets_num = len(net_id_list)
228 | 		new_nets_num = len(set(net_id_list))
229 | 		
230 | 		net_val_list = [self.net_pool.net_id2val[net_id] for net_id in net_id_list]
231 | 		mean_val, max_val = np.mean(net_val_list), np.max(net_val_list)
232 | 		self.net_log_writer.write('%d.\t nets=%d (total=%d)\t%s\n' % (self.episode, new_nets_num, nets_num,
233 | 														'\t'.join([str(net_id) for net_id in net_id_list])))
234 | 		log_str = '%d.\t nets=%d (total=%d)\t mean_val=%s (max_val=%s)\t using %s(min)\t%s' % \
235 | 				  (self.episode + 1, new_nets_num, nets_num, mean_val, max_val, running_time,
236 | 				   '\t'.join([str(net_val) for net_val in net_val_list]))
237 | 		if print_info:
238 | 			print(log_str)
239 | 		self.val_log_writer.write(log_str + '\n')
240 | 		
241 | 		self.val_log_writer.flush()
242 | 		self.net_log_writer.flush()
243 | 		self.net_val_wrt_episode.append(net_val_list)
244 | 		self.episode += 1
245 | 


--------------------------------------------------------------------------------
/code/expdir_monitor/distributed.py:
--------------------------------------------------------------------------------
  1 | from subprocess import Popen, PIPE
  2 | from threading import Thread, Lock
  3 | from queue import Queue
  4 | from time import sleep
  5 | from sys import stderr
  6 | import re
  7 | import json
  8 | import shlex
  9 | 
 10 | max_running_machine = 5
 11 | 
 12 | _max_used_mem = 0.3
 13 | _max_used_gpu = 0.3
 14 | config_file = 'server_config'
 15 | 
 16 | 
 17 | class GpuChecker:
 18 | 	def __init__(self, nvidia_getter, gpuid):
 19 | 		self.nvidia_getter = nvidia_getter
 20 | 		self.gpuid = gpuid
 21 | 	
 22 | 	def state_parser(self, state_str):
 23 | 		result = []
 24 | 		for line in state_str.split('\n'):
 25 | 			# .*?(\d*)C.*\|(.*?)MiB.*?/(.*?)MiB.*?\|.*?(\d*)\%
 26 | 			# .*?(\d*)C.*\|(.*?)MiB.*?/(.*?)MiB.*?\|.*?(\d*)%
 27 | 			pattern = re.search('.*?(\d*)C.*\|(.*?)MiB.*?/(.*?)MiB.*?\|.*?(\d*)%', line)
 28 | 			if pattern is not None:
 29 | 				result.append([int(x) for x in pattern.groups()])
 30 | 		if self.gpuid >= len(result):
 31 | 			return None
 32 | 		# assert self.gpuid < len(result), 'Parsing error or not enough gpus.'
 33 | 		return result[self.gpuid]
 34 | 	
 35 | 	def instance_available(self, state_str):
 36 | 		parse_result = self.state_parser(state_str)
 37 | 		if parse_result is None: return False
 38 | 		_, used_mem, total_mem, occupation = parse_result
 39 | 		occupation /= 100
 40 | 		return used_mem / total_mem < _max_used_mem and occupation < _max_used_gpu
 41 | 	
 42 | 	def check(self):
 43 | 		_check_times = 3
 44 | 		try:
 45 | 			for _i in range(_check_times):
 46 | 				assert self.instance_available(self.nvidia_getter())
 47 | 				if _i < _check_times - 1:
 48 | 					sleep(0.5)
 49 | 		except AssertionError:
 50 | 			return False
 51 | 		return True
 52 | 	
 53 | 	def is_on(self):
 54 | 		try:
 55 | 			parse_result = self.state_parser(self.nvidia_getter())
 56 | 			if parse_result is None:
 57 | 				return False
 58 | 			else:
 59 | 				return True
 60 | 		except Exception:
 61 | 			return False
 62 | 
 63 | 
 64 | class RemoteController:
 65 | 	def __init__(self, remote, gpuid, executive):
 66 | 		self.remote = remote
 67 | 		self.gpuid = gpuid
 68 | 		self.executive = executive
 69 | 		
 70 | 		self.gpu_checker = GpuChecker(lambda: self.run('nvidia-smi'), self.gpuid)
 71 | 		
 72 | 		self._lock = Lock()
 73 | 		self._occupied = False
 74 | 		self._on_running = None
 75 | 	
 76 | 	@property
 77 | 	def occupied(self):
 78 | 		with self._lock:
 79 | 			return self._occupied
 80 | 	
 81 | 	@occupied.setter
 82 | 	def occupied(self, val):
 83 | 		assert isinstance(val, bool), 'Occupied must be True or False, but {} received.'.format(val)
 84 | 		with self._lock:
 85 | 			self._occupied = val
 86 | 	
 87 | 	def run(self, cmd, stdin=None):
 88 | 		proc = Popen('ssh {} {}'.format(self.remote, shlex.quote(cmd)), shell=True, stdin=PIPE, stdout=PIPE,
 89 | 					 universal_newlines=True)
 90 | 		return proc.communicate(input=stdin)[0]
 91 | 	
 92 | 	@property
 93 | 	def gpu_state(self):
 94 | 		return self.gpu_checker.check()
 95 | 	
 96 | 	@property
 97 | 	def exe_cmd(self):
 98 | 		return 'CUDA_VISIBLE_DEVICES={gpuid} python3 {executive}'.format(
 99 | 			executive=self.executive,
100 | 			gpuid=self.gpuid
101 | 		)
102 | 	
103 | 	def check_on(self, queue):
104 | 		if not self.gpu_checker.is_on():
105 | 			if self._on_running is not None:
106 | 				queue.put(self._on_running)
107 | 				self._on_running = None
108 | 				print('Remote Error.')
109 | 			return False
110 | 		return True
111 | 	
112 | 	def remote_executer(self, idx, expdir, queue):
113 | 		self.occupied = True
114 | 		cmd = self.exe_cmd
115 | 		print('{}: {} {}'.format(self.remote, cmd, expdir), file=stderr)
116 | 		result = self.run(cmd, stdin=expdir)
117 | 		try:
118 | 			result = str(result).split('\n')
119 | 			used_time = result[-3]
120 | 			result = result[-2]
121 | 			assert result.startswith('valid performance: ') and used_time.startswith('running time: '), \
122 | 				'Invalid return: %s, %s' % (used_time, result)
123 | 			used_time = used_time[len('running time: '):]
124 | 			used_time = float(used_time) / 60  # minutes
125 | 			result = result[len('valid performance: '):]
126 | 			result = float(result)
127 | 			queue.put([idx, (result, used_time)])
128 | 			print('{}th task: {} is successfully executed, result is {}, using {} min.'.
129 | 				  format(idx, expdir, result, used_time), file=stderr)
130 | 		except Exception:
131 | 			queue.put([idx, expdir])
132 | 			print('{}th task: {} fails, with return: %s.'.format(idx, expdir, result), file=stderr)
133 | 		self.occupied = False
134 | 	
135 | 	def execute(self, idx, expdir, queue):
136 | 		if self.occupied or not self.gpu_state:
137 | 			queue.put([idx, expdir])
138 | 		else:
139 | 			self._on_running = [idx, expdir]
140 | 			thr = Thread(target=self.remote_executer, args=(idx, expdir, queue))
141 | 			thr.start()
142 | 			self._on_running = None
143 | 
144 | 
145 | class ClusterController:
146 | 	def __init__(self, config_list):
147 | 		self.cluster = [RemoteController(*config) for config in config_list]
148 | 		self._pt = 0
149 | 	
150 | 	def choice(self, queue):
151 | 		remotes_available, occupy_num = self.get_available(queue)
152 | 		while occupy_num >= max_running_machine:
153 | 			sleep(0.5)
154 | 			remotes_available, occupy_num = self.get_available(queue)
155 | 		while not remotes_available[self._pt]:
156 | 			self._pt = (self._pt + 1) % len(self.cluster)
157 | 		choose_remote = self.cluster[self._pt]
158 | 		self._pt = (self._pt + 1) % len(self.cluster)
159 | 		return choose_remote
160 | 		# return random.choice(self.cluster)
161 | 	
162 | 	def get_available(self, queue):
163 | 		remotes_available = [False] * len(self.cluster)
164 | 		occupy_num = len(self.cluster)
165 | 		for _i, remote in enumerate(self.cluster):
166 | 			if not remote.check_on(queue):
167 | 				occupy_num -= 1
168 | 				continue
169 | 			if not remote.occupied:
170 | 				remotes_available[_i] = True
171 | 				occupy_num -= 1
172 | 		return remotes_available, occupy_num
173 | 	
174 | 	def execute(self, idx, expdir, queue):
175 | 		self.choice(queue).execute(idx, expdir, queue)
176 | 
177 | 
178 | def run_tasks(config_list, expdir_list):
179 | 	controller = ClusterController(config_list)
180 | 	result_list = [None for _ in expdir_list]
181 | 	
182 | 	queue = Queue()
183 | 	for idx, expdir in enumerate(expdir_list):
184 | 		queue.put([idx, expdir])
185 | 	
186 | 	remained = len(result_list)
187 | 	while remained > 0:
188 | 		idx, val = queue.get()
189 | 		if isinstance(val, str):
190 | 			# expdir, need to execute
191 | 			controller.execute(idx, val, queue)
192 | 		elif isinstance(val, tuple):
193 | 			# result, need to be put in result_list
194 | 			result_list[idx] = val
195 | 			remained -= 1
196 | 	return result_list
197 | 
198 | 
199 | def run(task_list):
200 | 	with open(config_file, 'r') as f:
201 | 		config_list = json.load(f)
202 | 	expdir_list = [expdir for expdir, *_ in task_list]
203 | 	result_list = run_tasks(config_list, expdir_list)
204 | 	for idx, _ in enumerate(task_list):
205 | 		task_list[idx].append(result_list[idx])
206 | 


--------------------------------------------------------------------------------
/code/expdir_monitor/expdir_monitor.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import subprocess
  4 | from models.utils import RunConfig, get_model_config_by_name, get_model_by_name
  5 | from data_providers.utils import get_data_provider_by_name
  6 | import pickle
  7 | 
  8 | 
  9 | class ExpdirMonitor:
 10 | 	def __init__(self, expdir):
 11 | 		self.expdir = os.path.realpath(expdir)
 12 | 		os.makedirs(self.expdir, exist_ok=True)
 13 | 
 14 | 	@property
 15 | 	def logs(self): return '%s/logs' % self.expdir
 16 | 	
 17 | 	@property
 18 | 	def checkpoint(self): return '%s/checkpoint' % self.expdir
 19 | 	
 20 | 	@property
 21 | 	def snapshot(self): return '%s/snapshot' % self.expdir
 22 | 	
 23 | 	@property
 24 | 	def output(self): return '%s/output' % self.expdir
 25 | 	
 26 | 	@property
 27 | 	def init(self): return '%s/init' % self.expdir
 28 | 	
 29 | 	@property
 30 | 	def run_config_path(self): return '%s/run.config' % self.expdir
 31 | 	
 32 | 	@property
 33 | 	def net_config_path(self): return '%s/net.config' % self.expdir
 34 | 	
 35 | 	def load_run_config(self, print_info=False, dataset='C10+'):
 36 | 		if os.path.isfile(self.run_config_path):
 37 | 			run_config = json.load(open(self.run_config_path, 'r'))
 38 | 		else:
 39 | 			print('Use Default Run Config for %s' % dataset)
 40 | 			run_config = RunConfig.get_default_run_config(dataset)
 41 | 		if print_info:
 42 | 			print('Run config:')
 43 | 			for k, v in run_config.items():
 44 | 				print('\t%s: %s' % (k, v))
 45 | 		return RunConfig(**run_config)
 46 | 	
 47 | 	def load_init(self):
 48 | 		init_path = '%s/init' % self.expdir
 49 | 		if os.path.isfile(init_path):
 50 | 			return pickle.load(open(self.init, 'rb'))
 51 | 		else:
 52 | 			return None
 53 | 	
 54 | 	def load_net_config(self, init, print_info=False):
 55 | 		assert os.path.isfile(self.net_config_path), \
 56 | 			'Net configs do not exist in the given expdir <%s>' % self.expdir
 57 | 		net_config_json = json.load(open(self.net_config_path, 'r'))
 58 | 		net_config = get_model_config_by_name(net_config_json['name'])()
 59 | 		net_config.set_net_from_config(net_config_json, init=init, print_info=print_info)
 60 | 		return net_config, net_config_json['name']
 61 | 	
 62 | 	def run(self, pure=True, restore=False, test=False, valid=False, valid_size=-1):
 63 | 		if not restore:
 64 | 			_clear_files = ['logs', 'checkpoint', 'snapshot', 'output']
 65 | 			for file in _clear_files:
 66 | 				subprocess.run(['rm', '-rf', os.path.join(self.expdir, file)])
 67 | 		init = self.load_init()
 68 | 		dataset = 'C10+' if init is None else init.get('dataset', 'C10+')
 69 | 		run_config = self.load_run_config(print_info=(not pure), dataset=dataset)
 70 | 		run_config.renew_logs = False
 71 | 		if valid_size > 0:
 72 | 			run_config.validation_size = valid_size
 73 | 		
 74 | 		data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config())
 75 | 		net_config, model_name = self.load_net_config(init, print_info=(not pure))
 76 | 		model = get_model_by_name(model_name)(self.expdir, data_provider, run_config, net_config, pure=pure)
 77 | 		start_epoch = 1
 78 | 		if restore:
 79 | 			model.load_model()
 80 | 			epoch_info_file = '%s/checkpoint/epoch.info' % self.expdir
 81 | 			if os.path.isfile(epoch_info_file):
 82 | 				start_epoch = json.load(open(epoch_info_file, 'r'))['epoch']
 83 | 				if not pure:
 84 | 					print('start epoch: %d' % start_epoch)
 85 | 		if test:
 86 | 			print('Testing...')
 87 | 			loss, accuracy = model.test(data_provider.test, batch_size=200)
 88 | 			print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
 89 | 			json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open(self.output, 'w'))
 90 | 		elif valid:
 91 | 			print('validating...')
 92 | 			loss, accuracy = model.test(data_provider.validation, batch_size=200)
 93 | 			print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
 94 | 			json.dump({'valid_loss': '%s' % loss, 'valid_acc': '%s' % accuracy}, open(self.output, 'w'))
 95 | 		elif pure:
 96 | 			model.pure_train()
 97 | 			loss, accuracy = model.test(data_provider.validation, batch_size=200)
 98 | 			json.dump({'valid_loss': '%s' % loss, 'valid_acc': '%s' % accuracy}, open(self.output, 'w'))
 99 | 			model.save_init(self.snapshot, print_info=(not pure))
100 | 			model.save_config(self.expdir, print_info=(not pure))
101 | 		else:
102 | 			# train the model
103 | 			print('Data provider train images: ', data_provider.train.num_examples)
104 | 			model.train_all_epochs(start_epoch)
105 | 			print('Data provider test images: ', data_provider.test.num_examples)
106 | 			print('Testing...')
107 | 			loss, accuracy = model.test(data_provider.test, batch_size=200)
108 | 			print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
109 | 			json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open(self.output, 'w'))
110 | 			model.save_init(self.snapshot, print_info=(not pure))
111 | 			model.save_config(self.expdir, print_info=(not pure))
112 | 		return accuracy
113 | 


--------------------------------------------------------------------------------
/code/main.py:
--------------------------------------------------------------------------------
 1 | from expdir_monitor.expdir_monitor import ExpdirMonitor
 2 | import argparse
 3 | 
 4 | 
 5 | """
 6 | Given a expdir, run the exp
 7 | """
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument(
10 | 	'--test', action='store_true',
11 | 	help='Test model for required dataset if pretrained model exists.'
12 | )
13 | parser.add_argument(
14 | 	'--valid', action='store_true',
15 | )
16 | parser.add_argument(
17 | 	'--valid_size', type=int, default=-1,
18 | )
19 | parser.add_argument('--path', type=str)
20 | parser.add_argument('--restore', action='store_true')
21 | args = parser.parse_args()
22 | expdir_monitor = ExpdirMonitor(args.path)
23 | test_performance = expdir_monitor.run(pure=False, restore=args.restore, test=args.test, valid=args.valid,
24 |                                       valid_size=args.valid_size)
25 | if args.valid:
26 | 	print('validation performance: %s' % test_performance)
27 | else:
28 | 	print('test performance: %s' % test_performance)
29 | 


--------------------------------------------------------------------------------
/code/meta_controller/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/meta_controller/__init__.py


--------------------------------------------------------------------------------
/code/meta_controller/base_controller.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.contrib import rnn
  3 | from tensorflow.python.ops import array_ops
  4 | from models.basic_model import BasicModel
  5 | import numpy as np
  6 | import os
  7 | 
  8 | 
  9 | class BaseController:
 10 | 	def __init__(self, path):
 11 | 		self.path = os.path.realpath(path)
 12 | 		os.makedirs(self.path, exist_ok=True)
 13 | 		
 14 | 	def load(self):
 15 | 		raise NotImplementedError
 16 | 	
 17 | 	def save(self, global_step=None):
 18 | 		raise NotImplementedError
 19 | 	
 20 | 	@property
 21 | 	def save_path(self):
 22 | 		return '%s/model.ckpt' % self.path
 23 | 	
 24 | 	@property
 25 | 	def logs_path(self):
 26 | 		return '%s/logs' % self.path
 27 | 
 28 | 
 29 | class Vocabulary:
 30 | 	def __init__(self, token_list):
 31 | 		token_list = ['PAD'] + token_list
 32 | 		self.vocab = {}
 33 | 		for idx, token in enumerate(token_list):
 34 | 			self.vocab[token] = idx
 35 | 			self.vocab[idx] = token
 36 | 	
 37 | 	@property
 38 | 	def size(self):
 39 | 		return len(self.vocab) // 2
 40 | 	
 41 | 	def get_code(self, token_list):
 42 | 		return [self.vocab[token] for token in token_list]
 43 | 	
 44 | 	def get_token(self, code_list):
 45 | 		return [self.vocab[code] for code in code_list]
 46 | 	
 47 | 	@property
 48 | 	def pad_code(self):
 49 | 		return self.vocab['PAD']
 50 | 
 51 | 
 52 | def embedding(_input, vocab_size, embedding_dim, name='embedding'):
 53 | 	"""
 54 | 	_input: [batch_size, max_num_steps]
 55 | 	output: [batch_size, max_num_steps, embedding_dim]
 56 | 	"""
 57 | 	# embedding
 58 | 	embedding_var = tf.get_variable(
 59 | 		name=name,
 60 | 		shape=[vocab_size, embedding_dim],
 61 | 		initializer=tf.random_uniform_initializer(-np.sqrt(3), np.sqrt(3)),
 62 | 		dtype=tf.float32,
 63 | 	)  # Initialize embeddings to have variance=1.
 64 | 	output = tf.nn.embedding_lookup(embedding_var, _input)
 65 | 	return output
 66 | 
 67 | 
 68 | def build_cell(units, cell_type='lstm', num_layers=1):
 69 | 	if num_layers > 1:
 70 | 		cell = rnn.MultiRNNCell([
 71 | 			build_cell(units, cell_type, 1) for _ in range(num_layers)
 72 | 		])
 73 | 	else:
 74 | 		if cell_type == "lstm":
 75 | 			cell = rnn.LSTMCell(units)
 76 | 		elif cell_type == "gru":
 77 | 			cell = rnn.GRUCell(units)
 78 | 		else:
 79 | 			raise ValueError('Do not support %s' % cell_type)
 80 | 	return cell
 81 | 
 82 | 
 83 | def seq_len(sequence):
 84 | 	"""
 85 | 	assume padding with zero vectors
 86 | 	sequence: [batch_size, num_steps, features]
 87 | 	length: [batch_size]
 88 | 	"""
 89 | 	used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
 90 | 	length = tf.reduce_sum(used, 1)
 91 | 	length = tf.cast(length, tf.int32)
 92 | 	return length
 93 | 
 94 | 
 95 | class EncoderNet:
 96 | 	def __init__(self, num_steps, vocab, embedding_dim, rnn_units, rnn_type='bi_lstm', rnn_layers=1):
 97 | 		self.num_steps = num_steps
 98 | 		self.vocab = vocab
 99 | 		self.embedding_dim = embedding_dim
100 | 		
101 | 		self.rnn_units = rnn_units
102 | 		self.rnn_type = rnn_type
103 | 		self.rnn_layers = rnn_layers
104 | 		
105 | 		# placeholder
106 | 		self.seq_len, self.input_seq = None, None
107 | 		# op
108 | 		self.encoder_output, self.encoder_state = None, None
109 | 	
110 | 	@property
111 | 	def bidirectional(self):
112 | 		return self.rnn_type.startswith('bi')
113 | 	
114 | 	@property
115 | 	def cell_type(self):
116 | 		return self.rnn_type.split('_')[-1]
117 | 	
118 | 	def _define_input(self):
119 | 		self.seq_len = tf.placeholder(
120 | 			tf.int32,
121 | 			[None],
122 | 			'seq_len'
123 | 		)  # length of each sequence, shape = [batch_size, ]
124 | 		
125 | 		self.input_seq = tf.placeholder(
126 | 			tf.int32,
127 | 			[None, self.num_steps],
128 | 			'input_seq'
129 | 		)  # input sequence, shape = [batch_size, num_steps]
130 | 	
131 | 	def build(self):
132 | 		self._define_input()
133 | 		
134 | 		output = self.input_seq
135 | 		output = embedding(output, self.vocab.size, self.embedding_dim, name='layer_embedding')
136 | 		input_dim = self.embedding_dim
137 | 		
138 | 		# Prepare data shape to match rnn function requirements
139 | 		# Current data input shape: [batch_size, num_steps, input_dim]
140 | 		# Required shape: 'num_steps' tensors list of shape [batch_size, input_dim]
141 | 		output = tf.transpose(output, [1, 0, 2])
142 | 		output = tf.reshape(output, [-1, input_dim])
143 | 		output = tf.split(output, self.num_steps, 0)
144 | 		
145 | 		if self.bidirectional:
146 | 			# 'num_steps' tensors list of shape [batch_size, rnn_units * 2]
147 | 			fw_cell = build_cell(self.rnn_units, self.cell_type, self.rnn_layers)
148 | 			bw_cell = build_cell(self.rnn_units, self.cell_type, self.rnn_layers)
149 | 			output, state_fw, state_bw = rnn.static_bidirectional_rnn(
150 | 				fw_cell, bw_cell, output, dtype=tf.float32, sequence_length=self.seq_len, scope='encoder')
151 | 			
152 | 			if isinstance(state_fw, tf.contrib.rnn.LSTMStateTuple):
153 | 				encoder_state_c = tf.concat([state_fw.c, state_bw.c], axis=1, name='bidirectional_concat_c')
154 | 				encoder_state_h = tf.concat([state_fw.h, state_bw.h], axis=1, name='bidirectional_concat_h')
155 | 				state = tf.contrib.rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)
156 | 			elif isinstance(state_fw, tf.Tensor):
157 | 				state = tf.concat([state_fw, state_bw], axis=1, name='bidirectional_concat')
158 | 			else:
159 | 				raise ValueError
160 | 		else:
161 | 			# 'num_steps' tensors list of shape [batch_size, rnn_units]
162 | 			cell = build_cell(self.rnn_units, self.cell_type, self.rnn_layers)
163 | 			output, state = rnn.static_rnn(cell, output, dtype=tf.float32, sequence_length=self.seq_len,
164 | 										   scope='encoder')
165 | 		
166 | 		output = tf.stack(output, axis=0)  # [num_steps, batch_size, rnn_units]
167 | 		output = tf.transpose(output, [1, 0, 2])  # [batch_size, num_steps, rnn_units]
168 | 		self.encoder_output = output
169 | 		self.encoder_state = state
170 | 		return output, state
171 | 
172 | 
173 | class WiderActorNet:
174 | 	def __init__(self, out_dim, num_steps, net_type='simple', net_config=None):
175 | 		self.out_dim = out_dim
176 | 		self.num_steps = num_steps
177 | 		self.net_type = net_type
178 | 		self.net_config = net_config
179 | 		
180 | 		# placeholder
181 | 		self.decision, self.probs = None, None
182 | 	
183 | 	def build_forward(self, _input):
184 | 		output = _input  # [batch_size, num_steps, rnn_units]
185 | 		feature_dim = int(output.get_shape()[2])  # rnn_units
186 | 		output = tf.reshape(output, [-1, feature_dim])  # [batch_size * num_steps, rnn_units]
187 | 		final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax'
188 | 		if self.net_type == 'simple':
189 | 			net_config = [] if self.net_config is None else self.net_config
190 | 			with tf.variable_scope('wider_actor'):
191 | 				for layer in net_config:
192 | 					units, activation = layer.get('units'), layer.get('activation', 'relu')
193 | 					output = BasicModel.fc_layer(output, units, use_bias=True)
194 | 					output = BasicModel.activation(output, activation)
195 | 				logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True)  # [batch_size * num_steps, out_dim]
196 | 			probs = BasicModel.activation(logits, final_activation)  # [batch_size * num_steps, out_dim]
197 | 			probs_dim = self.out_dim
198 | 			if self.out_dim == 1:
199 | 				probs = tf.concat([1 - probs, probs], axis=1)
200 | 				probs_dim = 2
201 | 				
202 | 			self.decision = tf.multinomial(tf.log(probs), 1)  # [batch_size * num_steps, 1]
203 | 			self.decision = tf.reshape(self.decision, [-1, self.num_steps])  # [batch_size, num_steps]
204 | 			self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim])  # [batch_size, num_steps, out_dim]
205 | 		else:
206 | 			raise ValueError('Do not support %s' % self.net_type)
207 | 
208 | 
209 | class DeeperActorNet:
210 | 	def __init__(self, decision_num, out_dims, embedding_dim,
211 | 				 cell_type='lstm', rnn_layers=1, attention_config=None):
212 | 		self.decision_num = decision_num
213 | 		self.out_dims = out_dims
214 | 		self.embedding_dim = embedding_dim
215 | 		
216 | 		self.cell_type = cell_type
217 | 		self.rnn_layers = rnn_layers
218 | 		self.attention_config = attention_config
219 | 		
220 | 		# placeholder
221 | 		self.block_layer_num = None
222 | 		# op
223 | 		self.decision, self.probs = None, None
224 | 	
225 | 	def _define_input(self):
226 | 		self.block_layer_num = tf.placeholder(
227 | 			tf.int32,
228 | 			shape=[None, self.out_dims[0]]
229 | 		)  # [batch_size, block_num]
230 | 	
231 | 	def build_decoder_cell(self, encoder_state):
232 | 		if isinstance(encoder_state, tf.contrib.rnn.LSTMStateTuple):
233 | 			rnn_units = int(encoder_state.c.get_shape()[1])
234 | 			assert self.cell_type == 'lstm', 'Do not match'
235 | 		else:
236 | 			rnn_units = int(encoder_state.get_shape()[1])
237 | 		cell = build_cell(rnn_units, self.cell_type, self.rnn_layers)
238 | 		return cell
239 | 	
240 | 	def build_forward(self, encoder_output, encoder_state, is_training, decision_trajectory):
241 | 		self._define_input()
242 | 		self.decision, self.probs = [], []
243 | 		
244 | 		batch_size = array_ops.shape(encoder_output)[0]
245 | 		if self.attention_config is None:
246 | 			cell = self.build_decoder_cell(encoder_state)
247 | 			cell_state = encoder_state
248 | 			cell_input = tf.zeros(shape=[batch_size], dtype=tf.int32)
249 | 			with tf.variable_scope('deeper_actor'):
250 | 				for _i in range(self.decision_num):
251 | 					cell_input_embed = embedding(cell_input, 1 if _i == 0 else self.out_dims[_i - 1],
252 | 												 self.embedding_dim, name='deeper_actor_embedding_%d' % _i)
253 | 					with tf.variable_scope('rnn', reuse=(_i > 0)):
254 | 						cell_output, cell_state = cell(cell_input_embed, cell_state)
255 | 					with tf.variable_scope('classifier_%d' % _i):
256 | 						logits_i = BasicModel.fc_layer(cell_output, self.out_dims[_i], use_bias=True)
257 | 					act_i = 'softmax'
258 | 					probs_i = BasicModel.activation(logits_i, activation=act_i)  # [batch_size, out_dim_i]
259 | 					if _i == 1:
260 | 						# determine the layer index for deeper actor
261 | 						# require mask
262 | 						one_hot_block_decision = tf.one_hot(cell_input, depth=self.out_dims[0], dtype=tf.int32)
263 | 						max_layer_num = tf.multiply(self.block_layer_num, one_hot_block_decision)
264 | 						max_layer_num = tf.reduce_max(max_layer_num, axis=1)  # [batch_size]
265 | 						layer_mask = tf.sequence_mask(max_layer_num, self.out_dims[1], dtype=tf.float32)
266 | 						probs_i = tf.multiply(probs_i, layer_mask)
267 | 						# rescale the sum to 1
268 | 						probs_i = tf.divide(probs_i, tf.reduce_sum(probs_i, axis=1, keep_dims=True))
269 | 					decision_i = tf.multinomial(tf.log(probs_i), 1)  # [batch_size, 1]
270 | 					decision_i = tf.cast(decision_i, tf.int32)
271 | 					decision_i = tf.reshape(decision_i, shape=[-1])  # [batch_size]
272 | 					
273 | 					cell_input = tf.cond(
274 | 						is_training,
275 | 						lambda: decision_trajectory[:, _i],
276 | 						lambda: decision_i,
277 | 					)
278 | 					self.decision.append(decision_i)
279 | 					self.probs.append(probs_i)
280 | 				self.decision = tf.stack(self.decision, axis=1)  # [batch_size, decision_num]
281 | 		else:
282 | 			raise NotImplementedError
283 | 
284 | 
285 | 


--------------------------------------------------------------------------------
/code/meta_controller/rl_controller.py:
--------------------------------------------------------------------------------
  1 | from meta_controller.base_controller import WiderActorNet, DeeperActorNet, EncoderNet, BaseController
  2 | import tensorflow as tf
  3 | import os
  4 | from tensorflow.python.ops import array_ops
  5 | from models.basic_model import BasicModel
  6 | import shutil
  7 | import numpy as np
  8 | 
  9 | 
 10 | class RLNet2NetController(BaseController):
 11 | 	def save(self, global_step=None):
 12 | 		self.saver.save(self.sess, self.save_path, global_step=global_step)
 13 | 	
 14 | 	def load(self):
 15 | 		if os.path.isfile('%s/model.ckpt.index' % self.path):
 16 | 			try:
 17 | 				self.saver.restore(self.sess, self.save_path)
 18 | 			except Exception:
 19 | 				print('Failed to to load model '
 20 | 								'from save path: %s' % self.save_path)
 21 | 			print('Successfully load model from save path: %s' % self.save_path)
 22 | 		else:
 23 | 			print('No model files in ' + '%s/model.ckpt.index' % self.path)
 24 | 	
 25 | 	def __init__(self, path, entropy_penalty,
 26 | 				 encoder: EncoderNet, wider_actor: WiderActorNet, deeper_actor: DeeperActorNet, opt_config):
 27 | 		BaseController.__init__(self, path)
 28 | 		self.entropy_penalty = entropy_penalty
 29 | 		
 30 | 		self.encoder = encoder
 31 | 		self.wider_actor = wider_actor
 32 | 		self.deeper_actor = deeper_actor
 33 | 		self.opt_config = opt_config
 34 | 		
 35 | 		self.graph = tf.Graph()
 36 | 		self.obj, self.train_step = None, None
 37 | 		with self.graph.as_default():
 38 | 			self._define_input()
 39 | 			self.build_forward()
 40 | 			self.build_training_process()
 41 | 			self.global_variables_initializer = tf.global_variables_initializer()
 42 | 			self.saver = tf.train.Saver()
 43 | 		self._initialize_session()
 44 | 	
 45 | 	def _define_input(self):
 46 | 		self.learning_rate = tf.placeholder(
 47 | 			tf.float32,
 48 | 			shape=[],
 49 | 			name='learning_rate')
 50 | 		self.is_training = tf.placeholder(tf.bool, shape=[], name='is_training')
 51 | 		self.wider_seg_deeper = tf.placeholder(tf.int32, shape=[], name='wider_seg_deeper')
 52 | 		
 53 | 		self.wider_decision_trajectory = tf.placeholder(
 54 | 			tf.int32,
 55 | 			shape=[None, self.encoder.num_steps],
 56 | 			name='wider_decision_trajectory',
 57 | 		)  # [wider_batch_size, num_steps]
 58 | 		self.wider_decision_mask = tf.placeholder(
 59 | 			tf.float32,
 60 | 			shape=[None, self.encoder.num_steps],
 61 | 			name='wider_decision_mask',
 62 | 		)  # [wider_batch_size, num_steps]
 63 | 		
 64 | 		self.deeper_decision_trajectory = tf.placeholder(
 65 | 			tf.int32,
 66 | 			shape=[None, self.deeper_actor.decision_num],
 67 | 			name='deeper_decision_trajectory',
 68 | 		)  # [deeper_batch_size, deeper_decision_num]
 69 | 		
 70 | 		self.deeper_decision_mask = tf.placeholder(
 71 | 			tf.float32,
 72 | 			shape=[None, self.deeper_actor.decision_num],
 73 | 			name='deeper_decision_mask',
 74 | 		)  # [deeper_batch_size, deeper_decision_num]
 75 | 		
 76 | 		self.reward = tf.placeholder(
 77 | 			tf.float32,
 78 | 			shape=[None],
 79 | 			name='reward',
 80 | 		)  # [batch_size]
 81 | 		self.has_deeper = tf.placeholder(
 82 | 			tf.bool,
 83 | 			shape=[],
 84 | 			name='has_deeper',
 85 | 		)
 86 | 		
 87 | 	def update_controller(self, learning_rate, wider_seg_deeper, wider_decision_trajectory, wider_decision_mask,
 88 | 						  deeper_decision_trajectory, deeper_decison_mask, reward, block_layer_num, input_seq, seq_len):
 89 | 		has_deeper = wider_seg_deeper < len(input_seq)
 90 | 		feed_dict = {
 91 | 			self.learning_rate: learning_rate,
 92 | 			self.wider_seg_deeper: wider_seg_deeper,
 93 | 			self.wider_decision_trajectory: wider_decision_trajectory,
 94 | 			self.wider_decision_mask: wider_decision_mask,
 95 | 			self.deeper_decision_trajectory: deeper_decision_trajectory,
 96 | 			self.deeper_decision_mask: deeper_decison_mask,
 97 | 			self.reward: reward,
 98 | 			self.is_training: True and has_deeper,
 99 | 			self.deeper_actor.block_layer_num: block_layer_num,
100 | 			self.encoder.input_seq: input_seq,
101 | 			self.encoder.seq_len: seq_len,
102 | 			self.has_deeper: has_deeper,
103 | 		}
104 | 		self.sess.run(self.train_step, feed_dict=feed_dict)
105 | 		
106 | 	def build_forward(self):
107 | 		encoder_output, encoder_state = self.encoder.build()
108 | 		feed2wider_output = encoder_output[:self.wider_seg_deeper]
109 | 		feed2deeper_output = encoder_output[self.wider_seg_deeper:]
110 | 		if isinstance(encoder_state, tf.contrib.rnn.LSTMStateTuple):
111 | 			encoder_state_c = encoder_state.c
112 | 			encoder_state_h = encoder_state.h
113 | 			
114 | 			feed2wider_c = encoder_state_c[:self.wider_seg_deeper]
115 | 			feed2wider_h = encoder_state_h[:self.wider_seg_deeper]
116 | 			feed2wider_state = tf.contrib.rnn.LSTMStateTuple(c=feed2wider_c, h=feed2wider_h)
117 | 			
118 | 			feed2deeper_c = encoder_state_c[self.wider_seg_deeper:]
119 | 			feed2deeper_h = encoder_state_h[self.wider_seg_deeper:]
120 | 			feed2deeper_state = tf.contrib.rnn.LSTMStateTuple(c=feed2deeper_c, h=feed2deeper_h)
121 | 		elif isinstance(encoder_state, tf.Tensor):
122 | 			feed2wider_state = encoder_state[:self.wider_seg_deeper]
123 | 			feed2deeper_state = encoder_state[self.wider_seg_deeper:]
124 | 		else:
125 | 			raise ValueError
126 | 		
127 | 		self.wider_actor.build_forward(feed2wider_output)
128 | 		self.deeper_actor.build_forward(feed2deeper_output, feed2deeper_state, self.is_training,
129 | 										self.deeper_decision_trajectory)
130 | 		
131 | 	def build_training_process(self):
132 | 		raise NotImplementedError
133 | 	
134 | 	def sample_wider_decision(self, input_seq, seq_len):
135 | 		batch_size = len(seq_len)
136 | 		wider_decision, wider_probs = self.sess.run(
137 | 			fetches=[self.wider_actor.decision, self.wider_actor.probs],
138 | 			feed_dict={
139 | 				self.encoder.input_seq: input_seq,
140 | 				self.encoder.seq_len: seq_len,
141 | 				self.wider_seg_deeper: batch_size,
142 | 			}
143 | 		)  # [batch_size, num_steps]
144 | 		return wider_decision, wider_probs
145 | 	
146 | 	def sample_deeper_decision(self, input_seq, seq_len, block_layer_num):
147 | 		deeper_decision, deeper_probs = self.sess.run(
148 | 			fetches=[self.deeper_actor.decision, self.deeper_actor.probs],
149 | 			feed_dict={
150 | 				self.encoder.input_seq: input_seq,
151 | 				self.encoder.seq_len: seq_len,
152 | 				self.wider_seg_deeper: 0,
153 | 				self.is_training: False,
154 | 				self.deeper_actor.block_layer_num: block_layer_num,
155 | 				self.deeper_decision_trajectory: -np.ones([len(seq_len), self.deeper_actor.decision_num])
156 | 			}
157 | 		)  # [batch_size, decision_num]
158 | 		return deeper_decision, deeper_probs
159 | 	
160 | 	def _initialize_session(self):
161 | 		config = tf.ConfigProto()
162 | 		# restrict model GPU memory utilization to min required
163 | 		config.gpu_options.allow_growth = True
164 | 		self.sess = tf.Session(graph=self.graph, config=config)
165 | 		
166 | 		self.sess.run(self.global_variables_initializer)
167 | 		shutil.rmtree(self.logs_path, ignore_errors=True)
168 | 		self.summary_writer = tf.summary.FileWriter(self.logs_path, graph=self.graph)
169 | 
170 | 	def get_wider_entropy(self):
171 | 		wider_entropy = -tf.multiply(tf.log(self.wider_actor.probs), self.wider_actor.probs)
172 | 		wider_entropy = tf.reduce_sum(wider_entropy, axis=2)
173 | 		wider_entropy = tf.multiply(wider_entropy, self.wider_decision_mask)
174 | 		wider_entropy = tf.div(tf.reduce_sum(wider_entropy, axis=1), tf.reduce_sum(self.wider_decision_mask, axis=1))
175 | 		wider_entropy = tf.reduce_mean(wider_entropy)
176 | 		return wider_entropy
177 | 	
178 | 	def get_deeper_entropy(self):
179 | 		deeper_entropy = []
180 | 		for _i in range(self.deeper_actor.decision_num):
181 | 			deeper_probs = self.deeper_actor.probs[_i]
182 | 			entropy = -tf.multiply(tf.log(deeper_probs + 1e-10), deeper_probs)
183 | 			entropy = tf.reduce_sum(entropy, axis=1)
184 | 			deeper_entropy.append(entropy)
185 | 		deeper_entropy = tf.reduce_mean(deeper_entropy)
186 | 		return deeper_entropy
187 | 	
188 | 
189 | class ReinforceNet2NetController(RLNet2NetController):
190 | 	def build_training_process(self):
191 | 		wider_side_obj, wider_entropy = tf.cond(
192 | 			tf.greater(self.wider_seg_deeper, 0),
193 | 			lambda: self.get_wider_side_obj(),
194 | 			lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))
195 | 		)
196 | 		batch_size = array_ops.shape(self.reward)[0]
197 | 		deeper_side_obj, deeper_entropy = tf.cond(
198 | 			self.has_deeper,
199 | 			lambda: self.get_deeper_side_obj(),
200 | 			lambda: (tf.constant(0.0, dtype=tf.float32), tf.constant(0.0, dtype=tf.float32))
201 | 		)
202 | 		self.obj = wider_side_obj + deeper_side_obj
203 | 		entropy_term = wider_entropy * tf.cast(self.wider_seg_deeper, tf.float32) + \
204 | 					   deeper_entropy * tf.cast(batch_size - self.wider_seg_deeper, tf.float32)
205 | 		entropy_term /= tf.cast(batch_size, tf.float32)
206 | 		
207 | 		optimizer = BasicModel.build_optimizer(self.learning_rate, self.opt_config[0], self.opt_config[1])
208 | 		self.train_step = optimizer.minimize(- self.obj - self.entropy_penalty * entropy_term)
209 | 		
210 | 	def get_wider_side_obj(self):
211 | 		wider_side_reward = self.reward[:self.wider_seg_deeper]
212 | 
213 | 		# obj from wider side
214 | 		wider_trajectory = tf.one_hot(self.wider_decision_trajectory, depth=max(self.wider_actor.out_dim, 2))
215 | 		wider_probs = tf.reduce_max(tf.multiply(wider_trajectory, self.wider_actor.probs), axis=2)
216 | 		wider_probs = tf.log(wider_probs)  # [wider_batch_size, num_steps]
217 | 		wider_probs = tf.multiply(wider_probs, self.wider_decision_mask)
218 | 		wider_probs = tf.multiply(wider_probs, tf.reshape(wider_side_reward, shape=[-1, 1]))
219 | 		
220 | 		wider_side_obj = tf.reduce_sum(wider_probs)
221 | 		return wider_side_obj, self.get_wider_entropy()
222 | 	
223 | 	def get_deeper_side_obj(self):
224 | 		deeper_side_reward = self.reward[self.wider_seg_deeper:]
225 | 
226 | 		# obj from deeper side
227 | 		deeper_side_obj = []
228 | 		for _i in range(self.deeper_actor.decision_num):
229 | 			decision_trajectory = self.deeper_decision_trajectory[:, _i]
230 | 			deeper_decision_mask = self.deeper_decision_mask[:, _i]
231 | 			decision_trajectory = tf.one_hot(decision_trajectory, depth=self.deeper_actor.out_dims[_i])
232 | 			deeper_probs = tf.reduce_max(tf.multiply(decision_trajectory, self.deeper_actor.probs[_i]), axis=1)
233 | 			deeper_probs = tf.log(deeper_probs)  # [deeper_batch_size]
234 | 			deeper_probs = tf.multiply(deeper_probs, deeper_decision_mask)
235 | 			deeper_probs = tf.multiply(deeper_probs, deeper_side_reward)
236 | 			
237 | 			deeper_side_obj.append(tf.reduce_sum(deeper_probs))
238 | 		deeper_side_obj = tf.reduce_sum(deeper_side_obj)
239 | 		return deeper_side_obj, self.get_deeper_entropy()
240 | 
241 | 


--------------------------------------------------------------------------------
/code/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/code/models/__init__.py


--------------------------------------------------------------------------------
/code/models/basic_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import time
  6 | from datetime import timedelta
  7 | import json
  8 | import pickle
  9 | 	
 10 | 
 11 | class BasicModel:
 12 | 	def __init__(self, path, data_provider, run_config, net_config, pure=False, only_forward=False):
 13 | 		if only_forward: pure = True
 14 | 		self.graph = tf.Graph()
 15 | 		
 16 | 		self.data_provider = data_provider
 17 | 		self._path = path
 18 | 		self.run_config = run_config
 19 | 		self.net_config = net_config
 20 | 		
 21 | 		self.data_shape = data_provider.data_shape
 22 | 		self.n_classes = data_provider.n_classes
 23 | 		
 24 | 		self._save_path, self._logs_path = None, None
 25 | 		self.batches_step = 0
 26 | 		
 27 | 		self.cross_entropy, self.train_step, self.accuracy = None, None, None
 28 | 		with self.graph.as_default():
 29 | 			self._define_inputs()
 30 | 			self._build_graph(only_forward=only_forward)
 31 | 			self.global_variables_initializer = tf.global_variables_initializer()
 32 | 			if not pure:
 33 | 				self._count_trainable_params()
 34 | 				self.saver = tf.train.Saver()
 35 | 		self._initialize_session(set_logs=(not pure))
 36 | 	
 37 | 	@property
 38 | 	def save_path(self):
 39 | 		if self._save_path is None:
 40 | 			save_path = '%s/checkpoint' % self._path
 41 | 			os.makedirs(save_path, exist_ok=True)
 42 | 			save_path = os.path.join(save_path, 'model.ckpt')
 43 | 			self._save_path = save_path
 44 | 		return self._save_path
 45 | 	
 46 | 	@property
 47 | 	def logs_path(self):
 48 | 		if self._logs_path is None:
 49 | 			logs_path = '%s/logs' % self._path
 50 | 			if self.run_config.renew_logs:
 51 | 				shutil.rmtree(logs_path, ignore_errors=True)
 52 | 			os.makedirs(logs_path, exist_ok=True)
 53 | 			self._logs_path = logs_path
 54 | 		return self._logs_path
 55 | 	
 56 | 	def _build_graph(self, only_forward=False):
 57 | 		raise NotImplementedError
 58 | 	
 59 | 	def _define_inputs(self):
 60 | 		shape = [None]
 61 | 		shape.extend(self.data_shape)
 62 | 		self.images = tf.placeholder(
 63 | 			tf.float32,
 64 | 			shape=shape,
 65 | 			name='input_images')
 66 | 		self.labels = tf.placeholder(
 67 | 			tf.float32,
 68 | 			shape=[None, self.n_classes],
 69 | 			name='labels')
 70 | 		self.learning_rate = tf.placeholder(
 71 | 			tf.float32,
 72 | 			shape=[],
 73 | 			name='learning_rate')
 74 | 		self.is_training = tf.placeholder(tf.bool, shape=[], name='is_training')
 75 | 		
 76 | 	def _initialize_session(self, set_logs=True):
 77 | 		"""Initialize session, variables"""
 78 | 		config = tf.ConfigProto()
 79 | 		# restrict model GPU memory utilization to min required
 80 | 		config.gpu_options.allow_growth = True
 81 | 		self.sess = tf.Session(graph=self.graph, config=config)
 82 | 		
 83 | 		self.sess.run(self.global_variables_initializer)
 84 | 		if set_logs:
 85 | 			logswriter = tf.summary.FileWriter
 86 | 			self.summary_writer = logswriter(self.logs_path, graph=self.graph)
 87 | 	
 88 | 	def train_all_epochs(self, start_epoch=1):
 89 | 		n_epochs = self.run_config.n_epochs
 90 | 		learning_rate = self.run_config.init_lr
 91 | 		batch_size = self.run_config.batch_size
 92 | 		
 93 | 		total_start_time = time.time()
 94 | 		for epoch in range(start_epoch, n_epochs + 1):
 95 | 			print('\n', '-' * 30, 'Train epoch: %d' % epoch, '-' * 30, '\n')
 96 | 			start_time = time.time()
 97 | 			new_lr = self.run_config.learning_rate(epoch)
 98 | 			if new_lr != learning_rate:
 99 | 				learning_rate = new_lr
100 | 				print('Decrease learning rate, new lr = %f' % learning_rate)
101 | 			
102 | 			print('Training...')
103 | 			loss, acc = self.train_one_epoch(
104 | 				self.data_provider.train, batch_size, learning_rate)
105 | 			# save logs about "loss" and "acc" if the option is true
106 | 			if self.run_config.should_save_logs:
107 | 				self.log_loss_accuracy(loss, acc, epoch, prefix='train')
108 | 			
109 | 			if self.run_config.validation_frequency and epoch % self.run_config.validation_frequency == 0:
110 | 				print('Validation...')
111 | 				loss, acc = self.test(self.data_provider.validation, batch_size)
112 | 				if self.run_config.should_save_logs:
113 | 					self.log_loss_accuracy(loss, acc, epoch, prefix='valid')
114 | 				if self.run_config.should_save_model:
115 | 					self.save_model()
116 | 					json.dump({'epoch': epoch + 1}, open('%s/checkpoint/epoch.info' % self._path, 'w'))
117 | 			
118 | 			time_per_epoch = time.time() - start_time
119 | 			seconds_left = int((n_epochs - epoch) * time_per_epoch)
120 | 			print('Time per epoch: %s, Est. complete in: %s' % (
121 | 				str(timedelta(seconds=time_per_epoch)),
122 | 				str(timedelta(seconds=seconds_left))))
123 | 		
124 | 		if self.run_config.should_save_model:
125 | 			self.save_model()
126 | 		
127 | 		total_training_time = time.time() - total_start_time
128 | 		print('\nTotal training time: %s' % str(timedelta(
129 | 			seconds=total_training_time)))
130 | 	
131 | 	def train_one_epoch(self, data, batch_size, learning_rate):
132 | 		num_examples = data.num_examples
133 | 		total_loss = []
134 | 		total_accuracy = []
135 | 		for i in range(num_examples // batch_size):
136 | 			batch = data.next_batch(batch_size)
137 | 			images, labels = batch
138 | 			feed_dict = {
139 | 				self.images: images,
140 | 				self.labels: labels,
141 | 				self.learning_rate: learning_rate,
142 | 				self.is_training: True,
143 | 			}
144 | 			fetches = [self.train_step, self.cross_entropy, self.accuracy]
145 | 			result = self.sess.run(fetches, feed_dict=feed_dict)
146 | 			_, loss, accuracy = result
147 | 			total_loss.append(loss)
148 | 			total_accuracy.append(accuracy)
149 | 			# save logs about "loss" and "acc" if the option is true
150 | 			if self.run_config.should_save_logs:
151 | 				self.batches_step += 1
152 | 				self.log_loss_accuracy(
153 | 					loss, accuracy, self.batches_step, prefix='per_batch',
154 | 					should_print=False)
155 | 		mean_loss = np.mean(total_loss)
156 | 		mean_accuracy = np.mean(total_accuracy)
157 | 		return mean_loss, mean_accuracy
158 | 	
159 | 	def test(self, data, batch_size):
160 | 		num_examples = data.num_examples
161 | 		total_loss = []
162 | 		total_accuracy = []
163 | 		for i in range(num_examples // batch_size):
164 | 			batch = data.next_batch(batch_size)
165 | 			feed_dict = {
166 | 				self.images: batch[0],
167 | 				self.labels: batch[1],
168 | 				self.is_training: False,
169 | 			}
170 | 			fetches = [self.cross_entropy, self.accuracy]
171 | 			loss, accuracy = self.sess.run(fetches, feed_dict=feed_dict)
172 | 			total_loss.append(loss)
173 | 			total_accuracy.append(accuracy)
174 | 		mean_loss = np.mean(total_loss)
175 | 		mean_accuracy = np.mean(total_accuracy)
176 | 		remain_num = num_examples % batch_size
177 | 		if remain_num != 0:
178 | 			batch = data.next_batch(remain_num)
179 | 			feed_dict = {
180 | 				self.images: batch[0],
181 | 				self.labels: batch[1],
182 | 				self.is_training: False,
183 | 			}
184 | 			fetches = [self.cross_entropy, self.accuracy]
185 | 			loss, accuracy = self.sess.run(fetches, feed_dict=feed_dict)
186 | 			
187 | 			mean_loss = (mean_loss * (num_examples - remain_num) + loss * remain_num) / num_examples
188 | 			mean_accuracy = (mean_accuracy * (num_examples - remain_num) + accuracy * remain_num) / num_examples
189 | 		return mean_loss, mean_accuracy
190 | 	
191 | 	def save_config(self, save_path, print_info=True):
192 | 		os.makedirs(save_path, exist_ok=True)
193 | 		net_save_path = os.path.join(save_path, 'net.config')
194 | 		json.dump(self.net_config.get_config(), open(net_save_path, 'w'), indent=4)
195 | 		if print_info: print('Network configs dump to %s' % save_path)
196 | 		run_save_path = os.path.join(save_path, 'run.config')
197 | 		json.dump(self.run_config.get_config(), open(run_save_path, 'w'), indent=4)
198 | 		if print_info: print('Run configs dump to %s' % run_save_path)
199 | 	
200 | 	def save_init(self, save_path, print_info=True):
201 | 		os.makedirs(save_path, exist_ok=True)
202 | 		save_path = os.path.join(save_path, 'init')
203 | 		to_save_init = self.net_config.renew_init(self)
204 | 		to_save_init['dataset'] = self.run_config.dataset
205 | 		pickle.dump(to_save_init, open(save_path, 'wb'))
206 | 		if print_info: print('Network weights dump to %s' % save_path)
207 | 	
208 | 	def pure_train(self):
209 | 		n_epochs = self.run_config.n_epochs
210 | 		batch_size = self.run_config.batch_size
211 | 
212 | 		for epoch in range(1, n_epochs + 1):
213 | 			learning_rate = self.run_config.learning_rate(epoch)
214 | 			
215 | 			# train one epoch
216 | 			data = self.data_provider.train
217 | 			num_examples = data.num_examples
218 | 			for i in range(num_examples // batch_size):
219 | 				batch = data.next_batch(batch_size)
220 | 				images, labels = batch
221 | 				feed_dict = {
222 | 					self.images: images,
223 | 					self.labels: labels,
224 | 					self.learning_rate: learning_rate,
225 | 					self.is_training: True,
226 | 				}
227 | 				fetches = self.train_step
228 | 				self.sess.run(fetches, feed_dict=feed_dict)
229 | 	
230 | 	def save_model(self, global_step=None):
231 | 		self.saver.save(self.sess, self.save_path, global_step=global_step)
232 | 	
233 | 	def load_model(self):
234 | 		try:
235 | 			self.saver.restore(self.sess, self.save_path)
236 | 		except Exception:
237 | 			raise IOError('Failed to to load model '
238 | 						  'from save path: %s' % self.save_path)
239 | 		print('Successfully load model from save path: %s' % self.save_path)
240 | 	
241 | 	def log_loss_accuracy(self, loss, accuracy, epoch, prefix, should_print=True, write2file=True):
242 | 		if should_print:
243 | 			print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
244 | 		summary = tf.Summary(value=[
245 | 			tf.Summary.Value(
246 | 				tag='loss_%s' % prefix, simple_value=float(loss)),
247 | 			tf.Summary.Value(
248 | 				tag='accuracy_%s' % prefix, simple_value=float(accuracy))
249 | 		])
250 | 		self.summary_writer.add_summary(summary, epoch)
251 | 		if write2file and prefix == 'valid':
252 | 			with open('%s/console.txt' % self.logs_path, 'a') as fout:
253 | 				fout.write('%d: mean cross_entropy: %f, mean accuracy: %f\n' % (epoch, loss, accuracy))
254 | 	
255 | 	@staticmethod
256 | 	def _count_trainable_params():
257 | 		total_parameters = 0
258 | 		for variable in tf.trainable_variables():
259 | 			shape = variable.get_shape()
260 | 			variable_parameters = 1
261 | 			for dim in shape:
262 | 				variable_parameters *= dim.value
263 | 			total_parameters += variable_parameters
264 | 		print('Total training params: %.2fM' % (total_parameters / 1e6))
265 | 	
266 | 	@staticmethod
267 | 	def dropout(_input, keep_prob, is_training):
268 | 		if keep_prob < 1:
269 | 			output = tf.cond(
270 | 				is_training,
271 | 				lambda: tf.nn.dropout(_input, keep_prob),
272 | 				lambda: _input
273 | 			)
274 | 		else:
275 | 			output = _input
276 | 		return output
277 | 	
278 | 	@staticmethod
279 | 	def weight_variable(shape, name, initializer):
280 | 		return tf.get_variable(
281 | 			name,
282 | 			shape=shape,
283 | 			initializer=initializer,
284 | 		)
285 | 		
286 | 	@staticmethod
287 | 	def avg_pool(_input, k=2, s=2):
288 | 		ksize = [1, k, k, 1]
289 | 		strides = [1, s, s, 1]
290 | 		padding = 'VALID'
291 | 		# if stride = 1, keep the image size unchanged
292 | 		if s == 1: padding = 'SAME'
293 | 		output = tf.nn.avg_pool(_input, ksize, strides, padding)
294 | 		return output
295 | 	
296 | 	@staticmethod
297 | 	def max_pool(_input, k=2, s=2):
298 | 		ksize = [1, k, k, 1]
299 | 		strides = [1, s, s, 1]
300 | 		padding = 'VALID'
301 | 		# if stride = 1, keep the image size unchanged
302 | 		if s == 1: padding = 'SAME'
303 | 		output = tf.nn.max_pool(_input, ksize, strides, padding)
304 | 		return output
305 | 
306 | 	@staticmethod
307 | 	def conv2d(_input, out_features, kernel_size, strides=1, padding='SAME', param_initializer=None):
308 | 		if kernel_size == 1: padding = 'VALID'
309 | 		
310 | 		in_features = int(_input.get_shape()[-1])
311 | 		if not param_initializer: param_initializer = {}
312 | 		kernel = BasicModel.weight_variable(
313 | 			[kernel_size, kernel_size, in_features, out_features],
314 | 			name='kernel',
315 | 			initializer=param_initializer.get('kernel', tf.contrib.layers.variance_scaling_initializer())
316 | 		)
317 | 		output = tf.nn.conv2d(_input, kernel, [1, strides, strides, 1], padding)
318 | 		return output
319 | 	
320 | 	@staticmethod
321 | 	def fc_layer(_input, out_units, use_bias=False, param_initializer=None):
322 | 		features_total = int(_input.get_shape()[-1])
323 | 		if not param_initializer: param_initializer = {}
324 | 		W = BasicModel.weight_variable(
325 | 			[features_total, out_units], name='W',
326 | 			initializer=param_initializer.get('W', tf.contrib.layers.xavier_initializer())
327 | 		)
328 | 		output = tf.matmul(_input, W)
329 | 		if use_bias:
330 | 			bias = BasicModel.weight_variable(
331 | 				[out_units], name='bias',
332 | 				initializer=param_initializer.get('bias', tf.constant_initializer([0.0] * out_units))
333 | 			)
334 | 			output += bias
335 | 		return output
336 | 		
337 | 	@staticmethod
338 | 	def batch_norm(_input, is_training, epsilon=1e-3, decay=0.999, param_initializer=None):
339 | 		output = tf.contrib.layers.batch_norm(
340 | 			_input, scale=True, is_training=is_training, param_initializers=param_initializer,
341 | 			updates_collections=None, epsilon=epsilon, decay=decay)
342 | 		return output
343 | 
344 | 	@staticmethod
345 | 	def activation(_input, activation='relu'):
346 | 		if activation == 'relu':
347 | 			return tf.nn.relu(_input)
348 | 		elif activation == 'tanh':
349 | 			return tf.tanh(_input)
350 | 		elif activation == 'sigmoid':
351 | 			return tf.sigmoid(_input)
352 | 		elif activation == 'softmax':
353 | 			return tf.nn.softmax(_input)
354 | 		elif activation is None:
355 | 			return _input
356 | 		else:
357 | 			raise ValueError('Do not support %s' % activation)
358 | 
359 | 	@staticmethod
360 | 	def build_optimizer(learning_rate, opt_name, opt_param):
361 | 		if opt_name == 'momentum':
362 | 			return tf.train.MomentumOptimizer(learning_rate, **opt_param)
363 | 		elif opt_name == 'adam':
364 | 			return tf.train.AdamOptimizer(learning_rate, **opt_param)
365 | 		else:
366 | 			raise ValueError('Do not support the optimizer type: %s' % opt_name)
367 | 	
368 | 	@staticmethod
369 | 	def flatten(_input):
370 | 		input_shape = _input.shape.as_list()
371 | 		if len(input_shape) != 2:
372 | 			return tf.reshape(_input, [-1, np.prod(input_shape[1:])])
373 | 		else:
374 | 			return _input
375 | 


--------------------------------------------------------------------------------
/code/models/convnet.py:
--------------------------------------------------------------------------------
  1 | from models.basic_model import BasicModel
  2 | from data_providers.base_provider import DataProvider
  3 | from models.layers import ConvLayer, PoolLayer, FCLayer
  4 | from models.layer_cascade import LayerCascade
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | 
  9 | class SimpleConvnetConfig:
 10 | 	def __init__(self):
 11 | 		self.net_config = {
 12 | 			'weight_decay': None,
 13 | 			'bn_epsilon': None,
 14 | 			'bn_decay': None,
 15 | 			'drop_scheme': None,
 16 | 		}
 17 | 		self.layer_cascade = None
 18 | 	
 19 | 	@property
 20 | 	def weight_decay(self): return self.net_config['weight_decay']
 21 | 	
 22 | 	@property
 23 | 	def bn_epsilon(self): return self.net_config['bn_epsilon']
 24 | 	
 25 | 	@property
 26 | 	def bn_decay(self): return self.net_config['bn_decay']
 27 | 	
 28 | 	@property
 29 | 	def drop_scheme(self): return self.net_config['drop_scheme']
 30 | 	
 31 | 	@property
 32 | 	def depth(self): return self.layer_cascade.depth
 33 | 	
 34 | 	def get_config(self):
 35 | 		return {
 36 | 			'name': 'SimpleConvnet',
 37 | 			**self.net_config,
 38 | 			'layer_cascade': self.layer_cascade.get_config()
 39 | 		}
 40 | 
 41 | 	def copy(self):
 42 | 		net_config = SimpleConvnetConfig()
 43 | 		net_config.set_net_from_config(self.get_config(), self.renew_init(None), print_info=False)
 44 | 		return net_config
 45 | 	
 46 | 	def renew_init(self, convnet):
 47 | 		return {
 48 | 			'layer_cascade': self.layer_cascade.renew_init(convnet)
 49 | 		}
 50 | 	
 51 | 	def set_standard_convnet(self, data_provider: DataProvider, conv_blocks_config, fc_block_config, weight_decay,
 52 | 							 drop_scheme, bn_epsilon, bn_decay, print_info=True, **kwargs):
 53 | 		self.net_config = {
 54 | 			'weight_decay': weight_decay,
 55 | 			'bn_epsilon': bn_epsilon,
 56 | 			'bn_decay': bn_decay,
 57 | 			'drop_scheme': drop_scheme,
 58 | 		}
 59 | 		
 60 | 		image_size = data_provider.data_shape[0]
 61 | 		
 62 | 		layers = []
 63 | 		conv_id = 0
 64 | 		for _i, block_config in enumerate(conv_blocks_config):
 65 | 			num_layers, kernel_size, filter_num = block_config
 66 | 			for _j in range(num_layers):
 67 | 				keep_prob = 1.0
 68 | 				if 'conv' in drop_scheme['type']:
 69 | 					keep_prob = 1.0 if _i + _j == 0 else drop_scheme.get('conv_drop', 1.0)
 70 | 				conv_layer = ConvLayer('conv_%d' % conv_id, filter_num, kernel_size=kernel_size, keep_prob=keep_prob,
 71 | 									   pre_activation=False)
 72 | 				conv_id += 1
 73 | 				layers.append(conv_layer)
 74 | 			if _i < len(conv_blocks_config) - 1:
 75 | 				keep_prob = 1.0
 76 | 				if 'pool' in drop_scheme['type']:
 77 | 					keep_prob = drop_scheme.get('pool_drop', 1.0)
 78 | 				pool_layer = PoolLayer('pool_%d' % _i, 'max', keep_prob=keep_prob, pre_activation=False)
 79 | 				layers.append(pool_layer)
 80 | 				image_size = image_size // 2
 81 | 		global_avg_pool = PoolLayer('pool_%d' % len(conv_blocks_config), 'avg',
 82 | 									kernel_size=image_size, strides=image_size, pre_activation=False)
 83 | 		layers.append(global_avg_pool)
 84 | 		for _i, units in enumerate(fc_block_config):
 85 | 			keep_prob = 1.0
 86 | 			if 'fc' in drop_scheme['type']:
 87 | 				keep_prob = drop_scheme.get('fc_drop', 1.0)
 88 | 			fc_layer = FCLayer('fc_%d' % _i, units, keep_prob=keep_prob)
 89 | 			layers.append(fc_layer)
 90 | 		final_fc_layer = FCLayer('fc_%d' % len(fc_block_config), data_provider.n_classes, use_bn=False, use_bias=True,
 91 | 								 activation=None)
 92 | 		layers.append(final_fc_layer)
 93 | 		self.layer_cascade = LayerCascade('SimpleConvNet', layers)
 94 | 		
 95 | 		if print_info:
 96 | 			pass
 97 | 		return self
 98 | 	
 99 | 	def set_net_from_config(self, net_config_json, init=None, print_info=True):
100 | 		for key in self.net_config.keys():
101 | 			self.net_config[key] = net_config_json[key]
102 | 		init = init['layer_cascade'] if init is not None else None
103 | 		self.layer_cascade = LayerCascade.set_from_config(net_config_json['layer_cascade'], init)
104 | 		if print_info:
105 | 			pass
106 | 		return self
107 | 	
108 | 	def widen(self, layer_idx, new_width, widen_type='output_dim', noise=None):
109 | 		change_out_dim, _, _ = self.layer_cascade.widen(layer_idx, new_width, widen_type, noise)
110 | 		if change_out_dim:
111 | 			raise ValueError('Can not change the final logits number')
112 | 	
113 | 	def deepen(self, layer_idx, new_layer_config):
114 | 		return self.layer_cascade.deepen(layer_idx, new_layer_config, None)
115 | 	
116 | 	def set_identity4deepen(self, to_set_layers, data_provider, batch_size, batch_num=1, strict=True, noise=None):
117 | 		"""
118 | 		to_set_layers = [(new_layer, prev_layer), ...]
119 | 		"""
120 | 		task_list = {}
121 | 		for new_layer, prev_layer in to_set_layers:
122 | 			if new_layer.ready: continue
123 | 			if new_layer.use_bn and strict:
124 | 				task_id = id(prev_layer)
125 | 				if task_id in task_list:
126 | 					task_list[task_id][1].append(new_layer)
127 | 				else:
128 | 					task_list[task_id] = (prev_layer, [new_layer])
129 | 			else:
130 | 				new_layer.set_identity_layer(strict=strict, noise=noise)
131 | 		if len(task_list) > 0:
132 | 			model = SimpleConvnet(None, data_provider, None, net_config=self, only_forward=True)
133 | 			task_list = list(task_list.values())
134 | 			fetches = [prev_layer.output_op for prev_layer, _ in task_list]
135 | 			statistics = [[0, 0] for _ in task_list]
136 | 			for _i in range(batch_num):
137 | 				input_images, _ = data_provider.train.next_batch(batch_size)
138 | 				outputs = model.sess.run(fetches, feed_dict={model.images: input_images, model.is_training: False})
139 | 				for _j, out in enumerate(outputs):
140 | 					out = out.astype('float32')
141 | 					axis = tuple(range(len(out.shape) - 1))
142 | 					mean = np.mean(out, axis=axis, keepdims=True)
143 | 					variance = np.mean(np.square(out - mean), axis=axis, keepdims=True)
144 | 					mean, variance = np.squeeze(mean), np.squeeze(variance)
145 | 					statistics[_j][0] += mean
146 | 					statistics[_j][1] += variance
147 | 			for _j, (prev_layer, new_layers) in enumerate(task_list):
148 | 				mean, variance = statistics[_j][0] / batch_num, statistics[_j][1] / batch_num
149 | 				for new_layer in new_layers:
150 | 					if new_layer.ready: continue
151 | 					param = {
152 | 						'moving_mean': mean,
153 | 						'moving_variance': variance,
154 | 						'epsilon': self.bn_epsilon,
155 | 					}
156 | 					new_layer.set_identity_layer(strict=strict, param=param, noise=noise)
157 | 	
158 | 	
159 | class SimpleConvnet(BasicModel):
160 | 	def _build_graph(self, only_forward=False):
161 | 		_input = self.images
162 | 		output = _input
163 | 		
164 | 		output = self.net_config.layer_cascade.build(output, self, store_output_op=only_forward)
165 | 		
166 | 		if not only_forward:
167 | 			logits = output
168 | 			with tf.variable_scope('L2_Loss'):
169 | 				l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
170 | 			
171 | 			prediction = tf.nn.softmax(logits)
172 | 			
173 | 			# losses
174 | 			cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
175 | 				logits=logits, labels=self.labels))
176 | 			self.cross_entropy = cross_entropy
177 | 			
178 | 			# optimizer and train step
179 | 			optimizer = self.build_optimizer(self.learning_rate,
180 | 											 self.run_config.opt_config[0], self.run_config.opt_config[1])
181 | 			self.train_step = optimizer.minimize(
182 | 				cross_entropy + l2_loss * self.net_config.weight_decay)
183 | 			correct_prediction = tf.equal(
184 | 				tf.argmax(prediction, 1),
185 | 				tf.argmax(self.labels, 1))
186 | 			self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
187 | 


--------------------------------------------------------------------------------
/code/models/dense_net.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from models.basic_model import BasicModel
  3 | from models.layers import ConvLayer, FCLayer, PoolLayer, get_magnifier, apply_noise
  4 | from data_providers.base_provider import DataProvider
  5 | from models.layer_cascade import LayerCascade
  6 | from models.layer_multi_branch import LayerMultiBranch
  7 | import numpy as np
  8 | 
  9 | 
 10 | def get_block_by_name(name):
 11 | 	if name == 'transition':
 12 | 		return TransitionBlock
 13 | 	elif name == 'dense_block':
 14 | 		return DenseBlock
 15 | 	else:
 16 | 		raise ValueError('Unsupported block type: %s' % name)
 17 | 	
 18 | 
 19 | class TransitionBlock(LayerCascade):
 20 | 	def get_config(self):
 21 | 		return {
 22 | 			'name': 'transition',
 23 | 			**super(TransitionBlock, self).get_config(),
 24 | 		}
 25 | 		
 26 | 	@staticmethod
 27 | 	def set_from_config(config_json, init=None, return_class=True):
 28 | 		_id, layers = LayerCascade.set_from_config(config_json, init, return_class=False)
 29 | 		return TransitionBlock(_id, layers)
 30 | 	
 31 | 	def prev_widen(self, indices, magnifier, noise=None):
 32 | 		super(TransitionBlock, self).prev_widen(indices, magnifier, noise=noise)
 33 | 		return False, None, None
 34 | 	
 35 | 	def widen(self, loc, new_width, widen_type='output_dim', noise=None, input_dim=None):
 36 | 		return super(TransitionBlock, self).widen(loc['layer'], new_width, widen_type, noise=noise)
 37 | 	
 38 | 	def deepen(self, loc, new_layer_config, input_dim):
 39 | 		return super(TransitionBlock, self).deepen(loc['layer'], new_layer_config, input_dim)
 40 | 	
 41 | 
 42 | class DenseBlock:
 43 | 	def __init__(self, _id, miniblocks):
 44 | 		self._id = _id
 45 | 		self.miniblocks = miniblocks
 46 | 		
 47 | 		self.output_op = None
 48 | 	
 49 | 	@property
 50 | 	def id(self):
 51 | 		return self._id
 52 | 	
 53 | 	@id.setter
 54 | 	def id(self, value):
 55 | 		self._id = value
 56 | 	
 57 | 	@property
 58 | 	def depth(self):
 59 | 		depth = 0
 60 | 		for miniblock in self.miniblocks:
 61 | 			depth += miniblock.depth
 62 | 		return depth
 63 | 	
 64 | 	def out_features_dim(self, in_features_dim):
 65 | 		out_features_dim = in_features_dim
 66 | 		for miniblock in self.miniblocks:
 67 | 			out_features_dim += miniblock.out_features_dim
 68 | 		return out_features_dim
 69 | 	
 70 | 	def build(self, _input, densenet, store_output_op=False):
 71 | 		output = _input
 72 | 		with tf.variable_scope(self._id):
 73 | 			for miniblock in self.miniblocks:
 74 | 				comp_out = miniblock.build(output, densenet, store_output_op=store_output_op)
 75 | 				output = tf.concat(axis=3, values=(output, comp_out))
 76 | 		if store_output_op:
 77 | 			self.output_op = output
 78 | 		return output
 79 | 	
 80 | 	def get_config(self):
 81 | 		return {
 82 | 			'name': 'dense_block',
 83 | 			'_id': self._id,
 84 | 			'miniblocks': [miniblock.get_config() for miniblock in self.miniblocks]
 85 | 		}
 86 | 	
 87 | 	def renew_init(self, densenet):
 88 | 		return {
 89 | 			'_id': self._id,
 90 | 			'miniblocks': [miniblock.renew_init(densenet) for miniblock in self.miniblocks]
 91 | 		}
 92 | 	
 93 | 	@staticmethod
 94 | 	def set_from_config(config_json, init=None):
 95 | 		_id = config_json['_id']
 96 | 		miniblocks = []
 97 | 		for _i, miniblock_config in enumerate(config_json['miniblocks']):
 98 | 			miniblock_init = init['miniblocks'][_i] if init is not None else None
 99 | 			miniblock = LayerMultiBranch.set_from_config(miniblock_config, miniblock_init)
100 | 			miniblocks.append(miniblock)
101 | 		return DenseBlock(_id, miniblocks)
102 | 	
103 | 	"""
104 | 	Network Transformation Operations
105 | 	"""
106 | 	def insert_miniblock(self, idx, miniblock_config, input_dim, noise=None, scheme=0):
107 | 		assert 0 <= idx < len(self.miniblocks), 'Invalid miniblock index %d' % idx
108 | 		if miniblock_config['bc_mode']:
109 | 			# DenseNet-BC
110 | 			if scheme == 0:
111 | 				copy_idx = idx
112 | 				copy_miniblock = self.miniblocks[copy_idx]
113 | 				new_in_bottle = copy_miniblock.in_bottle.copy()
114 | 				new_in_layer = new_in_bottle.layers[0]
115 | 				pad_kernel_shape = list(new_in_layer.init['kernel'].shape)
116 | 				pad_kernel_shape[2] = copy_miniblock.out_features_dim
117 | 				new_in_layer.init['kernel'] = \
118 | 					np.concatenate([new_in_layer.init['kernel'], np.zeros(pad_kernel_shape)], axis=2)
119 | 				if new_in_layer.pre_activation and new_in_layer.use_bn:
120 | 					new_in_layer.init['beta'] = \
121 | 						np.concatenate([new_in_layer.init['beta'], np.zeros([copy_miniblock.out_features_dim])])
122 | 					new_in_layer.init['gamma'] = \
123 | 						np.concatenate([new_in_layer.init['gamma'], np.ones([copy_miniblock.out_features_dim])])
124 | 					new_in_layer.init['moving_mean'] = \
125 | 						np.concatenate([new_in_layer.init['moving_mean'], np.zeros([copy_miniblock.out_features_dim])])
126 | 					new_in_layer.init['moving_variance'] = \
127 | 						np.concatenate([new_in_layer.init['moving_variance'], np.ones([copy_miniblock.out_features_dim])])
128 | 				new_in_layer.init['kernel'] = apply_noise(new_in_layer.init['kernel'], noise.get('wider'))
129 | 				if copy_miniblock.out_bottle is None:
130 | 					new_branches, indices = copy_miniblock.remapped_branches(noise=noise)
131 | 					new_miniblock = LayerMultiBranch('M_%d' % (idx + 2), new_branches,
132 | 													 merge=copy_miniblock.merge, in_bottle=new_in_bottle)
133 | 					old_size = len(indices)
134 | 					indices = np.concatenate([np.arange(old_size), indices])
135 | 					magnifier = get_magnifier(old_size, indices)
136 | 					
137 | 					prev_miniblock_out_dim = input_dim
138 | 					for _i in range(0, idx):
139 | 						prev_miniblock_out_dim += self.miniblocks[_i].out_features_dim
140 | 					indices = np.concatenate([
141 | 						np.arange(prev_miniblock_out_dim),
142 | 						indices + prev_miniblock_out_dim,
143 | 					])
144 | 					magnifier = np.concatenate([
145 | 						[1] * prev_miniblock_out_dim,
146 | 						magnifier,
147 | 					])
148 | 					prev_miniblock_out_dim += old_size
149 | 					for _i in range(idx + 1, len(self.miniblocks)):
150 | 						miniblock_out_dim = self.miniblocks[_i].out_features_dim
151 | 						self.miniblocks[_i].id = 'M_%d' % (_i + 2)
152 | 						self.miniblocks[_i].prev_widen(indices, magnifier, noise=noise)
153 | 						indices = np.concatenate([
154 | 							indices,
155 | 							np.arange(prev_miniblock_out_dim, prev_miniblock_out_dim + miniblock_out_dim)
156 | 						])
157 | 						magnifier = np.concatenate([
158 | 							magnifier,
159 | 							[1] * miniblock_out_dim,
160 | 						])
161 | 						prev_miniblock_out_dim += miniblock_out_dim
162 | 					self.miniblocks = self.miniblocks[:idx + 1] + [new_miniblock] + self.miniblocks[idx + 1:]
163 | 					return indices, magnifier
164 | 				else:
165 | 					raise NotImplementedError
166 | 			else:
167 | 				# identity scheme
168 | 				raise NotImplementedError
169 | 		else:
170 | 			# DenseNet without BC
171 | 			raise NotImplementedError
172 | 	
173 | 	def prev_widen(self, indices, magnifier, noise=None):
174 | 		old_size = np.max(indices) + 1
175 | 		prev_miniblock_out_dim = old_size
176 | 		for miniblock in self.miniblocks:
177 | 			miniblock_out_dim = miniblock.out_features_dim
178 | 			miniblock.prev_widen(indices, magnifier, noise=noise)
179 | 			indices = np.concatenate([
180 | 				indices,
181 | 				np.arange(prev_miniblock_out_dim, prev_miniblock_out_dim + miniblock_out_dim)
182 | 			])
183 | 			magnifier = np.concatenate([
184 | 				magnifier,
185 | 				[1] * miniblock_out_dim,
186 | 			])
187 | 			prev_miniblock_out_dim += miniblock_out_dim
188 | 		return True, indices, magnifier
189 | 	
190 | 	def widen(self, loc, new_width, widen_type='output_dim', noise=None, input_dim=3):
191 | 		miniblock_idx = loc['miniblock']
192 | 		miniblock = self.miniblocks[miniblock_idx]
193 | 		old_miniblock_out_dim = miniblock.out_features_dim
194 | 		change_out_dim, indices, magnifier = miniblock.widen(loc, new_width, widen_type, noise=noise)
195 | 		if change_out_dim:
196 | 			prev_miniblock_out_dim = input_dim
197 | 			for _i in range(0, miniblock_idx):
198 | 				prev_miniblock_out_dim += self.miniblocks[_i].out_features_dim
199 | 			indices = np.concatenate([
200 | 				np.arange(prev_miniblock_out_dim),
201 | 				indices + prev_miniblock_out_dim,
202 | 			])
203 | 			magnifier = np.concatenate([
204 | 				[1] * prev_miniblock_out_dim,
205 | 				magnifier,
206 | 			])
207 | 			prev_miniblock_out_dim += old_miniblock_out_dim
208 | 			for _i in range(miniblock_idx + 1, len(self.miniblocks)):
209 | 				miniblock_out_dim = self.miniblocks[_i].out_features_dim
210 | 				self.miniblocks[_i].prev_widen(indices, magnifier, noise=noise)
211 | 				indices = np.concatenate([
212 | 					indices,
213 | 					np.arange(prev_miniblock_out_dim, prev_miniblock_out_dim + miniblock_out_dim)
214 | 				])
215 | 				magnifier = np.concatenate([
216 | 					magnifier,
217 | 					[1] * miniblock_out_dim,
218 | 				])
219 | 				prev_miniblock_out_dim += miniblock_out_dim
220 | 			return True, indices, magnifier
221 | 		else:
222 | 			return False, None, None
223 | 	
224 | 	def deepen(self, loc, new_layer_config, input_dim):
225 | 		miniblock_idx = loc['miniblock']
226 | 		for _i in range(0, miniblock_idx):
227 | 			input_dim += self.miniblocks[_i].out_features_dim
228 | 		return self.miniblocks[miniblock_idx].deepen(loc, new_layer_config, input_dim)
229 | 		
230 | 
231 | class DenseNetConfig:
232 | 	def __init__(self):
233 | 		self.net_config = {
234 | 			'model_type': None,
235 | 			'weight_decay': None,
236 | 			'first_ratio': None,
237 | 			'reduction': None,
238 | 			'bc_ratio': None,
239 | 			'bn_epsilon': None,
240 | 			'bn_decay': None,
241 | 			'pre_activation': None,
242 | 		}
243 | 		self.blocks = None
244 | 	
245 | 	@property
246 | 	def model_type(self): return self.net_config['model_type']
247 | 	
248 | 	@property
249 | 	def weight_decay(self): return self.net_config['weight_decay']
250 | 	
251 | 	@property
252 | 	def first_ratio(self): return self.net_config['first_ratio']
253 | 	
254 | 	@property
255 | 	def reduction(self): return self.net_config['reduction']
256 | 	
257 | 	@property
258 | 	def bc_ratio(self): return self.net_config['bc_ratio']
259 | 	
260 | 	@property
261 | 	def bn_epsilon(self): return self.net_config['bn_epsilon']
262 | 	
263 | 	@property
264 | 	def bn_decay(self): return self.net_config['bn_decay']
265 | 	
266 | 	@property
267 | 	def depth(self):
268 | 		depth = 0
269 | 		for block in self.blocks:
270 | 			depth += block.depth
271 | 		return depth
272 | 	
273 | 	@property
274 | 	def average_growth_rate(self):
275 | 		growth_rate_list = []
276 | 		for block in self.blocks:
277 | 			if isinstance(block, DenseBlock):
278 | 				for miniblock in block.miniblocks:
279 | 					growth_rate = miniblock.out_features_dim
280 | 					growth_rate_list.append(growth_rate)
281 | 		return np.mean(growth_rate_list)
282 | 	
283 | 	def copy(self):
284 | 		net_config = DenseNetConfig()
285 | 		net_config.set_net_from_config(self.get_config(), self.renew_init(None), print_info=False)
286 | 		return net_config
287 | 	
288 | 	def get_config(self):
289 | 		return {
290 | 			'name': 'DenseNet',
291 | 			**self.net_config,
292 | 			'blocks': [block.get_config() for block in self.blocks]
293 | 		}
294 | 
295 | 	def renew_init(self, densenet):
296 | 		return {
297 | 			'blocks': [block.renew_init(densenet) for block in self.blocks]
298 | 		}
299 | 	
300 | 	def set_standard_dense_net(self, data_provider: DataProvider, growth_rate, depth, total_blocks,
301 | 							   keep_prob, weight_decay, model_type,
302 | 							   first_ratio=2, reduction=1.0, bc_ratio=4,
303 | 							   bn_epsilon=1e-5, bn_decay=0.9, print_info=True,
304 | 							   pre_activation=True, **kwargs):
305 | 		self.net_config = {
306 | 			'model_type': model_type,
307 | 			'weight_decay': weight_decay,
308 | 			'first_ratio': first_ratio,
309 | 			'reduction': reduction,
310 | 			'bc_ratio': bc_ratio,
311 | 			'bn_epsilon': bn_epsilon,
312 | 			'bn_decay': bn_decay,
313 | 			'pre_activation': pre_activation,
314 | 		}
315 | 		
316 | 		image_size = data_provider.data_shape[0]
317 | 		
318 | 		first_output_features = growth_rate * first_ratio
319 | 		bc_mode = (model_type == 'DenseNet-BC')
320 | 		layers_per_block = (depth - (total_blocks + 1)) // total_blocks
321 | 		if bc_mode: layers_per_block = layers_per_block // 2
322 | 		
323 | 		# initial conv
324 | 		if pre_activation:
325 | 			init_conv_layer = ConvLayer('conv_0', first_output_features, kernel_size=3, activation=None, use_bn=False)
326 | 		else:
327 | 			init_conv_layer = ConvLayer('conv_0', first_output_features, kernel_size=3, pre_activation=False)
328 | 		init_transition = TransitionBlock('T_0_first', [init_conv_layer])
329 | 		self.blocks = [init_transition]
330 | 		
331 | 		# Dense Blocks
332 | 		in_features_dim = first_output_features
333 | 		for block_idx in range(1, total_blocks + 1):
334 | 			miniblocks = []
335 | 			block_id = 'D_%d' % block_idx
336 | 			for miniblock_idx in range(1, layers_per_block + 1):
337 | 				miniblock_id = 'M_%d' % miniblock_idx
338 | 				in_bottle = None
339 | 				if bc_mode:
340 | 					bottelneck_layer = ConvLayer('conv_0', growth_rate * bc_ratio, kernel_size=1, keep_prob=keep_prob,
341 | 												 pre_activation=pre_activation)
342 | 					in_bottle = LayerCascade('in_bottle', [bottelneck_layer])
343 | 				
344 | 				branch_0 = LayerCascade('B_0', [
345 | 					ConvLayer('conv_0', growth_rate, kernel_size=3,
346 | 							  keep_prob=keep_prob, pre_activation=pre_activation)
347 | 				])
348 | 				miniblocks.append(LayerMultiBranch(miniblock_id, [branch_0], in_bottle=in_bottle))
349 | 			dense_block = DenseBlock(block_id, miniblocks)
350 | 			self.blocks += [dense_block]
351 | 			
352 | 			out_features_dim = dense_block.out_features_dim(in_features_dim)
353 | 			if block_idx != total_blocks:
354 | 				out_features_dim = int(out_features_dim * reduction)
355 | 				transition_id = 'T_%d_middle' % block_idx
356 | 				conv_layer = ConvLayer('conv_0', out_features_dim, kernel_size=1, keep_prob=keep_prob,
357 | 									   pre_activation=pre_activation)
358 | 				avg_pool_layer = PoolLayer('pool_0', 'avg', kernel_size=2, strides=2)
359 | 				transition = TransitionBlock(transition_id, [conv_layer, avg_pool_layer])
360 | 				self.blocks.append(transition)
361 | 				image_size = image_size // 2
362 | 			in_features_dim = out_features_dim
363 | 		
364 | 		# Transition to classes
365 | 		if pre_activation:
366 | 			global_avg_pool = PoolLayer('pool_0', 'avg', kernel_size=image_size, strides=image_size,
367 | 										activation='relu', use_bn=True)
368 | 		else:
369 | 			global_avg_pool = PoolLayer('pool_0', 'avg', kernel_size=image_size, strides=image_size,
370 | 										pre_activation=False)
371 | 		final_fc_layer = FCLayer('fc_0', data_provider.n_classes, use_bn=False, use_bias=True, activation=None)
372 | 		transition_to_classes = TransitionBlock('T_to_classes', [global_avg_pool, final_fc_layer])
373 | 		self.blocks.append(transition_to_classes)
374 | 		
375 | 		# print information about the network
376 | 		if print_info:
377 | 			print('Set Standard %s' % model_type)
378 | 		
379 | 			if not bc_mode:
380 | 				print('Build %s model with %d blocks, '
381 | 					  '%d composite layers each.' % (model_type, total_blocks, layers_per_block))
382 | 			if bc_mode:
383 | 				print('Build %s model with %d blocks, '
384 | 					  '%d bottleneck layers and %d composite layers each.' % (
385 | 						  model_type, total_blocks, layers_per_block, layers_per_block))
386 | 			print('Reduction at transition layers: %.2f' % reduction)
387 | 		return self
388 | 		
389 | 	def set_net_from_config(self, net_config_json, init=None, print_info=True):
390 | 		# load config and init (if exist)
391 | 		for key in self.net_config.keys():
392 | 			self.net_config[key] = net_config_json[key]
393 | 		self.blocks = []
394 | 		for _i, block_config in enumerate(net_config_json['blocks']):
395 | 			block_init = init['blocks'][_i] if init is not None else None
396 | 			block = get_block_by_name(block_config['name'])
397 | 			self.blocks.append(block.set_from_config(block_config, block_init))
398 | 		if print_info:
399 | 			print('Set DenseNet from config:')
400 | 			for k, v in self.net_config.items():
401 | 				print('\t%s: %s' % (k, v))
402 | 			print('\t%s: %d' % ('depth', self.depth))
403 | 		return self
404 | 	
405 | 	def widen(self, loc, new_width, widen_type='output_dim', noise=None, image_channel=3):
406 | 		"""
407 | 		widen_type: "output_dim" or "kernel"
408 | 		"""
409 | 		block_idx = loc['block']
410 | 		if block_idx == 0:
411 | 			input_dim = image_channel
412 | 		elif isinstance(self.blocks[block_idx - 1], TransitionBlock):
413 | 			input_dim = self.blocks[block_idx - 1].out_features_dim
414 | 		else:
415 | 			input_dim = self.blocks[block_idx - 1].out_features_dim(self.blocks[block_idx - 2].out_features_dim)
416 | 		
417 | 		change_out_dim, indices, magnifier = \
418 | 			self.blocks[block_idx].widen(loc, new_width, widen_type, noise=noise, input_dim=input_dim)
419 | 		while change_out_dim:
420 | 			change_out_dim, indices, magnifier = self.blocks[block_idx + 1].prev_widen(indices, magnifier, noise=noise)
421 | 			block_idx += 1
422 | 		
423 | 	def deepen(self, loc, new_layer_config, image_channel=3):
424 | 		new_layer_config['pre_activation'] = self.net_config['pre_activation']
425 | 		block_idx = loc['block']
426 | 		if block_idx == 0:
427 | 			input_dim = image_channel
428 | 		elif isinstance(self.blocks[block_idx - 1], TransitionBlock):
429 | 			input_dim = self.blocks[block_idx - 1].out_features_dim
430 | 		else:
431 | 			input_dim = self.blocks[block_idx - 1].out_features_dim(self.blocks[block_idx - 2].out_features_dim)
432 | 		
433 | 		return self.blocks[block_idx].deepen(loc, new_layer_config, input_dim)
434 | 	
435 | 	def set_identity4deepen(self, to_set_layers, data_provider, batch_size, batch_num=1, strict=True, noise=None):
436 | 		"""
437 | 		to_set_layers = [(new_layer, prev_layer), ...]
438 | 		"""
439 | 		task_list = {}
440 | 		for new_layer, prev_layer in to_set_layers:
441 | 			if new_layer.ready: continue
442 | 			if new_layer.use_bn and strict:
443 | 				task_id = id(prev_layer)
444 | 				if task_id in task_list:
445 | 					task_list[task_id][1].append(new_layer)
446 | 				else:
447 | 					task_list[task_id] = (prev_layer, [new_layer])
448 | 			else:
449 | 				new_layer.set_identity_layer(strict=strict, noise=noise)
450 | 		if len(task_list) > 0:
451 | 			model = DenseNet(None, data_provider, None, net_config=self, only_forward=True)
452 | 			task_list = list(task_list.values())
453 | 			fetches = [prev_layer.output_op for prev_layer, _ in task_list]
454 | 			statistics = [[0, 0] for _ in task_list]
455 | 			for _i in range(batch_num):
456 | 				input_images, _ = data_provider.train.next_batch(batch_size)
457 | 				outputs = model.sess.run(fetches, feed_dict={model.images: input_images, model.is_training: False})
458 | 				for _j, out in enumerate(outputs):
459 | 					out = out.astype('float32')
460 | 					axis = tuple(range(len(out.shape) - 1))
461 | 					mean = np.mean(out, axis=axis, keepdims=True)
462 | 					variance = np.mean(np.square(out - mean), axis=axis, keepdims=True)
463 | 					mean, variance = np.squeeze(mean), np.squeeze(variance)
464 | 					statistics[_j][0] += mean
465 | 					statistics[_j][1] += variance
466 | 			for _j, (prev_layer, new_layers) in enumerate(task_list):
467 | 				mean, variance = statistics[_j][0] / batch_num, statistics[_j][1] / batch_num
468 | 				for new_layer in new_layers:
469 | 					if new_layer.ready: continue
470 | 					param = {
471 | 						'moving_mean': mean,
472 | 						'moving_variance': variance,
473 | 						'epsilon': self.bn_epsilon,
474 | 					}
475 | 					new_layer.set_identity_layer(strict=strict, param=param, noise=noise)
476 | 	
477 | 	def insert_miniblock(self, loc, miniblock_config, image_channel=3, noise=None):
478 | 		block_idx = loc['block']
479 | 		if block_idx == 0:
480 | 			input_dim = image_channel
481 | 		elif isinstance(self.blocks[block_idx - 1], TransitionBlock):
482 | 			input_dim = self.blocks[block_idx - 1].out_features_dim
483 | 		else:
484 | 			input_dim = self.blocks[block_idx - 1].out_features_dim(self.blocks[block_idx - 2].out_features_dim)
485 | 		
486 | 		assert isinstance(self.blocks[block_idx], DenseBlock), 'Invalid'
487 | 		indices, magnifier = \
488 | 			self.blocks[block_idx].insert_miniblock(loc['miniblock'], miniblock_config, input_dim, noise=noise)
489 | 		self.blocks[block_idx + 1].prev_widen(indices, magnifier, noise=noise)
490 | 		
491 | 		
492 | class DenseNet(BasicModel):
493 | 	def _build_graph(self, only_forward=False):
494 | 		_input = self.images
495 | 		output = _input
496 | 		# building blocks (transition and dense)
497 | 		for block in self.net_config.blocks:
498 | 			output = block.build(output, self, store_output_op=only_forward)
499 | 		
500 | 		if not only_forward:
501 | 			logits = output
502 | 			with tf.variable_scope('L2_Loss'):
503 | 				l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
504 | 			
505 | 			prediction = tf.nn.softmax(logits)
506 | 			
507 | 			# losses
508 | 			cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
509 | 				logits=logits, labels=self.labels))
510 | 			self.cross_entropy = cross_entropy
511 | 			
512 | 			# optimizer and train step
513 | 			optimizer = self.build_optimizer(self.learning_rate,
514 | 											 self.run_config.opt_config[0], self.run_config.opt_config[1])
515 | 			self.train_step = optimizer.minimize(
516 | 				cross_entropy + l2_loss * self.net_config.weight_decay)
517 | 			correct_prediction = tf.equal(
518 | 				tf.argmax(prediction, 1),
519 | 				tf.argmax(self.labels, 1))
520 | 			self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
521 | 


--------------------------------------------------------------------------------
/code/models/layer_cascade.py:
--------------------------------------------------------------------------------
  1 | from models.layers import ConvLayer, FCLayer, PoolLayer, get_layer_by_name
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | class LayerCascade:
  6 | 	def __init__(self, _id, layers):
  7 | 		self._id = _id
  8 | 		self.layers = layers
  9 | 		
 10 | 		self.output_op = None
 11 | 	
 12 | 	@property
 13 | 	def id(self):
 14 | 		return self._id
 15 | 	
 16 | 	@id.setter
 17 | 	def id(self, value):
 18 | 		self._id = value
 19 | 	
 20 | 	@property
 21 | 	def out_features_dim(self):
 22 | 		for layer in self.layers[::-1]:
 23 | 			if isinstance(layer, ConvLayer):
 24 | 				return layer.filter_num
 25 | 			elif isinstance(layer, FCLayer):
 26 | 				return layer.units
 27 | 		return None
 28 | 	
 29 | 	@property
 30 | 	def depth(self):
 31 | 		depth = 0
 32 | 		for layer in self.layers:
 33 | 			if isinstance(layer, ConvLayer) or isinstance(layer, FCLayer):
 34 | 				depth += 1
 35 | 		return depth
 36 | 	
 37 | 	def get_str(self):
 38 | 		layers_str = [layer.layer_str for layer in self.layers]
 39 | 		return '-'.join(layers_str)
 40 | 	
 41 | 	def build(self, _input, densenet, store_output_op=False):
 42 | 		output = _input
 43 | 		with tf.variable_scope(self._id):
 44 | 			for layer in self.layers:
 45 | 				output = layer.build(output, densenet, store_output_op=store_output_op)
 46 | 		if store_output_op:
 47 | 			self.output_op = output
 48 | 		return output
 49 | 	
 50 | 	def get_config(self):
 51 | 		return {
 52 | 			'_id': self._id,
 53 | 			'layers': [layer.get_config() for layer in self.layers]
 54 | 		}
 55 | 	
 56 | 	def renew_init(self, densenet):
 57 | 		return {
 58 | 			'_id': self._id,
 59 | 			'layers': [layer.renew_init(densenet) for layer in self.layers]
 60 | 		}
 61 | 	
 62 | 	def copy(self):
 63 | 		return self.set_from_config(self.get_config(), init=self.renew_init(None))
 64 | 	
 65 | 	@staticmethod
 66 | 	def set_from_config(config_json, init=None, return_class=True):
 67 | 		_id = config_json['_id']
 68 | 		layers = []
 69 | 		for _i, layer_config in enumerate(config_json['layers']):
 70 | 			layer_init = init['layers'][_i] if init is not None else None
 71 | 			layer = get_layer_by_name(layer_config['name'])
 72 | 			layers.append(layer.set_from_config(layer_config, layer_init))
 73 | 		if return_class:
 74 | 			return LayerCascade(_id, layers)
 75 | 		else:
 76 | 			return _id, layers
 77 | 	
 78 | 	"""
 79 | 	Network Transformation Operations
 80 | 	"""
 81 | 	
 82 | 	def prev_widen(self, indices, magnifier, noise=None):
 83 | 		for layer in self.layers:
 84 | 			if isinstance(layer, ConvLayer) or isinstance(layer, FCLayer):
 85 | 				layer.prev_widen(indices, magnifier, noise=noise)
 86 | 				break
 87 | 			else:
 88 | 				layer.prev_widen(indices, magnifier, noise=noise)
 89 | 	
 90 | 	def widen(self, idx, new_width, widen_type='output_dim', noise=None):
 91 | 		assert idx < len(self.layers), 'Index out of range: %d' % idx
 92 | 		if widen_type == 'output_dim':
 93 | 			assert isinstance(self.layers[idx], ConvLayer) or \
 94 | 				   isinstance(self.layers[idx], FCLayer), 'Operation not available'
 95 | 			to_widen_layer = self.layers[idx]
 96 | 			
 97 | 			if isinstance(to_widen_layer, ConvLayer):
 98 | 				indices, magnifier = to_widen_layer.widen_filters(new_filter_num=new_width, noise=noise)
 99 | 			else:
100 | 				indices, magnifier = to_widen_layer.widen_units(new_units_num=new_width, noise=noise)
101 | 			after_widen_layer = None
102 | 			for _i in range(idx + 1, len(self.layers)):
103 | 				if isinstance(self.layers[_i], ConvLayer) or isinstance(self.layers[_i], FCLayer):
104 | 					self.layers[_i].prev_widen(indices, magnifier, noise=noise)
105 | 					after_widen_layer = self.layers[_i]
106 | 					break
107 | 				else:
108 | 					self.layers[_i].prev_widen(indices, magnifier, noise=noise)
109 | 			return after_widen_layer is None, indices, magnifier
110 | 		else:
111 | 			raise ValueError('%s is not supported' % widen_type)
112 | 	
113 | 	def deepen(self, idx, new_layer_config, input_dim):
114 | 		assert idx < len(self.layers), 'Index out of range: %d' % idx
115 | 		if new_layer_config['name'] == 'fc':
116 | 			assert idx == len(self.layers) - 1 or isinstance(self.layers[idx + 1], FCLayer), 'Invalid'
117 | 			assert isinstance(self.layers[idx], FCLayer) or isinstance(self.layers[idx], PoolLayer), 'Invalid'
118 | 			# prepare the new fc layer
119 | 			units = input_dim
120 | 			for _i in range(idx, -1, -1):
121 | 				if isinstance(self.layers[_i], FCLayer):
122 | 					units = self.layers[_i].units
123 | 					break
124 | 				elif isinstance(self.layers[_i], ConvLayer):
125 | 					units = self.layers[_i].filter_num
126 | 					break
127 | 			fc_idx = 0
128 | 			for _i in range(0, idx + 1):
129 | 				if isinstance(self.layers[_i], FCLayer):
130 | 					fc_idx += 1
131 | 			_id = 'fc_%d' % fc_idx
132 | 			# change the id of following fc layers
133 | 			for _i in range(idx + 1, len(self.layers)):
134 | 				if isinstance(self.layers[_i], FCLayer):
135 | 					self.layers[_i].id = 'fc_%d' % (fc_idx + 1)
136 | 					fc_idx += 1
137 | 			prev_layer = None
138 | 			for _i in range(idx, -1, -1):
139 | 				if self.layers[_i].ready:
140 | 					prev_layer = self.layers[_i]
141 | 					break
142 | 			assert prev_layer is not None, 'Invalid'
143 | 			new_fc_layer = FCLayer(_id, units, ready=False, **new_layer_config)
144 | 			# insert the new layer into the cascade
145 | 			self.layers = self.layers[:idx + 1] + [new_fc_layer] + self.layers[idx + 1:]
146 | 			return new_fc_layer, prev_layer
147 | 		elif new_layer_config['name'] == 'conv':
148 | 			assert idx == len(self.layers) - 1 or not isinstance(self.layers[idx + 1], FCLayer), 'Invalid'
149 | 			assert isinstance(self.layers[idx], ConvLayer) or isinstance(self.layers[idx], FCLayer), 'Invalid'
150 | 			# prepare the new conv layer
151 | 			filter_num = input_dim
152 | 			for _i in range(idx, -1, -1):
153 | 				if isinstance(self.layers[_i], ConvLayer):
154 | 					filter_num = self.layers[_i].filter_num
155 | 					break
156 | 			conv_idx = 0
157 | 			for _i in range(0, idx + 1):
158 | 				if isinstance(self.layers[_i], ConvLayer):
159 | 					conv_idx += 1
160 | 			_id = 'conv_%d' % conv_idx
161 | 			# change the id of following conv layers
162 | 			for _i in range(idx + 1, len(self.layers)):
163 | 				if isinstance(self.layers[_i], ConvLayer):
164 | 					self.layers[_i].id = 'conv_%d' % (conv_idx + 1)
165 | 					conv_idx += 1
166 | 			prev_layer = None
167 | 			for _i in range(idx, -1, -1):
168 | 				if self.layers[_i].ready:
169 | 					prev_layer = self.layers[_i]
170 | 					break
171 | 			assert prev_layer is not None, 'Invalid'
172 | 			new_conv_layer = ConvLayer(_id, filter_num, ready=False, **new_layer_config)
173 | 			self.layers = self.layers[:idx + 1] + [new_conv_layer] + self.layers[idx + 1:]
174 | 			return new_conv_layer, prev_layer
175 | 		else:
176 | 			raise ValueError('Not support to insert a %s layer' % new_layer_config['name'])
177 | 


--------------------------------------------------------------------------------
/code/models/layer_multi_branch.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from models.layer_cascade import LayerCascade
  4 | 
  5 | 
  6 | class LayerMultiBranch:
  7 | 	def __init__(self, _id, branches, merge=None, in_bottle=None, out_bottle=None):
  8 | 		self._id = _id
  9 | 		self.in_bottle = in_bottle
 10 | 		self.branches = branches
 11 | 		self.out_bottle = out_bottle
 12 | 		self.merge = merge
 13 | 		if self.merge == 'add':
 14 | 			out_dim = []
 15 | 			for branch in self.branches:
 16 | 				out_dim.append(branch.out_features_dim)
 17 | 			assert np.std(out_dim) == 0, '<%s> require the output dim of all branches are the same' % self.merge
 18 | 		elif self.merge is None:
 19 | 			assert len(self.branches) == 1, 'Invalid'
 20 | 		
 21 | 		self.output_op = None
 22 | 	
 23 | 	@property
 24 | 	def id(self):
 25 | 		return self._id
 26 | 	
 27 | 	@id.setter
 28 | 	def id(self, value):
 29 | 		self._id = value
 30 | 	
 31 | 	@property
 32 | 	def out_features_dim(self):
 33 | 		if self.out_bottle:
 34 | 			return self.out_bottle.out_features_dim
 35 | 		out_dim = []
 36 | 		for branch in self.branches:
 37 | 			out_dim.append(branch.out_features_dim)
 38 | 		if self.merge == 'concat':
 39 | 			return np.sum(out_dim)
 40 | 		elif self.merge == 'add' or self.merge is None:
 41 | 			return out_dim[0]
 42 | 		else:
 43 | 			pass
 44 | 	
 45 | 	@property
 46 | 	def depth(self):
 47 | 		depth = 0
 48 | 		if self.in_bottle:
 49 | 			depth += self.in_bottle.depth
 50 | 		if self.out_bottle:
 51 | 			depth += self.out_bottle.depth
 52 | 		branch_depth = []
 53 | 		for branch in self.branches:
 54 | 			branch_depth.append(branch.depth)
 55 | 		depth += np.max(branch_depth)
 56 | 		return depth
 57 | 	
 58 | 	def get_str(self):
 59 | 		in_bottle_str = 'N' if self.in_bottle is None else self.in_bottle.get_str()
 60 | 		branches_str = [branch.get_str() for branch in self.branches]
 61 | 		branches_str = '+'.join(branches_str)
 62 | 		out_bottle_str = 'N' if self.out_bottle is None else self.out_bottle.get_str()
 63 | 		return '%s~%s~%s' % (in_bottle_str, branches_str, out_bottle_str)
 64 | 	
 65 | 	def build(self, _input, densenet, store_output_op=False):
 66 | 		with tf.variable_scope(self._id):
 67 | 			output = _input
 68 | 			# in bottle
 69 | 			if self.in_bottle:
 70 | 				output = self.in_bottle.build(output, densenet, store_output_op=store_output_op)
 71 | 			# branches
 72 | 			branch_out = []
 73 | 			for branch in self.branches:
 74 | 				branch_out.append(branch.build(output, densenet, store_output_op=store_output_op))
 75 | 			if self.merge == 'concat':
 76 | 				output = tf.concat(branch_out, axis=3)
 77 | 			elif self.merge == 'add':
 78 | 				output = tf.add_n(branch_out)
 79 | 			elif self.merge is None:
 80 | 				output = branch_out[0]
 81 | 			else:
 82 | 				raise ValueError('Do not support <%s>' % self.merge)
 83 | 			# out bottle
 84 | 			if self.out_bottle:
 85 | 				output = self.out_bottle.build(output, densenet, store_output_op=store_output_op)
 86 | 		if store_output_op:
 87 | 			self.output_op = output
 88 | 		return output
 89 | 	
 90 | 	def get_config(self):
 91 | 		return {
 92 | 			'_id': self._id,
 93 | 			'merge': self.merge,
 94 | 			'branches': [branch.get_config() for branch in self.branches],
 95 | 			'in_bottle': None if self.in_bottle is None else self.in_bottle.get_config(),
 96 | 			'out_bottle': None if self.out_bottle is None else self.out_bottle.get_config(),
 97 | 		}
 98 | 	
 99 | 	def renew_init(self, densenet):
100 | 		return {
101 | 			'_id': self._id,
102 | 			'branches': [branch.renew_init(densenet) for branch in self.branches],
103 | 			'in_bottle': None if self.in_bottle is None else self.in_bottle.renew_init(densenet),
104 | 			'out_bottle': None if self.out_bottle is None else self.out_bottle.renew_init(densenet),
105 | 		}
106 | 	
107 | 	@staticmethod
108 | 	def set_from_config(config_json, init=None):
109 | 		_id = config_json['_id']
110 | 		merge = config_json['merge']
111 | 		branches = []
112 | 		for _i, branch_config in enumerate(config_json['branches']):
113 | 			branch_init = init['branches'][_i] if init is not None else None
114 | 			branch = LayerCascade.set_from_config(branch_config, branch_init)
115 | 			branches.append(branch)
116 | 		in_bottle = config_json['in_bottle']
117 | 		if in_bottle:
118 | 			in_bottle_init = init['in_bottle'] if init is not None else None
119 | 			in_bottle = LayerCascade.set_from_config(in_bottle, in_bottle_init)
120 | 		out_bottle = config_json['out_bottle']
121 | 		if out_bottle:
122 | 			out_bottle_init = init['out_bottle'] if init is not None else None
123 | 			out_bottle = LayerCascade.set_from_config(out_bottle, out_bottle_init)
124 | 		return LayerMultiBranch(_id, branches, merge, in_bottle=in_bottle, out_bottle=out_bottle)
125 | 	
126 | 	"""
127 | 	Network Transformation Operations
128 | 	"""
129 | 	
130 | 	def prev_widen(self, indices, magnifier, noise=None):
131 | 		if self.in_bottle:
132 | 			self.in_bottle.prev_widen(indices, magnifier, noise=noise)
133 | 		else:
134 | 			for branch in self.branches:
135 | 				branch.prev_widen(indices, magnifier, noise=noise)
136 | 	
137 | 	def widen(self, loc, new_width, widen_type='output_dim', noise=None):
138 | 		if loc['multi-branch'] == 'in_bottle':
139 | 			assert self.in_bottle is not None, 'Invalid'
140 | 			change_out_dim, indices, magnifier = self.in_bottle.widen(loc['layer'], new_width, widen_type, noise=noise)
141 | 			if change_out_dim:
142 | 				for branch in self.branches:
143 | 					branch.prev_widen(indices, magnifier, noise=noise)
144 | 			return False, None, None
145 | 		elif loc['multi-branch'] == 'out_bottle':
146 | 			assert self.out_bottle is not None, 'Invalid'
147 | 			change_out_dim, indices, magnifier = self.out_bottle.widen(loc['layer'], new_width, widen_type, noise=noise)
148 | 			return change_out_dim, indices, magnifier
149 | 		elif loc['multi-branch'] == 'branch':
150 | 			branch_idx = loc['branch']
151 | 			branch = self.branches[branch_idx]
152 | 			old_branch_out_dim = branch.out_features_dim
153 | 			change_out_dim, indices, magnifier = branch.widen(loc['layer'], new_width, widen_type, noise=noise)
154 | 			if change_out_dim:
155 | 				assert self.merge != 'add', 'Invalid'
156 | 				prev_branch_out_dim = 0
157 | 				for _i in range(0, branch_idx):
158 | 					prev_branch_out_dim += self.branches[_i].out_features_dim
159 | 				post_branch_out_dim = 0
160 | 				for _i in range(branch_idx + 1, len(self.branches)):
161 | 					post_branch_out_dim += self.branches[_i].out_features_dim
162 | 				old_size = prev_branch_out_dim + old_branch_out_dim + post_branch_out_dim
163 | 				base = np.arange(old_size)
164 | 				indices = np.concatenate([
165 | 					base[:prev_branch_out_dim],
166 | 					indices + prev_branch_out_dim,
167 | 					base[prev_branch_out_dim + old_branch_out_dim:]
168 | 				])
169 | 				magnifier = np.concatenate([
170 | 					[1] * prev_branch_out_dim,
171 | 					magnifier,
172 | 					[1] * post_branch_out_dim,
173 | 				])
174 | 				if self.out_bottle is None:
175 | 					return True, indices, magnifier
176 | 				else:
177 | 					self.out_bottle.prev_widen(indices, magnifier, noise=noise)
178 | 					return False, None, None
179 | 			else:
180 | 				return False, None, None
181 | 		else:
182 | 			raise ValueError('Do not support %s' % loc['multi-branch'])
183 | 	
184 | 	def deepen(self, loc, new_layer_config, input_dim):
185 | 		if loc['multi-branch'] == 'in_bottle':
186 | 			assert self.in_bottle is not None, 'Invalid'
187 | 			return self.in_bottle.deepen(loc['layer'], new_layer_config, input_dim)
188 | 		elif loc['multi-branch'] == 'out_bottle':
189 | 			assert self.out_bottle is not None, 'Invalid'
190 | 			if self.merge == 'concat': input_dim = np.sum([branch.out_features_dim for branch in self.branches])
191 | 			else: input_dim = self.branches[0].out_features_dim
192 | 			return self.out_bottle.deepen(loc['layer'], new_layer_config, input_dim)
193 | 		elif loc['multi-branch'] == 'branch':
194 | 			if self.in_bottle is not None: input_dim = self.in_bottle.out_features_dim
195 | 			return self.branches[loc['branch']].deepen(loc['layer'], new_layer_config, input_dim)
196 | 		else:
197 | 			raise ValueError('Do not support %s' % loc['multi-branch'])
198 | 		
199 | 	def remapped_branches(self, noise=None):
200 | 		if self.merge == 'add' or self.merge is None:
201 | 			size = self.out_features_dim
202 | 			indices = np.random.choice(np.arange(size), size)
203 | 			new_branches = []
204 | 			for branch in self.branches:
205 | 				new_layers = [layer.copy() for layer in branch.layers[:-1]]
206 | 				last_layer = branch.layers[-1].copy().remap(indices, noise=noise)
207 | 				new_layers.append(last_layer)
208 | 				new_branch = LayerCascade(branch.id, new_layers)
209 | 				new_branches.append(new_branch)
210 | 		elif self.merge == 'concat':
211 | 			new_branches = []
212 | 			offset = 0
213 | 			indices = []
214 | 			for branch in self.branches:
215 | 				size = branch.out_features_dim
216 | 				sub_indices = np.random.choice(np.arange(size), size)
217 | 				new_layers = [layer.copy() for layer in branch.layers[:-1]]
218 | 				last_layer = branch.layers[-1].copy().remap(sub_indices, noise=noise)
219 | 				new_layers.append(last_layer)
220 | 				new_branch = LayerCascade(branch.id, new_layers)
221 | 				new_branches.append(new_branch)
222 | 				indices.append(sub_indices + offset)
223 | 				offset += size
224 | 			indices = np.concatenate(indices)
225 | 		else:
226 | 			raise NotImplementedError
227 | 		return new_branches, indices
228 | 


--------------------------------------------------------------------------------
/code/models/layers.py:
--------------------------------------------------------------------------------
  1 | from models.basic_model import BasicModel
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import copy
  5 | 
  6 | 
  7 | def apply_noise(weights, noise_config):
  8 | 	if noise_config is None:
  9 | 		return weights
 10 | 	noise_type = noise_config.get('type', 'normal')
 11 | 	if noise_type == 'normal':
 12 | 		ratio = noise_config.get('ratio', 1e-3)
 13 | 		std = np.std(weights)
 14 | 		noise = np.random.normal(0, std * ratio, size=weights.shape)
 15 | 	elif noise_type == 'uniform':
 16 | 		ratio = noise_config.get('ratio', 1e-3)
 17 | 		mean, _max = np.mean(weights), np.max(weights)
 18 | 		width = (_max - mean) * ratio
 19 | 		noise = np.random.uniform(-width, width, size=weights.shape)
 20 | 	else:
 21 | 		raise NotImplementedError
 22 | 	return weights + noise
 23 | 
 24 | 
 25 | def get_layer_by_name(name):
 26 | 	if name == 'conv':
 27 | 		return ConvLayer
 28 | 	elif name == 'fc':
 29 | 		return FCLayer
 30 | 	elif name == 'pool':
 31 | 		return PoolLayer
 32 | 	else:
 33 | 		raise ValueError('Unknown layer type: %s' % name)
 34 | 
 35 | 
 36 | def get_magnifier(old_size, indices):
 37 | 	_l = np.zeros(old_size)
 38 | 	for x in indices:
 39 | 		_l[x] += 1
 40 | 	magnifier = (1.0 / _l)[indices]
 41 | 	return magnifier
 42 | 
 43 | 
 44 | def get_random_remapping(old_size, new_size):
 45 | 	base = np.arange(old_size)
 46 | 	indices = np.concatenate([base, np.random.choice(base, new_size - old_size)])
 47 | 	
 48 | 	magnifier = get_magnifier(old_size, indices)
 49 | 	return indices, magnifier
 50 | 
 51 | 
 52 | class BaseLayer:
 53 | 	"""
 54 | 	_id, batch normalization, activation, dropout, ready
 55 | 	"""
 56 | 	def __init__(self, _id, use_bn=True, activation='relu', keep_prob=1.0, ready=True, pre_activation=True):
 57 | 		self._id = _id
 58 | 		self.use_bn = use_bn
 59 | 		self.activation = activation
 60 | 		self.keep_prob = keep_prob
 61 | 		self.ready = ready
 62 | 		self.pre_activation = pre_activation
 63 | 		
 64 | 		self._scope = None
 65 | 		self._init = None
 66 | 		self.output_op = None
 67 | 	
 68 | 	@property
 69 | 	def id(self): return self._id
 70 | 	
 71 | 	@id.setter
 72 | 	def id(self, value): self._id = value
 73 | 	
 74 | 	@property
 75 | 	def init(self):
 76 | 		return self._init
 77 | 	
 78 | 	@property
 79 | 	def param_initializer(self):
 80 | 		if self._init is None:
 81 | 			return None
 82 | 		param_initializer = {}
 83 | 		for key in self.variable_list.keys():
 84 | 			if self._init[key] is not None:
 85 | 				param_initializer[key] = tf.constant_initializer(self._init[key])
 86 | 		if len(param_initializer) == 0:
 87 | 			param_initializer = None
 88 | 		return param_initializer
 89 | 	
 90 | 	def renew_init(self, net: BasicModel):
 91 | 		if net is None:
 92 | 			return copy.deepcopy(self._init)
 93 | 		
 94 | 		self._init = {}
 95 | 		for key, var_name in self.variable_list.items():
 96 | 			var = net.graph.get_tensor_by_name('%s/%s' % (self._scope, var_name))
 97 | 			self._init[key] = net.sess.run(var)
 98 | 		if len(self._init) == 0:
 99 | 			self._init = None
100 | 		return copy.deepcopy(self._init)
101 | 	
102 | 	def copy(self):
103 | 		return self.set_from_config(self.get_config(), layer_init=copy.deepcopy(self._init))
104 | 	
105 | 	def get_config(self):
106 | 		return {
107 | 			'_id': self.id,
108 | 			'use_bn': self.use_bn,
109 | 			'activation': self.activation,
110 | 			'keep_prob': self.keep_prob,
111 | 			'pre_activation': self.pre_activation,
112 | 		}
113 | 	
114 | 	@property
115 | 	def variable_list(self):
116 | 		"""
117 | 		beta: mean scale
118 | 		gamma: variance scale
119 | 		y = gamma * (x - moving_mean) / sqrt(epsilon + moving_variance) + beta
120 | 		"""
121 | 		if self.use_bn:
122 | 			return {
123 | 				'moving_mean': 'BatchNorm/moving_mean:0',
124 | 				'moving_variance': 'BatchNorm/moving_variance:0',
125 | 				'beta': 'BatchNorm/beta:0',
126 | 				'gamma': 'BatchNorm/gamma:0',
127 | 			}
128 | 		else:
129 | 			return {}
130 | 	
131 | 	@staticmethod
132 | 	def set_from_config(layer_config, layer_init):
133 | 		raise NotImplementedError
134 | 	
135 | 	def build(self, _input, net, store_output_op):
136 | 		raise NotImplementedError
137 | 	
138 | 	def prev_widen(self, indices, magnifier, noise=None):
139 | 		raise NotImplementedError
140 | 	
141 | 	def set_identity_layer(self, strict, param, noise):
142 | 		raise NotImplementedError
143 | 	
144 | 	def widen_bn(self, indices, magnifier, noise=None):
145 | 		if self.use_bn:
146 | 			self._init['beta'] = self._init['beta'][indices]
147 | 			self._init['gamma'] = self._init['gamma'][indices]
148 | 			self._init['moving_mean'] = self._init['moving_mean'][indices]
149 | 			self._init['moving_variance'] = self._init['moving_variance'][indices]
150 | 	
151 | 	def set_bn_identity(self, strict=True, param=None, noise=None):
152 | 		if self.use_bn:
153 | 			if strict:
154 | 				self._init['moving_mean'] = param['moving_mean']
155 | 				self._init['moving_variance'] = param['moving_variance']
156 | 				self._init['beta'] = self._init['moving_mean']
157 | 				self._init['gamma'] = np.sqrt(self._init['moving_variance'] + param['epsilon'])
158 | 			else:
159 | 				# use default initialization for batch normalization layer
160 | 				self._init['moving_mean'], self._init['moving_variance'] = None, None
161 | 				self._init['beta'], self._init['gamma'] = None, None
162 | 	
163 | 	
164 | class ConvLayer(BaseLayer):
165 | 	def __init__(self, _id, filter_num, kernel_size=3, strides=1,
166 | 				 use_bn=True, activation='relu', keep_prob=1.0, ready=True, pre_activation=True, **kwargs):
167 | 		BaseLayer.__init__(self, _id, use_bn, activation, keep_prob, ready, pre_activation)
168 | 		self.filter_num = filter_num
169 | 		self.kernel_size = kernel_size
170 | 		self.strides = strides
171 | 	
172 | 	@property
173 | 	def layer_str(self):
174 | 		return 'C%d,%d,%d' % (self.filter_num, self.kernel_size, self.strides)
175 | 		
176 | 	@property
177 | 	def variable_list(self):
178 | 		var_list = {'kernel': 'kernel:0'}
179 | 		var_list.update(super(ConvLayer, self).variable_list)
180 | 		return var_list
181 | 	
182 | 	def get_config(self):
183 | 		return {
184 | 			'name': 'conv',
185 | 			'filter_num': self.filter_num,
186 | 			'kernel_size': self.kernel_size,
187 | 			'strides': self.strides,
188 | 			**super(ConvLayer, self).get_config(),
189 | 		}
190 | 	
191 | 	@staticmethod
192 | 	def set_from_config(layer_config, layer_init=None):
193 | 		conv_layer = ConvLayer(**layer_config)
194 | 		conv_layer._init = layer_init
195 | 		return conv_layer
196 | 	
197 | 	def build(self, _input, net: BasicModel, store_output_op=False):
198 | 		output = _input
199 | 		if not self.ready:
200 | 			return output
201 | 		with tf.variable_scope(self._id):
202 | 			self._scope = tf.get_variable_scope().name
203 | 			param_initializer = self.param_initializer
204 | 			if self.pre_activation:
205 | 				# batch normalization
206 | 				if self.use_bn:
207 | 					output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon,
208 | 												   net.net_config.bn_decay, param_initializer=param_initializer)
209 | 				# activation
210 | 				output = BasicModel.activation(output, self.activation)
211 | 				# convolutional
212 | 				output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides,
213 | 										   param_initializer=param_initializer)
214 | 			else:
215 | 				# convolutional
216 | 				output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides,
217 | 										   param_initializer=param_initializer)
218 | 				# batch normalization
219 | 				if self.use_bn:
220 | 					output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon,
221 | 												   net.net_config.bn_decay, param_initializer=param_initializer)
222 | 				# activation
223 | 				output = BasicModel.activation(output, self.activation)
224 | 			# dropout
225 | 			output = BasicModel.dropout(output, self.keep_prob, net.is_training)
226 | 		if store_output_op:
227 | 			self.output_op = output
228 | 		return output
229 | 	
230 | 	def widen_filters(self, new_filter_num, noise=None):
231 | 		"""
232 | 		Increase the filter number of a conv layer while preserving the functionality
233 | 		Proposed in 'Net2Net': https://arxiv.org/abs/1511.05641
234 | 		"""
235 | 		assert new_filter_num > self.filter_num, 'Invalid new filter number: %d' % new_filter_num
236 | 		assert self._init is not None, 'Uninitialized layer'
237 | 		old_size, new_size = self.filter_num, new_filter_num
238 | 		indices, magnifier = get_random_remapping(old_size, new_size)
239 | 		# more filters
240 | 		self.filter_num = new_filter_num
241 | 		new_kernel = self._init['kernel'][:, :, :, indices]
242 | 		new_kernel[:, :, :, old_size:] = apply_noise(new_kernel[:, :, :, old_size:], noise.get('wider'))
243 | 		self._init['kernel'] = new_kernel
244 | 		if not self.pre_activation:
245 | 			# widen batch norm variables if use batch norm
246 | 			self.widen_bn(indices, magnifier, noise=noise)
247 | 		return indices, magnifier
248 | 	
249 | 	def prev_widen(self, indices, magnifier, noise=None):
250 | 		assert self._init is not None, 'Uninitialized layer'
251 | 		# rescale kernel
252 | 		self._init['kernel'] = self._init['kernel'][:, :, indices, :] * magnifier.reshape([1, 1, -1, 1])
253 | 		if self.pre_activation:
254 | 			self.widen_bn(indices, magnifier, noise=noise)
255 | 		
256 | 	def set_identity_layer(self, strict=True, param=None, noise=None):
257 | 		self._init = {}
258 | 		self.set_bn_identity(strict, param, noise=noise)
259 | 		mid = self.kernel_size // 2
260 | 		self._init['kernel'] = np.zeros([self.kernel_size, self.kernel_size, self.filter_num, self.filter_num])
261 | 		self._init['kernel'][mid, mid] = np.eye(self.filter_num)
262 | 		self._init['kernel'] = apply_noise(self._init['kernel'], noise.get('deeper'))
263 | 		self.ready = True
264 | 
265 | 	def remap(self, indices, noise=None):
266 | 		self.filter_num = len(indices)
267 | 		self._init['kernel'] = self._init['kernel'][:, :, :, indices]
268 | 		self._init['kernel'] = apply_noise(self._init['kernel'], noise.get('wider'))
269 | 		if not self.pre_activation:
270 | 			self.widen_bn(indices, None, noise=noise)
271 | 		return self
272 | 		
273 | 		
274 | class FCLayer(BaseLayer):
275 | 	def __init__(self, _id, units, use_bn=True, use_bias=False, activation='relu', keep_prob=1.0, ready=True,
276 | 				 pre_activation=False, **kwargs):
277 | 		BaseLayer.__init__(self, _id, use_bn, activation, keep_prob, ready, pre_activation)
278 | 		self.units = units
279 | 		self.use_bias = use_bias
280 | 	
281 | 	@property
282 | 	def layer_str(self):
283 | 		return 'FC%d' % self.units
284 | 	
285 | 	@property
286 | 	def variable_list(self):
287 | 		var_list = {'W': 'W:0'}
288 | 		if self.use_bias:
289 | 			var_list['bias'] = 'bias:0'
290 | 		var_list.update(super(FCLayer, self).variable_list)
291 | 		return var_list
292 | 	
293 | 	def get_config(self):
294 | 		return {
295 | 			'name': 'fc',
296 | 			'units': self.units,
297 | 			'use_bias': self.use_bias,
298 | 			**super(FCLayer, self).get_config(),
299 | 		}
300 | 	
301 | 	@staticmethod
302 | 	def set_from_config(layer_config, layer_init=None):
303 | 		fc_layer = FCLayer(**layer_config)
304 | 		fc_layer._init = layer_init
305 | 		return fc_layer
306 | 	
307 | 	def build(self, _input, net: BasicModel, store_output_op=False):
308 | 		output = _input
309 | 		if not self.ready:
310 | 			return output
311 | 		with tf.variable_scope(self._id):
312 | 			self._scope = tf.get_variable_scope().name
313 | 			param_initializer = self.param_initializer
314 | 			# flatten if not
315 | 			output = BasicModel.flatten(output)
316 | 			if self.pre_activation:
317 | 				# batch normalization
318 | 				if self.use_bn:
319 | 					output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon,
320 | 												   net.net_config.bn_decay, param_initializer=param_initializer)
321 | 				# activation
322 | 				output = BasicModel.activation(output, self.activation)
323 | 				# FC
324 | 				output = BasicModel.fc_layer(output, self.units, self.use_bias, param_initializer=param_initializer)
325 | 			else:
326 | 				# FC
327 | 				output = BasicModel.fc_layer(output, self.units, self.use_bias, param_initializer=param_initializer)
328 | 				# batch normalization
329 | 				if self.use_bn:
330 | 					output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon,
331 | 												   net.net_config.bn_decay, param_initializer=param_initializer)
332 | 				# activation
333 | 				output = BasicModel.activation(output, self.activation)
334 | 			# dropout
335 | 			output = BasicModel.dropout(output, self.keep_prob, net.is_training)
336 | 		if store_output_op:
337 | 			self.output_op = output
338 | 		return output
339 | 	
340 | 	def widen_units(self, new_units_num, noise=None):
341 | 		"""
342 | 		Increase the units number of a fc layer while preserving the functionality
343 | 		Proposed in 'Net2Net': https://arxiv.org/abs/1511.05641
344 | 		W: [in_dim, out_units]
345 | 		bias: [out_units]
346 | 		"""
347 | 		assert new_units_num > self.units, 'Invalid new units number: %d' % new_units_num
348 | 		assert self._init is not None, 'Uninitialized layer'
349 | 		old_size, new_size = self.units, new_units_num
350 | 		indices, magnifier = get_random_remapping(old_size, new_size)
351 | 		# more units
352 | 		self._init['W'] = self._init['W'][:, indices]
353 | 		self._init['W'][:, old_size:] = apply_noise(self._init['W'][:, old_size:], noise.get('wider'))
354 | 		self.units = new_units_num
355 | 		# widen bias variable if exist
356 | 		if self.use_bias:
357 | 			self._init['bias'] = self._init['bias'][indices]
358 | 			self._init['bias'][old_size:] = apply_noise(self._init['bias'][old_size:], noise.get('wider'))
359 | 		if not self.pre_activation:
360 | 			# widen batch norm variables if use batch norm
361 | 			self.widen_bn(indices, magnifier, noise=noise)
362 | 		return indices, magnifier
363 | 			
364 | 	def prev_widen(self, indices, magnifier, noise=None):
365 | 		assert self._init is not None, 'Uninitialized layer'
366 | 		# rescale W
367 | 		self._init['W'] = self._init['W'][indices] * magnifier.reshape([-1, 1])
368 | 		if self.pre_activation:
369 | 			self.widen_bn(indices, magnifier, noise=noise)
370 | 	
371 | 	def set_identity_layer(self, strict=True, param=None, noise=None):
372 | 		self._init = {}
373 | 		self.set_bn_identity(strict, param, noise=noise)
374 | 		if self.use_bias:
375 | 			self._init['bias'] = [0.0] * self.units
376 | 		self._init['W'] = np.eye(self.units)
377 | 		self._init['W'] = apply_noise(self._init['W'], noise.get('deeper'))
378 | 		self.ready = True
379 | 	
380 | 	def remap(self, indices, noise=None):
381 | 		self.units = len(indices)
382 | 		self._init['W'] = self._init['W'][:, indices]
383 | 		self._init['W'] = apply_noise(self._init['W'], noise.get('wider'))
384 | 		if self.use_bias:
385 | 			self._init['bias'] = self._init['bias'][indices]
386 | 		if not self.pre_activation:
387 | 			self.widen_bn(indices, None, noise=noise)
388 | 		return self
389 | 	
390 | 		
391 | class PoolLayer(BaseLayer):
392 | 	def __init__(self, _id, _type, kernel_size=2, strides=2, use_bn=False, activation=None, keep_prob=1.0,
393 | 				 ready=True, pre_activation=True, **kwargs):
394 | 		BaseLayer.__init__(self, _id, use_bn, activation, keep_prob, ready, pre_activation)
395 | 		
396 | 		self._type = _type
397 | 		self.kernel_size = kernel_size
398 | 		self.strides = strides
399 | 	
400 | 	@property
401 | 	def layer_str(self):
402 | 		return 'P%d,%d' % (self.kernel_size, self.strides)
403 | 		
404 | 	def get_config(self):
405 | 		return {
406 | 			'name': 'pool',
407 | 			'_type': self._type,
408 | 			'kernel_size': self.kernel_size,
409 | 			'strides': self.strides,
410 | 			**super(PoolLayer, self).get_config(),
411 | 		}
412 | 	
413 | 	@staticmethod
414 | 	def set_from_config(layer_config, layer_init=None):
415 | 		pool_layer = PoolLayer(**layer_config)
416 | 		pool_layer._init = layer_init
417 | 		return pool_layer
418 | 	
419 | 	def build(self, _input, net: BasicModel, store_output_op=False):
420 | 		output = _input
421 | 		if not self.ready:
422 | 			return output
423 | 		with tf.variable_scope(self._id):
424 | 			self._scope = tf.get_variable_scope().name
425 | 			param_initializer = self.param_initializer
426 | 			if self.pre_activation:
427 | 				# batch normalization
428 | 				if self.use_bn:
429 | 					output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon,
430 | 												   net.net_config.bn_decay, param_initializer=param_initializer)
431 | 				# activation
432 | 				output = BasicModel.activation(output, self.activation)
433 | 				# Pooling
434 | 				if self._type == 'avg':
435 | 					output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides)
436 | 				elif self._type == 'max':
437 | 					output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides)
438 | 				else:
439 | 					raise ValueError('Do not support the pooling type: %s' % self._type)
440 | 			else:
441 | 				# Pooling
442 | 				if self._type == 'avg':
443 | 					output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides)
444 | 				elif self._type == 'max':
445 | 					output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides)
446 | 				else:
447 | 					raise ValueError('Do not support the pooling type: %s' % self._type)
448 | 				# batch normalization
449 | 				if self.use_bn:
450 | 					output = BasicModel.batch_norm(output, net.is_training, net.net_config.bn_epsilon,
451 | 												   net.net_config.bn_decay, param_initializer=param_initializer)
452 | 				# activation
453 | 				output = BasicModel.activation(output, self.activation)
454 | 			# dropout
455 | 			output = BasicModel.dropout(output, self.keep_prob, net.is_training)
456 | 		if store_output_op:
457 | 			self.output_op = output
458 | 		return output
459 | 
460 | 	def set_identity_layer(self, strict=True, param=None, noise=None):
461 | 		raise ValueError('Pooling layer can never be an identity layer')
462 | 	
463 | 	def prev_widen(self, indices, magnifier, noise=None):
464 | 		self.widen_bn(indices, magnifier, noise=noise)
465 | 


--------------------------------------------------------------------------------
/code/models/utils.py:
--------------------------------------------------------------------------------
  1 | from models.dense_net import DenseNetConfig, DenseNet
  2 | from models.convnet import SimpleConvnetConfig, SimpleConvnet
  3 | import numpy as np
  4 | 
  5 | 
  6 | def get_model_config_by_name(name):
  7 | 	if name == 'DenseNet':
  8 | 		return DenseNetConfig
  9 | 	elif name == 'SimpleConvnet':
 10 | 		return SimpleConvnetConfig
 11 | 	else:
 12 | 		raise ValueError('Unknown model type %s' % name)
 13 | 
 14 | 
 15 | def get_model_by_name(name):
 16 | 	if name == 'DenseNet':
 17 | 		return DenseNet
 18 | 	elif name == 'SimpleConvnet':
 19 | 		return SimpleConvnet
 20 | 	else:
 21 | 		raise ValueError('Unknown model type %s' % name)
 22 | 
 23 | 
 24 | class RunConfig:
 25 | 	def __init__(self, batch_size, n_epochs, init_lr, reduce_lr_epochs, reduce_lr_factors, opt_config,
 26 | 				 dataset, validation_size, validation_frequency, shuffle, normalization, should_save_logs,
 27 | 				 should_save_model, renew_logs=False, other_lr_schedule=None, include_extra=True, **kwargs):
 28 | 		
 29 | 		self.batch_size = batch_size
 30 | 		self.n_epochs = n_epochs
 31 | 		self.init_lr = init_lr
 32 | 		self.reduce_lr_epochs = reduce_lr_epochs
 33 | 		self.reduce_lr_factors = reduce_lr_factors
 34 | 		self.opt_config = opt_config
 35 | 		self.dataset = dataset
 36 | 		self.validation_size = validation_size
 37 | 		self.validation_frequency = validation_frequency
 38 | 		self.shuffle = shuffle
 39 | 		self.normalization = normalization
 40 | 		self.should_save_logs = should_save_logs
 41 | 		self.should_save_model = should_save_model
 42 | 		self.renew_logs = renew_logs
 43 | 		self.other_lr_schedule = other_lr_schedule
 44 | 		self.include_extra = include_extra
 45 | 	
 46 | 	def get_config(self):
 47 | 		return self.__dict__
 48 | 	
 49 | 	def update(self, new_config):
 50 | 		self.__dict__.update(new_config)
 51 | 	
 52 | 	def copy(self):
 53 | 		return RunConfig(**self.get_config())
 54 | 		
 55 | 	def learning_rate(self, epoch):
 56 | 		if self.other_lr_schedule is None or self.other_lr_schedule.get('type') is None:
 57 | 			lr = self.init_lr
 58 | 			for reduce_lr_epoch, reduce_factor in zip(self.reduce_lr_epochs, self.reduce_lr_factors):
 59 | 				if epoch >= reduce_lr_epoch * self.n_epochs:
 60 | 					lr /= reduce_factor
 61 | 		else:
 62 | 			if self.other_lr_schedule['type'] == 'cosine':
 63 | 				lr_max = self.init_lr
 64 | 				lr_min = self.other_lr_schedule.get('lr_min', 0)
 65 | 				lr = lr_min + 0.5 * (lr_max - lr_min) * (1 + np.cos((epoch - 1) / self.n_epochs * np.pi))
 66 | 			else:
 67 | 				raise ValueError('Do not support %s' % self.other_lr_schedule['type'])
 68 | 		return lr
 69 | 
 70 | 	@staticmethod
 71 | 	def get_default_run_config(dataset='C10+'):
 72 | 		if dataset in ['C10', 'C10+', 'C100', 'C100+']:
 73 | 			run_config = {
 74 | 				'batch_size': 64,
 75 | 				'n_epochs': 300,
 76 | 				'init_lr': 0.1,
 77 | 				'reduce_lr_epochs': [0.5, 0.75],  # epochs * 0.5, epochs * 0.75
 78 | 				'reduce_lr_factors': [10, 10],
 79 | 				'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}],
 80 | 				'dataset': dataset,  # choices = [C10, C10+, C100, C100+]
 81 | 				'validation_size': None,  # None or int
 82 | 				'validation_frequency': 10,
 83 | 				'shuffle': 'every_epoch',  # None, once_prior_train, every_epoch
 84 | 				'normalization': 'by_channels',  # None, divide_256, divide_255, by_channels
 85 | 				'should_save_logs': True,
 86 | 				'should_save_model': True,
 87 | 				'renew_logs': True,
 88 | 				'other_lr_schedule': {'type': 'cosine'},  # None, or cosine
 89 | 			}
 90 | 		elif dataset in ['SVHN']:
 91 | 			run_config = {
 92 | 				'batch_size': 64,
 93 | 				'n_epochs': 40,
 94 | 				'init_lr': 0.1,
 95 | 				'reduce_lr_epochs': [0.5, 0.75],  # epochs * 0.5, epochs * 0.75
 96 | 				'reduce_lr_factors': [10, 10],
 97 | 				'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}],
 98 | 				'dataset': dataset,  # choices = [C10, C10+, C100, C100+]
 99 | 				'validation_size': None,  # None or int
100 | 				'validation_frequency': 1,
101 | 				'shuffle': True,
102 | 				'normalization': 'divide_255',  # None, divide_256, divide_255, by_channels
103 | 				'should_save_logs': True,
104 | 				'should_save_model': True,
105 | 				'renew_logs': True,
106 | 				'other_lr_schedule': {'type': 'cosine'},  # None, or cosine
107 | 				'include_extra': False,
108 | 			}
109 | 		else:
110 | 			raise ValueError
111 | 		return run_config
112 | 
113 | 


--------------------------------------------------------------------------------
/code/run_dense_net.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from models.dense_net import DenseNet, DenseNetConfig
  4 | from data_providers.utils import get_data_provider_by_name
  5 | from models.utils import RunConfig
  6 | import json
  7 | 
  8 | run_config_cifar = {
  9 | 	'batch_size': 64,
 10 | 	'n_epochs': 300,
 11 | 	'init_lr': 0.1,
 12 | 	'reduce_lr_epochs': [0.5, 0.75],  # epochs * 0.5, epochs * 0.75
 13 | 	'reduce_lr_factors': [10, 10],
 14 | 	'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}],
 15 | 	'dataset': 'C10+',  # choices = [C10, C10+, C100, C100+]
 16 | 	'validation_size': 10000,  # None or int
 17 | 	'validation_frequency': 10,
 18 | 	'shuffle': 'every_epoch',  # None, once_prior_train, every_epoch
 19 | 	'normalization': 'by_channels',  # None, divide_256, divide_255, by_channels
 20 | 	'should_save_logs': True,
 21 | 	'should_save_model': True,
 22 | 	'renew_logs': True,
 23 | 	'other_lr_schedule': {'type': 'cosine'},  # None, or cosine
 24 | }
 25 | 
 26 | standard_net_config_cifar = {
 27 | 	'model_type': 'DenseNet-BC',
 28 | 	'weight_decay':  1e-4,
 29 | 	'first_ratio': 2,
 30 | 	'reduction': 0.5,
 31 | 	'bc_ratio': 4,
 32 | 	'bn_epsilon': 1e-5,
 33 | 	'bn_decay': 0.9,
 34 | 	'growth_rate': 4,
 35 | 	'depth': 10,
 36 | 	'total_blocks': 3,
 37 | 	'keep_prob': 0.8,
 38 | 	'pre_activation': True,
 39 | }
 40 | 
 41 | 
 42 | if __name__ == '__main__':
 43 | 	parser = argparse.ArgumentParser()
 44 | 	parser.add_argument(
 45 | 		'--train', action='store_true')
 46 | 	parser.add_argument(
 47 | 		'--test', action='store_true',
 48 | 		help='Test model for required dataset if pretrained model exists.')
 49 | 	parser.add_argument(
 50 | 		'--dataset', type=str, default='C10+', choices=['C10', 'C10+', 'C100', 'C100+'],
 51 | 	)
 52 | 	
 53 | 	parser.add_argument('--path', type=str, default='')
 54 | 	parser.add_argument('--save_config', action='store_true', help='Whether to save config in the path')
 55 | 	parser.add_argument('--save_init', action='store_true')
 56 | 	parser.add_argument('--load_model', action='store_true')
 57 | 	
 58 | 	args = parser.parse_args()
 59 | 	if args.dataset in ['C10', 'C100', 'C10+', 'C100+']:
 60 | 		run_config_cifar['dataset'] = args.dataset
 61 | 		run_config = RunConfig(**run_config_cifar)
 62 | 		net_config = standard_net_config_cifar
 63 | 	else:
 64 | 		raise ValueError
 65 | 	if len(args.path) == 0:
 66 | 		args.path = '../trained_nets/DenseNet/vs=%s_%s_%s_L=%d_K=%d_%s' % \
 67 | 					(run_config.validation_size, os.uname()[1], net_config['model_type'], net_config['depth'],
 68 | 					 net_config['growth_rate'], run_config.dataset)
 69 | 	
 70 | 	if run_config.dataset in ['C10+', 'C100+']:
 71 | 		net_config['keep_prob'] = 1.0
 72 | 	if standard_net_config_cifar['model_type'] == 'DenseNet':
 73 | 		net_config['reduction'] = 1.0
 74 | 	if args.test: args.load_model = True
 75 | 	
 76 | 	# print configurations
 77 | 	print('Run config:')
 78 | 	for k, v in run_config.get_config().items():
 79 | 		print('\t%s: %s' % (k, v))
 80 | 	print('Network config:')
 81 | 	for k, v in net_config.items():
 82 | 		print('\t%s: %s' % (k, v))
 83 | 	
 84 | 	print('Prepare training data...')
 85 | 	data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config())
 86 | 	
 87 | 	# set net config
 88 | 	net_config = DenseNetConfig().set_standard_dense_net(data_provider=data_provider, **net_config)
 89 | 	print('Initialize the model...')
 90 | 	model = DenseNet(args.path, data_provider, run_config, net_config)
 91 | 	
 92 | 	# save configs
 93 | 	if args.save_config:
 94 | 		model.save_config(args.path)
 95 | 		
 96 | 	if args.load_model: model.load_model()
 97 | 	if args.test:
 98 | 		# test
 99 | 		print('Data provider test images: ', data_provider.test.num_examples)
100 | 		print('Testing...')
101 | 		loss, accuracy = model.test(data_provider.test, batch_size=200)
102 | 		print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
103 | 		json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w'))
104 | 	elif args.train:
105 | 		# train the model
106 | 		print('Data provider train images: ', data_provider.train.num_examples)
107 | 		model.train_all_epochs()
108 | 		print('Data provider test images: ', data_provider.test.num_examples)
109 | 		print('Testing...')
110 | 		loss, accuracy = model.test(data_provider.test, batch_size=200)
111 | 		print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
112 | 
113 | 		# save inits
114 | 		if args.save_init:
115 | 			model.save_init(os.path.join(args.path, 'snapshot'))
116 | 		json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w'))
117 | 		
118 | 
119 | 


--------------------------------------------------------------------------------
/code/run_simple_convnet.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from models.convnet import SimpleConvnetConfig, SimpleConvnet
  4 | from data_providers.utils import get_data_provider_by_name
  5 | from models.utils import RunConfig
  6 | import json
  7 | import copy
  8 | 
  9 | 
 10 | run_config_cifar = {
 11 | 	'batch_size': 64,
 12 | 	'n_epochs': 300,
 13 | 	'init_lr': 0.1,
 14 | 	'reduce_lr_epochs': [0.5, 0.75],  # epochs * 0.5, epochs * 0.75
 15 | 	'reduce_lr_factors': [10, 10],
 16 | 	'opt_config': ['momentum', {'momentum': 0.9, 'use_nesterov': True}],
 17 | 	'dataset': 'C10+',  # choices = [C10, C10+, C100, C100+]
 18 | 	'validation_size': 10000,  # None or int
 19 | 	'validation_frequency': 10,
 20 | 	'shuffle': 'every_epoch',  # None, once_prior_train, every_epoch
 21 | 	'normalization': 'by_channels',  # None, divide_256, divide_255, by_channels
 22 | 	'should_save_logs': True,
 23 | 	'should_save_model': True,
 24 | 	'renew_logs': True,
 25 | 	'other_lr_schedule': {'type': 'cosine'},  # None, or cosine
 26 | }
 27 | 
 28 | standard_net_config = {
 29 | 	'conv_blocks_config': [
 30 | 		[1, 3, 4],
 31 | 		[1, 3, 4],
 32 | 		[1, 3, 4],
 33 | 		[1, 3, 4],
 34 | 	],
 35 | 	'fc_block_config': [8],
 36 | 	'weight_decay': 1e-4,
 37 | 	'drop_scheme': {'type': 'conv', 'conv_drop': 0.8, 'pool_drop': 0.7, 'fc_drop': 0.5},
 38 | 	'bn_epsilon': 1e-5,
 39 | 	'bn_decay': 0.9,
 40 | }
 41 | 
 42 | run_config_svhn = copy.deepcopy(run_config_cifar)
 43 | run_config_svhn.update({
 44 | 	'n_epochs': 40,
 45 | 	'dataset': 'SVHN',
 46 | 	'validation_size': 10000,
 47 | 	'validation_frequency': 1,
 48 | 	'normalization': 'divide_255',
 49 | 	'other_lr_schedule': {'type': 'cosine'},
 50 | 	'include_extra': False,
 51 | 	'shuffle': True,
 52 | })
 53 | 
 54 | 
 55 | run_str = '_'.join(['{}-{}-{}'.format(*block_config) for block_config in
 56 | 					standard_net_config['conv_blocks_config']])
 57 | run_str += '_%s' % '_'.join([str(units) for units in standard_net_config['fc_block_config']])
 58 | 
 59 | if __name__ == '__main__':
 60 | 	parser = argparse.ArgumentParser()
 61 | 	parser.add_argument(
 62 | 		'--train', action='store_true')
 63 | 	parser.add_argument(
 64 | 		'--test', action='store_true',
 65 | 		help='Test model for required dataset if pretrained model exists.')
 66 | 	parser.add_argument(
 67 | 		'--dataset', type=str, default='C10+', choices=['C10', 'C10+', 'C100', 'C100+', 'SVHN'],
 68 | 	)
 69 | 	parser.add_argument('--path', type=str, default='')
 70 | 	parser.add_argument('--save_config', action='store_true', help='Whether to save config in the path')
 71 | 	parser.add_argument('--save_init', action='store_true')
 72 | 	parser.add_argument('--load_model', action='store_true')
 73 | 	
 74 | 	args = parser.parse_args()
 75 | 	if args.dataset in ['C10', 'C100', 'C10+', 'C100+']:
 76 | 		run_config_cifar['dataset'] = args.dataset
 77 | 		run_config = RunConfig(**run_config_cifar)
 78 | 	elif args.dataset in ['SVHN']:
 79 | 		run_config = RunConfig(**run_config_svhn)
 80 | 	else:
 81 | 		raise ValueError
 82 | 	if len(args.path) == 0:
 83 | 		args.path = '../trained_nets/Convnet/vs=%s_Convnet_%s_%s_%s' % \
 84 | 					(run_config.validation_size, os.uname()[1], run_str, run_config.dataset)
 85 | 	if args.test: args.load_model = True
 86 | 	
 87 | 	# print configurations
 88 | 	print('Run config:')
 89 | 	for k, v in run_config.get_config().items():
 90 | 		print('\t%s: %s' % (k, v))
 91 | 	print('Network config:')
 92 | 	for k, v in standard_net_config.items():
 93 | 		print('\t%s: %s' % (k, v))
 94 | 	
 95 | 	print('Prepare training data...')
 96 | 	data_provider = get_data_provider_by_name(run_config.dataset, run_config.get_config())
 97 | 	
 98 | 	# set net config
 99 | 	net_config = SimpleConvnetConfig()
100 | 	net_config.set_standard_convnet(data_provider=data_provider, **standard_net_config)
101 | 	print('Initialize the model...')
102 | 	model = SimpleConvnet(args.path, data_provider, run_config, net_config)
103 | 	
104 | 	# save configs
105 | 	if args.save_config:
106 | 		model.save_config(args.path)
107 | 	
108 | 	if args.load_model: model.load_model()
109 | 	if args.test:
110 | 		# test
111 | 		print('Data provider test images: ', data_provider.test.num_examples)
112 | 		print('Testing...')
113 | 		loss, accuracy = model.test(data_provider.test, batch_size=200)
114 | 		print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
115 | 		json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w'))
116 | 	elif args.train:
117 | 		# train the model
118 | 		print('Data provider train images: ', data_provider.train.num_examples)
119 | 		model.train_all_epochs()
120 | 		print('Data provider test images: ', data_provider.test.num_examples)
121 | 		print('Testing...')
122 | 		loss, accuracy = model.test(data_provider.test, batch_size=200)
123 | 		print('mean cross_entropy: %f, mean accuracy: %f' % (loss, accuracy))
124 | 		
125 | 		# save inits
126 | 		if args.save_init:
127 | 			model.save_init(os.path.join(args.path, 'snapshot'))
128 | 		json.dump({'test_loss': '%s' % loss, 'test_acc': '%s' % accuracy}, open('%s/output' % args.path, 'w'))
129 | 


--------------------------------------------------------------------------------
/code/server_config:
--------------------------------------------------------------------------------
1 | [
2 | 	["<client 1 address>", <gpu_id_0>, "<path to the **code** folder on client 1>/client.py"],
3 | 	["<client 2 address>", <gpu_id_0>, "<path to the **code** folder on client 2>/client.py"],
4 | 	["<client 2 address>", <gpu_id_1>, "<path to the **code** folder on client 2>/client.py"]
5 | ]


--------------------------------------------------------------------------------
/figures/result_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/figures/result_sample.png


--------------------------------------------------------------------------------
/start_nets/start_net_convnet_small_C10+/init:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/han-cai/EAS/b20c48a3ee1c97fce4796eb17037de06ff05a595/start_nets/start_net_convnet_small_C10+/init


--------------------------------------------------------------------------------
/start_nets/start_net_convnet_small_C10+/net.config:
--------------------------------------------------------------------------------
  1 | {
  2 |     "name": "SimpleConvnet",
  3 |     "weight_decay": 0.0001,
  4 |     "bn_epsilon": 1e-05,
  5 |     "bn_decay": 0.9,
  6 |     "drop_scheme": {
  7 |         "type": "conv",
  8 |         "conv_drop": 1.0,
  9 |         "pool_drop": 0.7,
 10 |         "fc_drop": 0.5
 11 |     },
 12 |     "layer_cascade": {
 13 |         "_id": "SimpleConvNet",
 14 |         "layers": [
 15 |             {
 16 |                 "name": "conv",
 17 |                 "filter_num": 4,
 18 |                 "kernel_size": 3,
 19 |                 "strides": 1,
 20 |                 "_id": "conv_0",
 21 |                 "use_bn": true,
 22 |                 "activation": "relu",
 23 |                 "keep_prob": 1.0,
 24 |                 "pre_activation": false
 25 |             },
 26 |             {
 27 |                 "name": "pool",
 28 |                 "_type": "max",
 29 |                 "kernel_size": 2,
 30 |                 "strides": 2,
 31 |                 "_id": "pool_0",
 32 |                 "use_bn": false,
 33 |                 "activation": null,
 34 |                 "keep_prob": 1.0,
 35 |                 "pre_activation": false
 36 |             },
 37 |             {
 38 |                 "name": "conv",
 39 |                 "filter_num": 4,
 40 |                 "kernel_size": 3,
 41 |                 "strides": 1,
 42 |                 "_id": "conv_1",
 43 |                 "use_bn": true,
 44 |                 "activation": "relu",
 45 |                 "keep_prob": 1.0,
 46 |                 "pre_activation": false
 47 |             },
 48 |             {
 49 |                 "name": "pool",
 50 |                 "_type": "max",
 51 |                 "kernel_size": 2,
 52 |                 "strides": 2,
 53 |                 "_id": "pool_1",
 54 |                 "use_bn": false,
 55 |                 "activation": null,
 56 |                 "keep_prob": 1.0,
 57 |                 "pre_activation": false
 58 |             },
 59 |             {
 60 |                 "name": "conv",
 61 |                 "filter_num": 4,
 62 |                 "kernel_size": 3,
 63 |                 "strides": 1,
 64 |                 "_id": "conv_2",
 65 |                 "use_bn": true,
 66 |                 "activation": "relu",
 67 |                 "keep_prob": 1.0,
 68 |                 "pre_activation": false
 69 |             },
 70 |             {
 71 |                 "name": "pool",
 72 |                 "_type": "max",
 73 |                 "kernel_size": 2,
 74 |                 "strides": 2,
 75 |                 "_id": "pool_2",
 76 |                 "use_bn": false,
 77 |                 "activation": null,
 78 |                 "keep_prob": 1.0,
 79 |                 "pre_activation": false
 80 |             },
 81 |             {
 82 |                 "name": "conv",
 83 |                 "filter_num": 4,
 84 |                 "kernel_size": 3,
 85 |                 "strides": 1,
 86 |                 "_id": "conv_3",
 87 |                 "use_bn": true,
 88 |                 "activation": "relu",
 89 |                 "keep_prob": 1.0,
 90 |                 "pre_activation": false
 91 |             },
 92 |             {
 93 |                 "name": "pool",
 94 |                 "_type": "avg",
 95 |                 "kernel_size": 4,
 96 |                 "strides": 4,
 97 |                 "_id": "pool_4",
 98 |                 "use_bn": false,
 99 |                 "activation": null,
100 |                 "keep_prob": 1.0,
101 |                 "pre_activation": false
102 |             },
103 |             {
104 |                 "name": "fc",
105 |                 "units": 8,
106 |                 "use_bias": false,
107 |                 "_id": "fc_0",
108 |                 "use_bn": true,
109 |                 "activation": "relu",
110 |                 "keep_prob": 1.0,
111 |                 "pre_activation": false
112 |             },
113 |             {
114 |                 "name": "fc",
115 |                 "units": 10,
116 |                 "use_bias": true,
117 |                 "_id": "fc_1",
118 |                 "use_bn": false,
119 |                 "activation": null,
120 |                 "keep_prob": 1.0,
121 |                 "pre_activation": false
122 |             }
123 |         ]
124 |     }
125 | }


--------------------------------------------------------------------------------