├── .DS_Store ├── README.md ├── cs234-final-report.pdf ├── real+world+CNN ├── general-CNN │ ├── batch_accuracy.png │ ├── pg_continue.py │ ├── used_models.png │ └── utils │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── general.cpython-36.pyc │ │ ├── general.py │ │ └── general.pyc └── real world CNN │ ├── .DS_Store │ ├── config.py │ ├── generate buffer │ ├── controller.py │ ├── gen_sample.py │ ├── get_reward.py │ ├── input_data.py │ ├── main_cnn.py │ ├── manager.py │ ├── model.py │ ├── models.py │ ├── random_cnn.py │ ├── sample.py │ ├── test.py │ └── train.py │ ├── pg.py │ └── pg_config.py └── toy-model ├── __pycache__ ├── config.cpython-36.pyc └── pg_config.cpython-36.pyc ├── action_average_reward_dict.json ├── action_reward_dict.json ├── backup ├── batch_accuracy.png ├── reward_function.png └── used_models.png ├── batch_accuracy.png ├── config.py ├── create_polynomial.py ├── generateAR.py ├── input_data.py ├── log.txt ├── manager.py ├── model.py ├── pg.py ├── pg_config.py ├── reward_function.png ├── used_models.png └── utils ├── __init__.py ├── __pycache__ ├── __init__.cpython-36.pyc └── general.cpython-36.pyc └── general.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural-Architecture-Search 2 | This is a re-implementation of the first neural architecture search paper: https://arxiv.org/abs/1611.01578. 3 | 4 | We test our implementation on three setting: 5 | 1. customized toy environment, where the dataset is a multi-modal Gaussian; 6 | 2. A 3-layer fully-connected network on Google speech commands dataset; 7 | 3. A simple CNN on Google speech commands dataset. 8 | 9 | 10 | To run the code, simply go to the toy-model folder and run 11 | 12 | python pg.py 13 | 14 | Instructions on how to run the google speech commands dataset will be updated soon. However, you have all the scripts needed in this repo. Please take a look on our report: 15 | cs234-final-report.pdf in this repo for more details. 16 | -------------------------------------------------------------------------------- /cs234-final-report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/cs234-final-report.pdf -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/batch_accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/general-CNN/batch_accuracy.png -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/pg_continue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | 3 | import os 4 | import sys 5 | import logging 6 | import time 7 | import numpy as np 8 | import tensorflow as tf 9 | import json 10 | import scipy.signal 11 | import os 12 | import time 13 | import inspect 14 | from utils.general import get_logger, Progbar, export_plot 15 | from pg_config import pg_config 16 | #import logz 17 | from config import * 18 | import pdb 19 | from scipy.stats import multivariate_normal 20 | import matplotlib.pyplot as plt 21 | 22 | 23 | data_mean = 95.74566997 24 | data_std = 1.27014307 25 | 26 | data_max = 102.65417672555384 27 | data_min = 64.0118744 28 | 29 | tf.set_random_seed(0) 30 | 31 | class PG(object): 32 | 33 | def __init__(self): 34 | self.lr = 5e-2 35 | self.batch_size = 500 36 | self.controller_cells = 128 37 | self.num_iterations = 5000 38 | self.observation_dim = 100 39 | self.action_dim_1 = 1 40 | self.action_dim_2 = 2 41 | self.action_dim_3 = 2 42 | self.num_layers = 3 43 | self.num_actions_per_layer = 3 44 | 45 | self.hasConstraint = False 46 | self.hardConstraint = False 47 | self.reg_weight = 1e-5 48 | self.reg_op = 1e-8 49 | self.weight_limit = 8000 50 | self.op_limit = 1e8 51 | 52 | self.temp1 = [] 53 | self.temp2 = [] 54 | 55 | self.action_buffer = [] 56 | self.state_buffer = [] 57 | self.logprob_buffer = [] 58 | self._dict = {} 59 | self._used_dict = {} 60 | self.log_acc = [] 61 | self.logger = get_logger('./log.txt') 62 | 63 | self._num_used_models = [] 64 | 65 | self._initial_baseline = 0 66 | self.max_filter = 100 67 | self._used_models = [] 68 | 69 | with open('./norm_inter_acc.json', 'r') as f: 70 | self._raw_dict = json.load(f) 71 | 72 | self.build() 73 | 74 | 75 | def analyze_data(self): 76 | dictt = [] 77 | for key in self._raw_dict.keys(): 78 | if self._raw_dict[key]*2 >= 4.07: 79 | dictt.append(key) 80 | pdb.set_trace() 81 | a=1 82 | 83 | 84 | 85 | def interpolate_continues_reward(self, d, input_dict): 86 | temp_dict = {} 87 | all_dict = {} 88 | for key in input_dict.keys(): 89 | params = key[1:-1].split(',') 90 | test1 = int(params[-3]) 91 | test2 = int(params[-6]) 92 | test3 = int(params[-9]) 93 | if test1>100 or test2>100 or test3>100: 94 | continue 95 | keyword = list(map(int, params)) 96 | new_key = str(keyword[:-3*d] + ['N'] + keyword[-3*d+1:]) 97 | if new_key not in temp_dict: 98 | temp_dict[new_key] = {} 99 | temp_dict[new_key]['x'] = [] 100 | temp_dict[new_key]['y'] = [] 101 | temp_dict[new_key]['x'].append(keyword[-3*d]) 102 | temp_dict[new_key]['y'].append((input_dict[key])) 103 | 104 | maxx = -100 105 | minn = 100 106 | # interpolate 107 | for key in temp_dict.keys(): 108 | x_vec = temp_dict[key]['x'] 109 | y_vec = temp_dict[key]['y'] 110 | z = np.polyfit(x_vec, y_vec, 2) 111 | fff = np.poly1d(z) 112 | xp = np.linspace(1,100,100) 113 | yp = fff(xp) 114 | params = key[1:-1].split(',') 115 | for j, i in enumerate(xp): 116 | temp = params[:-3*d] + [i] + params[-3*d+1:] 117 | keyword = list(map(int, temp)) 118 | all_dict[str(keyword)] = yp[j] 119 | maxx = max(maxx, np.max(yp)) 120 | minn = min(minn, np.min(yp)) 121 | print ('max:', maxx) 122 | print ('min:', minn) 123 | return all_dict 124 | 125 | 126 | 127 | def build_reward_function(self): 128 | x, y = np.mgrid[-10:10:0.02, -10:10:0.02] 129 | pos = np.empty(x.shape + (2,)) 130 | pos[:, :, 0] = x 131 | pos[:, :, 1] = y 132 | rv1 = multivariate_normal([5, -5], [[10, 0], [0, 10]]) 133 | rv2 = multivariate_normal([2, -2], [[7, 2], [2, 5]]) 134 | rv3 = multivariate_normal([7, -7], [[1, 0], [0, 1]]) 135 | rv4 = multivariate_normal([3, -3], [[1, 0], [0, 1]]) 136 | rv11 = multivariate_normal([-5, 5], [[3, 1], [1, 2]]) 137 | rv22 = multivariate_normal([-2, 2], [[7, 2], [2, 5]]) 138 | rv33 = multivariate_normal([-7, 7], [[1, 0], [0,1]]) 139 | rv44 = multivariate_normal([-3, 3], [[4, 0], [0, 4]]) 140 | rv = rv1.pdf(pos) + rv2.pdf(pos) + rv3.pdf(pos) + rv4.pdf(pos) + rv11.pdf(pos) + rv22.pdf(pos) + rv33.pdf(pos) + rv44.pdf(pos) 141 | return rv 142 | 143 | 144 | def add_placeholders_op(self): 145 | self.action_placeholder = tf.placeholder(tf.int32, [self.num_layers*(self.num_actions_per_layer-1), self.batch_size]) 146 | self.con_action_placeholder = tf.placeholder(tf.float32, [self.num_layers, self.batch_size]) 147 | self.advantage_placeholder = tf.placeholder(tf.float32, [self.batch_size, self.num_layers*self.num_actions_per_layer]) 148 | 149 | 150 | 151 | def build_policy_network_op(self, scope="policy_network"): 152 | temp_logprob_buffer = [] 153 | with tf.variable_scope(scope): 154 | self.cell = tf.contrib.rnn.NASCell(self.controller_cells) 155 | cell_state = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) 156 | cell_input = tf.zeros([self.batch_size, 1, self.observation_dim]) 157 | for i in range(self.num_layers*self.num_actions_per_layer): 158 | outputs, cell_state = tf.nn.dynamic_rnn(self.cell, cell_input, initial_state=cell_state, dtype=tf.float32) 159 | if i%3 == 0: 160 | temp = tf.layers.dense(outputs[:, -1, :], units=self.observation_dim, name='rnn_fc_pre_1%d' % (i), bias_initializer=tf.constant_initializer(50.0)) 161 | temp = tf.nn.relu(temp) 162 | action_means1 = tf.reduce_mean(temp, [1]) 163 | action_means1 = tf.expand_dims(action_means1, 1) 164 | #action_means1 = tf.layers.dense(temp, units=self.action_dim_1, name='rnn_fc_1%d' % (i)) 165 | log_std1 = tf.get_variable('log_std_1' + str(i), shape=[self.action_dim_1], initializer=tf.constant_initializer(2.0)) 166 | mvn1 = tf.contrib.distributions.MultivariateNormalDiag(action_means1, tf.exp(log_std1)) 167 | logprob = mvn1.log_prob(tf.expand_dims(self.con_action_placeholder[int(i/3)], 1)) 168 | logprob = tf.expand_dims(logprob, 1) 169 | 170 | epsilon = tf.random_normal(shape=[self.action_dim_1], mean=0.0, stddev=1.0) 171 | sampled_action = action_means1 + epsilon * tf.exp(log_std1) 172 | sampled_action = tf.squeeze(sampled_action, axis=1) 173 | 174 | round_action = tf.cast(tf.round(sampled_action), tf.int32) 175 | round_action = tf.minimum(round_action, tf.ones([self.batch_size], dtype=tf.int32)*(self.observation_dim-1)) 176 | round_action = tf.maximum(round_action, tf.zeros([self.batch_size], dtype=tf.int32)) 177 | cell_input = tf.one_hot(round_action, self.observation_dim) 178 | cell_input = tf.expand_dims(cell_input, 1) 179 | 180 | 181 | else: 182 | if i%3 == 1: 183 | action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_2, name='rnn_fc_%d' % (i)) 184 | else: 185 | action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_3, name='rnn_fc_%d' % (i)) 186 | index = 2*int(i/3) + i%3 - 1 187 | 188 | sampled_action = tf.squeeze(tf.multinomial(action_logits, 1), axis=1) 189 | cell_input = tf.one_hot(sampled_action, self.observation_dim) 190 | cell_input = tf.expand_dims(cell_input, 1) 191 | logprob = tf.negative(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.action_placeholder[index], logits=action_logits)) 192 | logprob = tf.expand_dims(logprob, 1) 193 | 194 | self.action_buffer.append(sampled_action) #action 195 | #self.state_buffer.append(cell_input) # state 196 | temp_logprob_buffer.append(logprob) #logprob 197 | 198 | self.logprob_buffer = tf.concat(temp_logprob_buffer, 1) # batch x layer 199 | 200 | 201 | 202 | def add_loss_op(self): 203 | self.loss = -tf.reduce_mean(self.logprob_buffer * self.advantage_placeholder) 204 | 205 | 206 | def add_optimizer_op(self): 207 | self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) 208 | 209 | 210 | def build(self): 211 | self.add_placeholders_op() 212 | self.build_policy_network_op() 213 | self.add_loss_op() 214 | self.add_optimizer_op() 215 | 216 | 217 | def initialize(self): 218 | self.sess = tf.Session() 219 | init = tf.global_variables_initializer() 220 | self.sess.run(init) 221 | 222 | 223 | def sample_model_reward_return(self, t): 224 | filter_nums_map = {0:10, 1:50, 2:100, 3:200} 225 | kernel_sizes_map = {0:3, 1:5} 226 | strides_map = {0:1, 1:2} 227 | 228 | action_buffer = np.array(self.sess.run(self.action_buffer)) 229 | returns = np.float32(np.zeros_like(action_buffer)) 230 | losses = np.float32(np.zeros_like(action_buffer)) 231 | 232 | actions = np.zeros([self.num_layers*(self.num_actions_per_layer-1), self.batch_size]) 233 | con_actions = np.zeros([self.num_layers, self.batch_size]) 234 | 235 | 236 | for i in range(self.batch_size): 237 | temp = action_buffer[:, i].copy() 238 | actions[:,i] = np.array([temp[1], temp[2], temp[4],temp[5],temp[7],temp[8]]) 239 | con_actions[:,i] = np.array([temp[0],temp[3],temp[6]]) 240 | flag = 0 241 | for j in [0,3,6]: 242 | temp[j] = np.minimum(temp[j], self.observation_dim) 243 | temp[j] = np.round(temp[j]) 244 | if temp[j] < 1: 245 | flag = 1 246 | temp[j] = np.maximum(temp[j], 1) 247 | 248 | filter1, kernel1, stride1 = temp[0], kernel_sizes_map[temp[1]], strides_map[temp[2]] 249 | filter2, kernel2, stride2 = temp[3], kernel_sizes_map[temp[4]], strides_map[temp[5]] 250 | filter3, kernel3, stride3 = temp[6], kernel_sizes_map[temp[7]], strides_map[temp[8]] 251 | keyword = [filter1, kernel1, stride1, filter2, kernel2, stride2, filter3, kernel3, stride3] 252 | keyword = list(map(int, keyword)) 253 | 254 | returns[:, i] = self._raw_dict[str(keyword)] - flag*10 255 | 256 | if self.hasConstraint: 257 | weights = (filter1 + 1) * (kernel1**2) 258 | weights += (filter2 + 1) * (kernel2**2) 259 | weights += (filter3 + 1) * (kernel3**2) 260 | 261 | t_in, f_in = 99, 40 262 | t1, f1 = np.ceil(t_in/stride1), np.ceil(f_in/stride1) 263 | t2, f2 = np.ceil(t1/stride2), np.ceil(f1/stride2) 264 | t3, f3 = np.ceil(t2/stride3), np.ceil(f2/stride3) 265 | ops = 2*1*t1*f1*stride1**2*filter1 + t1*f1*filter1 266 | ops += (2*filter1*t2*f2*stride2**2 + filter1*t2*f2) + (2*filter1*t2*f2*filter2 + t2*f2*filter2) 267 | ops += (2*filter2*t3*f3*stride3**2 + filter2*t3*f3) + (2*filter2*t3*f3*filter3 + t3*f3*filter3) 268 | 269 | self.temp1.append(weights); self.temp2.append(ops) 270 | 271 | if self.hardConstraint: 272 | if weights > self.weight_limit or ops > self.op_limit: 273 | #returns[:, i] = 0 274 | returns[:, i] = -data_mean 275 | losses[:, i] = 0 276 | else: 277 | losses[:, i] = 0 278 | else: 279 | losses[:, i] = self.reg_weight*weights + self.reg_op*ops 280 | 281 | if str(keyword) not in self._used_models: 282 | self._used_models.append(str(keyword)) 283 | if t==self.num_iterations-1 and i>=self.batch_size-5: 284 | print ('converges at:', [temp[0], kernel_sizes_map[temp[1]], strides_map[temp[2]],\ 285 | temp[3], kernel_sizes_map[temp[4]], strides_map[temp[5]],\ 286 | temp[6], kernel_sizes_map[temp[7]], strides_map[temp[8]]]) 287 | #print np.mean(losses), np.mean(returns) 288 | return actions, con_actions, np.transpose(returns), np.transpose(losses) 289 | 290 | 291 | def train(self): 292 | self.baseline = -1000.0 293 | 294 | for t in range(self.num_iterations): 295 | #print ('iterations:', t) 296 | actions, con_actions, returns, losses = self.sample_model_reward_return(t) 297 | returns = returns * 2 298 | #self.baseline = (t*self.baseline + np.mean(returns)) / (t+1) 299 | if self.baseline == -1000.0: 300 | self.baseline = np.mean(returns) 301 | else: 302 | self.baseline = 0.6 * self.baseline + 0.4 * np.mean(returns) 303 | 304 | self.sess.run(self.train_op, feed_dict={ 305 | self.action_placeholder : actions, 306 | self.con_action_placeholder: con_actions, 307 | self.advantage_placeholder : returns - self.baseline}) 308 | 309 | avg_acc = np.mean(returns) 310 | used = len(self._used_models) 311 | self._num_used_models.append(used) 312 | 313 | 314 | self.log_acc.append(avg_acc) 315 | #sigma_reward = np.sqrt(np.var(returns) / len(total_rewards)) 316 | msg = "Average accuracy within a batch: {:04.2f}".format(avg_acc) 317 | self.logger.info(msg) 318 | #print (actions) 319 | 320 | 321 | self.logger.info("- Training done.") 322 | export_plot(self.log_acc, "Score", 'NAS-CNN', "./batch_accuracy.png") 323 | export_plot(self._num_used_models, "Number of distinct models sampled", 'NAS-CNN', "./used_models.png") 324 | 325 | 326 | 327 | def run(self): 328 | self.initialize() 329 | self.train() 330 | 331 | 332 | 333 | if __name__ == '__main__': 334 | model = PG() 335 | #model.analyze_data() 336 | model.run() -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/used_models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/general-CNN/used_models.png -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/general-CNN/utils/__init__.py -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/general-CNN/utils/__init__.pyc -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/general-CNN/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/utils/__pycache__/general.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/general-CNN/utils/__pycache__/general.cpython-36.pyc -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/utils/general.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import logging 4 | import numpy as np 5 | from collections import deque 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | def export_plot(ys, ylabel, title, filename, xs=None, xlabel='Training Batches'): 12 | """ 13 | Export a plot in filename 14 | 15 | Args: 16 | ys: (list) of float / int to plot 17 | filename: (string) directory 18 | """ 19 | if xs==None: 20 | xs = range(len(ys)) 21 | plt.figure() 22 | plt.plot(xs, ys) 23 | plt.xlabel(xlabel) 24 | plt.ylabel(ylabel) 25 | plt.title(title) 26 | plt.savefig(filename) 27 | plt.close() 28 | 29 | 30 | def get_logger(filename): 31 | """ 32 | Return a logger instance to a file 33 | """ 34 | logger = logging.getLogger('logger') 35 | logger.setLevel(logging.DEBUG) 36 | logging.basicConfig(format='%(message)s', level=logging.DEBUG) 37 | handler = logging.FileHandler(filename) 38 | handler.setLevel(logging.DEBUG) 39 | handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) 40 | logging.getLogger().addHandler(handler) 41 | return logger 42 | 43 | 44 | class Progbar(object): 45 | """Progbar class copied from keras (https://github.com/fchollet/keras/) 46 | 47 | Displays a progress bar. 48 | Small edit : added strict arg to update 49 | # Arguments 50 | target: Total number of steps expected. 51 | interval: Minimum visual progress update interval (in seconds). 52 | """ 53 | 54 | def __init__(self, target, width=30, verbose=1, discount=0.9): 55 | self.width = width 56 | self.target = target 57 | self.sum_values = {} 58 | self.exp_avg = {} 59 | self.unique_values = [] 60 | self.start = time.time() 61 | self.total_width = 0 62 | self.seen_so_far = 0 63 | self.verbose = verbose 64 | self.discount = discount 65 | 66 | def update(self, current, values=[], exact=[], strict=[], exp_avg=[]): 67 | """ 68 | Updates the progress bar. 69 | # Arguments 70 | current: Index of current step. 71 | values: List of tuples (name, value_for_last_step). 72 | The progress bar will display averages for these values. 73 | exact: List of tuples (name, value_for_last_step). 74 | The progress bar will display these values directly. 75 | """ 76 | 77 | for k, v in values: 78 | if k not in self.sum_values: 79 | self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far] 80 | self.unique_values.append(k) 81 | else: 82 | self.sum_values[k][0] += v * (current - self.seen_so_far) 83 | self.sum_values[k][1] += (current - self.seen_so_far) 84 | for k, v in exact: 85 | if k not in self.sum_values: 86 | self.unique_values.append(k) 87 | self.sum_values[k] = [v, 1] 88 | for k, v in strict: 89 | if k not in self.sum_values: 90 | self.unique_values.append(k) 91 | self.sum_values[k] = v 92 | for k, v in exp_avg: 93 | if k not in self.exp_avg: 94 | self.exp_avg[k] = v 95 | else: 96 | self.exp_avg[k] *= self.discount 97 | self.exp_avg[k] += (1-self.discount)*v 98 | 99 | self.seen_so_far = current 100 | 101 | now = time.time() 102 | if self.verbose == 1: 103 | prev_total_width = self.total_width 104 | sys.stdout.write("\b" * prev_total_width) 105 | sys.stdout.write("\r") 106 | 107 | numdigits = int(np.floor(np.log10(self.target))) + 1 108 | barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) 109 | bar = barstr % (current, self.target) 110 | prog = float(current)/self.target 111 | prog_width = int(self.width*prog) 112 | if prog_width > 0: 113 | bar += ('='*(prog_width-1)) 114 | if current < self.target: 115 | bar += '>' 116 | else: 117 | bar += '=' 118 | bar += ('.'*(self.width-prog_width)) 119 | bar += ']' 120 | sys.stdout.write(bar) 121 | self.total_width = len(bar) 122 | 123 | if current: 124 | time_per_unit = (now - self.start) / current 125 | else: 126 | time_per_unit = 0 127 | eta = time_per_unit*(self.target - current) 128 | info = '' 129 | if current < self.target: 130 | info += ' - ETA: %ds' % eta 131 | else: 132 | info += ' - %ds' % (now - self.start) 133 | for k in self.unique_values: 134 | if type(self.sum_values[k]) is list: 135 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 136 | else: 137 | info += ' - %s: %s' % (k, self.sum_values[k]) 138 | 139 | for k, v in self.exp_avg.iteritems(): 140 | info += ' - %s: %.4f' % (k, v) 141 | 142 | self.total_width += len(info) 143 | if prev_total_width > self.total_width: 144 | info += ((prev_total_width-self.total_width) * " ") 145 | 146 | sys.stdout.write(info) 147 | sys.stdout.flush() 148 | 149 | if current >= self.target: 150 | sys.stdout.write("\n") 151 | 152 | if self.verbose == 2: 153 | if current >= self.target: 154 | info = '%ds' % (now - self.start) 155 | for k in self.unique_values: 156 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 157 | sys.stdout.write(info + "\n") 158 | 159 | def add(self, n, values=[]): 160 | self.update(self.seen_so_far+n, values) 161 | -------------------------------------------------------------------------------- /real+world+CNN/general-CNN/utils/general.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/general-CNN/utils/general.pyc -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/real+world+CNN/real world CNN/.DS_Store -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/config.py: -------------------------------------------------------------------------------- 1 | #LAYER_SIZES = [30, 60, 100, 144] 2 | FILTER_NUMS = [10, 50, 100, 200] 3 | KERNEL_SIZES = [3, 5] 4 | STRIDES = [1, 2] 5 | NUM_LAYERS = 3 6 | NUM_ENUM = 10 7 | FLAGS = None 8 | CLIP_REWARDS = False 9 | 10 | JSON_SCALE = 1e6 11 | 12 | controller_cells = 32 13 | 14 | action_average_reward_dict_name = 'action_average_reward_dict.json' -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/controller.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pprint 3 | from collections import OrderedDict 4 | 5 | from keras import backend as K 6 | import tensorflow as tf 7 | 8 | import os 9 | if not os.path.exists('weights/'): 10 | os.makedirs('weights/') 11 | 12 | 13 | class StateSpace: 14 | ''' 15 | State Space manager 16 | 17 | Provides utilit functions for holding "states" / "actions" that the controller 18 | must use to train and predict. 19 | 20 | Also provides a more convenient way to define the search space 21 | ''' 22 | def __init__(self): 23 | self.states = OrderedDict() 24 | self.state_count_ = 0 25 | 26 | def add_state(self, name, values): 27 | ''' 28 | Adds a "state" to the state manager, along with some metadata for efficient 29 | packing and unpacking of information required by the RNN Controller. 30 | 31 | Stores metadata such as: 32 | - Global ID 33 | - Name 34 | - Valid Values 35 | - Number of valid values possible 36 | - Map from value ID to state value 37 | - Map from state value to value ID 38 | 39 | Args: 40 | name: name of the state / action 41 | values: valid values that this state can take 42 | 43 | Returns: 44 | Global ID of the state. Can be used to refer to this state later. 45 | ''' 46 | index_map = {} 47 | for i, val in enumerate(values): 48 | index_map[i] = val 49 | 50 | value_map = {} 51 | for i, val in enumerate(values): 52 | value_map[val] = i 53 | 54 | metadata = { 55 | 'id': self.state_count_, 56 | 'name': name, 57 | 'values': values, 58 | 'size': len(values), 59 | 'index_map_': index_map, 60 | 'value_map_': value_map, 61 | } 62 | self.states[self.state_count_] = metadata 63 | self.state_count_ += 1 64 | 65 | return self.state_count_ - 1 66 | 67 | def one_hot_encode(self, id, value): 68 | ''' 69 | One hot encode the specific state value 70 | 71 | Args: 72 | id: global id of the state 73 | value: state value 74 | 75 | Returns: 76 | one hot encoded representation of the state value 77 | ''' 78 | state = self[id] 79 | size = state['size'] 80 | value_map = state['value_map_'] 81 | value_idx = value_map[value] 82 | 83 | one_hot = np.zeros((1, size), dtype=np.float32) 84 | one_hot[np.arange(1), value_idx] = 1.0 85 | return one_hot 86 | 87 | def get_state_value(self, id, index): 88 | ''' 89 | Retrieves the state value from the state value ID 90 | 91 | Args: 92 | id: global id of the state 93 | index: index of the state value (usually from argmax) 94 | 95 | Returns: 96 | The actual state value at given value index 97 | ''' 98 | state = self[id] 99 | index_map = state['index_map_'] 100 | value = index_map[index] 101 | return value 102 | 103 | def get_random_state_space(self, num_layers): 104 | ''' 105 | Constructs a random initial state space for feeding as an initial value 106 | to the Controller RNN 107 | 108 | Args: 109 | num_layers: number of layers to duplicate the search space 110 | 111 | Returns: 112 | A list of one hot encoded states 113 | ''' 114 | states = [] 115 | 116 | for id in range(self.size * num_layers): 117 | state = self[id] 118 | size = state['size'] 119 | #pdb.set_trace() 120 | 121 | sample = np.random.choice(size, size=1) 122 | sample = state['index_map_'][sample[0]] 123 | state = self.one_hot_encode(id, sample) 124 | #pdb.set_trace() 125 | #state = 1.0 / size * np.ones(size)#np.random.uniform(0, 1, size) 126 | #state /= np.sum(state) 127 | #pdb.set_trace() 128 | states.append(state) 129 | return states 130 | 131 | def parse_state_space_list(self, state_list): 132 | ''' 133 | Parses a list of one hot encoded states to retrieve a list of state values 134 | 135 | Args: 136 | state_list: list of one hot encoded states 137 | 138 | Returns: 139 | list of state values 140 | ''' 141 | state_values = [] 142 | for id, state_one_hot in enumerate(state_list): 143 | state_val_idx = np.argmax(state_one_hot, axis=-1)[0] 144 | value = self.get_state_value(id, state_val_idx) 145 | state_values.append(value) 146 | 147 | return state_values 148 | 149 | def print_state_space(self): 150 | ''' Pretty print the state space ''' 151 | print('*' * 40, 'STATE SPACE', '*' * 40) 152 | 153 | pp = pprint.PrettyPrinter(indent=2, width=100) 154 | for id, state in self.states.items(): 155 | pp.pprint(state) 156 | print() 157 | 158 | def print_actions(self, actions): 159 | ''' Print the action space properly ''' 160 | print('Actions :') 161 | 162 | for id, action in enumerate(actions): 163 | if id % self.size == 0: 164 | print("*" * 20, "Layer %d" % (((id + 1) // self.size) + 1), "*" * 20) 165 | 166 | state = self[id] 167 | name = state['name'] 168 | vals = [(n, p) for n, p in zip(state['values'], *action)] 169 | print("%s : " % name, vals) 170 | print() 171 | 172 | def __getitem__(self, id): 173 | return self.states[id % self.size] 174 | 175 | @property 176 | def size(self): 177 | return self.state_count_ 178 | 179 | 180 | class Controller: 181 | ''' 182 | Utility class to manage the RNN Controller 183 | ''' 184 | def __init__(self, policy_session, num_layers, state_space, 185 | reg_param=0.001, 186 | discount_factor=0.99, 187 | exploration=0.8, 188 | controller_cells=32, 189 | restore_controller=False): 190 | self.policy_session = policy_session # type: tf.Session 191 | 192 | self.num_layers = num_layers 193 | self.state_space = state_space # type: StateSpace 194 | self.state_size = self.state_space.size 195 | 196 | self.controller_cells = controller_cells 197 | self.reg_strength = reg_param 198 | self.discount_factor = discount_factor 199 | self.exploration = exploration 200 | self.restore_controller = restore_controller 201 | 202 | self.reward_buffer = [] 203 | self.state_buffer = [] 204 | 205 | self.cell_outputs = [] 206 | self.policy_classifiers = [] 207 | self.policy_actions = [] 208 | self.policy_labels = [] 209 | self.cell_state = None 210 | 211 | self.build_policy_network() 212 | 213 | def get_action(self, state): 214 | ''' 215 | Gets a one hot encoded action list, either from random sampling or from 216 | the Controller RNN 217 | 218 | Args: 219 | state: a list of one hot encoded states, whose first value is used as initial 220 | state for the controller RNN 221 | 222 | Returns: 223 | A one hot encoded action list 224 | ''' 225 | if np.random.random() < self.exploration: 226 | print("Generating random action to explore") 227 | actions = [] 228 | 229 | for i in range(self.state_size * self.num_layers): 230 | state_ = self.state_space[i] 231 | size = state_['size'] 232 | #action = np.random.uniform(0, 1, size).reshape([1, 4]) 233 | #action /= np.sum(action) 234 | 235 | sample = np.random.choice(size, size=1) 236 | sample = state_['index_map_'][sample[0]] 237 | action = self.state_space.one_hot_encode(i, sample) 238 | 239 | actions.append(action) 240 | #pdb.set_trace() 241 | return actions 242 | 243 | else: 244 | print("Prediction action from Controller") 245 | initial_state = self.state_space[0] 246 | size = initial_state['size'] 247 | 248 | if state[0].shape != (1, 1, size): 249 | state = state[0].reshape((1, 1, size)) 250 | else: 251 | state = state[0] 252 | 253 | print("State input to Controller for Action : ", state.flatten()) 254 | 255 | with self.policy_session.as_default(): 256 | K.set_session(self.policy_session) 257 | 258 | with tf.name_scope('action_prediction'): 259 | pred_actions = self.policy_session.run(self.policy_actions, feed_dict={self.state_input: state}) 260 | 261 | return pred_actions 262 | 263 | 264 | def build_policy_network(self): 265 | with self.policy_session.as_default(): 266 | K.set_session(self.policy_session) 267 | 268 | with tf.name_scope('controller'): 269 | with tf.variable_scope('policy_network'): 270 | 271 | # state input is the first input fed into the controller RNN. 272 | # the rest of the inputs are fed to the RNN internally 273 | with tf.name_scope('state_input'): 274 | #state_input = 1/self.state_space[0]['size'] * tf.ones(self.state_space[0]['size']) 275 | #state_input = tf.expand_dims(state_input, 0) 276 | #state_input = tf.expand_dims(state_input, 0, name='state_input') 277 | state_input = tf.placeholder(dtype=tf.float32, shape=(1, 1, self.state_space[0]['size']), name='state_input') 278 | self.state_input = state_input 279 | 280 | # we can use LSTM as the controller as well 281 | nas_cell = tf.contrib.rnn.NASCell(self.controller_cells) 282 | if self.cell_state == None: 283 | self.cell_state = nas_cell.zero_state(batch_size=1, dtype=tf.float32) 284 | 285 | 286 | #with tf.name_scope('cell_state'): 287 | # input_cell_state = tf.placeholder(dtype=tf.float32, shape=(1, self.controller_cells), name='cell_state') 288 | 289 | # initially, cell input will be 1st state input 290 | cell_input = state_input 291 | #cell_input = 292 | 293 | # we provide a flat list of chained input-output to the RNN 294 | for i in range(self.state_size * self.num_layers): 295 | state_space = self.state_space[i] 296 | size = state_space['size'] 297 | 298 | with tf.name_scope('controller_output_%d' % i): 299 | # feed the ith layer input (i-1 layer output) to the RNN 300 | outputs, final_state = tf.nn.dynamic_rnn(nas_cell, 301 | cell_input, 302 | initial_state=self.cell_state, 303 | dtype=tf.float32) 304 | 305 | # add a new classifier for each layers output 306 | classifier = tf.layers.dense(outputs[:, -1, :], units=size, name='classifier_%d' % (i)) 307 | #reuse=False) 308 | preds = tf.nn.softmax(classifier) 309 | 310 | # feed the previous layer (i-1 layer output) to the next layers input, along with state 311 | cell_input = tf.expand_dims(preds, 0, name='cell_output_%d' % (i)) 312 | self.cell_state = final_state 313 | 314 | # store the tensors for later loss computation 315 | self.cell_outputs.append(cell_input) 316 | self.policy_classifiers.append(classifier) 317 | self.policy_actions.append(preds) 318 | 319 | policy_net_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='policy_network') 320 | 321 | with tf.name_scope('optimizer'): 322 | self.global_step = tf.Variable(0, trainable=False) 323 | starter_learning_rate = 0.1 324 | learning_rate = tf.train.exponential_decay(starter_learning_rate, self.global_step, 325 | 500, 0.95, staircase=True) 326 | 327 | tf.summary.scalar('learning_rate', learning_rate) 328 | 329 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) 330 | 331 | with tf.name_scope('losses'): 332 | self.discounted_rewards = tf.placeholder(tf.float32, shape=(None,), name='discounted_rewards') 333 | tf.summary.scalar('discounted_reward', tf.reduce_sum(self.discounted_rewards)) 334 | 335 | # calculate sum of all the individual classifiers 336 | cross_entropy_loss = 0 337 | for i in range(self.state_size * self.num_layers): 338 | classifier = self.policy_classifiers[i] 339 | state_space = self.state_space[i] 340 | size = state_space['size'] 341 | 342 | with tf.name_scope('state_%d' % (i + 1)): 343 | labels = tf.placeholder(dtype=tf.float32, shape=(None, size), name='cell_label_%d' % i) 344 | self.policy_labels.append(labels) 345 | 346 | ce_loss = tf.nn.softmax_cross_entropy_with_logits(logits=classifier, labels=labels) 347 | tf.summary.scalar('state_%d_ce_loss' % (i + 1), tf.reduce_mean(ce_loss)) 348 | 349 | cross_entropy_loss += ce_loss 350 | 351 | policy_gradient_loss = tf.reduce_mean(cross_entropy_loss) 352 | reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_net_variables]) # Regularization 353 | 354 | # sum up policy gradient and regularization loss 355 | self.total_loss = policy_gradient_loss + self.reg_strength * reg_loss 356 | tf.summary.scalar('total_loss', self.total_loss) 357 | 358 | self.gradients = self.optimizer.compute_gradients(self.total_loss) 359 | with tf.name_scope('policy_gradients'): 360 | # compute policy gradients 361 | for i, (grad, var) in enumerate(self.gradients): 362 | if grad is not None: 363 | self.gradients[i] = (grad * self.discounted_rewards, var) 364 | 365 | # training update 366 | with tf.name_scope("train_policy_network"): 367 | # apply gradients to update policy network 368 | self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step) 369 | 370 | self.summaries_op = tf.summary.merge_all() 371 | self.summary_writer = tf.summary.FileWriter('logs', graph=self.policy_session.graph) 372 | 373 | self.policy_session.run(tf.global_variables_initializer()) 374 | self.saver = tf.train.Saver(max_to_keep=1) 375 | 376 | if self.restore_controller: 377 | path = tf.train.latest_checkpoint('weights/') 378 | 379 | if path is not None and tf.train.checkpoint_exists(path): 380 | print("Loading Controller Checkpoint !") 381 | self.saver.restore(self.policy_session, path) 382 | 383 | def store_rollout(self, state, reward): 384 | self.reward_buffer.append(reward) 385 | self.state_buffer.append(state) 386 | 387 | # dump buffers to file if it grows larger than 50 items 388 | if len(self.reward_buffer) > 20: 389 | with open('buffers.txt', mode='a+') as f: 390 | for i in range(20): 391 | state_ = self.state_buffer[i] 392 | state_list = self.state_space.parse_state_space_list(state_) 393 | state_list = str(state_list)#','.join(state_list) 394 | 395 | f.write("%0.4f,%s\n" % (self.reward_buffer[i], state_list)) 396 | 397 | print("Saved buffers to file `buffers.txt` !") 398 | 399 | self.reward_buffer = [self.reward_buffer[-1]] 400 | self.state_buffer = [self.state_buffer[-1]] 401 | 402 | def discount_rewards(self): 403 | ''' 404 | Compute discounted rewards over the entire reward buffer 405 | 406 | Returns: 407 | Discounted reward value 408 | ''' 409 | rewards = np.asarray(self.reward_buffer) 410 | discounted_rewards = np.zeros_like(rewards) 411 | running_add = 0 412 | for t in reversed(range(0, rewards.size)): 413 | if rewards[t] != 0: 414 | running_add = 0 415 | running_add = running_add * self.discount_factor + rewards[t] 416 | discounted_rewards[t] = running_add 417 | return discounted_rewards[-1] 418 | 419 | def train_step(self): 420 | ''' 421 | Perform a single train step on the Controller RNN 422 | 423 | Returns: 424 | the training loss 425 | ''' 426 | states = self.state_buffer[-1] 427 | label_list = [] 428 | 429 | # parse the state space to get real value of the states, 430 | # then one hot encode them for comparison with the predictions 431 | state_list = self.state_space.parse_state_space_list(states) 432 | for id, state_value in enumerate(state_list): 433 | state_one_hot = self.state_space.one_hot_encode(id, state_value) 434 | label_list.append(state_one_hot) 435 | 436 | # the initial input to the controller RNN 437 | state_input_size = self.state_space[0]['size'] 438 | state_input = states[0].reshape((1, 1, state_input_size)) 439 | print("State input to Controller for training : ", state_input.flatten()) 440 | 441 | # the discounted reward value 442 | reward = self.discount_rewards() 443 | reward = np.asarray([reward]).astype('float32') 444 | 445 | feed_dict = { 446 | self.state_input: state_input, 447 | self.discounted_rewards: reward 448 | } 449 | 450 | # prepare the feed dict with the values of all the policy labels for each 451 | # of the Controller outputs 452 | for i, label in enumerate(label_list): 453 | feed_dict[self.policy_labels[i]] = label 454 | 455 | with self.policy_session.as_default(): 456 | #K.set_session(self.policy_session) 457 | 458 | print("Training RNN (States ip) : ", state_list) 459 | print("Training RNN (Reward ip) : ", reward.flatten()) 460 | _, loss, summary, global_step = self.policy_session.run([self.train_op, self.total_loss, self.summaries_op, 461 | self.global_step], 462 | feed_dict=feed_dict) 463 | 464 | self.summary_writer.add_summary(summary, global_step) 465 | self.saver.save(self.policy_session, save_path='weights/controller.ckpt', global_step=self.global_step) 466 | 467 | # reduce exploration after many train steps 468 | if global_step != 0 and global_step % 20 == 0 and self.exploration > 0.5: 469 | self.exploration *= 0.99 470 | 471 | return loss 472 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/gen_sample.py: -------------------------------------------------------------------------------- 1 | ''' 2 | import itertools 3 | filter_val = [10, 50, 100, 200] 4 | stride_val = [1,2] 5 | kernel_val = [3,5] 6 | filter_space = [v for v in itertools.product(filter_val, repeat=3)] 7 | stride_space = [v for v in itertools.product(stride_val, repeat=3)] 8 | kernel_space = [v for v in itertools.product(kernel_val, repeat=3)] 9 | 10 | f_val = [50, 100, 200] 11 | s_val = [1,2] 12 | k_val = [3] 13 | f_space = [v for v in itertools.product(f_val, repeat=3)] 14 | s_space = [v for v in itertools.product(s_val, repeat=3)] 15 | k_space = [v for v in itertools.product(k_val, repeat=3)] 16 | 17 | main_state = [] 18 | with open('Main_case.txt', 'w') as out: 19 | j = 0 20 | for k in k_space: 21 | for s in s_space: 22 | for f in f_space: 23 | main_state.append([f[0],k[0],s[0], 24 | f[1],k[1],s[1], 25 | f[2],k[2],s[2]]) 26 | sta = [f[0],k[0],s[0], 27 | f[1],k[1],s[1], 28 | f[2],k[2],s[2]] 29 | j += 1 30 | for st in sta: 31 | out.write(str(st)+' ') 32 | out.write('\n') 33 | 34 | print j 35 | 36 | print filter_space,len(filter_space) 37 | print stride_space,len(stride_space) 38 | print kernel_space,len(kernel_space) 39 | with open('All_case.txt', 'w') as outfile: 40 | k = 0 41 | for kernel in kernel_space: 42 | for stride in stride_space: 43 | for filters in filter_space: 44 | state = [filters[0],kernel[0],stride[0], 45 | filters[1],kernel[1],stride[1], 46 | filters[2],kernel[2],stride[2]] 47 | 48 | if state not in main_state: 49 | k +=1 50 | for stat in state: 51 | outfile.write(str(stat)+' ') 52 | outfile.write('\n') 53 | else: 54 | pass 55 | 56 | print k 57 | ''' 58 | 59 | state = [] 60 | with open('All_case.txt', 'r') as fin: 61 | for line in fin: 62 | line = line.strip('\n') 63 | line = line.strip() 64 | line = line.split(' ') 65 | 66 | 67 | 68 | print line 69 | s = [] 70 | for l in line: 71 | s.append(int(l)) 72 | state.append(s) 73 | R2 = state[500:1000] 74 | print R2 75 | print len(R2) 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/get_reward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | import pdb 4 | import matplotlib.pyplot as plt 5 | 6 | moving_acc = [] 7 | total_acc = [] 8 | moving_reward = [] 9 | total_reward = [] 10 | nn = [] 11 | 12 | n = 0 13 | with open('./train_history.csv', 'r') as f: 14 | reader = csv.reader(f) 15 | for row in reader: 16 | if n==0: 17 | n += 1 18 | continue 19 | nn.append(n) 20 | n += 1 21 | acc, reward, _, _, _ = row 22 | total_acc.append(float(acc)) 23 | total_reward.append(float(reward)*100) 24 | moving_acc.append(np.mean(total_acc[-20:])) 25 | moving_reward.append(np.mean(total_reward[-250:])) 26 | 27 | 28 | 29 | ''' 30 | plt.plot(nn[50:], moving_reward[50:], 'r') 31 | plt.ylabel('Average Reward') 32 | plt.xlabel('Number of Iterations') 33 | plt.title('Average Reward in DNN-KWS') 34 | #plt.show() 35 | plt.savefig('./milestone_reward.jpg') 36 | ''' 37 | 38 | plt.plot(nn[50:], moving_acc[50:], 'r') 39 | plt.ylabel('Average Acc') 40 | plt.xlabel('Number of Iterations') 41 | plt.title('Average Accuracy in DNN-KWS') 42 | plt.show() 43 | #plt.savefig('./milestone_reward.jpg') 44 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/input_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Model definitions for simple speech recognition. 16 | 17 | """ 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import hashlib 23 | import math 24 | import os.path 25 | import random 26 | import re 27 | import sys 28 | import tarfile 29 | 30 | import numpy as np 31 | from six.moves import urllib 32 | from six.moves import xrange # pylint: disable=redefined-builtin 33 | import tensorflow as tf 34 | 35 | from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio 36 | from tensorflow.python.ops import io_ops 37 | from tensorflow.python.platform import gfile 38 | from tensorflow.python.util import compat 39 | 40 | MAX_NUM_WAVS_PER_CLASS = 2**27 - 1 # ~134M 41 | SILENCE_LABEL = '_silence_' 42 | SILENCE_INDEX = 0 43 | UNKNOWN_WORD_LABEL = '_unknown_' 44 | UNKNOWN_WORD_INDEX = 1 45 | BACKGROUND_NOISE_DIR_NAME = '_background_noise_' 46 | RANDOM_SEED = 59185 47 | 48 | 49 | def prepare_words_list(wanted_words): 50 | """Prepends common tokens to the custom word list. 51 | 52 | Args: 53 | wanted_words: List of strings containing the custom words. 54 | 55 | Returns: 56 | List with the standard silence and unknown tokens added. 57 | """ 58 | return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words 59 | 60 | 61 | def which_set(filename, validation_percentage, testing_percentage): 62 | """Determines which data partition the file should belong to. 63 | 64 | We want to keep files in the same training, validation, or testing sets even 65 | if new ones are added over time. This makes it less likely that testing 66 | samples will accidentally be reused in training when long runs are restarted 67 | for example. To keep this stability, a hash of the filename is taken and used 68 | to determine which set it should belong to. This determination only depends on 69 | the name and the set proportions, so it won't change as other files are added. 70 | 71 | It's also useful to associate particular files as related (for example words 72 | spoken by the same person), so anything after '_nohash_' in a filename is 73 | ignored for set determination. This ensures that 'bobby_nohash_0.wav' and 74 | 'bobby_nohash_1.wav' are always in the same set, for example. 75 | 76 | Args: 77 | filename: File path of the data sample. 78 | validation_percentage: How much of the data set to use for validation. 79 | testing_percentage: How much of the data set to use for testing. 80 | 81 | Returns: 82 | String, one of 'training', 'validation', or 'testing'. 83 | """ 84 | base_name = os.path.basename(filename) 85 | # We want to ignore anything after '_nohash_' in the file name when 86 | # deciding which set to put a wav in, so the data set creator has a way of 87 | # grouping wavs that are close variations of each other. 88 | hash_name = re.sub(r'_nohash_.*$', '', base_name) 89 | # This looks a bit magical, but we need to decide whether this file should 90 | # go into the training, testing, or validation sets, and we want to keep 91 | # existing files in the same set even if more files are subsequently 92 | # added. 93 | # To do that, we need a stable way of deciding based on just the file name 94 | # itself, so we do a hash of that and then use that to generate a 95 | # probability value that we use to assign it. 96 | hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() 97 | percentage_hash = ((int(hash_name_hashed, 16) % 98 | (MAX_NUM_WAVS_PER_CLASS + 1)) * 99 | (100.0 / MAX_NUM_WAVS_PER_CLASS)) 100 | if percentage_hash < validation_percentage: 101 | result = 'validation' 102 | elif percentage_hash < (testing_percentage + validation_percentage): 103 | result = 'testing' 104 | else: 105 | result = 'training' 106 | return result 107 | 108 | 109 | def load_wav_file(filename): 110 | """Loads an audio file and returns a float PCM-encoded array of samples. 111 | 112 | Args: 113 | filename: Path to the .wav file to load. 114 | 115 | Returns: 116 | Numpy array holding the sample data as floats between -1.0 and 1.0. 117 | """ 118 | with tf.Session(graph=tf.Graph()) as sess: 119 | wav_filename_placeholder = tf.placeholder(tf.string, []) 120 | wav_loader = io_ops.read_file(wav_filename_placeholder) 121 | wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) 122 | return sess.run( 123 | wav_decoder, 124 | feed_dict={wav_filename_placeholder: filename}).audio.flatten() 125 | 126 | 127 | def save_wav_file(filename, wav_data, sample_rate): 128 | """Saves audio sample data to a .wav audio file. 129 | 130 | Args: 131 | filename: Path to save the file to. 132 | wav_data: 2D array of float PCM-encoded audio data. 133 | sample_rate: Samples per second to encode in the file. 134 | """ 135 | with tf.Session(graph=tf.Graph()) as sess: 136 | wav_filename_placeholder = tf.placeholder(tf.string, []) 137 | sample_rate_placeholder = tf.placeholder(tf.int32, []) 138 | wav_data_placeholder = tf.placeholder(tf.float32, [None, 1]) 139 | wav_encoder = contrib_audio.encode_wav(wav_data_placeholder, 140 | sample_rate_placeholder) 141 | wav_saver = io_ops.write_file(wav_filename_placeholder, wav_encoder) 142 | sess.run( 143 | wav_saver, 144 | feed_dict={ 145 | wav_filename_placeholder: filename, 146 | sample_rate_placeholder: sample_rate, 147 | wav_data_placeholder: np.reshape(wav_data, (-1, 1)) 148 | }) 149 | 150 | 151 | class AudioProcessor(object): 152 | """Handles loading, partitioning, and preparing audio training data.""" 153 | 154 | def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage, 155 | wanted_words, validation_percentage, testing_percentage, 156 | model_settings): 157 | self.data_dir = data_dir 158 | self.maybe_download_and_extract_dataset(data_url, data_dir) 159 | self.prepare_data_index(silence_percentage, unknown_percentage, 160 | wanted_words, validation_percentage, 161 | testing_percentage) 162 | self.prepare_background_data() 163 | self.prepare_processing_graph(model_settings) 164 | 165 | def maybe_download_and_extract_dataset(self, data_url, dest_directory): 166 | """Download and extract data set tar file. 167 | 168 | If the data set we're using doesn't already exist, this function 169 | downloads it from the TensorFlow.org website and unpacks it into a 170 | directory. 171 | If the data_url is none, don't download anything and expect the data 172 | directory to contain the correct files already. 173 | 174 | Args: 175 | data_url: Web location of the tar file containing the data set. 176 | dest_directory: File path to extract data to. 177 | """ 178 | if not data_url: 179 | return 180 | if not os.path.exists(dest_directory): 181 | os.makedirs(dest_directory) 182 | filename = data_url.split('/')[-1] 183 | filepath = os.path.join(dest_directory, filename) 184 | if not os.path.exists(filepath): 185 | 186 | def _progress(count, block_size, total_size): 187 | sys.stdout.write( 188 | '\r>> Downloading %s %.1f%%' % 189 | (filename, float(count * block_size) / float(total_size) * 100.0)) 190 | sys.stdout.flush() 191 | 192 | try: 193 | filepath, _ = urllib.request.urlretrieve(data_url, filepath, _progress) 194 | except: 195 | tf.logging.error('Failed to download URL: %s to folder: %s', data_url, 196 | filepath) 197 | tf.logging.error('Please make sure you have enough free space and' 198 | ' an internet connection') 199 | raise 200 | print() 201 | statinfo = os.stat(filepath) 202 | tf.logging.info('Successfully downloaded %s (%d bytes)', filename, 203 | statinfo.st_size) 204 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 205 | 206 | def prepare_data_index(self, silence_percentage, unknown_percentage, 207 | wanted_words, validation_percentage, 208 | testing_percentage): 209 | """Prepares a list of the samples organized by set and label. 210 | 211 | The training loop needs a list of all the available data, organized by 212 | which partition it should belong to, and with ground truth labels attached. 213 | This function analyzes the folders below the `data_dir`, figures out the 214 | right 215 | labels for each file based on the name of the subdirectory it belongs to, 216 | and uses a stable hash to assign it to a data set partition. 217 | 218 | Args: 219 | silence_percentage: How much of the resulting data should be background. 220 | unknown_percentage: How much should be audio outside the wanted classes. 221 | wanted_words: Labels of the classes we want to be able to recognize. 222 | validation_percentage: How much of the data set to use for validation. 223 | testing_percentage: How much of the data set to use for testing. 224 | 225 | Returns: 226 | Dictionary containing a list of file information for each set partition, 227 | and a lookup map for each class to determine its numeric index. 228 | 229 | Raises: 230 | Exception: If expected files are not found. 231 | """ 232 | # Make sure the shuffling and picking of unknowns is deterministic. 233 | random.seed(RANDOM_SEED) 234 | wanted_words_index = {} 235 | for index, wanted_word in enumerate(wanted_words): 236 | wanted_words_index[wanted_word] = index + 2 237 | self.data_index = {'validation': [], 'testing': [], 'training': []} 238 | unknown_index = {'validation': [], 'testing': [], 'training': []} 239 | all_words = {} 240 | # Look through all the subfolders to find audio samples 241 | search_path = os.path.join(self.data_dir, '*', '*.wav') 242 | for wav_path in gfile.Glob(search_path): 243 | _, word = os.path.split(os.path.dirname(wav_path)) 244 | word = word.lower() 245 | # Treat the '_background_noise_' folder as a special case, since we expect 246 | # it to contain long audio samples we mix in to improve training. 247 | if word == BACKGROUND_NOISE_DIR_NAME: 248 | continue 249 | all_words[word] = True 250 | set_index = which_set(wav_path, validation_percentage, testing_percentage) 251 | # If it's a known class, store its detail, otherwise add it to the list 252 | # we'll use to train the unknown label. 253 | if word in wanted_words_index: 254 | self.data_index[set_index].append({'label': word, 'file': wav_path}) 255 | else: 256 | unknown_index[set_index].append({'label': word, 'file': wav_path}) 257 | if not all_words: 258 | raise Exception('No .wavs found at ' + search_path) 259 | for index, wanted_word in enumerate(wanted_words): 260 | if wanted_word not in all_words: 261 | raise Exception('Expected to find ' + wanted_word + 262 | ' in labels but only found ' + 263 | ', '.join(all_words.keys())) 264 | # We need an arbitrary file to load as the input for the silence samples. 265 | # It's multiplied by zero later, so the content doesn't matter. 266 | silence_wav_path = self.data_index['training'][0]['file'] 267 | for set_index in ['validation', 'testing', 'training']: 268 | set_size = len(self.data_index[set_index]) 269 | silence_size = int(math.ceil(set_size * silence_percentage / 100)) 270 | for _ in range(silence_size): 271 | self.data_index[set_index].append({ 272 | 'label': SILENCE_LABEL, 273 | 'file': silence_wav_path 274 | }) 275 | # Pick some unknowns to add to each partition of the data set. 276 | random.shuffle(unknown_index[set_index]) 277 | unknown_size = int(math.ceil(set_size * unknown_percentage / 100)) 278 | self.data_index[set_index].extend(unknown_index[set_index][:unknown_size]) 279 | # Make sure the ordering is random. 280 | for set_index in ['validation', 'testing', 'training']: 281 | random.shuffle(self.data_index[set_index]) 282 | # Prepare the rest of the result data structure. 283 | self.words_list = prepare_words_list(wanted_words) 284 | self.word_to_index = {} 285 | for word in all_words: 286 | if word in wanted_words_index: 287 | self.word_to_index[word] = wanted_words_index[word] 288 | else: 289 | self.word_to_index[word] = UNKNOWN_WORD_INDEX 290 | self.word_to_index[SILENCE_LABEL] = SILENCE_INDEX 291 | 292 | def prepare_background_data(self): 293 | """Searches a folder for background noise audio, and loads it into memory. 294 | 295 | It's expected that the background audio samples will be in a subdirectory 296 | named '_background_noise_' inside the 'data_dir' folder, as .wavs that match 297 | the sample rate of the training data, but can be much longer in duration. 298 | 299 | If the '_background_noise_' folder doesn't exist at all, this isn't an 300 | error, it's just taken to mean that no background noise augmentation should 301 | be used. If the folder does exist, but it's empty, that's treated as an 302 | error. 303 | 304 | Returns: 305 | List of raw PCM-encoded audio samples of background noise. 306 | 307 | Raises: 308 | Exception: If files aren't found in the folder. 309 | """ 310 | self.background_data = [] 311 | background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME) 312 | if not os.path.exists(background_dir): 313 | return self.background_data 314 | with tf.Session(graph=tf.Graph()) as sess: 315 | wav_filename_placeholder = tf.placeholder(tf.string, []) 316 | wav_loader = io_ops.read_file(wav_filename_placeholder) 317 | wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) 318 | search_path = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME, 319 | '*.wav') 320 | for wav_path in gfile.Glob(search_path): 321 | wav_data = sess.run( 322 | wav_decoder, 323 | feed_dict={wav_filename_placeholder: wav_path}).audio.flatten() 324 | self.background_data.append(wav_data) 325 | if not self.background_data: 326 | raise Exception('No background wav files were found in ' + search_path) 327 | 328 | def prepare_processing_graph(self, model_settings): 329 | """Builds a TensorFlow graph to apply the input distortions. 330 | 331 | Creates a graph that loads a WAVE file, decodes it, scales the volume, 332 | shifts it in time, adds in background noise, calculates a spectrogram, and 333 | then builds an MFCC fingerprint from that. 334 | 335 | This must be called with an active TensorFlow session running, and it 336 | creates multiple placeholder inputs, and one output: 337 | 338 | - wav_filename_placeholder_: Filename of the WAV to load. 339 | - foreground_volume_placeholder_: How loud the main clip should be. 340 | - time_shift_padding_placeholder_: Where to pad the clip. 341 | - time_shift_offset_placeholder_: How much to move the clip in time. 342 | - background_data_placeholder_: PCM sample data for background noise. 343 | - background_volume_placeholder_: Loudness of mixed-in background. 344 | - mfcc_: Output 2D fingerprint of processed audio. 345 | 346 | Args: 347 | model_settings: Information about the current model being trained. 348 | """ 349 | desired_samples = model_settings['desired_samples'] 350 | self.wav_filename_placeholder_ = tf.placeholder(tf.string, []) 351 | wav_loader = io_ops.read_file(self.wav_filename_placeholder_) 352 | wav_decoder = contrib_audio.decode_wav( 353 | wav_loader, desired_channels=1, desired_samples=desired_samples) 354 | # Allow the audio sample's volume to be adjusted. 355 | self.foreground_volume_placeholder_ = tf.placeholder(tf.float32, []) 356 | scaled_foreground = tf.multiply(wav_decoder.audio, 357 | self.foreground_volume_placeholder_) 358 | # Shift the sample's start position, and pad any gaps with zeros. 359 | self.time_shift_padding_placeholder_ = tf.placeholder(tf.int32, [2, 2]) 360 | self.time_shift_offset_placeholder_ = tf.placeholder(tf.int32, [2]) 361 | padded_foreground = tf.pad( 362 | scaled_foreground, 363 | self.time_shift_padding_placeholder_, 364 | mode='CONSTANT') 365 | sliced_foreground = tf.slice(padded_foreground, 366 | self.time_shift_offset_placeholder_, 367 | [desired_samples, -1]) 368 | # Mix in background noise. 369 | self.background_data_placeholder_ = tf.placeholder(tf.float32, 370 | [desired_samples, 1]) 371 | self.background_volume_placeholder_ = tf.placeholder(tf.float32, []) 372 | background_mul = tf.multiply(self.background_data_placeholder_, 373 | self.background_volume_placeholder_) 374 | background_add = tf.add(background_mul, sliced_foreground) 375 | background_clamp = tf.clip_by_value(background_add, -1.0, 1.0) 376 | # Run the spectrogram and MFCC ops to get a 2D 'fingerprint' of the audio. 377 | spectrogram = contrib_audio.audio_spectrogram( 378 | background_clamp, 379 | window_size=model_settings['window_size_samples'], 380 | stride=model_settings['window_stride_samples'], 381 | magnitude_squared=True) 382 | self.mfcc_ = contrib_audio.mfcc( 383 | spectrogram, 384 | wav_decoder.sample_rate, 385 | dct_coefficient_count=model_settings['dct_coefficient_count']) 386 | 387 | def set_size(self, mode): 388 | """Calculates the number of samples in the dataset partition. 389 | 390 | Args: 391 | mode: Which partition, must be 'training', 'validation', or 'testing'. 392 | 393 | Returns: 394 | Number of samples in the partition. 395 | """ 396 | return len(self.data_index[mode]) 397 | 398 | def get_data(self, how_many, offset, model_settings, background_frequency, 399 | background_volume_range, time_shift, mode, sess): 400 | """Gather samples from the data set, applying transformations as needed. 401 | 402 | When the mode is 'training', a random selection of samples will be returned, 403 | otherwise the first N clips in the partition will be used. This ensures that 404 | validation always uses the same samples, reducing noise in the metrics. 405 | 406 | Args: 407 | how_many: Desired number of samples to return. -1 means the entire 408 | contents of this partition. 409 | offset: Where to start when fetching deterministically. 410 | model_settings: Information about the current model being trained. 411 | background_frequency: How many clips will have background noise, 0.0 to 412 | 1.0. 413 | background_volume_range: How loud the background noise will be. 414 | time_shift: How much to randomly shift the clips by in time. 415 | mode: Which partition to use, must be 'training', 'validation', or 416 | 'testing'. 417 | sess: TensorFlow session that was active when processor was created. 418 | 419 | Returns: 420 | List of sample data for the transformed samples, and list of labels in 421 | one-hot form. 422 | """ 423 | # Pick one of the partitions to choose samples from. 424 | candidates = self.data_index[mode] 425 | if how_many == -1: 426 | sample_count = len(candidates) 427 | else: 428 | sample_count = max(0, min(how_many, len(candidates) - offset)) 429 | # Data and labels will be populated and returned. 430 | data = np.zeros((sample_count, model_settings['fingerprint_size'])) 431 | labels = np.zeros((sample_count, model_settings['label_count'])) 432 | desired_samples = model_settings['desired_samples'] 433 | use_background = self.background_data and (mode == 'training') 434 | pick_deterministically = (mode != 'training') 435 | # Use the processing graph we created earlier to repeatedly to generate the 436 | # final output sample data we'll use in training. 437 | for i in xrange(offset, offset + sample_count): 438 | # Pick which audio sample to use. 439 | if how_many == -1 or pick_deterministically: 440 | sample_index = i 441 | else: 442 | sample_index = np.random.randint(len(candidates)) 443 | sample = candidates[sample_index] 444 | # If we're time shifting, set up the offset for this sample. 445 | if time_shift > 0: 446 | time_shift_amount = np.random.randint(-time_shift, time_shift) 447 | else: 448 | time_shift_amount = 0 449 | if time_shift_amount > 0: 450 | time_shift_padding = [[time_shift_amount, 0], [0, 0]] 451 | time_shift_offset = [0, 0] 452 | else: 453 | time_shift_padding = [[0, -time_shift_amount], [0, 0]] 454 | time_shift_offset = [-time_shift_amount, 0] 455 | input_dict = { 456 | self.wav_filename_placeholder_: sample['file'], 457 | self.time_shift_padding_placeholder_: time_shift_padding, 458 | self.time_shift_offset_placeholder_: time_shift_offset, 459 | } 460 | # Choose a section of background noise to mix in. 461 | if use_background: 462 | background_index = np.random.randint(len(self.background_data)) 463 | background_samples = self.background_data[background_index] 464 | background_offset = np.random.randint( 465 | 0, len(background_samples) - model_settings['desired_samples']) 466 | background_clipped = background_samples[background_offset:( 467 | background_offset + desired_samples)] 468 | background_reshaped = background_clipped.reshape([desired_samples, 1]) 469 | if np.random.uniform(0, 1) < background_frequency: 470 | background_volume = np.random.uniform(0, background_volume_range) 471 | else: 472 | background_volume = 0 473 | else: 474 | background_reshaped = np.zeros([desired_samples, 1]) 475 | background_volume = 0 476 | input_dict[self.background_data_placeholder_] = background_reshaped 477 | input_dict[self.background_volume_placeholder_] = background_volume 478 | # If we want silence, mute out the main sample but leave the background. 479 | if sample['label'] == SILENCE_LABEL: 480 | input_dict[self.foreground_volume_placeholder_] = 0 481 | else: 482 | input_dict[self.foreground_volume_placeholder_] = 1 483 | # Run the graph to produce the output audio. 484 | data[i - offset, :] = sess.run(self.mfcc_, feed_dict=input_dict).flatten() 485 | label_index = self.word_to_index[sample['label']] 486 | labels[i - offset, label_index] = 1 487 | return data, labels 488 | 489 | def get_unprocessed_data(self, how_many, model_settings, mode): 490 | """Retrieve sample data for the given partition, with no transformations. 491 | 492 | Args: 493 | how_many: Desired number of samples to return. -1 means the entire 494 | contents of this partition. 495 | model_settings: Information about the current model being trained. 496 | mode: Which partition to use, must be 'training', 'validation', or 497 | 'testing'. 498 | 499 | Returns: 500 | List of sample data for the samples, and list of labels in one-hot form. 501 | """ 502 | candidates = self.data_index[mode] 503 | if how_many == -1: 504 | sample_count = len(candidates) 505 | else: 506 | sample_count = how_many 507 | desired_samples = model_settings['desired_samples'] 508 | words_list = self.words_list 509 | data = np.zeros((sample_count, desired_samples)) 510 | labels = [] 511 | with tf.Session(graph=tf.Graph()) as sess: 512 | wav_filename_placeholder = tf.placeholder(tf.string, []) 513 | wav_loader = io_ops.read_file(wav_filename_placeholder) 514 | wav_decoder = contrib_audio.decode_wav( 515 | wav_loader, desired_channels=1, desired_samples=desired_samples) 516 | foreground_volume_placeholder = tf.placeholder(tf.float32, []) 517 | scaled_foreground = tf.multiply(wav_decoder.audio, 518 | foreground_volume_placeholder) 519 | for i in range(sample_count): 520 | if how_many == -1: 521 | sample_index = i 522 | else: 523 | sample_index = np.random.randint(len(candidates)) 524 | sample = candidates[sample_index] 525 | input_dict = {wav_filename_placeholder: sample['file']} 526 | if sample['label'] == SILENCE_LABEL: 527 | input_dict[foreground_volume_placeholder] = 0 528 | else: 529 | input_dict[foreground_volume_placeholder] = 1 530 | data[i, :] = sess.run(scaled_foreground, feed_dict=input_dict).flatten() 531 | label_index = self.word_to_index[sample['label']] 532 | labels.append(words_list[label_index]) 533 | return data, labels 534 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/main_cnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import numpy as np 5 | import csv 6 | from manager import NetworkManager 7 | import tensorflow as tf 8 | from model import model_fn 9 | from random import choice 10 | import itertools 11 | import tensorflow as tf 12 | from collections import defaultdict 13 | 14 | from controller import Controller, StateSpace 15 | from manager import NetworkManager 16 | from model import model_fn, model_fn_cnn 17 | import json 18 | import argparse 19 | import os.path 20 | import sys 21 | from six.moves import xrange # pylint: disable=redefined-builtin 22 | import random 23 | import input_data 24 | import models 25 | from tensorflow.python.platform import gfile 26 | from tensorflow.contrib import slim as slim 27 | FLAGS = None 28 | states = [] 29 | ''' 30 | filter_val = [50, 100, 200] 31 | stride_val = [1,2] 32 | kernel_val = [3] 33 | filter_space = [v for v in itertools.product(filter_val, repeat=3)] 34 | stride_space = [v for v in itertools.product(stride_val, repeat=3)] 35 | kernal_space = [v for v in itertools.product(kernel_val, repeat=3)] 36 | for itr in xrange(3): 37 | states.append([random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val), 38 | random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val), 39 | random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val)]) 40 | ''' 41 | states = [] 42 | with open('Main_case.txt', 'r') as fin: 43 | for line in fin: 44 | line = line.strip('\n') 45 | line = line.strip() 46 | line = line.split(' ') 47 | s = [] 48 | for l in line: 49 | s.append(int(l)) 50 | states.append(s) 51 | 52 | def main(_): 53 | CLIP_REWARDS = False 54 | data = defaultdict(list) 55 | with open('main_result.txt', 'w') as out: 56 | for ite in xrange(3): 57 | ite += 1 58 | print ('outter iteration:',ite) 59 | iteration = 0 60 | for state in states: 61 | iteration +=1 62 | print (iteration,state) 63 | manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) 64 | reward, previous_acc = manager.get_rewards(model_fn_cnn, state) 65 | previous_acc = round(previous_acc*100000,2) 66 | print (previous_acc) 67 | data[str(state)].append(previous_acc) 68 | out.write("{} {}\n".format(state, previous_acc)) 69 | with open('main_sample.json', 'w') as outfile: 70 | json.dump(data, outfile) 71 | 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument( 76 | '--data_url', 77 | type=str, 78 | # pylint: disable=line-too-long 79 | default='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz', 80 | # pylint: enable=line-too-long 81 | help='Location of speech training data archive on the web.') 82 | parser.add_argument( 83 | '--data_dir', 84 | type=str, 85 | default='/tmp/speech_dataset/', 86 | help="""\ 87 | Where to download the speech training data to. 88 | """) 89 | parser.add_argument( 90 | '--background_volume', 91 | type=float, 92 | default=0.1, 93 | help="""\ 94 | How loud the background noise should be, between 0 and 1. 95 | """) 96 | parser.add_argument( 97 | '--background_frequency', 98 | type=float, 99 | default=0.8, 100 | help="""\ 101 | How many of the training samples have background noise mixed in. 102 | """) 103 | parser.add_argument( 104 | '--silence_percentage', 105 | type=float, 106 | default=10.0, 107 | help="""\ 108 | How much of the training data should be silence. 109 | """) 110 | parser.add_argument( 111 | '--unknown_percentage', 112 | type=float, 113 | default=10.0, 114 | help="""\ 115 | How much of the training data should be unknown words. 116 | """) 117 | parser.add_argument( 118 | '--time_shift_ms', 119 | type=float, 120 | default=100.0, 121 | help="""\ 122 | Range to randomly shift the training audio by in time. 123 | """) 124 | parser.add_argument( 125 | '--testing_percentage', 126 | type=int, 127 | default=10, 128 | help='What percentage of wavs to use as a test set.') 129 | parser.add_argument( 130 | '--validation_percentage', 131 | type=int, 132 | default=10, 133 | help='What percentage of wavs to use as a validation set.') 134 | parser.add_argument( 135 | '--sample_rate', 136 | type=int, 137 | default=16000, 138 | help='Expected sample rate of the wavs',) 139 | parser.add_argument( 140 | '--clip_duration_ms', 141 | type=int, 142 | default=1000, 143 | help='Expected duration in milliseconds of the wavs',) 144 | parser.add_argument( 145 | '--window_size_ms', 146 | type=float, 147 | default=40.0, 148 | help='How long each spectrogram timeslice is',) 149 | parser.add_argument( 150 | '--window_stride_ms', 151 | type=float, 152 | default=20.0, 153 | help='How long each spectrogram timeslice is',) 154 | parser.add_argument( 155 | '--dct_coefficient_count', 156 | type=int, 157 | default=10, 158 | help='How many bins to use for the MFCC fingerprint',) 159 | parser.add_argument( 160 | '--how_many_training_steps', 161 | type=str, 162 | default='1000', 163 | help='How many training loops to run',) 164 | parser.add_argument( 165 | '--eval_step_interval', 166 | type=int, 167 | default=100, 168 | help='How often to evaluate the training results.') 169 | parser.add_argument( 170 | '--learning_rate', 171 | type=str, 172 | default='0.001', 173 | help='How large a learning rate to use when training.') 174 | parser.add_argument( 175 | '--batch_size', 176 | type=int, 177 | default=100, 178 | help='How many items to train with at once',) 179 | parser.add_argument( 180 | '--summaries_dir', 181 | type=str, 182 | default='/tmp/retrain_logs', 183 | help='Where to save summary logs for TensorBoard.') 184 | parser.add_argument( 185 | '--wanted_words', 186 | type=str, 187 | default='yes,no', #default='yes,no,up,down,left,right,on,off,stop,go' 188 | help='Words to use (others will be added to an unknown label)',) 189 | parser.add_argument( 190 | '--train_dir', 191 | type=str, 192 | default='/tmp/speech_commands_train', 193 | help='Directory to write event logs and checkpoint.') 194 | parser.add_argument( 195 | '--save_step_interval', 196 | type=int, 197 | default=100, 198 | help='Save model checkpoint every save_steps.') 199 | parser.add_argument( 200 | '--start_checkpoint', 201 | type=str, 202 | default='', 203 | help='If specified, restore this pretrained model before any training.') 204 | parser.add_argument( 205 | '--model_architecture', 206 | type=str, 207 | default='dnn', 208 | help='What model architecture to use') 209 | parser.add_argument( 210 | '--model_size_info', 211 | type=int, 212 | nargs="+", 213 | default=[128,128,128], 214 | help='Model dimensions - different for various models') 215 | parser.add_argument( 216 | '--check_nans', 217 | type=bool, 218 | default=False, 219 | help='Whether to check for invalid numbers during processing') 220 | 221 | FLAGS, unparsed = parser.parse_known_args() 222 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 223 | 224 | 225 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import numpy as np 5 | import tensorflow as tf 6 | from six.moves import xrange # pylint: disable=redefined-builtin 7 | import argparse 8 | import os.path 9 | import sys 10 | import input_data 11 | import models 12 | from tensorflow.python.platform import gfile 13 | from tensorflow.contrib import slim as slim 14 | 15 | 16 | def KWS_data_loader(FLAGS, sess): 17 | #sess = tf.Session() 18 | model_settings = models.prepare_model_settings( 19 | len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), 20 | FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, 21 | FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) 22 | audio_processor = input_data.AudioProcessor( 23 | FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, 24 | FLAGS.unknown_percentage, 25 | FLAGS.wanted_words.split(','), FLAGS.validation_percentage, 26 | FLAGS.testing_percentage, model_settings) 27 | fingerprint_size = model_settings['fingerprint_size'] 28 | label_count = model_settings['label_count'] 29 | time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) 30 | fingerprint_size = model_settings['fingerprint_size'] 31 | label_count = model_settings['label_count'] 32 | time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) 33 | training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) 34 | learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) 35 | if len(training_steps_list) != len(learning_rates_list): 36 | raise Exception( 37 | '--how_many_training_steps and --learning_rate must be equal length ' 38 | 'lists, but are %d and %d long instead' % (len(training_steps_list), 39 | len(learning_rates_list))) 40 | 41 | validation_fingerprints, validation_ground_truth = ( 42 | audio_processor.get_data(-1, 0, model_settings, 0.0, 43 | 0.0, 0, 'validation', sess)) 44 | input_frequency_size = model_settings['dct_coefficient_count'] 45 | input_time_size = model_settings['spectrogram_length'] 46 | return audio_processor, training_steps_list, learning_rates_list, model_settings, time_shift_samples, validation_fingerprints, validation_ground_truth 47 | 48 | class NetworkManager: 49 | ''' 50 | Helper class to manage the generation of subnetwork training given a dataset 51 | ''' 52 | def __init__(self, FLAGS, acc_beta=0.8, clip_rewards=False): 53 | ''' 54 | Manager which is tasked with creating subnetworks, training them on a dataset, and retrieving 55 | rewards in the term of accuracy, which is passed to the controller RNN. 56 | 57 | Args: 58 | dataset: a tuple of 4 arrays (X_train, y_train, X_val, y_val) 59 | epochs: number of epochs to train the subnetworks 60 | batchsize: batchsize of training the subnetworks 61 | acc_beta: exponential weight for the accuracy 62 | clip_rewards: whether to clip rewards in [-0.05, 0.05] range to prevent 63 | large weight updates. Use when training is highly unstable. 64 | ''' 65 | self.FLAGS = FLAGS 66 | self.clip_rewards = clip_rewards 67 | 68 | self.beta = acc_beta 69 | self.beta_bias = acc_beta 70 | self.moving_acc = 0.0 71 | 72 | 73 | def get_rewards(self, model_fn, actions): 74 | ''' 75 | Creates a subnetwork given the actions predicted by the controller RNN, 76 | trains it on the provided dataset, and then returns a reward. 77 | 78 | Args: 79 | model_fn: a function which accepts one argument, a list of 80 | parsed actions, obtained via an inverse mapping from the 81 | StateSpace. 82 | actions: a list of parsed actions obtained via an inverse mapping 83 | from the StateSpace. It is in a specific order as given below: 84 | 85 | Consider 4 states were added to the StateSpace via the `add_state` 86 | method. Then the `actions` array will be of length 4, with the 87 | values of those states in the order that they were added. 88 | 89 | If number of layers is greater than one, then the `actions` array 90 | will be of length `4 * number of layers` (in the above scenario). 91 | The index from [0:4] will be for layer 0, from [4:8] for layer 1, 92 | etc for the number of layers. 93 | 94 | These action values are for direct use in the construction of models. 95 | 96 | Returns: 97 | a reward for training a model with the given actions 98 | ''' 99 | 100 | with tf.Session(graph=tf.Graph()) as network_sess: 101 | tf.logging.set_verbosity(tf.logging.INFO) 102 | audio_processor, training_steps_list, learning_rates_list, model_settings, time_shift_samples, X_val, y_val = KWS_data_loader( 103 | self.FLAGS, network_sess) 104 | 105 | # generate a submodel given predicted actions 106 | logits, fingerprint_input, is_training = model_fn(actions, model_settings) 107 | ground_truth_input = tf.placeholder(tf.float32, [None, model_settings['label_count']], name='groundtruth_input') 108 | learning_rate = 0.001 109 | 110 | # Optionally we can add runtime checks to spot when NaNs or other symptoms of 111 | # numerical errors start occurring during training. 112 | control_dependencies = [] 113 | if self.FLAGS.check_nans: 114 | checks = tf.add_check_numerics_ops() 115 | control_dependencies = [checks] 116 | 117 | # Create the back propagation and training evaluation machinery in the graph. 118 | with tf.name_scope('cross_entropy'): 119 | cross_entropy_mean = tf.reduce_mean( 120 | tf.nn.softmax_cross_entropy_with_logits( 121 | labels=ground_truth_input, logits=logits)) 122 | 123 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 124 | with tf.name_scope('train'), tf.control_dependencies(update_ops), tf.control_dependencies(control_dependencies): 125 | learning_rate_input = tf.placeholder(tf.float32, [], name='learning_rate_input') 126 | train_op = tf.train.AdamOptimizer(learning_rate_input) 127 | train_step = slim.learning.create_train_op(cross_entropy_mean, train_op) 128 | predicted_indices = tf.argmax(logits, 1) 129 | expected_indices = tf.argmax(ground_truth_input, 1) 130 | correct_prediction = tf.equal(predicted_indices, expected_indices) 131 | evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 132 | 133 | 134 | # Training loop. 135 | best_accuracy = 0 136 | training_steps_max = np.sum(training_steps_list) 137 | start_step = 1 # use for checkpoint, fixed here 138 | tf.global_variables_initializer().run() 139 | 140 | for training_step in xrange(start_step, training_steps_max + 1): 141 | X_train, y_train = audio_processor.get_data( 142 | self.FLAGS.batch_size, 0, model_settings, self.FLAGS.background_frequency, 143 | self.FLAGS.background_volume, time_shift_samples, 'training', network_sess) 144 | train_accuracy, _ = network_sess.run( 145 | [ 146 | evaluation_step , train_step 147 | ], 148 | feed_dict={ 149 | fingerprint_input: X_train, 150 | ground_truth_input: y_train, 151 | learning_rate_input: learning_rate, 152 | is_training: True 153 | }) 154 | #tf.logging.info('Step #%d: accuracy %.2f%%' % (training_step, train_accuracy * 100)) 155 | 156 | is_last_step = (training_step == training_steps_max) 157 | if (training_step % self.FLAGS.eval_step_interval) == 0 or is_last_step: 158 | validation_accuracy = network_sess.run( 159 | evaluation_step, 160 | feed_dict={ 161 | fingerprint_input: X_val, 162 | ground_truth_input: y_val, 163 | is_training: False 164 | }) 165 | tf.logging.info('Step #%d: Validation accuracy %.2f%%' % (training_step, validation_accuracy * 100)) 166 | if validation_accuracy > best_accuracy: 167 | best_accuracy = validation_accuracy 168 | else: 169 | learning_rate = learning_rate / 2.0 170 | 171 | # compute the reward 172 | acc = best_accuracy 173 | reward = (acc - self.moving_acc) * 10 174 | if self.moving_acc == 0.0: 175 | reward = 0 176 | 177 | # if rewards are clipped, clip them in the range -0.05 to 0.05 178 | if self.clip_rewards: 179 | reward = np.clip(reward, -0.05, 0.05) 180 | 181 | # update moving accuracy with bias correction for 1st update 182 | self.moving_acc = self.beta * self.moving_acc + (1 - self.beta) * acc 183 | self.moving_acc = self.moving_acc / (1 - self.beta_bias) 184 | self.beta_bias = 0 185 | 186 | 187 | 188 | #print() 189 | #print("Manager: EWA Accuracy = ", self.moving_acc) 190 | 191 | # clean up resources and GPU memory 192 | network_sess.close() 193 | 194 | return reward, acc 195 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import sys 7 | import os 8 | import math 9 | 10 | import pdb 11 | import tensorflow as tf 12 | import tensorflow.contrib.slim as slim 13 | from tensorflow.python.ops import control_flow_ops 14 | 15 | 16 | def model_fn(actions, model_settings): 17 | fingerprint_input = tf.placeholder(tf.float32, [None, model_settings['fingerprint_size']], name='fingerprint_input') 18 | is_training = tf.placeholder(tf.bool, []) 19 | filters_1, filters_2, filters_3 = actions 20 | net = tf.layers.dense(fingerprint_input, filters_1, activation=tf.nn.relu) 21 | net = tf.layers.dense(net, filters_2, activation=tf.nn.relu) 22 | net = tf.layers.dense(net, filters_3, activation=tf.nn.relu) 23 | net = tf.layers.dense(net, model_settings['label_count']) 24 | return net, fingerprint_input, is_training 25 | 26 | 27 | 28 | 29 | def model_fn_cnn(actions, model_settings): 30 | def ds_cnn_arg_scope(weight_decay=0): 31 | with slim.arg_scope( 32 | [slim.convolution2d, slim.separable_convolution2d], 33 | weights_initializer=slim.initializers.xavier_initializer(), 34 | biases_initializer=slim.init_ops.zeros_initializer(), 35 | weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: 36 | return sc 37 | 38 | def _depthwise_separable_conv(inputs, 39 | num_pwc_filters, 40 | sc, 41 | kernel_size, 42 | stride): 43 | depthwise_conv = slim.separable_convolution2d(inputs, 44 | num_outputs=None, 45 | stride=stride, 46 | depth_multiplier=1, 47 | kernel_size=kernel_size, 48 | scope=sc+'/depthwise_conv') 49 | 50 | bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm') 51 | pointwise_conv = slim.convolution2d(bn, 52 | num_pwc_filters, 53 | kernel_size=[1, 1], 54 | scope=sc+'/pointwise_conv') 55 | bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm') 56 | return bn 57 | 58 | 59 | fingerprint_input = tf.placeholder(tf.float32, [None, model_settings['fingerprint_size']], name='fingerprint_input') 60 | is_training = tf.placeholder(tf.bool, []) 61 | filters_1, kernel_1, stride_1, filters_2, kernel_2, stride_2, filters_3, kernel_3, stride_3 = actions 62 | 63 | label_count = model_settings['label_count'] 64 | input_frequency_size = model_settings['dct_coefficient_count'] 65 | input_time_size = model_settings['spectrogram_length'] 66 | fingerprint_4d = tf.reshape(fingerprint_input, 67 | [-1, input_time_size, input_frequency_size, 1]) 68 | 69 | t_dim = math.ceil(input_time_size / float(2 * stride_1 * stride_2 * stride_3)) 70 | f_dim = math.ceil(input_frequency_size / float(1 * stride_1 * stride_2 * stride_3)) 71 | 72 | 73 | scope = 'DS-CNN' 74 | with tf.variable_scope(scope) as sc: 75 | end_points_collection = sc.name + '_end_points' 76 | with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], 77 | activation_fn=None, 78 | weights_initializer=slim.initializers.xavier_initializer(), 79 | biases_initializer=slim.init_ops.zeros_initializer(), 80 | outputs_collections=[end_points_collection]): 81 | with slim.arg_scope([slim.batch_norm], 82 | is_training=is_training, 83 | decay=0.96, 84 | updates_collections=None, 85 | activation_fn=tf.nn.relu): 86 | #pdb.set_trace() 87 | net = slim.convolution2d(fingerprint_4d, 64, [10, 4], stride=[2, 1], padding='SAME', scope='conv_1') 88 | net = slim.batch_norm(net, scope='conv_1/batch_norm') 89 | net = _depthwise_separable_conv(net, filters_1, kernel_size = [kernel_1, kernel_1], stride = [stride_1, stride_1], sc='conv_ds_1') 90 | net = _depthwise_separable_conv(net, filters_2, kernel_size = [kernel_2, kernel_2], stride = [stride_2, stride_2], sc='conv_ds_2') 91 | net = _depthwise_separable_conv(net, filters_3, kernel_size = [kernel_3, kernel_3], stride = [stride_3, stride_3], sc='conv_ds_3') 92 | net = slim.avg_pool2d(net, [t_dim, f_dim], scope='avg_pool') 93 | 94 | net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') 95 | logits = slim.fully_connected(net, label_count, activation_fn=None, scope='fc1') 96 | return logits, fingerprint_input, is_training 97 | 98 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/random_cnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import numpy as np 5 | import csv 6 | from manager import NetworkManager 7 | import tensorflow as tf 8 | from random import choice 9 | import itertools 10 | import tensorflow as tf 11 | from collections import defaultdict 12 | from model import model_fn, model_fn_cnn 13 | import json 14 | import argparse 15 | import os.path 16 | import sys 17 | from six.moves import xrange # pylint: disable=redefined-builtin 18 | import random 19 | import input_data 20 | import models 21 | from tensorflow.python.platform import gfile 22 | from tensorflow.contrib import slim as slim 23 | FLAGS = None 24 | ''' 25 | states = [] 26 | filter_val = [10, 50, 100, 200] 27 | stride_val = [1,2] 28 | kernel_val = [3,5] 29 | for itr in xrange(500): 30 | states.append([random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val), 31 | random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val), 32 | random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val)]) 33 | ''' 34 | states = [] 35 | with open('All_case.txt', 'r') as fin: 36 | for line in fin: 37 | line = line.strip('\n') 38 | line = line.strip() 39 | line = line.split(' ') 40 | s = [] 41 | for l in line: 42 | s.append(int(l)) 43 | states.append(s) 44 | Rand2 = states[500:1000] 45 | def main(_): 46 | CLIP_REWARDS = False 47 | 48 | #filter_space = [v for v in itertools.product(filter_val, repeat=3)] 49 | #stride_space = [v for v in itertools.product(stride_val, repeat=3)] 50 | #kernal_space = [v for v in itertools.product(kernel_val, repeat=3)] 51 | data = defaultdict(list) 52 | with open('rand_result2.txt', 'w') as out: 53 | iteration = 0 54 | for state in Rand2: 55 | iteration +=1 56 | print (iteration,state) 57 | manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) 58 | reward, previous_acc = manager.get_rewards(model_fn_cnn, state) 59 | previous_acc = round(previous_acc*100000,2) 60 | print(previous_acc) 61 | data[str(state)].append(previous_acc) 62 | out.write("{} {}\n".format(state, previous_acc)) 63 | with open('random_sample2.json', 'w') as outfile: 64 | json.dump(data, outfile) 65 | 66 | 67 | if __name__ == '__main__': 68 | parser = argparse.ArgumentParser() 69 | parser.add_argument( 70 | '--data_url', 71 | type=str, 72 | # pylint: disable=line-too-long 73 | default='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz', 74 | # pylint: enable=line-too-long 75 | help='Location of speech training data archive on the web.') 76 | parser.add_argument( 77 | '--data_dir', 78 | type=str, 79 | default='/tmp/speech_dataset/', 80 | help="""\ 81 | Where to download the speech training data to. 82 | """) 83 | parser.add_argument( 84 | '--background_volume', 85 | type=float, 86 | default=0.1, 87 | help="""\ 88 | How loud the background noise should be, between 0 and 1. 89 | """) 90 | parser.add_argument( 91 | '--background_frequency', 92 | type=float, 93 | default=0.8, 94 | help="""\ 95 | How many of the training samples have background noise mixed in. 96 | """) 97 | parser.add_argument( 98 | '--silence_percentage', 99 | type=float, 100 | default=10.0, 101 | help="""\ 102 | How much of the training data should be silence. 103 | """) 104 | parser.add_argument( 105 | '--unknown_percentage', 106 | type=float, 107 | default=10.0, 108 | help="""\ 109 | How much of the training data should be unknown words. 110 | """) 111 | parser.add_argument( 112 | '--time_shift_ms', 113 | type=float, 114 | default=100.0, 115 | help="""\ 116 | Range to randomly shift the training audio by in time. 117 | """) 118 | parser.add_argument( 119 | '--testing_percentage', 120 | type=int, 121 | default=10, 122 | help='What percentage of wavs to use as a test set.') 123 | parser.add_argument( 124 | '--validation_percentage', 125 | type=int, 126 | default=10, 127 | help='What percentage of wavs to use as a validation set.') 128 | parser.add_argument( 129 | '--sample_rate', 130 | type=int, 131 | default=16000, 132 | help='Expected sample rate of the wavs',) 133 | parser.add_argument( 134 | '--clip_duration_ms', 135 | type=int, 136 | default=1000, 137 | help='Expected duration in milliseconds of the wavs',) 138 | parser.add_argument( 139 | '--window_size_ms', 140 | type=float, 141 | default=40.0, 142 | help='How long each spectrogram timeslice is',) 143 | parser.add_argument( 144 | '--window_stride_ms', 145 | type=float, 146 | default=20.0, 147 | help='How long each spectrogram timeslice is',) 148 | parser.add_argument( 149 | '--dct_coefficient_count', 150 | type=int, 151 | default=10, 152 | help='How many bins to use for the MFCC fingerprint',) 153 | parser.add_argument( 154 | '--how_many_training_steps', 155 | type=str, 156 | default='1000', 157 | help='How many training loops to run',) 158 | parser.add_argument( 159 | '--eval_step_interval', 160 | type=int, 161 | default=100, 162 | help='How often to evaluate the training results.') 163 | parser.add_argument( 164 | '--learning_rate', 165 | type=str, 166 | default='0.001', 167 | help='How large a learning rate to use when training.') 168 | parser.add_argument( 169 | '--batch_size', 170 | type=int, 171 | default=100, 172 | help='How many items to train with at once',) 173 | parser.add_argument( 174 | '--summaries_dir', 175 | type=str, 176 | default='/tmp/retrain_logs', 177 | help='Where to save summary logs for TensorBoard.') 178 | parser.add_argument( 179 | '--wanted_words', 180 | type=str, 181 | default='yes,no', #default='yes,no,up,down,left,right,on,off,stop,go' 182 | help='Words to use (others will be added to an unknown label)',) 183 | parser.add_argument( 184 | '--train_dir', 185 | type=str, 186 | default='/tmp/speech_commands_train', 187 | help='Directory to write event logs and checkpoint.') 188 | parser.add_argument( 189 | '--save_step_interval', 190 | type=int, 191 | default=100, 192 | help='Save model checkpoint every save_steps.') 193 | parser.add_argument( 194 | '--start_checkpoint', 195 | type=str, 196 | default='', 197 | help='If specified, restore this pretrained model before any training.') 198 | parser.add_argument( 199 | '--model_architecture', 200 | type=str, 201 | default='dnn', 202 | help='What model architecture to use') 203 | parser.add_argument( 204 | '--model_size_info', 205 | type=int, 206 | nargs="+", 207 | default=[128,128,128], 208 | help='Model dimensions - different for various models') 209 | parser.add_argument( 210 | '--check_nans', 211 | type=bool, 212 | default=False, 213 | help='Whether to check for invalid numbers during processing') 214 | 215 | FLAGS, unparsed = parser.parse_known_args() 216 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 217 | 218 | 219 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import numpy as np 5 | import csv 6 | from manager import NetworkManager 7 | import tensorflow as tf 8 | from model import model_fn 9 | from random import choice 10 | import itertools 11 | import tensorflow as tf 12 | from collections import defaultdict 13 | 14 | from controller import Controller, StateSpace 15 | from manager import NetworkManager 16 | from model import model_fn 17 | import json 18 | import argparse 19 | import os.path 20 | import sys 21 | from six.moves import xrange # pylint: disable=redefined-builtin 22 | 23 | import input_data 24 | import models 25 | from tensorflow.python.platform import gfile 26 | from tensorflow.contrib import slim as slim 27 | FLAGS = None 28 | 29 | 30 | def main(_): 31 | CLIP_REWARDS = False 32 | value = [30,60,100,144] 33 | state_space = [v for v in itertools.product(value, repeat=3)] 34 | data = defaultdict(list) 35 | for itr in xrange(500): 36 | for state in state_space: 37 | states = list(state) 38 | manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) 39 | reward, previous_acc = manager.get_rewards(model_fn, states) 40 | previous_acc = round(previous_acc*100000,2) 41 | data[str(state)].append(previous_acc) 42 | with open('data.json', 'w') as outfile: 43 | json.dump(data, outfile) 44 | 45 | :q 46 | 47 | if __name__ == '__main__': 48 | parser = argparse.ArgumentParser() 49 | parser.add_argument( 50 | '--data_url', 51 | type=str, 52 | # pylint: disable=line-too-long 53 | default='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz', 54 | # pylint: enable=line-too-long 55 | help='Location of speech training data archive on the web.') 56 | parser.add_argument( 57 | '--data_dir', 58 | type=str, 59 | default='/tmp/speech_dataset/', 60 | help="""\ 61 | Where to download the speech training data to. 62 | """) 63 | parser.add_argument( 64 | '--background_volume', 65 | type=float, 66 | default=0.1, 67 | help="""\ 68 | How loud the background noise should be, between 0 and 1. 69 | """) 70 | parser.add_argument( 71 | '--background_frequency', 72 | type=float, 73 | default=0.8, 74 | help="""\ 75 | How many of the training samples have background noise mixed in. 76 | """) 77 | parser.add_argument( 78 | '--silence_percentage', 79 | type=float, 80 | default=10.0, 81 | help="""\ 82 | How much of the training data should be silence. 83 | """) 84 | parser.add_argument( 85 | '--unknown_percentage', 86 | type=float, 87 | default=10.0, 88 | help="""\ 89 | How much of the training data should be unknown words. 90 | """) 91 | parser.add_argument( 92 | '--time_shift_ms', 93 | type=float, 94 | default=100.0, 95 | help="""\ 96 | Range to randomly shift the training audio by in time. 97 | """) 98 | parser.add_argument( 99 | '--testing_percentage', 100 | type=int, 101 | default=10, 102 | help='What percentage of wavs to use as a test set.') 103 | parser.add_argument( 104 | '--validation_percentage', 105 | type=int, 106 | default=10, 107 | help='What percentage of wavs to use as a validation set.') 108 | parser.add_argument( 109 | '--sample_rate', 110 | type=int, 111 | default=16000, 112 | help='Expected sample rate of the wavs',) 113 | parser.add_argument( 114 | '--clip_duration_ms', 115 | type=int, 116 | default=1000, 117 | help='Expected duration in milliseconds of the wavs',) 118 | parser.add_argument( 119 | '--window_size_ms', 120 | type=float, 121 | default=40.0, 122 | help='How long each spectrogram timeslice is',) 123 | parser.add_argument( 124 | '--window_stride_ms', 125 | type=float, 126 | default=20.0, 127 | help='How long each spectrogram timeslice is',) 128 | parser.add_argument( 129 | '--dct_coefficient_count', 130 | type=int, 131 | default=10, 132 | help='How many bins to use for the MFCC fingerprint',) 133 | parser.add_argument( 134 | '--how_many_training_steps', 135 | type=str, 136 | default='400', 137 | help='How many training loops to run',) 138 | parser.add_argument( 139 | '--eval_step_interval', 140 | type=int, 141 | default=200, 142 | help='How often to evaluate the training results.') 143 | parser.add_argument( 144 | '--learning_rate', 145 | type=str, 146 | default='0.001', 147 | help='How large a learning rate to use when training.') 148 | parser.add_argument( 149 | '--batch_size', 150 | type=int, 151 | default=100, 152 | help='How many items to train with at once',) 153 | parser.add_argument( 154 | '--summaries_dir', 155 | type=str, 156 | default='/tmp/retrain_logs', 157 | help='Where to save summary logs for TensorBoard.') 158 | parser.add_argument( 159 | '--wanted_words', 160 | type=str, 161 | default='yes', #default='yes,no,up,down,left,right,on,off,stop,go' 162 | help='Words to use (others will be added to an unknown label)',) 163 | parser.add_argument( 164 | '--train_dir', 165 | type=str, 166 | default='/tmp/speech_commands_train', 167 | help='Directory to write event logs and checkpoint.') 168 | parser.add_argument( 169 | '--save_step_interval', 170 | type=int, 171 | default=100, 172 | help='Save model checkpoint every save_steps.') 173 | parser.add_argument( 174 | '--start_checkpoint', 175 | type=str, 176 | default='', 177 | help='If specified, restore this pretrained model before any training.') 178 | parser.add_argument( 179 | '--model_architecture', 180 | type=str, 181 | default='dnn', 182 | help='What model architecture to use') 183 | parser.add_argument( 184 | '--model_size_info', 185 | type=int, 186 | nargs="+", 187 | default=[128,128,128], 188 | help='Model dimensions - different for various models') 189 | parser.add_argument( 190 | '--check_nans', 191 | type=bool, 192 | default=False, 193 | help='Whether to check for invalid numbers during processing') 194 | 195 | FLAGS, unparsed = parser.parse_known_args() 196 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 197 | 198 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import numpy as np 5 | import csv 6 | from manager import NetworkManager 7 | import tensorflow as tf 8 | from model import model_fn 9 | from random import choice 10 | import itertools 11 | import tensorflow as tf 12 | from collections import defaultdict 13 | 14 | from controller import Controller, StateSpace 15 | from manager import NetworkManager 16 | from model import model_fn, model_fn_cnn 17 | import json 18 | import argparse 19 | import os.path 20 | import sys 21 | from six.moves import xrange # pylint: disable=redefined-builtin 22 | import random 23 | import input_data 24 | import models 25 | from tensorflow.python.platform import gfile 26 | from tensorflow.contrib import slim as slim 27 | FLAGS = None 28 | states = [] 29 | filter_val = [10, 50, 100, 200] 30 | stride_val = [1,2] 31 | kernel_val = [3,5] 32 | 33 | for itr in xrange(10): 34 | ''' 35 | states.append([random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val), 36 | random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val), 37 | random.choice(filter_val), random.choice(kernel_val), random.choice(stride_val)]) 38 | ''' 39 | states.append([200,5,1,200,5,1,200,5,1]) 40 | 41 | def main(_): 42 | CLIP_REWARDS = False 43 | 44 | #filter_space = [v for v in itertools.product(filter_val, repeat=3)] 45 | #stride_space = [v for v in itertools.product(stride_val, repeat=3)] 46 | #kernal_space = [v for v in itertools.product(kernel_val, repeat=3)] 47 | data = defaultdict(list) 48 | with open('result.txt', 'w') as out: 49 | for state in states: 50 | print (state) 51 | manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) 52 | reward, previous_acc = manager.get_rewards(model_fn_cnn, state) 53 | previous_acc = round(previous_acc*100000,2) 54 | print(previous_acc) 55 | data[str(state)].append(previous_acc) 56 | 57 | out.write("{} {}\n".format(state, previous_acc)) 58 | with open('data.json', 'w') as outfile: 59 | json.dump(data, outfile) 60 | 61 | 62 | if __name__ == '__main__': 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument( 65 | '--data_url', 66 | type=str, 67 | # pylint: disable=line-too-long 68 | default='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz', 69 | # pylint: enable=line-too-long 70 | help='Location of speech training data archive on the web.') 71 | parser.add_argument( 72 | '--data_dir', 73 | type=str, 74 | default='/tmp/speech_dataset/', 75 | help="""\ 76 | Where to download the speech training data to. 77 | """) 78 | parser.add_argument( 79 | '--background_volume', 80 | type=float, 81 | default=0.1, 82 | help="""\ 83 | How loud the background noise should be, between 0 and 1. 84 | """) 85 | parser.add_argument( 86 | '--background_frequency', 87 | type=float, 88 | default=0.8, 89 | help="""\ 90 | How many of the training samples have background noise mixed in. 91 | """) 92 | parser.add_argument( 93 | '--silence_percentage', 94 | type=float, 95 | default=10.0, 96 | help="""\ 97 | How much of the training data should be silence. 98 | """) 99 | parser.add_argument( 100 | '--unknown_percentage', 101 | type=float, 102 | default=10.0, 103 | help="""\ 104 | How much of the training data should be unknown words. 105 | """) 106 | parser.add_argument( 107 | '--time_shift_ms', 108 | type=float, 109 | default=100.0, 110 | help="""\ 111 | Range to randomly shift the training audio by in time. 112 | """) 113 | parser.add_argument( 114 | '--testing_percentage', 115 | type=int, 116 | default=10, 117 | help='What percentage of wavs to use as a test set.') 118 | parser.add_argument( 119 | '--validation_percentage', 120 | type=int, 121 | default=10, 122 | help='What percentage of wavs to use as a validation set.') 123 | parser.add_argument( 124 | '--sample_rate', 125 | type=int, 126 | default=16000, 127 | help='Expected sample rate of the wavs',) 128 | parser.add_argument( 129 | '--clip_duration_ms', 130 | type=int, 131 | default=1000, 132 | help='Expected duration in milliseconds of the wavs',) 133 | parser.add_argument( 134 | '--window_size_ms', 135 | type=float, 136 | default=40.0, 137 | help='How long each spectrogram timeslice is',) 138 | parser.add_argument( 139 | '--window_stride_ms', 140 | type=float, 141 | default=20.0, 142 | help='How long each spectrogram timeslice is',) 143 | parser.add_argument( 144 | '--dct_coefficient_count', 145 | type=int, 146 | default=10, 147 | help='How many bins to use for the MFCC fingerprint',) 148 | parser.add_argument( 149 | '--how_many_training_steps', 150 | type=str, 151 | default='1000', 152 | help='How many training loops to run',) 153 | parser.add_argument( 154 | '--eval_step_interval', 155 | type=int, 156 | default=100, 157 | help='How often to evaluate the training results.') 158 | parser.add_argument( 159 | '--learning_rate', 160 | type=str, 161 | default='0.001', 162 | help='How large a learning rate to use when training.') 163 | parser.add_argument( 164 | '--batch_size', 165 | type=int, 166 | default=100, 167 | help='How many items to train with at once',) 168 | parser.add_argument( 169 | '--summaries_dir', 170 | type=str, 171 | default='/tmp/retrain_logs', 172 | help='Where to save summary logs for TensorBoard.') 173 | parser.add_argument( 174 | '--wanted_words', 175 | type=str, 176 | default='yes,no', #default='yes,no,up,down,left,right,on,off,stop,go' 177 | help='Words to use (others will be added to an unknown label)',) 178 | parser.add_argument( 179 | '--train_dir', 180 | type=str, 181 | default='/tmp/speech_commands_train', 182 | help='Directory to write event logs and checkpoint.') 183 | parser.add_argument( 184 | '--save_step_interval', 185 | type=int, 186 | default=100, 187 | help='Save model checkpoint every save_steps.') 188 | parser.add_argument( 189 | '--start_checkpoint', 190 | type=str, 191 | default='', 192 | help='If specified, restore this pretrained model before any training.') 193 | parser.add_argument( 194 | '--model_architecture', 195 | type=str, 196 | default='dnn', 197 | help='What model architecture to use') 198 | parser.add_argument( 199 | '--model_size_info', 200 | type=int, 201 | nargs="+", 202 | default=[128,128,128], 203 | help='Model dimensions - different for various models') 204 | parser.add_argument( 205 | '--check_nans', 206 | type=bool, 207 | default=False, 208 | help='Whether to check for invalid numbers during processing') 209 | 210 | FLAGS, unparsed = parser.parse_known_args() 211 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 212 | 213 | 214 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/generate buffer/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import numpy as np 5 | import csv 6 | 7 | import tensorflow as tf 8 | from keras import backend as K 9 | from keras.datasets import cifar10 10 | from keras.utils import to_categorical 11 | 12 | from controller import Controller, StateSpace 13 | from manager import NetworkManager 14 | from model import model_fn, model_fn_cnn 15 | 16 | import argparse 17 | import os.path 18 | import sys 19 | from six.moves import xrange # pylint: disable=redefined-builtin 20 | 21 | import input_data 22 | import models 23 | from tensorflow.python.platform import gfile 24 | from tensorflow.contrib import slim as slim 25 | 26 | FLAGS = None 27 | 28 | 29 | def main(_): 30 | # create a shared session between Keras and Tensorflow 31 | policy_sess = tf.Session() 32 | K.set_session(policy_sess) 33 | 34 | NUM_LAYERS = 3 # number of layers of the state space 35 | MAX_TRIALS = 250 # maximum number of models generated 36 | 37 | MAX_EPOCHS = 60 # maximum number of epochs to train 38 | BATCHSIZE = 100 # batchsize 39 | EXPLORATION = 0.5 # high exploration for the first 1000 steps 40 | REGULARIZATION = 1e-3 # regularization strength 41 | CONTROLLER_CELLS = 32 # number of cells in RNN controller 42 | CLIP_REWARDS = False # clip rewards in the [-0.05, 0.05] range 43 | RESTORE_CONTROLLER = True # restore controller to continue training 44 | 45 | # construct a state space 46 | state_space = StateSpace() 47 | 48 | # add states 49 | #state_space.add_state(name='kernel', values=[3]) 50 | state_space.add_state(name='filters', values=[30, 60, 100, 144]) 51 | #state_space.add_state(name='stride', values=[1]) 52 | 53 | # print the state space being searched 54 | state_space.print_state_space() 55 | 56 | previous_acc = 0.0 57 | total_reward = 0.0 58 | 59 | with policy_sess.as_default(): 60 | # create the Controller and build the internal policy network 61 | controller = Controller(policy_sess, NUM_LAYERS, state_space, 62 | reg_param=REGULARIZATION, 63 | exploration=EXPLORATION, 64 | controller_cells=CONTROLLER_CELLS, 65 | restore_controller=RESTORE_CONTROLLER) 66 | print ('done') 67 | # create the Network Manager 68 | manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) 69 | 70 | # get an initial random state space if controller needs to predict an 71 | # action from the initial state 72 | state = state_space.get_random_state_space(NUM_LAYERS) 73 | print("Initial Random State : ", state_space.parse_state_space_list(state)) 74 | #print() 75 | 76 | # train for number of trails 77 | for trial in range(MAX_TRIALS): 78 | with policy_sess.as_default(): 79 | actions = controller.get_action(state) # get an action for the previous state 80 | 81 | # print the action probabilities 82 | state_space.print_actions(actions) 83 | print("Predicted actions : ", state_space.parse_state_space_list(actions)) 84 | 85 | # build a model, train and get reward and accuracy from the network manager 86 | reward, previous_acc = manager.get_rewards(model_fn_cnn, state_space.parse_state_space_list(actions)) 87 | print("Rewards : ", reward, "Accuracy : ", previous_acc) 88 | 89 | with policy_sess.as_default(): 90 | 91 | total_reward += reward 92 | print("Total reward : ", total_reward) 93 | 94 | # actions and states are equivalent, save the state and reward 95 | state = actions 96 | controller.store_rollout(state, reward) 97 | 98 | # train the controller on the saved state and the discounted rewards 99 | loss = controller.train_step() 100 | print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss)) 101 | 102 | # write the results of this trial into a file 103 | with open('train_history.csv', mode='a+') as f: 104 | data = [previous_acc, reward] 105 | data.extend(state_space.parse_state_space_list(state)) 106 | writer = csv.writer(f) 107 | writer.writerow(data) 108 | print() 109 | 110 | print("Total Reward : ", total_reward) 111 | 112 | 113 | if __name__ == '__main__': 114 | parser = argparse.ArgumentParser() 115 | parser.add_argument( 116 | '--data_url', 117 | type=str, 118 | # pylint: disable=line-too-long 119 | default='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz', 120 | # pylint: enable=line-too-long 121 | help='Location of speech training data archive on the web.') 122 | parser.add_argument( 123 | '--data_dir', 124 | type=str, 125 | default='/tmp/speech_dataset/', 126 | help="""\ 127 | Where to download the speech training data to. 128 | """) 129 | parser.add_argument( 130 | '--background_volume', 131 | type=float, 132 | default=0.1, 133 | help="""\ 134 | How loud the background noise should be, between 0 and 1. 135 | """) 136 | parser.add_argument( 137 | '--background_frequency', 138 | type=float, 139 | default=0.8, 140 | help="""\ 141 | How many of the training samples have background noise mixed in. 142 | """) 143 | parser.add_argument( 144 | '--silence_percentage', 145 | type=float, 146 | default=10.0, 147 | help="""\ 148 | How much of the training data should be silence. 149 | """) 150 | parser.add_argument( 151 | '--unknown_percentage', 152 | type=float, 153 | default=10.0, 154 | help="""\ 155 | How much of the training data should be unknown words. 156 | """) 157 | parser.add_argument( 158 | '--time_shift_ms', 159 | type=float, 160 | default=100.0, 161 | help="""\ 162 | Range to randomly shift the training audio by in time. 163 | """) 164 | parser.add_argument( 165 | '--testing_percentage', 166 | type=int, 167 | default=10, 168 | help='What percentage of wavs to use as a test set.') 169 | parser.add_argument( 170 | '--validation_percentage', 171 | type=int, 172 | default=10, 173 | help='What percentage of wavs to use as a validation set.') 174 | parser.add_argument( 175 | '--sample_rate', 176 | type=int, 177 | default=16000, 178 | help='Expected sample rate of the wavs',) 179 | parser.add_argument( 180 | '--clip_duration_ms', 181 | type=int, 182 | default=1000, 183 | help='Expected duration in milliseconds of the wavs',) 184 | parser.add_argument( 185 | '--window_size_ms', 186 | type=float, 187 | default=40.0, 188 | help='How long each spectrogram timeslice is',) 189 | parser.add_argument( 190 | '--window_stride_ms', 191 | type=float, 192 | default=20.0, 193 | help='How long each spectrogram timeslice is',) 194 | parser.add_argument( 195 | '--dct_coefficient_count', 196 | type=int, 197 | default=10, 198 | help='How many bins to use for the MFCC fingerprint',) 199 | parser.add_argument( 200 | '--how_many_training_steps', 201 | type=str, 202 | default='200', 203 | help='How many training loops to run',) 204 | parser.add_argument( 205 | '--eval_step_interval', 206 | type=int, 207 | default=200, 208 | help='How often to evaluate the training results.') 209 | parser.add_argument( 210 | '--learning_rate', 211 | type=str, 212 | default='0.001', 213 | help='How large a learning rate to use when training.') 214 | parser.add_argument( 215 | '--batch_size', 216 | type=int, 217 | default=100, 218 | help='How many items to train with at once',) 219 | parser.add_argument( 220 | '--summaries_dir', 221 | type=str, 222 | default='/tmp/retrain_logs', 223 | help='Where to save summary logs for TensorBoard.') 224 | parser.add_argument( 225 | '--wanted_words', 226 | type=str, 227 | default='yes', #default='yes,no,up,down,left,right,on,off,stop,go' 228 | help='Words to use (others will be added to an unknown label)',) 229 | parser.add_argument( 230 | '--train_dir', 231 | type=str, 232 | default='/tmp/speech_commands_train', 233 | help='Directory to write event logs and checkpoint.') 234 | parser.add_argument( 235 | '--save_step_interval', 236 | type=int, 237 | default=100, 238 | help='Save model checkpoint every save_steps.') 239 | parser.add_argument( 240 | '--start_checkpoint', 241 | type=str, 242 | default='', 243 | help='If specified, restore this pretrained model before any training.') 244 | parser.add_argument( 245 | '--model_architecture', 246 | type=str, 247 | default='dnn', 248 | help='What model architecture to use') 249 | parser.add_argument( 250 | '--model_size_info', 251 | type=int, 252 | nargs="+", 253 | default=[128,128,128], 254 | help='Model dimensions - different for various models') 255 | parser.add_argument( 256 | '--check_nans', 257 | type=bool, 258 | default=False, 259 | help='Whether to check for invalid numbers during processing') 260 | 261 | FLAGS, unparsed = parser.parse_known_args() 262 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 263 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/pg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | 3 | import os 4 | import sys 5 | import logging 6 | import time 7 | import numpy as np 8 | import tensorflow as tf 9 | import json 10 | import scipy.signal 11 | import os 12 | import time 13 | import inspect 14 | from utils.general import get_logger, Progbar, export_plot 15 | from pg_config import pg_config 16 | #import logz 17 | from config import * 18 | import pdb 19 | from scipy.stats import multivariate_normal 20 | 21 | data_mean = 95942.2077661 22 | data_std = 1317.44860489 23 | 24 | class PG(object): 25 | 26 | def __init__(self): 27 | self.lr = 10e-2 28 | self.batch_size = 64 29 | self.controller_cells = 64 30 | self.num_iterations = 100 31 | self.observation_dim = 4 32 | self.action_dim_1 = 4 33 | self.action_dim_2 = 2 34 | self.action_dim_3 = 2 35 | self.num_layers = 3 36 | self.num_actions_per_layer = 3 37 | 38 | self.hasConstraint = True 39 | self.hardConstraint = True 40 | self.reg_weight = 1e-5 41 | self.reg_op = 1e-8 42 | self.weight_limit = 8000 43 | self.op_limit = 1e8 44 | 45 | self.temp1 = [] 46 | self.temp2 = [] 47 | 48 | self.action_buffer = [] 49 | self.state_buffer = [] 50 | self.logprob_buffer = [] 51 | self._dict = {} 52 | self._used_dict = {} 53 | self.log_acc = [] 54 | self.logger = get_logger('./log.txt') 55 | 56 | self._num_used_models = [] 57 | 58 | self._initial_baseline = 0.05 59 | 60 | #with open('./unormdata.json', 'r') as f: 61 | with open('./normalizedata.json', 'r') as f: 62 | self._raw_dict = json.load(f) 63 | f.close() 64 | filter_nums_map = {10:0, 50:1, 100:2, 200:3} 65 | kernel_sizes_map = {3:0, 5:1} 66 | strides_map = {1:0, 2:1} 67 | for key in self._raw_dict.keys(): 68 | params = key[1:-1].split(',') 69 | temp = [] 70 | for i in range(9): 71 | if i%3 == 0: temp.append(filter_nums_map[int(params[i])]) 72 | elif i%3 == 1: temp.append(kernel_sizes_map[int(params[i])]) 73 | else: temp.append(strides_map[int(params[i])]) 74 | 75 | self._dict[str(temp)] = np.mean(self._raw_dict[key]) 76 | self._used_dict[str(temp)] = 0 77 | self.build() 78 | 79 | 80 | 81 | def add_placeholders_op(self): 82 | self.observation_placeholder = tf.placeholder(tf.float32, [self.batch_size, 1, self.observation_dim]) 83 | self.action_placeholder = tf.placeholder(tf.int32, [self.num_layers*self.num_actions_per_layer, self.batch_size]) 84 | self.advantage_placeholder = tf.placeholder(tf.float32, [self.batch_size, self.num_layers*self.num_actions_per_layer]) 85 | 86 | 87 | 88 | def build_policy_network_op(self, scope="policy_network"): 89 | temp_logprob_buffer = [] 90 | with tf.variable_scope(scope): 91 | self.cell = tf.contrib.rnn.NASCell(self.controller_cells) 92 | cell_state = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) 93 | cell_input = tf.zeros([self.batch_size, 1, self.observation_dim]) 94 | for i in range(self.num_layers*self.num_actions_per_layer): 95 | outputs, cell_state = tf.nn.dynamic_rnn(self.cell, cell_input, initial_state=cell_state, dtype=tf.float32) 96 | # outputs[:, -1, :].shape = (batch_size, controller_cells) 97 | #action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_1, name='rnn_fc', reuse=tf.AUTO_REUSE) 98 | if i%3 == 0: action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_1, name='rnn_fc_%d' % (i)) 99 | elif i%3 == 1: action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_2, name='rnn_fc_%d' % (i)) 100 | else: action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_3, name='rnn_fc_%d' % (i)) 101 | # if i%3 == 0: action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_1, name='rnn_fc_1', reuse=tf.AUTO_REUSE) 102 | # elif i%3 == 1: action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_2, name='rnn_fc_2', reuse=tf.AUTO_REUSE) 103 | # else: action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim_3, name='rnn_fc_3', reuse=tf.AUTO_REUSE) 104 | 105 | sampled_action = tf.squeeze(tf.multinomial(action_logits, 1), axis=1) 106 | cell_input = tf.one_hot(sampled_action, self.observation_dim) 107 | cell_input = tf.expand_dims(cell_input, 1) 108 | logprob = tf.negative(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.action_placeholder[i], logits=action_logits)) 109 | logprob = tf.expand_dims(logprob, 1) 110 | 111 | self.action_buffer.append(sampled_action) #action 112 | #self.state_buffer.append(cell_input) # state 113 | temp_logprob_buffer.append(logprob) #logprob 114 | 115 | self.logprob_buffer = tf.concat(temp_logprob_buffer, 1) # batch x layer 116 | 117 | 118 | 119 | def add_loss_op(self): 120 | self.loss = -tf.reduce_mean(self.logprob_buffer * self.advantage_placeholder) 121 | 122 | 123 | def add_optimizer_op(self): 124 | self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) 125 | 126 | 127 | def build(self): 128 | self.add_placeholders_op() 129 | self.build_policy_network_op() 130 | self.add_loss_op() 131 | self.add_optimizer_op() 132 | 133 | 134 | def initialize(self): 135 | self.sess = tf.Session() 136 | init = tf.global_variables_initializer() 137 | self.sess.run(init) 138 | 139 | 140 | def sample_model_reward_return(self, t): 141 | filter_nums_map = {0:10, 1:50, 2:100, 3:200} 142 | kernel_sizes_map = {0:3, 1:5} 143 | strides_map = {0:1, 1:2} 144 | 145 | action_buffer = np.array(self.sess.run(self.action_buffer)) 146 | # action_buffer.shape = (num_layers, N) 147 | returns = np.float32(np.zeros_like(action_buffer)) 148 | losses = np.float32(np.zeros_like(action_buffer)) 149 | #pdb.set_trace() 150 | for i in range(self.batch_size): 151 | #returns[:,i] = self._dict[str(action_buffer[:,i])] - self._initial_baseline 152 | #returns[:, i] = self._dict[action_buffer[:, i][0], action_buffer[:, i][1]] 153 | #self._used_dict[action_buffer[:, i][0], action_buffer[:, i][1]] = 1 154 | temp = action_buffer[:, i] 155 | #print temp 156 | # temp[1] = 0 if temp[1]<=1 else 1 157 | # temp[2] = 0 if temp[2]<=1 else 1 158 | # temp[4] = 0 if temp[4]<=1 else 1 159 | # temp[5] = 0 if temp[5]<=1 else 1 160 | # temp[7] = 0 if temp[7]<=1 else 1 161 | # temp[8] = 0 if temp[8]<=1 else 1 162 | 163 | filter1, kernel1, stride1 = filter_nums_map[temp[0]], kernel_sizes_map[temp[1]], strides_map[temp[2]] 164 | filter2, kernel2, stride2 = filter_nums_map[temp[3]], kernel_sizes_map[temp[4]], strides_map[temp[5]] 165 | filter3, kernel3, stride3 = filter_nums_map[temp[6]], kernel_sizes_map[temp[7]], strides_map[temp[8]] 166 | 167 | if str(temp) not in self._dict.keys(): 168 | # print 'not in buffer', [filter1, kernel1, stride1, filter2, kernel2, stride2, filter3, kernel3, stride3] 169 | s = str([filter1, kernel1, stride1, filter2, kernel2, stride2, filter3, kernel3, stride3]) 170 | #print self._raw_dict[s] 171 | #self._dict[str(temp)] = np.mean(self._raw_dict[s]) / 100000. 172 | self._dict[str(temp)] = np.mean(self._raw_dict[s]) 173 | 174 | returns[:, i] = self._dict[str(temp)] #- self._initial_baseline 175 | 176 | if self.hasConstraint: 177 | weights = (filter1 + 1) * (kernel1**2) 178 | weights += (filter2 + 1) * (kernel2**2) 179 | weights += (filter3 + 1) * (kernel3**2) 180 | 181 | t_in, f_in = 49, 10 182 | t1, f1 = np.ceil(t_in/stride1), np.ceil(f_in/stride1) 183 | t2, f2 = np.ceil(t1/stride2), np.ceil(f1/stride2) 184 | t3, f3 = np.ceil(t2/stride3), np.ceil(f2/stride3) 185 | ops = 2*1*t1*f1*stride1**2*filter1 + t1*f1*filter1 186 | ops += (2*filter1*t2*f2*stride2**2 + filter1*t2*f2) + (2*filter1*t2*f2*filter2 + t2*f2*filter2) 187 | ops += (2*filter2*t3*f3*stride3**2 + filter2*t3*f3) + (2*filter2*t3*f3*filter3 + t3*f3*filter3) 188 | 189 | self.temp1.append(weights); self.temp2.append(ops) 190 | 191 | if self.hardConstraint: 192 | if weights > self.weight_limit or ops > self.op_limit: 193 | #returns[:, i] = 0 194 | returns[:, i] = -data_mean 195 | losses[:, i] = 0 196 | else: 197 | losses[:, i] = 0 198 | else: 199 | losses[:, i] = self.reg_weight*weights + self.reg_op*ops 200 | 201 | self._used_dict[str(temp)] = 1 202 | if t==self.num_iterations-1 and i>=self.batch_size-5: 203 | print 'converges at:', [filter_nums_map[temp[0]], kernel_sizes_map[temp[1]], strides_map[temp[2]],\ 204 | filter_nums_map[temp[3]], kernel_sizes_map[temp[4]], strides_map[temp[5]],\ 205 | filter_nums_map[temp[6]], kernel_sizes_map[temp[7]], strides_map[temp[8]]] 206 | #print np.mean(losses), np.mean(returns) 207 | return action_buffer, np.transpose(returns), np.transpose(losses) 208 | 209 | 210 | def train(self): 211 | 212 | for t in range(self.num_iterations): 213 | actions, returns, losses = self.sample_model_reward_return(t) 214 | 215 | self.sess.run(self.train_op, feed_dict={ 216 | self.action_placeholder : actions, 217 | self.advantage_placeholder : returns-losses}) 218 | 219 | #avg_acc = np.mean(returns) 220 | avg_acc = (np.mean(returns)*data_std + data_mean) / 100000. 221 | 222 | #calculate number of used models: 223 | used = 0 224 | for key in self._used_dict.keys(): 225 | used += self._used_dict[key] 226 | #used = np.sum(self._used_dict) 227 | self._num_used_models.append(used) 228 | 229 | 230 | self.log_acc.append(avg_acc) 231 | #sigma_reward = np.sqrt(np.var(returns) / len(total_rewards)) 232 | msg = "Average accuracy within a batch: {:04.2f}".format(avg_acc*100) 233 | self.logger.info(msg) 234 | #print (actions) 235 | 236 | 237 | self.logger.info("- Training done.") 238 | #export_plot(self.log_acc, "Batch_Accuracy", 'NAS-DNN', "./batch_accuracy.png", self._num_used_models, "Sampled Model") 239 | export_plot(self.log_acc, "Score", 'NAS-DNN', "./batch_accuracy.png") 240 | export_plot(self._num_used_models, "Models Sampled", 'NAS-DNN', "./used_models.png") 241 | 242 | print 'log_acc'; print self.log_acc 243 | print '_num_used_models'; print self._num_used_models 244 | # print 'weights', np.mean(self.temp1), np.var(self.temp1) 245 | # print 'ops', np.mean(self.temp2), np.var(self.temp2) 246 | 247 | 248 | def run(self): 249 | self.initialize() 250 | self.train() 251 | 252 | 253 | 254 | if __name__ == '__main__': 255 | model = PG() 256 | model.run() 257 | -------------------------------------------------------------------------------- /real+world+CNN/real world CNN/pg_config.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class pg_config(): 4 | 5 | record = False 6 | 7 | # output config 8 | output_path = "PG_results/" 9 | model_output = output_path + "model.weights/" 10 | #log_path = output_path + "log.txt" 11 | plot_output = output_path + "scores.png" 12 | record_path = output_path 13 | record_freq = 5 14 | summary_freq = 1 15 | 16 | 17 | # model and training config 18 | num_batches = 10 # number of batches trained on 19 | batch_size = 50 # number of steps used to compute each policy update 20 | learning_rate = 3e-2 21 | #use_baseline = False 22 | normalize_advantage=True 23 | activation=tf.nn.relu 24 | -------------------------------------------------------------------------------- /toy-model/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /toy-model/__pycache__/pg_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/__pycache__/pg_config.cpython-36.pyc -------------------------------------------------------------------------------- /toy-model/action_average_reward_dict.json: -------------------------------------------------------------------------------- 1 | {"(30, 30, 30)": 917460.32, "(30, 30, 60)": 936507.94, "(30, 30, 100)": 933333.34, "(30, 30, 144)": 920634.93, "(30, 60, 30)": 942857.15, "(30, 60, 60)": 933333.34, "(30, 60, 100)": 926984.13, "(30, 60, 144)": 933333.34, "(30, 100, 30)": 936507.94, "(30, 100, 60)": 942857.15, "(30, 100, 100)": 933333.34, "(30, 100, 144)": 936507.94, "(30, 144, 30)": 936507.94, "(30, 144, 60)": 936507.94, "(30, 144, 100)": 936507.94, "(30, 144, 144)": 939682.54, "(60, 30, 30)": 911111.12, "(60, 30, 60)": 949206.35, "(60, 30, 100)": 946031.75, "(60, 30, 144)": 939682.54, "(60, 60, 30)": 936507.94, "(60, 60, 60)": 942857.15, "(60, 60, 100)": 936507.94, "(60, 60, 144)": 930158.73, "(60, 100, 30)": 917460.32, "(60, 100, 60)": 930158.73, "(60, 100, 100)": 933333.34, "(60, 100, 144)": 939682.54, "(60, 144, 30)": 933333.34, "(60, 144, 60)": 946031.75, "(60, 144, 100)": 942857.15, "(60, 144, 144)": 936507.94, "(100, 30, 30)": 949206.35, "(100, 30, 60)": 939682.54, "(100, 30, 100)": 939682.54, "(100, 30, 144)": 939682.54, "(100, 60, 30)": 942857.15, "(100, 60, 60)": 926984.13, "(100, 60, 100)": 936507.94, "(100, 60, 144)": 936507.94, "(100, 100, 30)": 933333.34, "(100, 100, 60)": 923809.53, "(100, 100, 100)": 942857.15, "(100, 100, 144)": 911111.12, "(100, 144, 30)": 946031.75, "(100, 144, 60)": 926984.13, "(100, 144, 100)": 942857.15, "(100, 144, 144)": 939682.54, "(144, 30, 30)": 933333.34, "(144, 30, 60)": 946031.75, "(144, 30, 100)": 946031.75, "(144, 30, 144)": 936507.94, "(144, 60, 30)": 911111.12, "(144, 60, 60)": 920634.93, "(144, 60, 100)": 946031.75, "(144, 60, 144)": 917460.32, "(144, 100, 30)": 898412.7, "(144, 100, 60)": 946031.75, "(144, 100, 100)": 907936.51, "(144, 100, 144)": 939682.54, "(144, 144, 30)": 946031.75, "(144, 144, 60)": 895238.1, "(144, 144, 100)": 936507.94, "(144, 144, 144)": 930158.73} 2 | -------------------------------------------------------------------------------- /toy-model/action_reward_dict.json: -------------------------------------------------------------------------------- 1 | {"(30, 30, 30)": [917460.32], "(30, 30, 60)": [936507.94], "(30, 30, 100)": [933333.34], "(30, 30, 144)": [920634.93], "(30, 60, 30)": [942857.15], "(30, 60, 60)": [933333.34], "(30, 60, 100)": [926984.13], "(30, 60, 144)": [933333.34], "(30, 100, 30)": [936507.94], "(30, 100, 60)": [942857.15], "(30, 100, 100)": [933333.34], "(30, 100, 144)": [936507.94], "(30, 144, 30)": [936507.94], "(30, 144, 60)": [936507.94], "(30, 144, 100)": [936507.94], "(30, 144, 144)": [939682.54], "(60, 30, 30)": [911111.12], "(60, 30, 60)": [949206.35], "(60, 30, 100)": [946031.75], "(60, 30, 144)": [939682.54], "(60, 60, 30)": [936507.94], "(60, 60, 60)": [942857.15], "(60, 60, 100)": [936507.94], "(60, 60, 144)": [930158.73], "(60, 100, 30)": [917460.32], "(60, 100, 60)": [930158.73], "(60, 100, 100)": [933333.34], "(60, 100, 144)": [939682.54], "(60, 144, 30)": [933333.34], "(60, 144, 60)": [946031.75], "(60, 144, 100)": [942857.15], "(60, 144, 144)": [936507.94], "(100, 30, 30)": [949206.35], "(100, 30, 60)": [939682.54], "(100, 30, 100)": [939682.54], "(100, 30, 144)": [939682.54], "(100, 60, 30)": [942857.15], "(100, 60, 60)": [926984.13], "(100, 60, 100)": [936507.94], "(100, 60, 144)": [936507.94], "(100, 100, 30)": [933333.34], "(100, 100, 60)": [923809.53], "(100, 100, 100)": [942857.15], "(100, 100, 144)": [911111.12], "(100, 144, 30)": [946031.75], "(100, 144, 60)": [926984.13], "(100, 144, 100)": [942857.15], "(100, 144, 144)": [939682.54], "(144, 30, 30)": [933333.34], "(144, 30, 60)": [946031.75], "(144, 30, 100)": [946031.75], "(144, 30, 144)": [936507.94], "(144, 60, 30)": [911111.12], "(144, 60, 60)": [920634.93], "(144, 60, 100)": [946031.75], "(144, 60, 144)": [917460.32], "(144, 100, 30)": [898412.7], "(144, 100, 60)": [946031.75], "(144, 100, 100)": [907936.51], "(144, 100, 144)": [939682.54], "(144, 144, 30)": [946031.75], "(144, 144, 60)": [895238.1], "(144, 144, 100)": [936507.94], "(144, 144, 144)": [930158.73]} -------------------------------------------------------------------------------- /toy-model/backup/batch_accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/backup/batch_accuracy.png -------------------------------------------------------------------------------- /toy-model/backup/reward_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/backup/reward_function.png -------------------------------------------------------------------------------- /toy-model/backup/used_models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/backup/used_models.png -------------------------------------------------------------------------------- /toy-model/batch_accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/batch_accuracy.png -------------------------------------------------------------------------------- /toy-model/config.py: -------------------------------------------------------------------------------- 1 | LAYER_SIZES = [30, 60, 100, 144] 2 | NUM_LAYERS = 3 3 | NUM_ENUM = 10 4 | FLAGS = None 5 | CLIP_REWARDS = False 6 | 7 | JSON_SCALE = 1e6 8 | 9 | controller_cells = 32 10 | 11 | action_average_reward_dict_name = 'action_average_reward_dict.json' -------------------------------------------------------------------------------- /toy-model/create_polynomial.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | from scipy.stats import multivariate_normal 5 | 6 | import pdb 7 | 8 | 9 | x, y = np.mgrid[-10:10:0.02, -10:10:0.02] 10 | pos = np.empty(x.shape + (2,)) 11 | pos[:, :, 0] = x 12 | pos[:, :, 1] = y 13 | rv1 = multivariate_normal([5, -5], [[10, 0], [0, 10]]) 14 | rv2 = multivariate_normal([2, -2], [[7, 2], [2, 5]]) 15 | rv3 = multivariate_normal([7, -7], [[1, 0], [0, 1]]) 16 | rv4 = multivariate_normal([3, -3], [[1, 0], [0, 1]]) 17 | rv11 = multivariate_normal([-5, 5], [[3, 1], [1, 2]]) 18 | rv22 = multivariate_normal([-2, 2], [[7, 2], [2, 5]]) 19 | rv33 = multivariate_normal([-7, 7], [[1, 0], [0,1]]) 20 | rv44 = multivariate_normal([-3, 3], [[4, 0], [0, 4]]) 21 | rv = rv1.pdf(pos) + rv2.pdf(pos) + rv3.pdf(pos) + rv4.pdf(pos) + rv11.pdf(pos) + rv22.pdf(pos) + rv33.pdf(pos) + rv44.pdf(pos) 22 | 23 | 24 | 25 | #z = np.polynomial.polynomial.polyval2d(X, Y, coeff) 26 | fig = plt.figure() 27 | #plt.contourf(x, y, rv) 28 | #plt.show() 29 | 30 | ax = Axes3D(fig) #fig.add_subplot(111, projection='3d') 31 | ax.plot_surface(x, y, rv) 32 | plt.show() 33 | 34 | pdb.set_trace() 35 | a = 1 -------------------------------------------------------------------------------- /toy-model/generateAR.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | #from keras import backend as K 4 | import argparse 5 | #import csv 6 | import os 7 | import sys 8 | import json 9 | 10 | import input_data 11 | from model import model_fn 12 | from manager import NetworkManager 13 | 14 | from config import * 15 | # LAYER_SIZES = [30, 60, 100, 144] 16 | # NUM_LAYERS = 3 17 | # layer_sizes = [30, 60] 18 | # num_layers = 3 19 | # NUM_ENUM = 2 20 | # FLAGS = None 21 | # CLIP_REWARDS = False 22 | 23 | NUM_ENUM = 1 24 | def main(_): 25 | action_reward_dict = {} 26 | policy_sess = tf.Session() 27 | #K.set_session(policy_sess) 28 | manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) 29 | 30 | size = [len(LAYER_SIZES)]*NUM_LAYERS 31 | reward_space = np.zeros((size)) 32 | #print(reward_space.shape) 33 | for i in range(NUM_ENUM): 34 | for idx,val in np.ndenumerate(reward_space): 35 | action = [LAYER_SIZES[i] for i in idx] 36 | #print(action) 37 | with policy_sess.as_default(): 38 | _, acc = manager.get_rewards(model_fn, action) 39 | print(action, acc) 40 | acc = round(acc*JSON_SCALE, 2) 41 | action = str(tuple(action)) 42 | if action not in action_reward_dict: 43 | action_reward_dict[action] = [acc] 44 | else: 45 | action_reward_dict[action].append(acc) 46 | 47 | action_average_reward_dict = {} 48 | for k in action_reward_dict.keys(): 49 | action_average_reward_dict[k] = round(np.mean(action_reward_dict[k]), 2) 50 | 51 | with open('action_reward_dict.json', 'w') as f: 52 | json.dump(action_reward_dict, f) 53 | f.close() 54 | with open('action_average_reward_dict.json', 'w') as f: 55 | json.dump(action_average_reward_dict, f) 56 | f.close() 57 | 58 | 59 | if __name__ == '__main__': 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument( 62 | '--data_url', 63 | type=str, 64 | # pylint: disable=line-too-long 65 | default='http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz', 66 | # pylint: enable=line-too-long 67 | help='Location of speech training data archive on the web.') 68 | parser.add_argument( 69 | '--data_dir', 70 | type=str, 71 | default='/tmp/speech_dataset/', 72 | help="""\ 73 | Where to download the speech training data to. 74 | """) 75 | parser.add_argument( 76 | '--background_volume', 77 | type=float, 78 | default=0.1, 79 | help="""\ 80 | How loud the background noise should be, between 0 and 1. 81 | """) 82 | parser.add_argument( 83 | '--background_frequency', 84 | type=float, 85 | default=0.8, 86 | help="""\ 87 | How many of the training samples have background noise mixed in. 88 | """) 89 | parser.add_argument( 90 | '--silence_percentage', 91 | type=float, 92 | default=10.0, 93 | help="""\ 94 | How much of the training data should be silence. 95 | """) 96 | parser.add_argument( 97 | '--unknown_percentage', 98 | type=float, 99 | default=10.0, 100 | help="""\ 101 | How much of the training data should be unknown words. 102 | """) 103 | parser.add_argument( 104 | '--time_shift_ms', 105 | type=float, 106 | default=100.0, 107 | help="""\ 108 | Range to randomly shift the training audio by in time. 109 | """) 110 | parser.add_argument( 111 | '--testing_percentage', 112 | type=int, 113 | default=10, 114 | help='What percentage of wavs to use as a test set.') 115 | parser.add_argument( 116 | '--validation_percentage', 117 | type=int, 118 | default=10, 119 | help='What percentage of wavs to use as a validation set.') 120 | parser.add_argument( 121 | '--sample_rate', 122 | type=int, 123 | default=16000, 124 | help='Expected sample rate of the wavs',) 125 | parser.add_argument( 126 | '--clip_duration_ms', 127 | type=int, 128 | default=1000, 129 | help='Expected duration in milliseconds of the wavs',) 130 | parser.add_argument( 131 | '--window_size_ms', 132 | type=float, 133 | default=40.0, 134 | help='How long each spectrogram timeslice is',) 135 | parser.add_argument( 136 | '--window_stride_ms', 137 | type=float, 138 | default=20.0, 139 | help='How long each spectrogram timeslice is',) 140 | parser.add_argument( 141 | '--dct_coefficient_count', 142 | type=int, 143 | default=10, 144 | help='How many bins to use for the MFCC fingerprint',) 145 | parser.add_argument( 146 | '--how_many_training_steps', 147 | type=str, 148 | default='200', 149 | help='How many training loops to run',) 150 | parser.add_argument( 151 | '--eval_step_interval', 152 | type=int, 153 | default=200, 154 | help='How often to evaluate the training results.') 155 | parser.add_argument( 156 | '--learning_rate', 157 | type=str, 158 | default='0.001', 159 | help='How large a learning rate to use when training.') 160 | parser.add_argument( 161 | '--batch_size', 162 | type=int, 163 | default=100, 164 | help='How many items to train with at once',) 165 | parser.add_argument( 166 | '--summaries_dir', 167 | type=str, 168 | default='/tmp/retrain_logs', 169 | help='Where to save summary logs for TensorBoard.') 170 | parser.add_argument( 171 | '--wanted_words', 172 | type=str, 173 | default='yes', #default='yes,no,up,down,left,right,on,off,stop,go' 174 | help='Words to use (others will be added to an unknown label)',) 175 | parser.add_argument( 176 | '--train_dir', 177 | type=str, 178 | default='/tmp/speech_commands_train', 179 | help='Directory to write event logs and checkpoint.') 180 | parser.add_argument( 181 | '--save_step_interval', 182 | type=int, 183 | default=100, 184 | help='Save model checkpoint every save_steps.') 185 | parser.add_argument( 186 | '--start_checkpoint', 187 | type=str, 188 | default='', 189 | help='If specified, restore this pretrained model before any training.') 190 | parser.add_argument( 191 | '--model_architecture', 192 | type=str, 193 | default='dnn', 194 | help='What model architecture to use') 195 | parser.add_argument( 196 | '--model_size_info', 197 | type=int, 198 | nargs="+", 199 | default=[128,128,128], 200 | help='Model dimensions - different for various models') 201 | parser.add_argument( 202 | '--check_nans', 203 | type=bool, 204 | default=False, 205 | help='Whether to check for invalid numbers during processing') 206 | 207 | FLAGS, unparsed = parser.parse_known_args() 208 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 209 | 210 | -------------------------------------------------------------------------------- /toy-model/input_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Model definitions for simple speech recognition. 16 | 17 | """ 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import hashlib 23 | import math 24 | import os.path 25 | import random 26 | import re 27 | import sys 28 | import tarfile 29 | 30 | import numpy as np 31 | from six.moves import urllib 32 | from six.moves import xrange # pylint: disable=redefined-builtin 33 | import tensorflow as tf 34 | 35 | from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio 36 | from tensorflow.python.ops import io_ops 37 | from tensorflow.python.platform import gfile 38 | from tensorflow.python.util import compat 39 | 40 | MAX_NUM_WAVS_PER_CLASS = 2**27 - 1 # ~134M 41 | SILENCE_LABEL = '_silence_' 42 | SILENCE_INDEX = 0 43 | UNKNOWN_WORD_LABEL = '_unknown_' 44 | UNKNOWN_WORD_INDEX = 1 45 | BACKGROUND_NOISE_DIR_NAME = '_background_noise_' 46 | RANDOM_SEED = 59185 47 | 48 | 49 | def prepare_words_list(wanted_words): 50 | """Prepends common tokens to the custom word list. 51 | 52 | Args: 53 | wanted_words: List of strings containing the custom words. 54 | 55 | Returns: 56 | List with the standard silence and unknown tokens added. 57 | """ 58 | return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words 59 | 60 | 61 | def which_set(filename, validation_percentage, testing_percentage): 62 | """Determines which data partition the file should belong to. 63 | 64 | We want to keep files in the same training, validation, or testing sets even 65 | if new ones are added over time. This makes it less likely that testing 66 | samples will accidentally be reused in training when long runs are restarted 67 | for example. To keep this stability, a hash of the filename is taken and used 68 | to determine which set it should belong to. This determination only depends on 69 | the name and the set proportions, so it won't change as other files are added. 70 | 71 | It's also useful to associate particular files as related (for example words 72 | spoken by the same person), so anything after '_nohash_' in a filename is 73 | ignored for set determination. This ensures that 'bobby_nohash_0.wav' and 74 | 'bobby_nohash_1.wav' are always in the same set, for example. 75 | 76 | Args: 77 | filename: File path of the data sample. 78 | validation_percentage: How much of the data set to use for validation. 79 | testing_percentage: How much of the data set to use for testing. 80 | 81 | Returns: 82 | String, one of 'training', 'validation', or 'testing'. 83 | """ 84 | base_name = os.path.basename(filename) 85 | # We want to ignore anything after '_nohash_' in the file name when 86 | # deciding which set to put a wav in, so the data set creator has a way of 87 | # grouping wavs that are close variations of each other. 88 | hash_name = re.sub(r'_nohash_.*$', '', base_name) 89 | # This looks a bit magical, but we need to decide whether this file should 90 | # go into the training, testing, or validation sets, and we want to keep 91 | # existing files in the same set even if more files are subsequently 92 | # added. 93 | # To do that, we need a stable way of deciding based on just the file name 94 | # itself, so we do a hash of that and then use that to generate a 95 | # probability value that we use to assign it. 96 | hash_name_hashed = hashlib.sha1(compat.as_bytes(hash_name)).hexdigest() 97 | percentage_hash = ((int(hash_name_hashed, 16) % 98 | (MAX_NUM_WAVS_PER_CLASS + 1)) * 99 | (100.0 / MAX_NUM_WAVS_PER_CLASS)) 100 | if percentage_hash < validation_percentage: 101 | result = 'validation' 102 | elif percentage_hash < (testing_percentage + validation_percentage): 103 | result = 'testing' 104 | else: 105 | result = 'training' 106 | return result 107 | 108 | 109 | def load_wav_file(filename): 110 | """Loads an audio file and returns a float PCM-encoded array of samples. 111 | 112 | Args: 113 | filename: Path to the .wav file to load. 114 | 115 | Returns: 116 | Numpy array holding the sample data as floats between -1.0 and 1.0. 117 | """ 118 | with tf.Session(graph=tf.Graph()) as sess: 119 | wav_filename_placeholder = tf.placeholder(tf.string, []) 120 | wav_loader = io_ops.read_file(wav_filename_placeholder) 121 | wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) 122 | return sess.run( 123 | wav_decoder, 124 | feed_dict={wav_filename_placeholder: filename}).audio.flatten() 125 | 126 | 127 | def save_wav_file(filename, wav_data, sample_rate): 128 | """Saves audio sample data to a .wav audio file. 129 | 130 | Args: 131 | filename: Path to save the file to. 132 | wav_data: 2D array of float PCM-encoded audio data. 133 | sample_rate: Samples per second to encode in the file. 134 | """ 135 | with tf.Session(graph=tf.Graph()) as sess: 136 | wav_filename_placeholder = tf.placeholder(tf.string, []) 137 | sample_rate_placeholder = tf.placeholder(tf.int32, []) 138 | wav_data_placeholder = tf.placeholder(tf.float32, [None, 1]) 139 | wav_encoder = contrib_audio.encode_wav(wav_data_placeholder, 140 | sample_rate_placeholder) 141 | wav_saver = io_ops.write_file(wav_filename_placeholder, wav_encoder) 142 | sess.run( 143 | wav_saver, 144 | feed_dict={ 145 | wav_filename_placeholder: filename, 146 | sample_rate_placeholder: sample_rate, 147 | wav_data_placeholder: np.reshape(wav_data, (-1, 1)) 148 | }) 149 | 150 | 151 | class AudioProcessor(object): 152 | """Handles loading, partitioning, and preparing audio training data.""" 153 | 154 | def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage, 155 | wanted_words, validation_percentage, testing_percentage, 156 | model_settings): 157 | self.data_dir = data_dir 158 | self.maybe_download_and_extract_dataset(data_url, data_dir) 159 | self.prepare_data_index(silence_percentage, unknown_percentage, 160 | wanted_words, validation_percentage, 161 | testing_percentage) 162 | self.prepare_background_data() 163 | self.prepare_processing_graph(model_settings) 164 | 165 | def maybe_download_and_extract_dataset(self, data_url, dest_directory): 166 | """Download and extract data set tar file. 167 | 168 | If the data set we're using doesn't already exist, this function 169 | downloads it from the TensorFlow.org website and unpacks it into a 170 | directory. 171 | If the data_url is none, don't download anything and expect the data 172 | directory to contain the correct files already. 173 | 174 | Args: 175 | data_url: Web location of the tar file containing the data set. 176 | dest_directory: File path to extract data to. 177 | """ 178 | if not data_url: 179 | return 180 | if not os.path.exists(dest_directory): 181 | os.makedirs(dest_directory) 182 | filename = data_url.split('/')[-1] 183 | filepath = os.path.join(dest_directory, filename) 184 | if not os.path.exists(filepath): 185 | 186 | def _progress(count, block_size, total_size): 187 | sys.stdout.write( 188 | '\r>> Downloading %s %.1f%%' % 189 | (filename, float(count * block_size) / float(total_size) * 100.0)) 190 | sys.stdout.flush() 191 | 192 | try: 193 | filepath, _ = urllib.request.urlretrieve(data_url, filepath, _progress) 194 | except: 195 | tf.logging.error('Failed to download URL: %s to folder: %s', data_url, 196 | filepath) 197 | tf.logging.error('Please make sure you have enough free space and' 198 | ' an internet connection') 199 | raise 200 | print() 201 | statinfo = os.stat(filepath) 202 | tf.logging.info('Successfully downloaded %s (%d bytes)', filename, 203 | statinfo.st_size) 204 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 205 | 206 | def prepare_data_index(self, silence_percentage, unknown_percentage, 207 | wanted_words, validation_percentage, 208 | testing_percentage): 209 | """Prepares a list of the samples organized by set and label. 210 | 211 | The training loop needs a list of all the available data, organized by 212 | which partition it should belong to, and with ground truth labels attached. 213 | This function analyzes the folders below the `data_dir`, figures out the 214 | right 215 | labels for each file based on the name of the subdirectory it belongs to, 216 | and uses a stable hash to assign it to a data set partition. 217 | 218 | Args: 219 | silence_percentage: How much of the resulting data should be background. 220 | unknown_percentage: How much should be audio outside the wanted classes. 221 | wanted_words: Labels of the classes we want to be able to recognize. 222 | validation_percentage: How much of the data set to use for validation. 223 | testing_percentage: How much of the data set to use for testing. 224 | 225 | Returns: 226 | Dictionary containing a list of file information for each set partition, 227 | and a lookup map for each class to determine its numeric index. 228 | 229 | Raises: 230 | Exception: If expected files are not found. 231 | """ 232 | # Make sure the shuffling and picking of unknowns is deterministic. 233 | random.seed(RANDOM_SEED) 234 | wanted_words_index = {} 235 | for index, wanted_word in enumerate(wanted_words): 236 | wanted_words_index[wanted_word] = index + 2 237 | self.data_index = {'validation': [], 'testing': [], 'training': []} 238 | unknown_index = {'validation': [], 'testing': [], 'training': []} 239 | all_words = {} 240 | # Look through all the subfolders to find audio samples 241 | search_path = os.path.join(self.data_dir, '*', '*.wav') 242 | for wav_path in gfile.Glob(search_path): 243 | _, word = os.path.split(os.path.dirname(wav_path)) 244 | word = word.lower() 245 | # Treat the '_background_noise_' folder as a special case, since we expect 246 | # it to contain long audio samples we mix in to improve training. 247 | if word == BACKGROUND_NOISE_DIR_NAME: 248 | continue 249 | all_words[word] = True 250 | set_index = which_set(wav_path, validation_percentage, testing_percentage) 251 | # If it's a known class, store its detail, otherwise add it to the list 252 | # we'll use to train the unknown label. 253 | if word in wanted_words_index: 254 | self.data_index[set_index].append({'label': word, 'file': wav_path}) 255 | else: 256 | unknown_index[set_index].append({'label': word, 'file': wav_path}) 257 | if not all_words: 258 | raise Exception('No .wavs found at ' + search_path) 259 | for index, wanted_word in enumerate(wanted_words): 260 | if wanted_word not in all_words: 261 | raise Exception('Expected to find ' + wanted_word + 262 | ' in labels but only found ' + 263 | ', '.join(all_words.keys())) 264 | # We need an arbitrary file to load as the input for the silence samples. 265 | # It's multiplied by zero later, so the content doesn't matter. 266 | silence_wav_path = self.data_index['training'][0]['file'] 267 | for set_index in ['validation', 'testing', 'training']: 268 | set_size = len(self.data_index[set_index]) 269 | silence_size = int(math.ceil(set_size * silence_percentage / 100)) 270 | for _ in range(silence_size): 271 | self.data_index[set_index].append({ 272 | 'label': SILENCE_LABEL, 273 | 'file': silence_wav_path 274 | }) 275 | # Pick some unknowns to add to each partition of the data set. 276 | random.shuffle(unknown_index[set_index]) 277 | unknown_size = int(math.ceil(set_size * unknown_percentage / 100)) 278 | self.data_index[set_index].extend(unknown_index[set_index][:unknown_size]) 279 | # Make sure the ordering is random. 280 | for set_index in ['validation', 'testing', 'training']: 281 | random.shuffle(self.data_index[set_index]) 282 | # Prepare the rest of the result data structure. 283 | self.words_list = prepare_words_list(wanted_words) 284 | self.word_to_index = {} 285 | for word in all_words: 286 | if word in wanted_words_index: 287 | self.word_to_index[word] = wanted_words_index[word] 288 | else: 289 | self.word_to_index[word] = UNKNOWN_WORD_INDEX 290 | self.word_to_index[SILENCE_LABEL] = SILENCE_INDEX 291 | 292 | def prepare_background_data(self): 293 | """Searches a folder for background noise audio, and loads it into memory. 294 | 295 | It's expected that the background audio samples will be in a subdirectory 296 | named '_background_noise_' inside the 'data_dir' folder, as .wavs that match 297 | the sample rate of the training data, but can be much longer in duration. 298 | 299 | If the '_background_noise_' folder doesn't exist at all, this isn't an 300 | error, it's just taken to mean that no background noise augmentation should 301 | be used. If the folder does exist, but it's empty, that's treated as an 302 | error. 303 | 304 | Returns: 305 | List of raw PCM-encoded audio samples of background noise. 306 | 307 | Raises: 308 | Exception: If files aren't found in the folder. 309 | """ 310 | self.background_data = [] 311 | background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME) 312 | if not os.path.exists(background_dir): 313 | return self.background_data 314 | with tf.Session(graph=tf.Graph()) as sess: 315 | wav_filename_placeholder = tf.placeholder(tf.string, []) 316 | wav_loader = io_ops.read_file(wav_filename_placeholder) 317 | wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1) 318 | search_path = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME, 319 | '*.wav') 320 | for wav_path in gfile.Glob(search_path): 321 | wav_data = sess.run( 322 | wav_decoder, 323 | feed_dict={wav_filename_placeholder: wav_path}).audio.flatten() 324 | self.background_data.append(wav_data) 325 | if not self.background_data: 326 | raise Exception('No background wav files were found in ' + search_path) 327 | 328 | def prepare_processing_graph(self, model_settings): 329 | """Builds a TensorFlow graph to apply the input distortions. 330 | 331 | Creates a graph that loads a WAVE file, decodes it, scales the volume, 332 | shifts it in time, adds in background noise, calculates a spectrogram, and 333 | then builds an MFCC fingerprint from that. 334 | 335 | This must be called with an active TensorFlow session running, and it 336 | creates multiple placeholder inputs, and one output: 337 | 338 | - wav_filename_placeholder_: Filename of the WAV to load. 339 | - foreground_volume_placeholder_: How loud the main clip should be. 340 | - time_shift_padding_placeholder_: Where to pad the clip. 341 | - time_shift_offset_placeholder_: How much to move the clip in time. 342 | - background_data_placeholder_: PCM sample data for background noise. 343 | - background_volume_placeholder_: Loudness of mixed-in background. 344 | - mfcc_: Output 2D fingerprint of processed audio. 345 | 346 | Args: 347 | model_settings: Information about the current model being trained. 348 | """ 349 | desired_samples = model_settings['desired_samples'] 350 | self.wav_filename_placeholder_ = tf.placeholder(tf.string, []) 351 | wav_loader = io_ops.read_file(self.wav_filename_placeholder_) 352 | wav_decoder = contrib_audio.decode_wav( 353 | wav_loader, desired_channels=1, desired_samples=desired_samples) 354 | # Allow the audio sample's volume to be adjusted. 355 | self.foreground_volume_placeholder_ = tf.placeholder(tf.float32, []) 356 | scaled_foreground = tf.multiply(wav_decoder.audio, 357 | self.foreground_volume_placeholder_) 358 | # Shift the sample's start position, and pad any gaps with zeros. 359 | self.time_shift_padding_placeholder_ = tf.placeholder(tf.int32, [2, 2]) 360 | self.time_shift_offset_placeholder_ = tf.placeholder(tf.int32, [2]) 361 | padded_foreground = tf.pad( 362 | scaled_foreground, 363 | self.time_shift_padding_placeholder_, 364 | mode='CONSTANT') 365 | sliced_foreground = tf.slice(padded_foreground, 366 | self.time_shift_offset_placeholder_, 367 | [desired_samples, -1]) 368 | # Mix in background noise. 369 | self.background_data_placeholder_ = tf.placeholder(tf.float32, 370 | [desired_samples, 1]) 371 | self.background_volume_placeholder_ = tf.placeholder(tf.float32, []) 372 | background_mul = tf.multiply(self.background_data_placeholder_, 373 | self.background_volume_placeholder_) 374 | background_add = tf.add(background_mul, sliced_foreground) 375 | background_clamp = tf.clip_by_value(background_add, -1.0, 1.0) 376 | # Run the spectrogram and MFCC ops to get a 2D 'fingerprint' of the audio. 377 | spectrogram = contrib_audio.audio_spectrogram( 378 | background_clamp, 379 | window_size=model_settings['window_size_samples'], 380 | stride=model_settings['window_stride_samples'], 381 | magnitude_squared=True) 382 | self.mfcc_ = contrib_audio.mfcc( 383 | spectrogram, 384 | wav_decoder.sample_rate, 385 | dct_coefficient_count=model_settings['dct_coefficient_count']) 386 | 387 | def set_size(self, mode): 388 | """Calculates the number of samples in the dataset partition. 389 | 390 | Args: 391 | mode: Which partition, must be 'training', 'validation', or 'testing'. 392 | 393 | Returns: 394 | Number of samples in the partition. 395 | """ 396 | return len(self.data_index[mode]) 397 | 398 | def get_data(self, how_many, offset, model_settings, background_frequency, 399 | background_volume_range, time_shift, mode, sess): 400 | """Gather samples from the data set, applying transformations as needed. 401 | 402 | When the mode is 'training', a random selection of samples will be returned, 403 | otherwise the first N clips in the partition will be used. This ensures that 404 | validation always uses the same samples, reducing noise in the metrics. 405 | 406 | Args: 407 | how_many: Desired number of samples to return. -1 means the entire 408 | contents of this partition. 409 | offset: Where to start when fetching deterministically. 410 | model_settings: Information about the current model being trained. 411 | background_frequency: How many clips will have background noise, 0.0 to 412 | 1.0. 413 | background_volume_range: How loud the background noise will be. 414 | time_shift: How much to randomly shift the clips by in time. 415 | mode: Which partition to use, must be 'training', 'validation', or 416 | 'testing'. 417 | sess: TensorFlow session that was active when processor was created. 418 | 419 | Returns: 420 | List of sample data for the transformed samples, and list of labels in 421 | one-hot form. 422 | """ 423 | # Pick one of the partitions to choose samples from. 424 | candidates = self.data_index[mode] 425 | if how_many == -1: 426 | sample_count = len(candidates) 427 | else: 428 | sample_count = max(0, min(how_many, len(candidates) - offset)) 429 | # Data and labels will be populated and returned. 430 | data = np.zeros((sample_count, model_settings['fingerprint_size'])) 431 | labels = np.zeros((sample_count, model_settings['label_count'])) 432 | desired_samples = model_settings['desired_samples'] 433 | use_background = self.background_data and (mode == 'training') 434 | pick_deterministically = (mode != 'training') 435 | # Use the processing graph we created earlier to repeatedly to generate the 436 | # final output sample data we'll use in training. 437 | for i in xrange(offset, offset + sample_count): 438 | # Pick which audio sample to use. 439 | if how_many == -1 or pick_deterministically: 440 | sample_index = i 441 | else: 442 | sample_index = np.random.randint(len(candidates)) 443 | sample = candidates[sample_index] 444 | # If we're time shifting, set up the offset for this sample. 445 | if time_shift > 0: 446 | time_shift_amount = np.random.randint(-time_shift, time_shift) 447 | else: 448 | time_shift_amount = 0 449 | if time_shift_amount > 0: 450 | time_shift_padding = [[time_shift_amount, 0], [0, 0]] 451 | time_shift_offset = [0, 0] 452 | else: 453 | time_shift_padding = [[0, -time_shift_amount], [0, 0]] 454 | time_shift_offset = [-time_shift_amount, 0] 455 | input_dict = { 456 | self.wav_filename_placeholder_: sample['file'], 457 | self.time_shift_padding_placeholder_: time_shift_padding, 458 | self.time_shift_offset_placeholder_: time_shift_offset, 459 | } 460 | # Choose a section of background noise to mix in. 461 | if use_background: 462 | background_index = np.random.randint(len(self.background_data)) 463 | background_samples = self.background_data[background_index] 464 | background_offset = np.random.randint( 465 | 0, len(background_samples) - model_settings['desired_samples']) 466 | background_clipped = background_samples[background_offset:( 467 | background_offset + desired_samples)] 468 | background_reshaped = background_clipped.reshape([desired_samples, 1]) 469 | if np.random.uniform(0, 1) < background_frequency: 470 | background_volume = np.random.uniform(0, background_volume_range) 471 | else: 472 | background_volume = 0 473 | else: 474 | background_reshaped = np.zeros([desired_samples, 1]) 475 | background_volume = 0 476 | input_dict[self.background_data_placeholder_] = background_reshaped 477 | input_dict[self.background_volume_placeholder_] = background_volume 478 | # If we want silence, mute out the main sample but leave the background. 479 | if sample['label'] == SILENCE_LABEL: 480 | input_dict[self.foreground_volume_placeholder_] = 0 481 | else: 482 | input_dict[self.foreground_volume_placeholder_] = 1 483 | # Run the graph to produce the output audio. 484 | data[i - offset, :] = sess.run(self.mfcc_, feed_dict=input_dict).flatten() 485 | label_index = self.word_to_index[sample['label']] 486 | labels[i - offset, label_index] = 1 487 | return data, labels 488 | 489 | def get_unprocessed_data(self, how_many, model_settings, mode): 490 | """Retrieve sample data for the given partition, with no transformations. 491 | 492 | Args: 493 | how_many: Desired number of samples to return. -1 means the entire 494 | contents of this partition. 495 | model_settings: Information about the current model being trained. 496 | mode: Which partition to use, must be 'training', 'validation', or 497 | 'testing'. 498 | 499 | Returns: 500 | List of sample data for the samples, and list of labels in one-hot form. 501 | """ 502 | candidates = self.data_index[mode] 503 | if how_many == -1: 504 | sample_count = len(candidates) 505 | else: 506 | sample_count = how_many 507 | desired_samples = model_settings['desired_samples'] 508 | words_list = self.words_list 509 | data = np.zeros((sample_count, desired_samples)) 510 | labels = [] 511 | with tf.Session(graph=tf.Graph()) as sess: 512 | wav_filename_placeholder = tf.placeholder(tf.string, []) 513 | wav_loader = io_ops.read_file(wav_filename_placeholder) 514 | wav_decoder = contrib_audio.decode_wav( 515 | wav_loader, desired_channels=1, desired_samples=desired_samples) 516 | foreground_volume_placeholder = tf.placeholder(tf.float32, []) 517 | scaled_foreground = tf.multiply(wav_decoder.audio, 518 | foreground_volume_placeholder) 519 | for i in range(sample_count): 520 | if how_many == -1: 521 | sample_index = i 522 | else: 523 | sample_index = np.random.randint(len(candidates)) 524 | sample = candidates[sample_index] 525 | input_dict = {wav_filename_placeholder: sample['file']} 526 | if sample['label'] == SILENCE_LABEL: 527 | input_dict[foreground_volume_placeholder] = 0 528 | else: 529 | input_dict[foreground_volume_placeholder] = 1 530 | data[i, :] = sess.run(scaled_foreground, feed_dict=input_dict).flatten() 531 | label_index = self.word_to_index[sample['label']] 532 | labels.append(words_list[label_index]) 533 | return data, labels 534 | -------------------------------------------------------------------------------- /toy-model/manager.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import numpy as np 5 | import tensorflow as tf 6 | from six.moves import xrange # pylint: disable=redefined-builtin 7 | import argparse 8 | import os.path 9 | import sys 10 | import input_data 11 | import models 12 | from tensorflow.python.platform import gfile 13 | from tensorflow.contrib import slim as slim 14 | 15 | 16 | def KWS_data_loader(FLAGS, sess): 17 | #sess = tf.Session() 18 | model_settings = models.prepare_model_settings( 19 | len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), 20 | FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, 21 | FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) 22 | audio_processor = input_data.AudioProcessor( 23 | FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, 24 | FLAGS.unknown_percentage, 25 | FLAGS.wanted_words.split(','), FLAGS.validation_percentage, 26 | FLAGS.testing_percentage, model_settings) 27 | fingerprint_size = model_settings['fingerprint_size'] 28 | label_count = model_settings['label_count'] 29 | time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) 30 | fingerprint_size = model_settings['fingerprint_size'] 31 | label_count = model_settings['label_count'] 32 | time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) 33 | training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) 34 | learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) 35 | if len(training_steps_list) != len(learning_rates_list): 36 | raise Exception( 37 | '--how_many_training_steps and --learning_rate must be equal length ' 38 | 'lists, but are %d and %d long instead' % (len(training_steps_list), 39 | len(learning_rates_list))) 40 | 41 | validation_fingerprints, validation_ground_truth = ( 42 | audio_processor.get_data(-1, 0, model_settings, 0.0, 43 | 0.0, 0, 'validation', sess)) 44 | input_frequency_size = model_settings['dct_coefficient_count'] 45 | input_time_size = model_settings['spectrogram_length'] 46 | return audio_processor, training_steps_list, learning_rates_list, model_settings, time_shift_samples, validation_fingerprints, validation_ground_truth 47 | 48 | class NetworkManager: 49 | ''' 50 | Helper class to manage the generation of subnetwork training given a dataset 51 | ''' 52 | def __init__(self, FLAGS, acc_beta=0.8, clip_rewards=False): 53 | ''' 54 | Manager which is tasked with creating subnetworks, training them on a dataset, and retrieving 55 | rewards in the term of accuracy, which is passed to the controller RNN. 56 | 57 | Args: 58 | dataset: a tuple of 4 arrays (X_train, y_train, X_val, y_val) 59 | epochs: number of epochs to train the subnetworks 60 | batchsize: batchsize of training the subnetworks 61 | acc_beta: exponential weight for the accuracy 62 | clip_rewards: whether to clip rewards in [-0.05, 0.05] range to prevent 63 | large weight updates. Use when training is highly unstable. 64 | ''' 65 | self.FLAGS = FLAGS 66 | self.clip_rewards = clip_rewards 67 | 68 | self.beta = acc_beta 69 | self.beta_bias = acc_beta 70 | self.moving_acc = 0.0 71 | 72 | 73 | def get_rewards(self, model_fn, actions): 74 | ''' 75 | Creates a subnetwork given the actions predicted by the controller RNN, 76 | trains it on the provided dataset, and then returns a reward. 77 | 78 | Args: 79 | model_fn: a function which accepts one argument, a list of 80 | parsed actions, obtained via an inverse mapping from the 81 | StateSpace. 82 | actions: a list of parsed actions obtained via an inverse mapping 83 | from the StateSpace. It is in a specific order as given below: 84 | 85 | Consider 4 states were added to the StateSpace via the `add_state` 86 | method. Then the `actions` array will be of length 4, with the 87 | values of those states in the order that they were added. 88 | 89 | If number of layers is greater than one, then the `actions` array 90 | will be of length `4 * number of layers` (in the above scenario). 91 | The index from [0:4] will be for layer 0, from [4:8] for layer 1, 92 | etc for the number of layers. 93 | 94 | These action values are for direct use in the construction of models. 95 | 96 | Returns: 97 | a reward for training a model with the given actions 98 | ''' 99 | 100 | with tf.Session(graph=tf.Graph()) as network_sess: 101 | tf.logging.set_verbosity(tf.logging.INFO) 102 | audio_processor, training_steps_list, learning_rates_list, model_settings, time_shift_samples, X_val, y_val = KWS_data_loader( 103 | self.FLAGS, network_sess) 104 | 105 | # generate a submodel given predicted actions 106 | logits, fingerprint_input, is_training = model_fn(actions, model_settings) 107 | ground_truth_input = tf.placeholder(tf.float32, [None, model_settings['label_count']], name='groundtruth_input') 108 | learning_rate = 0.001 109 | 110 | # Optionally we can add runtime checks to spot when NaNs or other symptoms of 111 | # numerical errors start occurring during training. 112 | control_dependencies = [] 113 | if self.FLAGS.check_nans: 114 | checks = tf.add_check_numerics_ops() 115 | control_dependencies = [checks] 116 | 117 | # Create the back propagation and training evaluation machinery in the graph. 118 | with tf.name_scope('cross_entropy'): 119 | cross_entropy_mean = tf.reduce_mean( 120 | tf.nn.softmax_cross_entropy_with_logits( 121 | labels=ground_truth_input, logits=logits)) 122 | 123 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 124 | with tf.name_scope('train'), tf.control_dependencies(update_ops), tf.control_dependencies(control_dependencies): 125 | learning_rate_input = tf.placeholder(tf.float32, [], name='learning_rate_input') 126 | train_op = tf.train.AdamOptimizer(learning_rate_input) 127 | train_step = slim.learning.create_train_op(cross_entropy_mean, train_op) 128 | predicted_indices = tf.argmax(logits, 1) 129 | expected_indices = tf.argmax(ground_truth_input, 1) 130 | correct_prediction = tf.equal(predicted_indices, expected_indices) 131 | evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 132 | 133 | 134 | # Training loop. 135 | best_accuracy = 0 136 | training_steps_max = np.sum(training_steps_list) 137 | start_step = 1 # use for checkpoint, fixed here 138 | tf.global_variables_initializer().run() 139 | 140 | for training_step in xrange(start_step, training_steps_max + 1): 141 | X_train, y_train = audio_processor.get_data( 142 | self.FLAGS.batch_size, 0, model_settings, self.FLAGS.background_frequency, 143 | self.FLAGS.background_volume, time_shift_samples, 'training', network_sess) 144 | train_accuracy, _ = network_sess.run( 145 | [ 146 | evaluation_step , train_step 147 | ], 148 | feed_dict={ 149 | fingerprint_input: X_train, 150 | ground_truth_input: y_train, 151 | learning_rate_input: learning_rate, 152 | is_training: True 153 | }) 154 | #tf.logging.info('Step #%d: accuracy %.2f%%' % (training_step, train_accuracy * 100)) 155 | 156 | is_last_step = (training_step == training_steps_max) 157 | if (training_step % self.FLAGS.eval_step_interval) == 0 or is_last_step: 158 | validation_accuracy = network_sess.run( 159 | evaluation_step, 160 | feed_dict={ 161 | fingerprint_input: X_val, 162 | ground_truth_input: y_val, 163 | is_training: False 164 | }) 165 | tf.logging.info('Step #%d: Validation accuracy %.2f%%' % (training_step, validation_accuracy * 100)) 166 | if validation_accuracy > best_accuracy: 167 | best_accuracy = validation_accuracy 168 | else: 169 | learning_rate = learning_rate / 2.0 170 | 171 | # compute the reward 172 | acc = best_accuracy 173 | reward = (acc - self.moving_acc) * 10 174 | if self.moving_acc == 0.0: 175 | reward = 0 176 | 177 | # if rewards are clipped, clip them in the range -0.05 to 0.05 178 | if self.clip_rewards: 179 | reward = np.clip(reward, -0.05, 0.05) 180 | 181 | # update moving accuracy with bias correction for 1st update 182 | self.moving_acc = self.beta * self.moving_acc + (1 - self.beta) * acc 183 | self.moving_acc = self.moving_acc / (1 - self.beta_bias) 184 | self.beta_bias = 0 185 | 186 | 187 | 188 | #print() 189 | #print("Manager: EWA Accuracy = ", self.moving_acc) 190 | 191 | # clean up resources and GPU memory 192 | network_sess.close() 193 | 194 | return reward, acc -------------------------------------------------------------------------------- /toy-model/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import sys 7 | import os 8 | import math 9 | 10 | import pdb 11 | import tensorflow as tf 12 | import tensorflow.contrib.slim as slim 13 | from tensorflow.python.ops import control_flow_ops 14 | 15 | 16 | def model_fn(actions, model_settings): 17 | fingerprint_input = tf.placeholder(tf.float32, [None, model_settings['fingerprint_size']], name='fingerprint_input') 18 | is_training = tf.placeholder(tf.bool, []) 19 | filters_1, filters_2, filters_3 = actions 20 | net = tf.layers.dense(fingerprint_input, filters_1, activation=tf.nn.relu) 21 | net = tf.layers.dense(net, filters_2, activation=tf.nn.relu) 22 | net = tf.layers.dense(net, filters_3, activation=tf.nn.relu) 23 | net = tf.layers.dense(net, model_settings['label_count']) 24 | return net, fingerprint_input, is_training 25 | 26 | 27 | 28 | 29 | def model_fn_cnn(actions, model_settings): 30 | def ds_cnn_arg_scope(weight_decay=0): 31 | with slim.arg_scope( 32 | [slim.convolution2d, slim.separable_convolution2d], 33 | weights_initializer=slim.initializers.xavier_initializer(), 34 | biases_initializer=slim.init_ops.zeros_initializer(), 35 | weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: 36 | return sc 37 | 38 | def _depthwise_separable_conv(inputs, 39 | num_pwc_filters, 40 | sc, 41 | kernel_size, 42 | stride): 43 | depthwise_conv = slim.separable_convolution2d(inputs, 44 | num_outputs=None, 45 | stride=stride, 46 | depth_multiplier=1, 47 | kernel_size=kernel_size, 48 | scope=sc+'/depthwise_conv') 49 | 50 | bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm') 51 | pointwise_conv = slim.convolution2d(bn, 52 | num_pwc_filters, 53 | kernel_size=[1, 1], 54 | scope=sc+'/pointwise_conv') 55 | bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm') 56 | return bn 57 | 58 | 59 | fingerprint_input = tf.placeholder(tf.float32, [None, model_settings['fingerprint_size']], name='fingerprint_input') 60 | is_training = tf.placeholder(tf.bool, []) 61 | filters_1, kernel_1, stride_1, filters_2, kernel_2, stride_2, filters_3, kernel_3, stride_3 = actions 62 | 63 | label_count = model_settings['label_count'] 64 | input_frequency_size = model_settings['dct_coefficient_count'] 65 | input_time_size = model_settings['spectrogram_length'] 66 | fingerprint_4d = tf.reshape(fingerprint_input, 67 | [-1, input_time_size, input_frequency_size, 1]) 68 | 69 | t_dim = math.ceil(input_time_size / float(2 * stride_1 * stride_2 * stride_3)) 70 | f_dim = math.ceil(input_frequency_size / float(1 * stride_1 * stride_2 * stride_3)) 71 | 72 | 73 | scope = 'DS-CNN' 74 | with tf.variable_scope(scope) as sc: 75 | end_points_collection = sc.name + '_end_points' 76 | with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], 77 | activation_fn=None, 78 | weights_initializer=slim.initializers.xavier_initializer(), 79 | biases_initializer=slim.init_ops.zeros_initializer(), 80 | outputs_collections=[end_points_collection]): 81 | with slim.arg_scope([slim.batch_norm], 82 | is_training=is_training, 83 | decay=0.96, 84 | updates_collections=None, 85 | activation_fn=tf.nn.relu): 86 | #pdb.set_trace() 87 | net = slim.convolution2d(fingerprint_4d, 64, [10, 4], stride=[2, 1], padding='SAME', scope='conv_1') 88 | net = slim.batch_norm(net, scope='conv_1/batch_norm') 89 | net = _depthwise_separable_conv(net, filters_1, kernel_size = [kernel_1, kernel_1], stride = [stride_1, stride_1], sc='conv_ds_1') 90 | net = _depthwise_separable_conv(net, filters_2, kernel_size = [kernel_2, kernel_2], stride = [stride_2, stride_2], sc='conv_ds_2') 91 | net = _depthwise_separable_conv(net, filters_3, kernel_size = [kernel_3, kernel_3], stride = [stride_3, stride_3], sc='conv_ds_3') 92 | net = slim.avg_pool2d(net, [t_dim, f_dim], scope='avg_pool') 93 | 94 | net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') 95 | logits = slim.fully_connected(net, label_count, activation_fn=None, scope='fc1') 96 | return logits, fingerprint_input, is_training 97 | 98 | -------------------------------------------------------------------------------- /toy-model/pg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | 3 | import os 4 | import sys 5 | import logging 6 | import time 7 | import numpy as np 8 | import tensorflow as tf 9 | import json 10 | import scipy.signal 11 | import os 12 | import time 13 | import inspect 14 | from utils.general import get_logger, Progbar, export_plot 15 | from pg_config import pg_config 16 | #import logz 17 | from config import * 18 | import pdb 19 | from scipy.stats import multivariate_normal 20 | 21 | 22 | class PG(object): 23 | 24 | def __init__(self): 25 | self.lr = 5e-2 26 | self.controller_cells = 64 27 | self.batch_size = 500 28 | self.num_batches = 500 29 | self.observation_dim = 1000 30 | self.action_dim = 1000 31 | self.num_layers = 2 32 | 33 | self.action_buffer = [] 34 | self.state_buffer = [] 35 | self.logprob_buffer = [] 36 | self._dict = {} 37 | self._used_dict = {} 38 | self.log_acc = [] 39 | self.logger = get_logger('./log.txt') 40 | self.baseline = -1000.0 41 | 42 | self._num_used_models = [] 43 | 44 | #self._initial_baseline = 45 | 46 | ''' 47 | with open('./action_average_reward_dict.json', 'r') as f: 48 | self._raw_dict = json.load(f) 49 | temp_map = {30:0, 60:1, 100:2, 144:3} 50 | for key in self._raw_dict.keys(): 51 | actions = [temp_map[int(a)] for a in key[1:-1].split(',')] 52 | temp = str(actions).replace(",","") 53 | accuracy = float(self._raw_dict[key]) / 10000 54 | self._dict[temp] = accuracy 55 | self._used_dict[temp] = 0 56 | ''' 57 | self._dict = self.build_reward_function() 58 | self._used_dict = np.zeros_like(self._dict) 59 | self.build() 60 | 61 | 62 | 63 | def build_reward_function(self): 64 | x, y = np.mgrid[-10:10:0.02, -10:10:0.02] 65 | pos = np.empty(x.shape + (2,)) 66 | pos[:, :, 0] = x 67 | pos[:, :, 1] = y 68 | rv1 = multivariate_normal([5, -5], [[10, 0], [0, 10]]) 69 | rv2 = multivariate_normal([2, -2], [[7, 2], [2, 5]]) 70 | rv3 = multivariate_normal([7, -7], [[1, 0], [0, 1]]) 71 | rv4 = multivariate_normal([3, -3], [[1, 0], [0, 1]]) 72 | rv11 = multivariate_normal([-5, 5], [[3, 1], [1, 2]]) 73 | rv22 = multivariate_normal([-2, 2], [[7, 2], [2, 5]]) 74 | rv33 = multivariate_normal([-7, 7], [[1, 0], [0,1]]) 75 | rv44 = multivariate_normal([-3, 3], [[4, 0], [0, 4]]) 76 | rv = rv1.pdf(pos) + rv2.pdf(pos) + rv3.pdf(pos) + rv4.pdf(pos) + rv11.pdf(pos) + rv22.pdf(pos) + rv33.pdf(pos) + rv44.pdf(pos) 77 | return rv 78 | 79 | 80 | def add_placeholders_op(self): 81 | self.observation_placeholder = tf.placeholder(tf.float32, [self.batch_size, 1, self.observation_dim]) 82 | self.action_placeholder = tf.placeholder(tf.int32, [self.num_layers, self.batch_size]) 83 | self.advantage_placeholder = tf.placeholder(tf.float32, [self.batch_size, self.num_layers]) 84 | 85 | 86 | def build_policy_network_op(self, scope="policy_network"): 87 | temp_logprob_buffer = [] 88 | with tf.variable_scope(scope): 89 | self.cell = tf.contrib.rnn.NASCell(self.controller_cells) 90 | cell_state = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) 91 | cell_input = tf.zeros([self.batch_size, 1, self.observation_dim]) 92 | for i in range(self.num_layers): 93 | outputs, cell_state = tf.nn.dynamic_rnn(self.cell, cell_input, initial_state=cell_state, dtype=tf.float32) 94 | action_logits = tf.layers.dense(outputs[:, -1, :], units=self.action_dim, name='rnn_fc_%d' % (i)) 95 | 96 | sampled_action = tf.squeeze(tf.multinomial(action_logits, 1), axis=1) 97 | cell_input = tf.one_hot(sampled_action, self.observation_dim) 98 | cell_input = tf.expand_dims(cell_input, 1) 99 | logprob = tf.negative(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.action_placeholder[i], logits=action_logits)) 100 | logprob = tf.expand_dims(logprob, 1) 101 | 102 | self.action_buffer.append(sampled_action) #action 103 | #self.state_buffer.append(cell_input) # state 104 | temp_logprob_buffer.append(logprob) #logprob 105 | 106 | self.logprob_buffer = tf.concat(temp_logprob_buffer, 1) # batch x layer 107 | 108 | 109 | 110 | def add_loss_op(self): 111 | self.loss = -tf.reduce_mean(self.logprob_buffer * self.advantage_placeholder) 112 | 113 | 114 | def add_optimizer_op(self): 115 | self.train_op = tf.train.AdamOptimizer(self.lr, beta1=0.94).minimize(self.loss) 116 | 117 | 118 | def build(self): 119 | self.add_placeholders_op() 120 | self.build_policy_network_op() 121 | self.add_loss_op() 122 | self.add_optimizer_op() 123 | 124 | 125 | def initialize(self): 126 | self.sess = tf.Session() 127 | init = tf.global_variables_initializer() 128 | self.sess.run(init) 129 | 130 | 131 | def sample_model_reward_return(self): 132 | action_buffer = np.array(self.sess.run(self.action_buffer)) 133 | returns = np.float32(np.zeros_like(action_buffer)) 134 | #pdb.set_trace() 135 | for i in range(self.batch_size): 136 | #returns[:,i] = self._dict[str(action_buffer[:,i])] - self._initial_baseline 137 | returns[:, i] = self._dict[action_buffer[:, i][0], action_buffer[:, i][1]] 138 | self._used_dict[action_buffer[:, i][0], action_buffer[:, i][1]] = 1 139 | 140 | return action_buffer, np.transpose(returns) 141 | 142 | 143 | def train(self): 144 | 145 | for t in range(self.num_batches): 146 | actions, returns = self.sample_model_reward_return() 147 | #self.baseline = (t*self.baseline + np.mean(returns)) / (t+1) 148 | if self.baseline == -1000.0: 149 | self.baseline = np.mean(returns) 150 | else: 151 | self.baseline = 0.6 * self.baseline + 0.4 * np.mean(returns) 152 | 153 | self.sess.run(self.train_op, feed_dict={ 154 | self.action_placeholder : actions, 155 | self.advantage_placeholder : returns}) # not using baseline here 156 | 157 | 158 | avg_acc = np.mean(returns) 159 | 160 | #calculate number of used models: 161 | used = 0 162 | #for key in self._used_dict.keys(): 163 | #used += self._used_dict[key] 164 | used = np.sum(self._used_dict) 165 | self._num_used_models.append(used) 166 | 167 | 168 | self.log_acc.append(avg_acc) 169 | #sigma_reward = np.sqrt(np.var(returns) / len(total_rewards)) 170 | msg = "Average accuracy within a batch: {:04.2f}".format(avg_acc) 171 | self.logger.info(msg) 172 | print (actions) 173 | 174 | 175 | self.logger.info("- Training done.") 176 | #export_plot(self.log_acc, "Batch_Accuracy", 'NAS-DNN', "./batch_accuracy.png", self._num_used_models, "Sampled Model") 177 | export_plot(self.log_acc, "Score", 'NAS-DNN', "./batch_accuracy.png") 178 | export_plot(self._num_used_models, "Models Sampled", 'NAS-DNN', "./used_models.png") 179 | 180 | 181 | def run(self): 182 | self.initialize() 183 | self.train() 184 | 185 | 186 | 187 | if __name__ == '__main__': 188 | model = PG() 189 | model.run() 190 | -------------------------------------------------------------------------------- /toy-model/pg_config.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class pg_config(): 4 | 5 | record = False 6 | 7 | # output config 8 | output_path = "PG_results/" 9 | model_output = output_path + "model.weights/" 10 | #log_path = output_path + "log.txt" 11 | plot_output = output_path + "scores.png" 12 | record_path = output_path 13 | record_freq = 5 14 | summary_freq = 1 15 | 16 | 17 | # model and training config 18 | num_batches = 10 # number of batches trained on 19 | batch_size = 50 # number of steps used to compute each policy update 20 | learning_rate = 3e-2 21 | #use_baseline = False 22 | normalize_advantage=True 23 | activation=tf.nn.relu 24 | -------------------------------------------------------------------------------- /toy-model/reward_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/reward_function.png -------------------------------------------------------------------------------- /toy-model/used_models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/used_models.png -------------------------------------------------------------------------------- /toy-model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/utils/__init__.py -------------------------------------------------------------------------------- /toy-model/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /toy-model/utils/__pycache__/general.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydzhang12345/Neural-Architecture-Search/5193a8365a1d922c6bfd68a7b1d18962ead09ead/toy-model/utils/__pycache__/general.cpython-36.pyc -------------------------------------------------------------------------------- /toy-model/utils/general.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import logging 4 | import numpy as np 5 | from collections import deque 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | def export_plot(ys, ylabel, title, filename, xs=None, xlabel='Training Batches'): 12 | """ 13 | Export a plot in filename 14 | 15 | Args: 16 | ys: (list) of float / int to plot 17 | filename: (string) directory 18 | """ 19 | if xs==None: 20 | xs = range(len(ys)) 21 | plt.figure() 22 | plt.plot(xs, ys) 23 | plt.xlabel(xlabel) 24 | plt.ylabel(ylabel) 25 | plt.title(title) 26 | plt.savefig(filename) 27 | plt.close() 28 | 29 | 30 | def get_logger(filename): 31 | """ 32 | Return a logger instance to a file 33 | """ 34 | logger = logging.getLogger('logger') 35 | logger.setLevel(logging.DEBUG) 36 | logging.basicConfig(format='%(message)s', level=logging.DEBUG) 37 | handler = logging.FileHandler(filename) 38 | handler.setLevel(logging.DEBUG) 39 | handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) 40 | logging.getLogger().addHandler(handler) 41 | return logger 42 | 43 | 44 | class Progbar(object): 45 | """Progbar class copied from keras (https://github.com/fchollet/keras/) 46 | 47 | Displays a progress bar. 48 | Small edit : added strict arg to update 49 | # Arguments 50 | target: Total number of steps expected. 51 | interval: Minimum visual progress update interval (in seconds). 52 | """ 53 | 54 | def __init__(self, target, width=30, verbose=1, discount=0.9): 55 | self.width = width 56 | self.target = target 57 | self.sum_values = {} 58 | self.exp_avg = {} 59 | self.unique_values = [] 60 | self.start = time.time() 61 | self.total_width = 0 62 | self.seen_so_far = 0 63 | self.verbose = verbose 64 | self.discount = discount 65 | 66 | def update(self, current, values=[], exact=[], strict=[], exp_avg=[]): 67 | """ 68 | Updates the progress bar. 69 | # Arguments 70 | current: Index of current step. 71 | values: List of tuples (name, value_for_last_step). 72 | The progress bar will display averages for these values. 73 | exact: List of tuples (name, value_for_last_step). 74 | The progress bar will display these values directly. 75 | """ 76 | 77 | for k, v in values: 78 | if k not in self.sum_values: 79 | self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far] 80 | self.unique_values.append(k) 81 | else: 82 | self.sum_values[k][0] += v * (current - self.seen_so_far) 83 | self.sum_values[k][1] += (current - self.seen_so_far) 84 | for k, v in exact: 85 | if k not in self.sum_values: 86 | self.unique_values.append(k) 87 | self.sum_values[k] = [v, 1] 88 | for k, v in strict: 89 | if k not in self.sum_values: 90 | self.unique_values.append(k) 91 | self.sum_values[k] = v 92 | for k, v in exp_avg: 93 | if k not in self.exp_avg: 94 | self.exp_avg[k] = v 95 | else: 96 | self.exp_avg[k] *= self.discount 97 | self.exp_avg[k] += (1-self.discount)*v 98 | 99 | self.seen_so_far = current 100 | 101 | now = time.time() 102 | if self.verbose == 1: 103 | prev_total_width = self.total_width 104 | sys.stdout.write("\b" * prev_total_width) 105 | sys.stdout.write("\r") 106 | 107 | numdigits = int(np.floor(np.log10(self.target))) + 1 108 | barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) 109 | bar = barstr % (current, self.target) 110 | prog = float(current)/self.target 111 | prog_width = int(self.width*prog) 112 | if prog_width > 0: 113 | bar += ('='*(prog_width-1)) 114 | if current < self.target: 115 | bar += '>' 116 | else: 117 | bar += '=' 118 | bar += ('.'*(self.width-prog_width)) 119 | bar += ']' 120 | sys.stdout.write(bar) 121 | self.total_width = len(bar) 122 | 123 | if current: 124 | time_per_unit = (now - self.start) / current 125 | else: 126 | time_per_unit = 0 127 | eta = time_per_unit*(self.target - current) 128 | info = '' 129 | if current < self.target: 130 | info += ' - ETA: %ds' % eta 131 | else: 132 | info += ' - %ds' % (now - self.start) 133 | for k in self.unique_values: 134 | if type(self.sum_values[k]) is list: 135 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 136 | else: 137 | info += ' - %s: %s' % (k, self.sum_values[k]) 138 | 139 | for k, v in self.exp_avg.iteritems(): 140 | info += ' - %s: %.4f' % (k, v) 141 | 142 | self.total_width += len(info) 143 | if prev_total_width > self.total_width: 144 | info += ((prev_total_width-self.total_width) * " ") 145 | 146 | sys.stdout.write(info) 147 | sys.stdout.flush() 148 | 149 | if current >= self.target: 150 | sys.stdout.write("\n") 151 | 152 | if self.verbose == 2: 153 | if current >= self.target: 154 | info = '%ds' % (now - self.start) 155 | for k in self.unique_values: 156 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 157 | sys.stdout.write(info + "\n") 158 | 159 | def add(self, n, values=[]): 160 | self.update(self.seen_so_far+n, values) 161 | --------------------------------------------------------------------------------