├── .DS_Store ├── LICENSE ├── MNIST_data ├── t10k-images-idx3-ubyte.gz ├── t10k-labels-idx1-ubyte.gz ├── train-images-idx3-ubyte.gz └── train-labels-idx1-ubyte.gz ├── README.md ├── cnn.py ├── experiments ├── .DS_Store ├── README.md └── train.py ├── net_manager.py ├── reinforce.py └── train.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Wallarm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MNIST_data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /MNIST_data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /MNIST_data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /MNIST_data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nascell-automl 2 | This code belongs to the "Simple implementation of Neural Architecture Search with Reinforcement Learning 3 | " blog post. 4 | 5 | An original blog post with all the details (step-by-step guide): 6 | https://lab.wallarm.com/the-first-step-by-step-guide-for-implementing-neural-architecture-search-with-reinforcement-99ade71b3d28 7 | 8 | # Requirements 9 | - Python 3 10 | - Tensorflow > 1.4 11 | 12 | # Training 13 | Print parameters: 14 | ``` 15 | python3 train.py --help 16 | ``` 17 | ``` 18 | optional arguments: 19 | -h, --help show this help message and exit 20 | --max_layers MAX_LAYERS 21 | ``` 22 | Train: 23 | ``` 24 | python3 train.py 25 | ``` 26 | 27 | # For evaluate architecture 28 | Print parameters: 29 | ``` 30 | $ cd experiments/ 31 | $ python3 train.py --architecture "61, 24, 60, 5, 57, 55, 59, 3" 32 | ``` 33 | -------------------------------------------------------------------------------- /cnn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | class CNN(): 4 | def __init__(self, num_input, num_classes, cnn_config): 5 | cnn = [c[0] for c in cnn_config] 6 | cnn_num_filters = [c[1] for c in cnn_config] 7 | max_pool_ksize = [c[2] for c in cnn_config] 8 | 9 | self.X = tf.placeholder(tf.float32, 10 | [None, num_input], 11 | name="input_X") 12 | self.Y = tf.placeholder(tf.int32, [None, num_classes], name="input_Y") 13 | self.dropout_keep_prob = tf.placeholder(tf.float32, [], name="dense_dropout_keep_prob") 14 | self.cnn_dropout_rates = tf.placeholder(tf.float32, [len(cnn), ], name="cnn_dropout_keep_prob") 15 | 16 | Y = self.Y 17 | X = tf.expand_dims(self.X, -1) 18 | pool_out = X 19 | with tf.name_scope("Conv_part"): 20 | for idd, filter_size in enumerate(cnn): 21 | with tf.name_scope("L"+str(idd)): 22 | conv_out = tf.layers.conv1d( 23 | pool_out, 24 | filters=cnn_num_filters[idd], 25 | kernel_size=(int(filter_size)), 26 | strides=1, 27 | padding="SAME", 28 | name="conv_out_"+str(idd), 29 | activation=tf.nn.relu, 30 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 31 | bias_initializer=tf.zeros_initializer 32 | ) 33 | pool_out = tf.layers.max_pooling1d( 34 | conv_out, 35 | pool_size=(int(max_pool_ksize[idd])), 36 | strides=1, 37 | padding='SAME', 38 | name="max_pool_"+str(idd) 39 | ) 40 | pool_out = tf.nn.dropout(pool_out, self.cnn_dropout_rates[idd]) 41 | 42 | flatten_pred_out = tf.contrib.layers.flatten(pool_out) 43 | self.logits = tf.layers.dense(flatten_pred_out, num_classes) 44 | 45 | self.prediction = tf.nn.softmax(self.logits, name="prediction") 46 | self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=Y, name="loss") 47 | correct_pred = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(Y, 1)) 48 | self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name="accuracy") 49 | -------------------------------------------------------------------------------- /experiments/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/experiments/.DS_Store -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- 1 | 2 | if you want to use the following architecture for MNIST: 3 | - input layer : 784 nodes (MNIST images size) 4 | - first convolution layer : 5x32 5 | - first max-pooling layer: 2 6 | - second convolution layer : 5x64 7 | - second max-pooling layer: 2 8 | - output layer : 10 nodes (number of class for MNIST) 9 | you can do it with following command: 10 | ``` 11 | python3 train.py --architecture "5, 32, 2, 5, 3, 64, 2, 3" 12 | ``` 13 | every 4 numbers represent the size of the kernel, count of filters, max-pooling and dropout per layer. 14 | 15 | 16 | -------------------------------------------------------------------------------- /experiments/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import argparse 3 | import sys 4 | sys.path.append('../') 5 | from cnn import CNN 6 | from tensorflow.examples.tutorials.mnist import input_data 7 | 8 | def main(action, name): 9 | mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True) 10 | action = [int(x) for x in action.split(",")] 11 | training_epochs = 10 12 | batch_size = 100 13 | 14 | action = [action[x:x+4] for x in range(0, len(action), 4)] 15 | cnn_drop_rate = [c[3] for c in action] 16 | 17 | model = CNN(784, 10, action) 18 | loss_op = tf.reduce_mean(model.loss) 19 | optimizer = tf.train.AdamOptimizer(learning_rate=0.0001) 20 | train_op = optimizer.minimize(loss_op) 21 | 22 | tf.summary.scalar('acc', model.accuracy) 23 | tf.summary.scalar('loss', tf.reduce_mean(model.loss)) 24 | merged_summary_op = tf.summary.merge_all() 25 | summary_writer = tf.summary.FileWriter(name, graph=tf.get_default_graph()) 26 | 27 | init = tf.global_variables_initializer() 28 | sess = tf.Session() 29 | sess.run(init) 30 | 31 | 32 | for epoch in range(training_epochs): 33 | for step in range(int(mnist.train.num_examples/batch_size)): 34 | batch_x, batch_y = mnist.train.next_batch(batch_size) 35 | feed = {model.X: batch_x, 36 | model.Y: batch_y, 37 | model.dropout_keep_prob: 0.85, 38 | model.cnn_dropout_rates: cnn_drop_rate} 39 | _, summary = sess.run([train_op, merged_summary_op], feed_dict=feed) 40 | summary_writer.add_summary(summary, step+(epoch+1)*int(mnist.train.num_examples/batch_size)) 41 | 42 | print("epoch: ", epoch+1, " of ", training_epochs) 43 | 44 | batch_x, batch_y = mnist.test.next_batch(mnist.test.num_examples) 45 | loss, acc = sess.run( 46 | [loss_op, model.accuracy], 47 | feed_dict={model.X: batch_x, 48 | model.Y: batch_y, 49 | model.dropout_keep_prob: 1.0, 50 | model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)}) 51 | 52 | print("Network accuracy =", acc, " loss =", loss) 53 | print("Final accuracy for", name, " =", acc) 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument('--architecture', default="5, 32, 2, 5, 3, 64, 2, 3") 59 | parser.add_argument('--name', default="model") 60 | args = parser.parse_args() 61 | 62 | main(args.architecture, args.name) 63 | 64 | -------------------------------------------------------------------------------- /net_manager.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from cnn import CNN 3 | 4 | class NetManager(): 5 | def __init__(self, num_input, num_classes, learning_rate, mnist, 6 | max_step_per_action=5500*3, 7 | bathc_size=100, 8 | dropout_rate=0.85): 9 | 10 | self.num_input = num_input 11 | self.num_classes = num_classes 12 | self.learning_rate = learning_rate 13 | self.mnist = mnist 14 | 15 | self.max_step_per_action = max_step_per_action 16 | self.bathc_size = bathc_size 17 | self.dropout_rate = dropout_rate 18 | 19 | def get_reward(self, action, step, pre_acc): 20 | action = [action[0][0][x:x+4] for x in range(0, len(action[0][0]), 4)] 21 | cnn_drop_rate = [c[3] for c in action] 22 | with tf.Graph().as_default() as g: 23 | with g.container('experiment'+str(step)): 24 | model = CNN(self.num_input, self.num_classes, action) 25 | loss_op = tf.reduce_mean(model.loss) 26 | optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) 27 | train_op = optimizer.minimize(loss_op) 28 | 29 | with tf.Session() as train_sess: 30 | init = tf.global_variables_initializer() 31 | train_sess.run(init) 32 | 33 | for step in range(self.max_step_per_action): 34 | batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size) 35 | feed = {model.X: batch_x, 36 | model.Y: batch_y, 37 | model.dropout_keep_prob: self.dropout_rate, 38 | model.cnn_dropout_rates: cnn_drop_rate} 39 | _ = train_sess.run(train_op, feed_dict=feed) 40 | 41 | if step % 100 == 0: 42 | # Calculate batch loss and accuracy 43 | loss, acc = train_sess.run( 44 | [loss_op, model.accuracy], 45 | feed_dict={model.X: batch_x, 46 | model.Y: batch_y, 47 | model.dropout_keep_prob: 1.0, 48 | model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)}) 49 | print("Step " + str(step) + 50 | ", Minibatch Loss= " + "{:.4f}".format(loss) + 51 | ", Current accuracy= " + "{:.3f}".format(acc)) 52 | batch_x, batch_y = self.mnist.test.next_batch(10000) 53 | loss, acc = train_sess.run( 54 | [loss_op, model.accuracy], 55 | feed_dict={model.X: batch_x, 56 | model.Y: batch_y, 57 | model.dropout_keep_prob: 1.0, 58 | model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)}) 59 | print("!!!!!!acc:", acc, pre_acc) 60 | if acc - pre_acc <= 0.01: 61 | return acc, acc 62 | else: 63 | return 0.01, acc 64 | 65 | -------------------------------------------------------------------------------- /reinforce.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import random 3 | import numpy as np 4 | 5 | class Reinforce(): 6 | def __init__(self, sess, optimizer, policy_network, max_layers, global_step, 7 | division_rate=100.0, 8 | reg_param=0.001, 9 | discount_factor=0.99, 10 | exploration=0.3): 11 | self.sess = sess 12 | self.optimizer = optimizer 13 | self.policy_network = policy_network 14 | self.division_rate = division_rate 15 | self.reg_param = reg_param 16 | self.discount_factor=discount_factor 17 | self.max_layers = max_layers 18 | self.global_step = global_step 19 | 20 | self.reward_buffer = [] 21 | self.state_buffer = [] 22 | 23 | self.create_variables() 24 | var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) 25 | self.sess.run(tf.variables_initializer(var_lists)) 26 | 27 | def get_action(self, state): 28 | return self.sess.run(self.predicted_action, {self.states: state}) 29 | if random.random() < self.exploration: 30 | return np.array([[random.sample(range(1, 35), 4*self.max_layers)]]) 31 | else: 32 | return self.sess.run(self.predicted_action, {self.states: state}) 33 | 34 | def create_variables(self): 35 | with tf.name_scope("model_inputs"): 36 | # raw state representation 37 | self.states = tf.placeholder(tf.float32, [None, self.max_layers*4], name="states") 38 | 39 | with tf.name_scope("predict_actions"): 40 | # initialize policy network 41 | with tf.variable_scope("policy_network"): 42 | self.policy_outputs = self.policy_network(self.states, self.max_layers) 43 | 44 | self.action_scores = tf.identity(self.policy_outputs, name="action_scores") 45 | 46 | self.predicted_action = tf.cast(tf.scalar_mul(self.division_rate, self.action_scores), tf.int32, name="predicted_action") 47 | 48 | 49 | # regularization loss 50 | policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network") 51 | 52 | # compute loss and gradients 53 | with tf.name_scope("compute_gradients"): 54 | # gradients for selecting action from policy network 55 | self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards") 56 | 57 | with tf.variable_scope("policy_network", reuse=True): 58 | self.logprobs = self.policy_network(self.states, self.max_layers) 59 | print("self.logprobs", self.logprobs) 60 | 61 | # compute policy loss and regularization loss 62 | self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logprobs[:, -1, :], labels=self.states) 63 | self.pg_loss = tf.reduce_mean(self.cross_entropy_loss) 64 | self.reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables]) # Regularization 65 | self.loss = self.pg_loss + self.reg_param * self.reg_loss 66 | 67 | #compute gradients 68 | self.gradients = self.optimizer.compute_gradients(self.loss) 69 | 70 | # compute policy gradients 71 | for i, (grad, var) in enumerate(self.gradients): 72 | if grad is not None: 73 | self.gradients[i] = (grad * self.discounted_rewards, var) 74 | 75 | # training update 76 | with tf.name_scope("train_policy_network"): 77 | # apply gradients to update policy network 78 | self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step) 79 | 80 | def storeRollout(self, state, reward): 81 | self.reward_buffer.append(reward) 82 | self.state_buffer.append(state[0]) 83 | 84 | def train_step(self, steps_count): 85 | states = np.array(self.state_buffer[-steps_count:])/self.division_rate 86 | rewars = self.reward_buffer[-steps_count:] 87 | _, ls = self.sess.run([self.train_op, self.loss], 88 | {self.states: states, 89 | self.discounted_rewards: rewars}) 90 | return ls 91 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import argparse 4 | import datetime 5 | 6 | from cnn import CNN 7 | from net_manager import NetManager 8 | from reinforce import Reinforce 9 | 10 | from tensorflow.examples.tutorials.mnist import input_data 11 | 12 | def parse_args(): 13 | desc = "TensorFlow implementation of 'Neural Architecture Search with Reinforcement Learning'" 14 | parser = argparse.ArgumentParser(description=desc) 15 | 16 | parser.add_argument('--max_layers', default=2) 17 | 18 | args = parser.parse_args() 19 | args.max_layers = int(args.max_layers) 20 | return args 21 | 22 | 23 | ''' 24 | Policy network is a main network for searching optimal architecture 25 | it uses NAS - Neural Architecture Search recurrent network cell. 26 | https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1363 27 | 28 | Args: 29 | state: current state of required topology 30 | max_layers: maximum number of layers 31 | Returns: 32 | 3-D tensor with new state (new topology) 33 | ''' 34 | def policy_network(state, max_layers): 35 | with tf.name_scope("policy_network"): 36 | nas_cell = tf.contrib.rnn.NASCell(4*max_layers) 37 | outputs, state = tf.nn.dynamic_rnn( 38 | nas_cell, 39 | tf.expand_dims(state, -1), 40 | dtype=tf.float32 41 | ) 42 | bias = tf.Variable([0.05]*4*max_layers) 43 | outputs = tf.nn.bias_add(outputs, bias) 44 | print("outputs: ", outputs, outputs[:, -1:, :], tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers])) 45 | #return tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]) # Returned last output of rnn 46 | return outputs[:, -1:, :] 47 | 48 | def train(mnist): 49 | global args 50 | sess = tf.Session() 51 | global_step = tf.Variable(0, trainable=False) 52 | starter_learning_rate = 0.1 53 | learning_rate = tf.train.exponential_decay(0.99, global_step, 54 | 500, 0.96, staircase=True) 55 | 56 | optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) 57 | 58 | reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step) 59 | net_manager = NetManager(num_input=784, 60 | num_classes=10, 61 | learning_rate=0.001, 62 | mnist=mnist, 63 | bathc_size=100) 64 | 65 | MAX_EPISODES = 2500 66 | step = 0 67 | state = np.array([[10.0, 128.0, 1.0, 1.0]*args.max_layers], dtype=np.float32) 68 | pre_acc = 0.0 69 | total_rewards = 0 70 | for i_episode in range(MAX_EPISODES): 71 | action = reinforce.get_action(state) 72 | print("ca:", action) 73 | if all(ai > 0 for ai in action[0][0]): 74 | reward, pre_acc = net_manager.get_reward(action, step, pre_acc) 75 | print("=====>", reward, pre_acc) 76 | else: 77 | reward = -1.0 78 | total_rewards += reward 79 | 80 | # In our sample action is equal state 81 | state = action[0] 82 | reinforce.storeRollout(state, reward) 83 | 84 | step += 1 85 | ls = reinforce.train_step(1) 86 | log_str = "current time: "+str(datetime.datetime.now().time())+" episode: "+str(i_episode)+" loss: "+str(ls)+" last_state: "+str(state)+" last_reward: "+str(reward)+"\n" 87 | log = open("lg3.txt", "a+") 88 | log.write(log_str) 89 | log.close() 90 | print(log_str) 91 | 92 | def main(): 93 | global args 94 | args = parse_args() 95 | 96 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 97 | train(mnist) 98 | 99 | if __name__ == '__main__': 100 | main() 101 | --------------------------------------------------------------------------------