├── .DS_Store
├── LICENSE
├── MNIST_data
    ├── t10k-images-idx3-ubyte.gz
    ├── t10k-labels-idx1-ubyte.gz
    ├── train-images-idx3-ubyte.gz
    └── train-labels-idx1-ubyte.gz
├── README.md
├── cnn.py
├── experiments
    ├── .DS_Store
    ├── README.md
    └── train.py
├── net_manager.py
├── reinforce.py
└── train.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/.DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Wallarm
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MNIST_data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/MNIST_data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/MNIST_data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/MNIST_data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/MNIST_data/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # nascell-automl
 2 | This code belongs to the "Simple implementation of Neural Architecture Search with Reinforcement Learning
 3 | " blog post.
 4 | 
 5 | An original blog post with all the details (step-by-step guide):
 6 | https://lab.wallarm.com/the-first-step-by-step-guide-for-implementing-neural-architecture-search-with-reinforcement-99ade71b3d28
 7 | 
 8 | # Requirements
 9 | - Python 3
10 | - Tensorflow > 1.4
11 | 
12 | # Training
13 | Print parameters:
14 | ```
15 | python3 train.py --help
16 | ```
17 | ```
18 | optional arguments:
19 |   -h, --help            show this help message and exit
20 |   --max_layers MAX_LAYERS
21 | ```
22 | Train:
23 | ```
24 | python3 train.py
25 | ```
26 | 
27 | # For evaluate architecture
28 | Print parameters:
29 | ```
30 | $ cd experiments/
31 | $ python3 train.py --architecture "61, 24, 60,  5, 57, 55, 59, 3"
32 | ```
33 | 


--------------------------------------------------------------------------------
/cnn.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | class CNN():
 4 |     def __init__(self, num_input, num_classes, cnn_config):
 5 |         cnn = [c[0] for c in cnn_config]
 6 |         cnn_num_filters = [c[1] for c in cnn_config]
 7 |         max_pool_ksize = [c[2] for c in cnn_config]
 8 | 
 9 |         self.X = tf.placeholder(tf.float32,
10 |                                 [None, num_input], 
11 |                                 name="input_X")
12 |         self.Y = tf.placeholder(tf.int32, [None, num_classes], name="input_Y")
13 |         self.dropout_keep_prob = tf.placeholder(tf.float32, [], name="dense_dropout_keep_prob")
14 |         self.cnn_dropout_rates = tf.placeholder(tf.float32, [len(cnn), ], name="cnn_dropout_keep_prob")
15 | 
16 |         Y = self.Y
17 |         X = tf.expand_dims(self.X, -1)
18 |         pool_out = X
19 |         with tf.name_scope("Conv_part"):
20 |             for idd, filter_size in enumerate(cnn):
21 |                 with tf.name_scope("L"+str(idd)):
22 |                     conv_out = tf.layers.conv1d(
23 |                         pool_out,
24 |                         filters=cnn_num_filters[idd],
25 |                         kernel_size=(int(filter_size)),
26 |                         strides=1,
27 |                         padding="SAME",
28 |                         name="conv_out_"+str(idd),
29 |                         activation=tf.nn.relu,
30 |                         kernel_initializer=tf.contrib.layers.xavier_initializer(),
31 |                         bias_initializer=tf.zeros_initializer
32 |                     )
33 |                     pool_out = tf.layers.max_pooling1d(
34 |                         conv_out,
35 |                         pool_size=(int(max_pool_ksize[idd])),
36 |                         strides=1,
37 |                         padding='SAME',
38 |                         name="max_pool_"+str(idd)
39 |                     )
40 |                     pool_out = tf.nn.dropout(pool_out, self.cnn_dropout_rates[idd])
41 | 
42 |             flatten_pred_out = tf.contrib.layers.flatten(pool_out)
43 |             self.logits = tf.layers.dense(flatten_pred_out, num_classes)
44 | 
45 |         self.prediction = tf.nn.softmax(self.logits, name="prediction")
46 |         self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=Y, name="loss")
47 |         correct_pred = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(Y, 1))
48 |         self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name="accuracy")
49 | 


--------------------------------------------------------------------------------
/experiments/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wallarm/nascell-automl/18e5e59b94d56590f044be28c97cfa84886ec295/experiments/.DS_Store


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | if you want to use the following architecture for MNIST:
 3 | - input layer : 784 nodes (MNIST images size)
 4 | - first convolution layer : 5x32
 5 | - first max-pooling layer: 2
 6 | - second convolution layer : 5x64
 7 | - second max-pooling layer: 2
 8 | - output layer : 10 nodes (number of class for MNIST)
 9 | you can do it with following command: 
10 | ```
11 | python3 train.py --architecture "5, 32, 2,  5, 3, 64, 2, 3"
12 | ```
13 | every 4 numbers represent the size of the kernel, count of filters, max-pooling and dropout per layer. 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/experiments/train.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import argparse
 3 | import sys
 4 | sys.path.append('../')
 5 | from cnn import CNN
 6 | from tensorflow.examples.tutorials.mnist import input_data
 7 | 
 8 | def main(action, name):
 9 |     mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)
10 |     action = [int(x) for x in action.split(",")]
11 |     training_epochs = 10 
12 |     batch_size = 100
13 | 
14 |     action = [action[x:x+4] for x in range(0, len(action), 4)]
15 |     cnn_drop_rate = [c[3] for c in action]
16 | 
17 |     model = CNN(784, 10, action)
18 |     loss_op = tf.reduce_mean(model.loss)
19 |     optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
20 |     train_op = optimizer.minimize(loss_op)
21 |     
22 |     tf.summary.scalar('acc', model.accuracy)
23 |     tf.summary.scalar('loss', tf.reduce_mean(model.loss))
24 |     merged_summary_op = tf.summary.merge_all()
25 |     summary_writer = tf.summary.FileWriter(name, graph=tf.get_default_graph())
26 | 
27 |     init = tf.global_variables_initializer()
28 |     sess = tf.Session()
29 |     sess.run(init)
30 | 
31 |     
32 |     for epoch in range(training_epochs):
33 |         for step in range(int(mnist.train.num_examples/batch_size)):
34 |             batch_x, batch_y = mnist.train.next_batch(batch_size)
35 |             feed = {model.X: batch_x,
36 |                     model.Y: batch_y,
37 |                     model.dropout_keep_prob: 0.85,
38 |                     model.cnn_dropout_rates: cnn_drop_rate}
39 |             _, summary = sess.run([train_op, merged_summary_op], feed_dict=feed)
40 |             summary_writer.add_summary(summary, step+(epoch+1)*int(mnist.train.num_examples/batch_size))
41 | 
42 |         print("epoch: ", epoch+1, " of ", training_epochs)
43 |     
44 |         batch_x, batch_y = mnist.test.next_batch(mnist.test.num_examples)
45 |         loss, acc = sess.run(
46 |                                [loss_op, model.accuracy],
47 |                                feed_dict={model.X: batch_x,
48 |                                           model.Y: batch_y,
49 |                                           model.dropout_keep_prob: 1.0,
50 |                                           model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)})
51 |        
52 |         print("Network accuracy =", acc, " loss =", loss)
53 |     print("Final accuracy for", name, " =", acc)
54 |     
55 | 
56 | if __name__ == '__main__':
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument('--architecture', default="5, 32, 2,  5, 3, 64, 2, 3")
59 |     parser.add_argument('--name', default="model")
60 |     args = parser.parse_args()
61 | 
62 |     main(args.architecture, args.name)
63 | 
64 | 


--------------------------------------------------------------------------------
/net_manager.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from cnn import CNN
 3 | 
 4 | class NetManager():
 5 |     def __init__(self, num_input, num_classes, learning_rate, mnist,
 6 |                  max_step_per_action=5500*3,
 7 |                  bathc_size=100,
 8 |                  dropout_rate=0.85):
 9 | 
10 |         self.num_input = num_input
11 |         self.num_classes = num_classes
12 |         self.learning_rate = learning_rate
13 |         self.mnist = mnist
14 | 
15 |         self.max_step_per_action = max_step_per_action
16 |         self.bathc_size = bathc_size
17 |         self.dropout_rate = dropout_rate
18 | 
19 |     def get_reward(self, action, step, pre_acc):
20 |         action = [action[0][0][x:x+4] for x in range(0, len(action[0][0]), 4)]
21 |         cnn_drop_rate = [c[3] for c in action]
22 |         with tf.Graph().as_default() as g:
23 |             with g.container('experiment'+str(step)):
24 |                 model = CNN(self.num_input, self.num_classes, action)
25 |                 loss_op = tf.reduce_mean(model.loss)
26 |                 optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
27 |                 train_op = optimizer.minimize(loss_op)
28 | 
29 |                 with tf.Session() as train_sess:
30 |                     init = tf.global_variables_initializer()
31 |                     train_sess.run(init)
32 | 
33 |                     for step in range(self.max_step_per_action):
34 |                         batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size)
35 |                         feed = {model.X: batch_x,
36 |                                 model.Y: batch_y,
37 |                                 model.dropout_keep_prob: self.dropout_rate,
38 |                                 model.cnn_dropout_rates: cnn_drop_rate}
39 |                         _ = train_sess.run(train_op, feed_dict=feed)
40 | 
41 |                         if step % 100 == 0:
42 |                             # Calculate batch loss and accuracy
43 |                             loss, acc = train_sess.run(
44 |                                 [loss_op, model.accuracy],
45 |                                 feed_dict={model.X: batch_x,
46 |                                            model.Y: batch_y,
47 |                                            model.dropout_keep_prob: 1.0,
48 |                                            model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)})
49 |                             print("Step " + str(step) +
50 |                                   ", Minibatch Loss= " + "{:.4f}".format(loss) +
51 |                                   ", Current accuracy= " + "{:.3f}".format(acc))
52 |                     batch_x, batch_y = self.mnist.test.next_batch(10000)
53 |                     loss, acc = train_sess.run(
54 |                                 [loss_op, model.accuracy],
55 |                                 feed_dict={model.X: batch_x,
56 |                                            model.Y: batch_y,
57 |                                            model.dropout_keep_prob: 1.0,
58 |                                            model.cnn_dropout_rates: [1.0]*len(cnn_drop_rate)})
59 |                     print("!!!!!!acc:", acc, pre_acc)
60 |                     if acc - pre_acc <= 0.01:
61 |                         return acc, acc 
62 |                     else:
63 |                         return 0.01, acc
64 |                     
65 | 


--------------------------------------------------------------------------------
/reinforce.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import random
 3 | import numpy as np
 4 | 
 5 | class Reinforce():
 6 |     def __init__(self, sess, optimizer, policy_network, max_layers, global_step,
 7 |                  division_rate=100.0,
 8 |                  reg_param=0.001,
 9 |                  discount_factor=0.99,
10 |                  exploration=0.3):
11 |         self.sess = sess
12 |         self.optimizer = optimizer
13 |         self.policy_network = policy_network 
14 |         self.division_rate = division_rate
15 |         self.reg_param = reg_param
16 |         self.discount_factor=discount_factor
17 |         self.max_layers = max_layers
18 |         self.global_step = global_step
19 | 
20 |         self.reward_buffer = []
21 |         self.state_buffer = []
22 | 
23 |         self.create_variables()
24 |         var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
25 |         self.sess.run(tf.variables_initializer(var_lists))
26 | 
27 |     def get_action(self, state):
28 |         return self.sess.run(self.predicted_action, {self.states: state})
29 |         if random.random() < self.exploration:
30 |             return np.array([[random.sample(range(1, 35), 4*self.max_layers)]])
31 |         else:
32 |             return self.sess.run(self.predicted_action, {self.states: state})
33 | 
34 |     def create_variables(self):
35 |         with tf.name_scope("model_inputs"):
36 |             # raw state representation
37 |             self.states = tf.placeholder(tf.float32, [None, self.max_layers*4], name="states")
38 | 
39 |         with tf.name_scope("predict_actions"):
40 |             # initialize policy network
41 |             with tf.variable_scope("policy_network"):
42 |                 self.policy_outputs = self.policy_network(self.states, self.max_layers)
43 | 
44 |             self.action_scores = tf.identity(self.policy_outputs, name="action_scores")
45 | 
46 |             self.predicted_action = tf.cast(tf.scalar_mul(self.division_rate, self.action_scores), tf.int32, name="predicted_action")
47 | 
48 | 
49 |         # regularization loss
50 |         policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network")
51 | 
52 |         # compute loss and gradients
53 |         with tf.name_scope("compute_gradients"):
54 |             # gradients for selecting action from policy network
55 |             self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")
56 | 
57 |             with tf.variable_scope("policy_network", reuse=True):
58 |                 self.logprobs = self.policy_network(self.states, self.max_layers)
59 |                 print("self.logprobs", self.logprobs)
60 | 
61 |             # compute policy loss and regularization loss
62 |             self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logprobs[:, -1, :], labels=self.states)
63 |             self.pg_loss            = tf.reduce_mean(self.cross_entropy_loss)
64 |             self.reg_loss           = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables]) # Regularization
65 |             self.loss               = self.pg_loss + self.reg_param * self.reg_loss
66 | 
67 |             #compute gradients
68 |             self.gradients = self.optimizer.compute_gradients(self.loss)
69 |             
70 |             # compute policy gradients
71 |             for i, (grad, var) in enumerate(self.gradients):
72 |                 if grad is not None:
73 |                     self.gradients[i] = (grad * self.discounted_rewards, var)
74 | 
75 |             # training update
76 |             with tf.name_scope("train_policy_network"):
77 |                 # apply gradients to update policy network
78 |                 self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)
79 | 
80 |     def storeRollout(self, state, reward):
81 |         self.reward_buffer.append(reward)
82 |         self.state_buffer.append(state[0])
83 | 
84 |     def train_step(self, steps_count):
85 |         states = np.array(self.state_buffer[-steps_count:])/self.division_rate
86 |         rewars = self.reward_buffer[-steps_count:]
87 |         _, ls = self.sess.run([self.train_op, self.loss],
88 |                      {self.states: states,
89 |                       self.discounted_rewards: rewars})
90 |         return ls
91 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import argparse
  4 | import datetime
  5 | 
  6 | from cnn import CNN
  7 | from net_manager import NetManager
  8 | from reinforce import Reinforce
  9 | 
 10 | from tensorflow.examples.tutorials.mnist import input_data
 11 | 
 12 | def parse_args():
 13 |     desc = "TensorFlow implementation of 'Neural Architecture Search with Reinforcement Learning'"
 14 |     parser = argparse.ArgumentParser(description=desc)
 15 | 
 16 |     parser.add_argument('--max_layers', default=2)
 17 | 
 18 |     args = parser.parse_args()
 19 |     args.max_layers = int(args.max_layers)
 20 |     return args
 21 | 
 22 | 
 23 | '''
 24 |     Policy network is a main network for searching optimal architecture
 25 |     it uses NAS - Neural Architecture Search recurrent network cell.
 26 |     https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1363
 27 | 
 28 |     Args:
 29 |         state: current state of required topology
 30 |         max_layers: maximum number of layers
 31 |     Returns:
 32 |         3-D tensor with new state (new topology)
 33 | '''
 34 | def policy_network(state, max_layers):
 35 |     with tf.name_scope("policy_network"):
 36 |         nas_cell = tf.contrib.rnn.NASCell(4*max_layers)
 37 |         outputs, state = tf.nn.dynamic_rnn(
 38 |             nas_cell,
 39 |             tf.expand_dims(state, -1),
 40 |             dtype=tf.float32
 41 |         )
 42 |         bias = tf.Variable([0.05]*4*max_layers)
 43 |         outputs = tf.nn.bias_add(outputs, bias)
 44 |         print("outputs: ", outputs, outputs[:, -1:, :],  tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]))
 45 |         #return tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]) # Returned last output of rnn
 46 |         return outputs[:, -1:, :]      
 47 | 
 48 | def train(mnist):
 49 |     global args
 50 |     sess = tf.Session()
 51 |     global_step = tf.Variable(0, trainable=False)
 52 |     starter_learning_rate = 0.1
 53 |     learning_rate = tf.train.exponential_decay(0.99, global_step,
 54 |                                            500, 0.96, staircase=True)
 55 | 
 56 |     optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
 57 | 
 58 |     reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step)
 59 |     net_manager = NetManager(num_input=784,
 60 |                              num_classes=10,
 61 |                              learning_rate=0.001,
 62 |                              mnist=mnist,
 63 |                              bathc_size=100)
 64 | 
 65 |     MAX_EPISODES = 2500
 66 |     step = 0
 67 |     state = np.array([[10.0, 128.0, 1.0, 1.0]*args.max_layers], dtype=np.float32)
 68 |     pre_acc = 0.0
 69 |     total_rewards = 0
 70 |     for i_episode in range(MAX_EPISODES):       
 71 |         action = reinforce.get_action(state)
 72 |         print("ca:", action)
 73 |         if all(ai > 0 for ai in action[0][0]):
 74 |             reward, pre_acc = net_manager.get_reward(action, step, pre_acc)
 75 |             print("=====>", reward, pre_acc)
 76 |         else:
 77 |             reward = -1.0
 78 |         total_rewards += reward
 79 | 
 80 |         # In our sample action is equal state
 81 |         state = action[0]
 82 |         reinforce.storeRollout(state, reward)
 83 | 
 84 |         step += 1
 85 |         ls = reinforce.train_step(1)
 86 |         log_str = "current time:  "+str(datetime.datetime.now().time())+" episode:  "+str(i_episode)+" loss:  "+str(ls)+" last_state:  "+str(state)+" last_reward:  "+str(reward)+"\n"
 87 |         log = open("lg3.txt", "a+")
 88 |         log.write(log_str)
 89 |         log.close()
 90 |         print(log_str)
 91 | 
 92 | def main():
 93 |     global args
 94 |     args = parse_args()
 95 | 
 96 |     mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
 97 |     train(mnist)
 98 | 
 99 | if __name__ == '__main__':
100 |   main()
101 | 


--------------------------------------------------------------------------------