├── Utils
    ├── constants.py
    ├── configs.py
    ├── cifar10_processor.py
    └── child_network.py
├── Reference paper
    ├── NAS with RL.pdf
    └── Efficient Neural Architecture via Parameter Sharing.pdf
├── train.py
├── README.md
└── Controller.py


/Utils/constants.py:
--------------------------------------------------------------------------------
1 | class PATHS:
2 |     DATA_DIR = "data"
3 |     SAVE_DIR = "saves"


--------------------------------------------------------------------------------
/Reference paper/NAS with RL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ajayn1997/Neural-Architecture-Search-using-Reinforcement-Learning/HEAD/Reference paper/NAS with RL.pdf


--------------------------------------------------------------------------------
/Reference paper/Efficient Neural Architecture via Parameter Sharing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ajayn1997/Neural-Architecture-Search-using-Reinforcement-Learning/HEAD/Reference paper/Efficient Neural Architecture via Parameter Sharing.pdf


--------------------------------------------------------------------------------
/Utils/configs.py:
--------------------------------------------------------------------------------
 1 | child_network_params = {
 2 |     "learning_rate": 3e-5,
 3 |     "max_epochs": 100,
 4 |     "beta": 1e-3,
 5 |     "batch_size": 20
 6 | }
 7 | 
 8 | controller_params = {
 9 |     "max_layers": 3,
10 |     "components_per_layer": 4,
11 |     'beta': 1e-4,
12 |     'max_episodes': 2000,
13 |     "num_children_per_episode": 10
14 | }


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | from Controller import Controller
 5 | 
 6 | if __name__ == '__main__':
 7 |     # Configure the logger
 8 |     logging.basicConfig(stream=sys.stdout,
 9 |                         level=logging.DEBUG,
10 |                         format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
11 |     controller = Controller()
12 |     controller.train_controller()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Neural-Architecture-Search-using-Reinforcement-Learning
2 | An implementation of neural architecture search using the REINFORCE algorithm. we use a re-current network to generate the model descriptions of neural networks and trainthis RNN with reinforcement learning to maximize the expected accuracy of thegenerated architectures on a validation set. This algorithm is tested on the CIFAR-10 dataset. The project is inspired from the work presented in the paper "NEURAL ARCHITECTURE SEARCH WITH REINFORCEMENT LEARNING" by Barret et al from Google Brain.
3 | # Architecture
4 | ![alt text](https://miro.medium.com/max/656/1*hIif88uJ7Te8MJEhm40rbw.png)
5 | ![alt text](https://i.ytimg.com/vi/CYUpDogeIL0/maxresdefault.jpg)
6 | 


--------------------------------------------------------------------------------
/Utils/cifar10_processor.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | from keras.datasets import cifar10
 6 | from keras.utils import np_utils
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | def _create_tf_dataset(x, y, batch_size):
11 |     return tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(x),
12 |                                 tf.data.Dataset.from_tensor_slices(y))).shuffle(500).repeat().batch(batch_size)
13 | 
14 | def get_tf_datasets_from_numpy(batch_size, validation_split=0.1):
15 |     """
16 |     Main function getting tf.Data.datasets for training, validation, and testing
17 |     Args:
18 |         batch_size (int): Batch size
19 |         validation_split (float): Split for partitioning training and validation sets. Between 0.0 and 1.0.
20 |     """
21 |     # Load data from keras datasets api
22 |     (X, y), (X_test, y_test) = cifar10.load_data()
23 | 
24 |     logger.info("Dividing pixels by 255")
25 |     X = X / 255.
26 |     X_test = X_test / 255.
27 | 
28 |     X = X.astype(np.float32)
29 |     X_test = X_test.astype(np.float32)
30 |     y = y.astype(np.float32)
31 |     y_test = y_test.astype(np.float32)
32 | 
33 |     # Turn labels into onehot encodings
34 |     if y.shape[1] != 10:
35 |         y = np_utils.to_categorical(y, num_classes=10)
36 |         y_test = np_utils.to_categorical(y_test, num_classes=10)
37 | 
38 |     logger.info("Loaded data from keras")
39 | 
40 |     split_idx = int((1.0 - validation_split) * len(X))
41 |     X_train, y_train = X[:split_idx], y[:split_idx]
42 |     X_valid, y_valid = X[split_idx:], y[split_idx:]
43 | 
44 |     train_dataset = _create_tf_dataset(X_train, y_train, batch_size)
45 |     valid_dataset = _create_tf_dataset(X_valid, y_valid, batch_size)
46 |     test_dataset = _create_tf_dataset(X_test, y_test, batch_size)
47 | 
48 |     # Get the batch sizes for the train, valid, and test datasets
49 |     num_train_batches = int(X_train.shape[0] // batch_size)
50 |     num_valid_batches = int(X_valid.shape[0] // batch_size)
51 |     num_test_batches = int(X_test.shape[0] // batch_size)
52 | 
53 |     return train_dataset, valid_dataset, test_dataset, num_train_batches, num_valid_batches, num_test_batches


--------------------------------------------------------------------------------
/Utils/child_network.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | def ChildCNN(object):
 8 | 
 9 |     def __init__(self, cnn_dna, child_id, beta=1e-4, drop_rate=0.2, **kwargs):
10 |         self.cnn_dna = self.process_raw_controller_output(cnn_dna)
11 |         self.child_id = child_id
12 |         self.beta = beta
13 |         self.drop_rate = drop_rate
14 |         self.is_training = tf.placeholder_with_default(True, shape=None, name="is_training_{}".format(self.child_id))
15 |         self.num_classes = 10
16 |     
17 |     def process_raw_controller_output(self, output):
18 |         '''
19 |         A helper function for preprocessing the output of the NASCell
20 |         Args:
21 |             output(np.ndarray): The output of the NASCell
22 |         Returns:
23 |             (list) The child network's architecture
24 |         '''
25 |         output = output.ravel()
26 |         cnn_dna = [list(output[x:x+4]) for x in range(0, len(output), 4)]
27 |         return cnn_dna
28 |     
29 |     def build(self, input_tensor):
30 |         '''
31 |         Method for creating the child network
32 |         Args:
33 |             input_tensor: The tensor which represents the input
34 |         Returns:
35 |             The tensor which represents the output logit(pre-softmax activation)
36 |         '''
37 |         logger.info("DNA is : {}".format(self.cnn_dna))
38 |         output = input_tensor
39 |         for idx in range(len(self.cnn_dna)):
40 |             # Get the configuration for the layer
41 |             kernel_size, stride, num_filters, max_pool_size = self.cnn_dna[idx]
42 |             output = tf.layers.conv2d(output,
43 |                         # Specify the number of filters the convolutional layer will output
44 |                         filters=num_filters,
45 |                         # This specifies the size (height, width) of the convolutional kernel
46 |                         kernel_size=(kernel_size, kernel_size),
47 |                         # The size of the stride of the kernel
48 |                         strides=(stride, stride),
49 |                         # We add padding to the image
50 |                         padding="SAME",
51 |                         # It is good practice to name your layers
52 |                         name="conv_layer_{}".format(idx),
53 |                         activation=tf.nn.relu,
54 |                         kernel_initializer=tf.contrib.layers.xavier_initializer(),
55 |                         bias_initializer=tf.zeros_initializer(),
56 |                         kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.beta))
57 |                 # We apply 2D max pooling on the output of the conv layer
58 |                 output = tf.layers.max_pooling2d(
59 |                     output, pool_size=(max_pool_size, max_pool_size), strides=1,
60 |                     padding="SAME", name="pool_out_{}".format(idx)
61 |                 )
62 |                 # Dropout to regularize the network further
63 |                 output = tf.layers.dropout(output, rate=self.drop_rate, training=self.is_training)
64 |         
65 |         # Flatten outputs of the CNN and add a fully connected layers
66 |         with tf.name_scope("child_{}_fully_connected".format(self.child_id)):
67 |             output = tf.layers.flatten(output, name="flatten")
68 |             logits = tf.layers.dense(output, self.num_classes)
69 | 
70 |         return logits


--------------------------------------------------------------------------------
/Controller.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | from Utils.child_network import ChildCNN
  7 | from Utils.cifar10_processor import get_tf_datasets_from_numpy
  8 | from Utils.config import child_network_params, controller_params
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | def ema(values):
 13 |     """
 14 |     Helper function for keeping track of an exponential moving average of a list of values.
 15 |     For this module, we use it to maintain an exponential moving average of rewards
 16 |     
 17 |     Args:
 18 |         values (list): A list of rewards 
 19 |     Returns:
 20 |         (float) The last value of the exponential moving average
 21 |     """
 22 |     weights = np.exp(np.linspace(-1., 0., len(values)))
 23 |     weights /= weights.sum()
 24 |     a = np.convolve(values, weights, mode="full")[:len(values)]
 25 |     return a[-1]
 26 | 
 27 | class Controller(object):
 28 | 
 29 |     def __init__(self):
 30 |         self.graph = tf.Graph()
 31 |         self.sess = tf.Session(graph=self.graph)
 32 |         self.num_cell_outputs = controller_params['components_per_layer'] * controller_params['max_layers']
 33 |         self.reward_history = []
 34 |         self.architecture_history = []
 35 |         self.divison_rate = 100
 36 |         with self.graph.as_default():
 37 |             self.build_controller()
 38 | 
 39 |     def network_generator(self, nas_cell_hidden_state):
 40 |         # number of output units we expect from a NAS cell
 41 |         with tf.name_scope('network_generator'):
 42 |             nas = tf.contrib.rnn.NASCell(self.num_cell_outputs)
 43 |             network_architecture, nas_cell_hidden_state = tf.nn.dynamic_rnn(nas, tf.expand_dims(
 44 |                 nas_cell_hidden_state, -1), dtype=tf.float32)
 45 |             bias_variable = tf.Variable([0.01] * self.num_cell_outputs)
 46 |             network_architecture = tf.nn.bias_add(network_architecture, bias_variable)
 47 |             return network_architecture[:, -1:, :]
 48 | 
 49 |     def generate_child_network(self, child_network_architecture):
 50 |         with self.graph.as_default():
 51 |             return self.sess.run(self.cnn_dna_output, {self.child_network_architectures: child_network_architecture})
 52 | 
 53 |     def build_controller(self):
 54 |         logger.info('Building controller network')
 55 |         # Build inputs and placeholders
 56 |         with tf.name_scope('controller_inputs'):
 57 |             # Input to the NASCell
 58 |             self.child_network_architectures = tf.placeholder(tf.float32, [None, self.num_cell_outputs], 
 59 |                                                               name='controller_input')
 60 |             # Discounted rewards
 61 |             self.discounted_rewards = tf.placeholder(tf.float32, (None, ), name='discounted_rewards')
 62 | 
 63 |         # Build controller
 64 |         with tf.name_scope('network_generation'):
 65 |             with tf.variable_scope('controller'):
 66 |                 self.controller_output = tf.identity(self.network_generator(self.child_network_architectures), 
 67 |                                                      name='policy_scores')
 68 |                 self.cnn_dna_output = tf.cast(tf.scalar_mul(self.divison_rate, self.controller_output), tf.int32,
 69 |                                               name='controller_prediction')
 70 | 
 71 |         # Set up optimizer
 72 |         self.global_step = tf.Variable(0, trainable=False)
 73 |         self.learning_rate = tf.train.exponential_decay(0.99, self.global_step, 500, 0.96, staircase=True)
 74 |         self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate)
 75 | 
 76 |         # Gradient and loss computation
 77 |         with tf.name_scope('gradient_and_loss'):
 78 |             # Define policy gradient loss for the controller
 79 |             self.policy_gradient_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
 80 |                 logits=self.controller_output[:, -1, :],
 81 |                 labels=self.child_network_architectures))
 82 |             # L2 weight decay for Controller weights
 83 |             self.l2_loss = tf.reduce_sum(tf.add_n([tf.nn.l2_loss(v) for v in
 84 |                                                    tf.trainable_variables(scope="controller")]))
 85 |             # Add the above two losses to define total loss
 86 |             self.total_loss = self.policy_gradient_loss + self.l2_loss * controller_params["beta"]
 87 |             # Compute the gradients
 88 |             self.gradients = self.optimizer.compute_gradients(self.total_loss)
 89 | 
 90 |             # Gradients calculated using REINFORCE
 91 |             for i, (grad, var) in enumerate(self.gradients):
 92 |                 if grad is not None:
 93 |                     self.gradients[i] = (grad * self.discounted_rewards, var)
 94 | 
 95 |         with tf.name_scope('train_controller'):
 96 |             # The main training operation. This applies REINFORCE on the weights of the Controller
 97 |             self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)
 98 | 
 99 |         logger.info('Successfully built controller')
100 | 
101 | 
102 |     def train_child_network(self, cnn_dna, child_id):
103 |         """
104 |         Trains a child network and returns reward, or the validation accuracy
105 |         Args:
106 |             cnn_dna (list): List of tuples representing the child network's DNA
107 |             child_id (str): Name of child network
108 |         Returns:
109 |             (float) validation accuracy
110 |         """
111 |         logger.info("Training with dna: {}".format(cnn_dna))
112 |         child_graph = tf.Graph()
113 |         with child_graph.as_default():
114 |             sess = tf.Session()
115 | 
116 |             child_network = ChildCNN(cnn_dna=cnn_dna, child_id=child_id, **child_network_params)
117 | 
118 |             # Create input pipeline
119 |             train_dataset, valid_dataset, test_dataset, num_train_batches, num_valid_batches, num_test_batches = \
120 |                 get_tf_datasets_from_numpy(batch_size=child_network_params["batch_size"])
121 | 
122 |             # Generic iterator
123 |             iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
124 |             next_tensor_batch = iterator.get_next()
125 | 
126 |             # Separate train and validation set init ops
127 |             train_init_ops = iterator.make_initializer(train_dataset)
128 |             valid_init_ops = iterator.make_initializer(valid_dataset)
129 | 
130 |             # Build the graph
131 |             input_tensor, labels = next_tensor_batch
132 | 
133 |             # Build the child network, which returns the pre-softmax logits of the child network
134 |             logits = child_network.build(input_tensor)
135 |             
136 |             # Define the loss function for the child network
137 |             loss_ops = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits, name="loss")
138 | 
139 |             # Define the training operation for the child network
140 |             train_ops = tf.train.AdamOptimizer(learning_rate=child_network_params["learning_rate"]).minimize(loss_ops)
141 | 
142 |             # The following operations are for calculating the accuracy of the child network
143 |             pred_ops = tf.nn.softmax(logits, name="preds")
144 |             correct = tf.equal(tf.argmax(pred_ops, 1), tf.argmax(labels, 1), name="correct")
145 |             accuracy_ops = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
146 | 
147 |             initializer = tf.global_variables_initializer()
148 | 
149 |             # Training
150 |             sess.run(initializer)
151 |             sess.run(train_init_ops)
152 | 
153 |             logger.info("Training child CNN {} for {} epochs".format(child_id, child_network_params["max_epochs"]))
154 |             for epoch_idx in range(child_network_params["max_epochs"]):
155 |                 avg_loss, avg_acc = [], []
156 | 
157 |                 for batch_idx in range(num_train_batches):
158 |                     loss, _, accuracy = sess.run([loss_ops, train_ops, accuracy_ops])
159 |                     avg_loss.append(loss)
160 |                     avg_acc.append(accuracy)
161 | 
162 |                 logger.info("\tEpoch {}:\tloss - {:.6f}\taccuracy - {:.3f}".format(epoch_idx,
163 |                                                                                    np.mean(avg_loss), np.mean(avg_acc)))
164 | 
165 |             # Validate and return reward
166 |             logger.info("Finished training, now calculating validation accuracy")
167 |             sess.run(valid_init_ops)
168 |             avg_val_loss, avg_val_acc = [], []
169 |             for batch_idx in range(num_valid_batches):
170 |                 valid_loss, valid_accuracy = sess.run([loss_ops, accuracy_ops])
171 |                 avg_val_loss.append(valid_loss)
172 |                 avg_val_acc.append(valid_accuracy)
173 |             logger.info("Valid loss - {:.6f}\tValid accuracy - {:.3f}".format(np.mean(avg_val_loss),
174 |                                                                               np.mean(avg_val_acc)))
175 | 
176 |         return np.mean(avg_val_acc)
177 | 
178 |     def train_controller(self):
179 |         with self.graph.as_default():
180 |             self.sess.run(tf.global_variables_initializer())
181 | 
182 |         step = 0
183 |         total_rewards = 0
184 |         child_network_architecture = np.array([[10.0, 128.0, 1.0, 1.0] *
185 |                                                controller_params['max_layers']], dtype=np.float32)
186 | 
187 |         for episode in range(controller_params['max_episodes']):
188 |             logger.info('=============> Episode {} for Controller'.format(episode))
189 |             step += 1
190 |             episode_reward_buffer = []
191 | 
192 |             for sub_child in range(controller_params["num_children_per_episode"]):
193 |                 # Generate a child network architecture
194 |                 child_network_architecture = self.generate_child_network(child_network_architecture)[0]
195 | 
196 |                 if np.any(np.less_equal(child_network_architecture, 0.0)):
197 |                     reward = -1.0
198 |                 else:
199 |                     reward = self.train_child_network(cnn_dna=child_network_architecture,
200 |                                                       child_id='child/{}'.format("{}_{}".format(episode, sub_child)))
201 |                 episode_reward_buffer.append(reward)
202 | 
203 |             mean_reward = np.mean(episode_reward_buffer)
204 | 
205 |             self.reward_history.append(mean_reward)
206 |             self.architecture_history.append(child_network_architecture)
207 |             total_rewards += mean_reward
208 | 
209 |             child_network_architecture = np.array(self.architecture_history[-step:]).ravel() / self.divison_rate
210 |             child_network_architecture = child_network_architecture.reshape((-1, self.num_cell_outputs))
211 |             baseline = ema(self.reward_history)
212 |             last_reward = self.reward_history[-1]
213 |             rewards = [last_reward - baseline]
214 |             logger.info("Buffers before loss calculation")
215 |             logger.info("States: {}".format(child_network_architecture))
216 |             logger.info("Rewards: {}".format(rewards))
217 | 
218 |             with self.graph.as_default():
219 |                 _, loss = self.sess.run([self.train_op, self.total_loss],
220 |                                         {self.child_network_architectures: child_network_architecture,
221 |                                          self.discounted_rewards: rewards})
222 | 
223 |             logger.info('Episode: {} | Loss: {} | DNA: {} | Reward : {}'.format(
224 |                 episode, loss, child_network_architecture.ravel(), mean_reward))


--------------------------------------------------------------------------------