├── Utils ├── constants.py ├── configs.py ├── cifar10_processor.py └── child_network.py ├── Reference paper ├── NAS with RL.pdf └── Efficient Neural Architecture via Parameter Sharing.pdf ├── train.py ├── README.md └── Controller.py /Utils/constants.py: -------------------------------------------------------------------------------- 1 | class PATHS: 2 | DATA_DIR = "data" 3 | SAVE_DIR = "saves" -------------------------------------------------------------------------------- /Reference paper/NAS with RL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ajayn1997/Neural-Architecture-Search-using-Reinforcement-Learning/HEAD/Reference paper/NAS with RL.pdf -------------------------------------------------------------------------------- /Reference paper/Efficient Neural Architecture via Parameter Sharing.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ajayn1997/Neural-Architecture-Search-using-Reinforcement-Learning/HEAD/Reference paper/Efficient Neural Architecture via Parameter Sharing.pdf -------------------------------------------------------------------------------- /Utils/configs.py: -------------------------------------------------------------------------------- 1 | child_network_params = { 2 | "learning_rate": 3e-5, 3 | "max_epochs": 100, 4 | "beta": 1e-3, 5 | "batch_size": 20 6 | } 7 | 8 | controller_params = { 9 | "max_layers": 3, 10 | "components_per_layer": 4, 11 | 'beta': 1e-4, 12 | 'max_episodes': 2000, 13 | "num_children_per_episode": 10 14 | } -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | from Controller import Controller 5 | 6 | if __name__ == '__main__': 7 | # Configure the logger 8 | logging.basicConfig(stream=sys.stdout, 9 | level=logging.DEBUG, 10 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s') 11 | controller = Controller() 12 | controller.train_controller() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural-Architecture-Search-using-Reinforcement-Learning 2 | An implementation of neural architecture search using the REINFORCE algorithm. we use a re-current network to generate the model descriptions of neural networks and trainthis RNN with reinforcement learning to maximize the expected accuracy of thegenerated architectures on a validation set. This algorithm is tested on the CIFAR-10 dataset. The project is inspired from the work presented in the paper "NEURAL ARCHITECTURE SEARCH WITH REINFORCEMENT LEARNING" by Barret et al from Google Brain. 3 | # Architecture 4 | ![alt text](https://miro.medium.com/max/656/1*hIif88uJ7Te8MJEhm40rbw.png) 5 | ![alt text](https://i.ytimg.com/vi/CYUpDogeIL0/maxresdefault.jpg) 6 | -------------------------------------------------------------------------------- /Utils/cifar10_processor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | from keras.datasets import cifar10 6 | from keras.utils import np_utils 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | def _create_tf_dataset(x, y, batch_size): 11 | return tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(x), 12 | tf.data.Dataset.from_tensor_slices(y))).shuffle(500).repeat().batch(batch_size) 13 | 14 | def get_tf_datasets_from_numpy(batch_size, validation_split=0.1): 15 | """ 16 | Main function getting tf.Data.datasets for training, validation, and testing 17 | Args: 18 | batch_size (int): Batch size 19 | validation_split (float): Split for partitioning training and validation sets. Between 0.0 and 1.0. 20 | """ 21 | # Load data from keras datasets api 22 | (X, y), (X_test, y_test) = cifar10.load_data() 23 | 24 | logger.info("Dividing pixels by 255") 25 | X = X / 255. 26 | X_test = X_test / 255. 27 | 28 | X = X.astype(np.float32) 29 | X_test = X_test.astype(np.float32) 30 | y = y.astype(np.float32) 31 | y_test = y_test.astype(np.float32) 32 | 33 | # Turn labels into onehot encodings 34 | if y.shape[1] != 10: 35 | y = np_utils.to_categorical(y, num_classes=10) 36 | y_test = np_utils.to_categorical(y_test, num_classes=10) 37 | 38 | logger.info("Loaded data from keras") 39 | 40 | split_idx = int((1.0 - validation_split) * len(X)) 41 | X_train, y_train = X[:split_idx], y[:split_idx] 42 | X_valid, y_valid = X[split_idx:], y[split_idx:] 43 | 44 | train_dataset = _create_tf_dataset(X_train, y_train, batch_size) 45 | valid_dataset = _create_tf_dataset(X_valid, y_valid, batch_size) 46 | test_dataset = _create_tf_dataset(X_test, y_test, batch_size) 47 | 48 | # Get the batch sizes for the train, valid, and test datasets 49 | num_train_batches = int(X_train.shape[0] // batch_size) 50 | num_valid_batches = int(X_valid.shape[0] // batch_size) 51 | num_test_batches = int(X_test.shape[0] // batch_size) 52 | 53 | return train_dataset, valid_dataset, test_dataset, num_train_batches, num_valid_batches, num_test_batches -------------------------------------------------------------------------------- /Utils/child_network.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import tensorflow as tf 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | def ChildCNN(object): 8 | 9 | def __init__(self, cnn_dna, child_id, beta=1e-4, drop_rate=0.2, **kwargs): 10 | self.cnn_dna = self.process_raw_controller_output(cnn_dna) 11 | self.child_id = child_id 12 | self.beta = beta 13 | self.drop_rate = drop_rate 14 | self.is_training = tf.placeholder_with_default(True, shape=None, name="is_training_{}".format(self.child_id)) 15 | self.num_classes = 10 16 | 17 | def process_raw_controller_output(self, output): 18 | ''' 19 | A helper function for preprocessing the output of the NASCell 20 | Args: 21 | output(np.ndarray): The output of the NASCell 22 | Returns: 23 | (list) The child network's architecture 24 | ''' 25 | output = output.ravel() 26 | cnn_dna = [list(output[x:x+4]) for x in range(0, len(output), 4)] 27 | return cnn_dna 28 | 29 | def build(self, input_tensor): 30 | ''' 31 | Method for creating the child network 32 | Args: 33 | input_tensor: The tensor which represents the input 34 | Returns: 35 | The tensor which represents the output logit(pre-softmax activation) 36 | ''' 37 | logger.info("DNA is : {}".format(self.cnn_dna)) 38 | output = input_tensor 39 | for idx in range(len(self.cnn_dna)): 40 | # Get the configuration for the layer 41 | kernel_size, stride, num_filters, max_pool_size = self.cnn_dna[idx] 42 | output = tf.layers.conv2d(output, 43 | # Specify the number of filters the convolutional layer will output 44 | filters=num_filters, 45 | # This specifies the size (height, width) of the convolutional kernel 46 | kernel_size=(kernel_size, kernel_size), 47 | # The size of the stride of the kernel 48 | strides=(stride, stride), 49 | # We add padding to the image 50 | padding="SAME", 51 | # It is good practice to name your layers 52 | name="conv_layer_{}".format(idx), 53 | activation=tf.nn.relu, 54 | kernel_initializer=tf.contrib.layers.xavier_initializer(), 55 | bias_initializer=tf.zeros_initializer(), 56 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.beta)) 57 | # We apply 2D max pooling on the output of the conv layer 58 | output = tf.layers.max_pooling2d( 59 | output, pool_size=(max_pool_size, max_pool_size), strides=1, 60 | padding="SAME", name="pool_out_{}".format(idx) 61 | ) 62 | # Dropout to regularize the network further 63 | output = tf.layers.dropout(output, rate=self.drop_rate, training=self.is_training) 64 | 65 | # Flatten outputs of the CNN and add a fully connected layers 66 | with tf.name_scope("child_{}_fully_connected".format(self.child_id)): 67 | output = tf.layers.flatten(output, name="flatten") 68 | logits = tf.layers.dense(output, self.num_classes) 69 | 70 | return logits -------------------------------------------------------------------------------- /Controller.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from Utils.child_network import ChildCNN 7 | from Utils.cifar10_processor import get_tf_datasets_from_numpy 8 | from Utils.config import child_network_params, controller_params 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | def ema(values): 13 | """ 14 | Helper function for keeping track of an exponential moving average of a list of values. 15 | For this module, we use it to maintain an exponential moving average of rewards 16 | 17 | Args: 18 | values (list): A list of rewards 19 | Returns: 20 | (float) The last value of the exponential moving average 21 | """ 22 | weights = np.exp(np.linspace(-1., 0., len(values))) 23 | weights /= weights.sum() 24 | a = np.convolve(values, weights, mode="full")[:len(values)] 25 | return a[-1] 26 | 27 | class Controller(object): 28 | 29 | def __init__(self): 30 | self.graph = tf.Graph() 31 | self.sess = tf.Session(graph=self.graph) 32 | self.num_cell_outputs = controller_params['components_per_layer'] * controller_params['max_layers'] 33 | self.reward_history = [] 34 | self.architecture_history = [] 35 | self.divison_rate = 100 36 | with self.graph.as_default(): 37 | self.build_controller() 38 | 39 | def network_generator(self, nas_cell_hidden_state): 40 | # number of output units we expect from a NAS cell 41 | with tf.name_scope('network_generator'): 42 | nas = tf.contrib.rnn.NASCell(self.num_cell_outputs) 43 | network_architecture, nas_cell_hidden_state = tf.nn.dynamic_rnn(nas, tf.expand_dims( 44 | nas_cell_hidden_state, -1), dtype=tf.float32) 45 | bias_variable = tf.Variable([0.01] * self.num_cell_outputs) 46 | network_architecture = tf.nn.bias_add(network_architecture, bias_variable) 47 | return network_architecture[:, -1:, :] 48 | 49 | def generate_child_network(self, child_network_architecture): 50 | with self.graph.as_default(): 51 | return self.sess.run(self.cnn_dna_output, {self.child_network_architectures: child_network_architecture}) 52 | 53 | def build_controller(self): 54 | logger.info('Building controller network') 55 | # Build inputs and placeholders 56 | with tf.name_scope('controller_inputs'): 57 | # Input to the NASCell 58 | self.child_network_architectures = tf.placeholder(tf.float32, [None, self.num_cell_outputs], 59 | name='controller_input') 60 | # Discounted rewards 61 | self.discounted_rewards = tf.placeholder(tf.float32, (None, ), name='discounted_rewards') 62 | 63 | # Build controller 64 | with tf.name_scope('network_generation'): 65 | with tf.variable_scope('controller'): 66 | self.controller_output = tf.identity(self.network_generator(self.child_network_architectures), 67 | name='policy_scores') 68 | self.cnn_dna_output = tf.cast(tf.scalar_mul(self.divison_rate, self.controller_output), tf.int32, 69 | name='controller_prediction') 70 | 71 | # Set up optimizer 72 | self.global_step = tf.Variable(0, trainable=False) 73 | self.learning_rate = tf.train.exponential_decay(0.99, self.global_step, 500, 0.96, staircase=True) 74 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate) 75 | 76 | # Gradient and loss computation 77 | with tf.name_scope('gradient_and_loss'): 78 | # Define policy gradient loss for the controller 79 | self.policy_gradient_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( 80 | logits=self.controller_output[:, -1, :], 81 | labels=self.child_network_architectures)) 82 | # L2 weight decay for Controller weights 83 | self.l2_loss = tf.reduce_sum(tf.add_n([tf.nn.l2_loss(v) for v in 84 | tf.trainable_variables(scope="controller")])) 85 | # Add the above two losses to define total loss 86 | self.total_loss = self.policy_gradient_loss + self.l2_loss * controller_params["beta"] 87 | # Compute the gradients 88 | self.gradients = self.optimizer.compute_gradients(self.total_loss) 89 | 90 | # Gradients calculated using REINFORCE 91 | for i, (grad, var) in enumerate(self.gradients): 92 | if grad is not None: 93 | self.gradients[i] = (grad * self.discounted_rewards, var) 94 | 95 | with tf.name_scope('train_controller'): 96 | # The main training operation. This applies REINFORCE on the weights of the Controller 97 | self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step) 98 | 99 | logger.info('Successfully built controller') 100 | 101 | 102 | def train_child_network(self, cnn_dna, child_id): 103 | """ 104 | Trains a child network and returns reward, or the validation accuracy 105 | Args: 106 | cnn_dna (list): List of tuples representing the child network's DNA 107 | child_id (str): Name of child network 108 | Returns: 109 | (float) validation accuracy 110 | """ 111 | logger.info("Training with dna: {}".format(cnn_dna)) 112 | child_graph = tf.Graph() 113 | with child_graph.as_default(): 114 | sess = tf.Session() 115 | 116 | child_network = ChildCNN(cnn_dna=cnn_dna, child_id=child_id, **child_network_params) 117 | 118 | # Create input pipeline 119 | train_dataset, valid_dataset, test_dataset, num_train_batches, num_valid_batches, num_test_batches = \ 120 | get_tf_datasets_from_numpy(batch_size=child_network_params["batch_size"]) 121 | 122 | # Generic iterator 123 | iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) 124 | next_tensor_batch = iterator.get_next() 125 | 126 | # Separate train and validation set init ops 127 | train_init_ops = iterator.make_initializer(train_dataset) 128 | valid_init_ops = iterator.make_initializer(valid_dataset) 129 | 130 | # Build the graph 131 | input_tensor, labels = next_tensor_batch 132 | 133 | # Build the child network, which returns the pre-softmax logits of the child network 134 | logits = child_network.build(input_tensor) 135 | 136 | # Define the loss function for the child network 137 | loss_ops = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits, name="loss") 138 | 139 | # Define the training operation for the child network 140 | train_ops = tf.train.AdamOptimizer(learning_rate=child_network_params["learning_rate"]).minimize(loss_ops) 141 | 142 | # The following operations are for calculating the accuracy of the child network 143 | pred_ops = tf.nn.softmax(logits, name="preds") 144 | correct = tf.equal(tf.argmax(pred_ops, 1), tf.argmax(labels, 1), name="correct") 145 | accuracy_ops = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy") 146 | 147 | initializer = tf.global_variables_initializer() 148 | 149 | # Training 150 | sess.run(initializer) 151 | sess.run(train_init_ops) 152 | 153 | logger.info("Training child CNN {} for {} epochs".format(child_id, child_network_params["max_epochs"])) 154 | for epoch_idx in range(child_network_params["max_epochs"]): 155 | avg_loss, avg_acc = [], [] 156 | 157 | for batch_idx in range(num_train_batches): 158 | loss, _, accuracy = sess.run([loss_ops, train_ops, accuracy_ops]) 159 | avg_loss.append(loss) 160 | avg_acc.append(accuracy) 161 | 162 | logger.info("\tEpoch {}:\tloss - {:.6f}\taccuracy - {:.3f}".format(epoch_idx, 163 | np.mean(avg_loss), np.mean(avg_acc))) 164 | 165 | # Validate and return reward 166 | logger.info("Finished training, now calculating validation accuracy") 167 | sess.run(valid_init_ops) 168 | avg_val_loss, avg_val_acc = [], [] 169 | for batch_idx in range(num_valid_batches): 170 | valid_loss, valid_accuracy = sess.run([loss_ops, accuracy_ops]) 171 | avg_val_loss.append(valid_loss) 172 | avg_val_acc.append(valid_accuracy) 173 | logger.info("Valid loss - {:.6f}\tValid accuracy - {:.3f}".format(np.mean(avg_val_loss), 174 | np.mean(avg_val_acc))) 175 | 176 | return np.mean(avg_val_acc) 177 | 178 | def train_controller(self): 179 | with self.graph.as_default(): 180 | self.sess.run(tf.global_variables_initializer()) 181 | 182 | step = 0 183 | total_rewards = 0 184 | child_network_architecture = np.array([[10.0, 128.0, 1.0, 1.0] * 185 | controller_params['max_layers']], dtype=np.float32) 186 | 187 | for episode in range(controller_params['max_episodes']): 188 | logger.info('=============> Episode {} for Controller'.format(episode)) 189 | step += 1 190 | episode_reward_buffer = [] 191 | 192 | for sub_child in range(controller_params["num_children_per_episode"]): 193 | # Generate a child network architecture 194 | child_network_architecture = self.generate_child_network(child_network_architecture)[0] 195 | 196 | if np.any(np.less_equal(child_network_architecture, 0.0)): 197 | reward = -1.0 198 | else: 199 | reward = self.train_child_network(cnn_dna=child_network_architecture, 200 | child_id='child/{}'.format("{}_{}".format(episode, sub_child))) 201 | episode_reward_buffer.append(reward) 202 | 203 | mean_reward = np.mean(episode_reward_buffer) 204 | 205 | self.reward_history.append(mean_reward) 206 | self.architecture_history.append(child_network_architecture) 207 | total_rewards += mean_reward 208 | 209 | child_network_architecture = np.array(self.architecture_history[-step:]).ravel() / self.divison_rate 210 | child_network_architecture = child_network_architecture.reshape((-1, self.num_cell_outputs)) 211 | baseline = ema(self.reward_history) 212 | last_reward = self.reward_history[-1] 213 | rewards = [last_reward - baseline] 214 | logger.info("Buffers before loss calculation") 215 | logger.info("States: {}".format(child_network_architecture)) 216 | logger.info("Rewards: {}".format(rewards)) 217 | 218 | with self.graph.as_default(): 219 | _, loss = self.sess.run([self.train_op, self.total_loss], 220 | {self.child_network_architectures: child_network_architecture, 221 | self.discounted_rewards: rewards}) 222 | 223 | logger.info('Episode: {} | Loss: {} | DNA: {} | Reward : {}'.format( 224 | episode, loss, child_network_architecture.ravel(), mean_reward)) --------------------------------------------------------------------------------