├── .gitignore ├── README.md ├── main.py ├── net2net.py └── slim ├── __init__.py ├── losses.py ├── ops.py ├── scopes.py └── variables.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.meta 3 | *model* 4 | checkpoint 5 | MNIST_data/ 6 | logs* 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Net2Net : Accelerating Learning via Knowledge Transfer 2 | 3 | - Numpy-based Net2Net module 4 | - Net2Wider 5 | - Net2Deeper 6 | 7 | - Net2Net using Tensorflow 8 | - Test in MNIST dataset 9 | 10 | ## Dependencies 11 | 12 | - Net2Net core module 13 | - Numpy 14 | - Scipy 15 | 16 | - Tensorflow examples 17 | - Tensorflow 18 | - Slim 19 | 20 | ## Results 21 | 22 | - Baseline architecture 23 | 24 | ``` 25 | 5x5x32(conv1)-pool1-5x5x64(conv2)-pool2-1024(fc1)-10(fc2) 26 | ``` 27 | 28 | - [EXP 1] Train a teacher network 29 | 30 | ``` 31 | [Iter: 100] Validation Accuracy : 0.8732 32 | [Iter: 200] Validation Accuracy : 0.9025 33 | [Iter: 300] Validation Accuracy : 0.9313 34 | [Iter: 400] Validation Accuracy : 0.9408 35 | [Iter: 500] Validation Accuracy : 0.9363 36 | [Iter: 600] Validation Accuracy : 0.9466 37 | [Iter: 700] Validation Accuracy : 0.9379 38 | [Iter: 800] Validation Accuracy : 0.9582 39 | [Iter: 900] Validation Accuracy : 0.9583 40 | ``` 41 | 42 | - [EXP 2] Train a student network (Net2Wider) 43 | - # of filters in 'conv1' layer [32->128] 44 | 45 | ``` 46 | [Iter: 100] Validation Accuracy : 0.9136 47 | [Iter: 200] Validation Accuracy : 0.9689 48 | [Iter: 300] Validation Accuracy : 0.9645 49 | [Iter: 400] Validation Accuracy : 0.9757 50 | [Iter: 500] Validation Accuracy : 0.9762 51 | [Iter: 600] Validation Accuracy : 0.9757 52 | [Iter: 700] Validation Accuracy : 0.9752 53 | [Iter: 800] Validation Accuracy : 0.9765 54 | [Iter: 900] Validation Accuracy : 0.9777 55 | ``` 56 | 57 | - [EXP 3] Net2Wider baseline (Random pad) 58 | 59 | ``` 60 | [Iter: 100] Validation Accuracy : 0.9255 61 | [Iter: 200] Validation Accuracy : 0.9361 62 | [Iter: 300] Validation Accuracy : 0.9418 63 | [Iter: 400] Validation Accuracy : 0.9551 64 | [Iter: 500] Validation Accuracy : 0.9608 65 | [Iter: 600] Validation Accuracy : 0.9653 66 | [Iter: 700] Validation Accuracy : 0.9677 67 | [Iter: 800] Validation Accuracy : 0.9659 68 | [Iter: 900] Validation Accuracy : 0.9690 69 | ``` 70 | 71 | - [EXP 4] Train a student network (Net2Deeper) 72 | - Insert a new layer after 'conv1' layer 73 | 74 | ``` 75 | [Iter: 100] Validation Accuracy : 0.9673 76 | [Iter: 200] Validation Accuracy : 0.9646 77 | [Iter: 300] Validation Accuracy : 0.9718 78 | [Iter: 400] Validation Accuracy : 0.9731 79 | [Iter: 500] Validation Accuracy : 0.9765 80 | [Iter: 600] Validation Accuracy : 0.9612 81 | [Iter: 700] Validation Accuracy : 0.9783 82 | [Iter: 800] Validation Accuracy : 0.9812 83 | [Iter: 900] Validation Accuracy : 0.9785 84 | ``` 85 | 86 | - [EXP 5] Net2Deeper baseline (Random initialization) 87 | 88 | ``` 89 | [Iter: 100] Validation Accuracy : 0.9057 90 | [Iter: 200] Validation Accuracy : 0.9059 91 | [Iter: 300] Validation Accuracy : 0.9446 92 | [Iter: 400] Validation Accuracy : 0.9489 93 | [Iter: 500] Validation Accuracy : 0.9541 94 | [Iter: 600] Validation Accuracy : 0.9581 95 | [Iter: 700] Validation Accuracy : 0.9607 96 | [Iter: 800] Validation Accuracy : 0.9499 97 | [Iter: 900] Validation Accuracy : 0.9663 98 | ``` 99 | 100 | ## Notes 101 | - All parameters are fixed except new weights from Net2Net. 102 | - The Net2Net core module (net2net.py) can be used in various deep learning libraries (theano, caffe etc.) because it has only numpy dependency. 103 | 104 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Net2Net using Tensorflow 3 | 4 | @ Test in MNIST dataset 5 | 1. Train a teacher network 6 | 2. Train a student network (Net2Wider) 7 | - # of filters in 'conv1' layer [32->128] 8 | 3. Random pad (Net2Wider baseline) 9 | 4. Train a student network (Net2Deeper) 10 | - Insert a new layer after 'conv1' layer 11 | 5. Random initialization (Net2Deeper baseline) 12 | 13 | NOTE: All parameters are fixed expect new weights from Net2Net. 14 | 15 | Author: Kyunghyun Paeng 16 | 17 | """ 18 | import numpy as np 19 | import scipy.signal 20 | import tensorflow as tf 21 | from slim import ops 22 | from slim import scopes 23 | from slim import variables 24 | from net2net import Net2Net 25 | 26 | from tensorflow.examples.tutorials.mnist import input_data 27 | 28 | MODEL='./my-model-500.meta' 29 | WEIGHT='./my-model-500' 30 | BATCH_SIZE = 50 31 | MAX_ITER = 1000 32 | TEST_ITER = 500 33 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True) 34 | 35 | def train_a_student_network_deeper(): 36 | new_w1, new_b1 = tf_net2deeper(MODEL, WEIGHT, 'conv1') 37 | with tf.Graph().as_default(): 38 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 39 | x = tf.placeholder(tf.float32, shape=[None, 784]) 40 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 41 | x_image = tf.reshape(x, [-1,28,28,1]) 42 | net = ops.conv2d(x_image, 32, [5, 5], scope='conv1') 43 | net = ops.conv2d(net, 32, [5, 5], scope='conv1_new', initializer='constant', weights=new_w1, bias=new_b1, restore=False) 44 | net = ops.max_pool(net, [2, 2], scope='pool1') 45 | net = ops.conv2d(net, 64, [5, 5], scope='conv2') 46 | net = ops.max_pool(net, [2, 2], scope='pool2') 47 | net = ops.flatten(net, scope='pool2_flat') 48 | net = ops.fc(net, 1024, scope='fc1') 49 | net = ops.fc(net, 10, activation=None, scope='fc2') 50 | y_conv = tf.nn.softmax(net) 51 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), axis=[1])) 52 | model = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 53 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 54 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 55 | tf.summary.scalar('loss', cross_entropy) 56 | tf.summary.scalar('acc', accuracy) 57 | merged = tf.summary.merge_all() 58 | saver = tf.train.Saver() 59 | writer = tf.summary.FileWriter('./logs-deeper', sess.graph) 60 | sess.run(tf.global_variables_initializer()) 61 | variables_to_restore = tf.get_collection(variables.VARIABLES_TO_RESTORE) 62 | saver = tf.train.Saver(variables_to_restore) 63 | saver.restore(sess, WEIGHT) 64 | print('Net2Deeper...') 65 | for i in range(MAX_ITER): 66 | batch = mnist.train.next_batch(BATCH_SIZE) 67 | sess.run(model, feed_dict={x: batch[0], y_: batch[1]}) 68 | if i % 100 == 0: 69 | summary_str, acc = sess.run([merged, accuracy], feed_dict={x: mnist.test.images, y_: mnist.test.labels}) 70 | writer.add_summary(summary_str, i) 71 | print('[Iter: {}] Validation Accuracy : {:.4f}'.format(i,acc)) 72 | 73 | def train_a_student_network_deeper_rand_init(): 74 | with tf.Graph().as_default(): 75 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 76 | x = tf.placeholder(tf.float32, shape=[None, 784]) 77 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 78 | x_image = tf.reshape(x, [-1,28,28,1]) 79 | net = ops.conv2d(x_image, 32, [5, 5], scope='conv1', stddev=0.1, bias=0.1) 80 | net = ops.conv2d(net, 32, [5, 5], scope='conv1_new', stddev=0.1, bias=0.1, restore=False) 81 | net = ops.max_pool(net, [2, 2], scope='pool1') 82 | net = ops.conv2d(net, 64, [5, 5], scope='conv2', stddev=0.1, bias=0.1) 83 | net = ops.max_pool(net, [2, 2], scope='pool2') 84 | net = ops.flatten(net, scope='pool2_flat') 85 | net = ops.fc(net, 1024, scope='fc1', stddev=0.1, bias=0.1) 86 | net = ops.fc(net, 10, activation=None, scope='fc2', stddev=0.1, bias=0.1) 87 | y_conv = tf.nn.softmax(net) 88 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), axis=[1])) 89 | model = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 90 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 91 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 92 | tf.summary.scalar('loss', cross_entropy) 93 | tf.summary.scalar('acc', accuracy) 94 | merged = tf.summary.merge_all() 95 | saver = tf.train.Saver() 96 | writer = tf.summary.FileWriter('./logs-deeper-rand', sess.graph) 97 | sess.run(tf.global_variables_initializer()) 98 | variables_to_restore = tf.get_collection(variables.VARIABLES_TO_RESTORE) 99 | saver = tf.train.Saver(variables_to_restore) 100 | saver.restore(sess, WEIGHT) 101 | print('Net2Deeper Baseline (Rand init)...') 102 | for i in range(MAX_ITER): 103 | batch = mnist.train.next_batch(BATCH_SIZE) 104 | sess.run(model, feed_dict={x: batch[0], y_: batch[1]}) 105 | if i % 100 == 0: 106 | summary_str, acc = sess.run([merged, accuracy], feed_dict={x: mnist.test.images, y_: mnist.test.labels}) 107 | writer.add_summary(summary_str, i) 108 | print('[Iter: {}] Validation Accuracy : {:.4f}'.format(i,acc)) 109 | 110 | def train_a_student_network_wider(): 111 | new_width_conv = 128 112 | new_w1, new_b1, new_w2, new_b2 = tf_net2wider(MODEL, WEIGHT, 'conv1', 'conv2', new_width_conv) 113 | with tf.Graph().as_default(): 114 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 115 | x = tf.placeholder(tf.float32, shape=[None, 784]) 116 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 117 | x_image = tf.reshape(x, [-1,28,28,1]) 118 | net = ops.conv2d(x_image, new_width_conv, [5, 5], scope='conv1', initializer='constant', weights=new_w1, bias=new_b1, restore=False) 119 | net = ops.max_pool(net, [2, 2], scope='pool1') 120 | net = ops.conv2d(net, 64, [5, 5], scope='conv2', initializer='constant', weights=new_w2, bias=new_b2, restore=False) 121 | net = ops.max_pool(net, [2, 2], scope='pool2') 122 | net = ops.flatten(net, scope='pool2_flat') 123 | net = ops.fc(net, 1024, scope='fc1') 124 | net = ops.fc(net, 10, activation=None, scope='fc2') 125 | y_conv = tf.nn.softmax(net) 126 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), axis=[1])) 127 | model = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 128 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 129 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 130 | tf.summary.scalar('loss', cross_entropy) 131 | tf.summary.scalar('acc', accuracy) 132 | merged = tf.summary.merge_all() 133 | saver = tf.train.Saver() 134 | writer = tf.summary.FileWriter('./logs-wider', sess.graph) 135 | sess.run(tf.global_variables_initializer()) 136 | variables_to_restore = tf.get_collection(variables.VARIABLES_TO_RESTORE) 137 | saver = tf.train.Saver(variables_to_restore) 138 | saver.restore(sess, WEIGHT) 139 | print('Net2Wider...') 140 | for i in range(MAX_ITER): 141 | batch = mnist.train.next_batch(BATCH_SIZE) 142 | sess.run(model, feed_dict={x: batch[0], y_: batch[1]}) 143 | if i % 100 == 0: 144 | summary_str, acc = sess.run([merged, accuracy], feed_dict={x: mnist.test.images, y_: mnist.test.labels}) 145 | writer.add_summary(summary_str, i) 146 | print('[Iter: {}] Validation Accuracy : {:.4f}'.format(i,acc)) 147 | 148 | def train_a_student_network_wider_rand_pad(): 149 | new_width_conv = 128 150 | new_w1, new_b1, new_w2, new_b2 = tf_net2wider_rand(MODEL, WEIGHT, 'conv1', 'conv2', new_width_conv) 151 | with tf.Graph().as_default(): 152 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 153 | x = tf.placeholder(tf.float32, shape=[None, 784]) 154 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 155 | x_image = tf.reshape(x, [-1,28,28,1]) 156 | net = ops.conv2d(x_image, new_width_conv, [5, 5], scope='conv1', initializer='constant', weights=new_w1, bias=new_b1, restore=False) 157 | net = ops.max_pool(net, [2, 2], scope='pool1') 158 | net = ops.conv2d(net, 64, [5, 5], scope='conv2', initializer='constant', weights=new_w2, bias=new_b2, restore=False) 159 | net = ops.max_pool(net, [2, 2], scope='pool2') 160 | net = ops.flatten(net, scope='pool2_flat') 161 | net = ops.fc(net, 1024, scope='fc1', stddev=0.1, bias=0.1) 162 | net = ops.fc(net, 10, activation=None, scope='fc2', stddev=0.1, bias=0.1) 163 | y_conv = tf.nn.softmax(net) 164 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), axis=[1])) 165 | model = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 166 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 167 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 168 | tf.summary.scalar('loss', cross_entropy) 169 | tf.summary.scalar('acc', accuracy) 170 | merged = tf.summary.merge_all() 171 | saver = tf.train.Saver() 172 | writer = tf.summary.FileWriter('./logs-wider-rand', sess.graph) 173 | sess.run(tf.global_variables_initializer()) 174 | variables_to_restore = tf.get_collection(variables.VARIABLES_TO_RESTORE) 175 | saver = tf.train.Saver(variables_to_restore) 176 | saver.restore(sess, WEIGHT) 177 | print('Net2Wider Baseline (Rand pad)...') 178 | for i in range(MAX_ITER): 179 | batch = mnist.train.next_batch(BATCH_SIZE) 180 | sess.run(model, feed_dict={x: batch[0], y_: batch[1]}) 181 | if i % 100 == 0: 182 | summary_str, acc = sess.run([merged, accuracy], feed_dict={x: mnist.test.images, y_: mnist.test.labels}) 183 | writer.add_summary(summary_str, i) 184 | print('[Iter: {}] Validation Accuracy : {:.4f}'.format(i,acc)) 185 | 186 | def train_a_teacher_network(): 187 | x = tf.placeholder(tf.float32, shape=[None, 784]) 188 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 189 | x_image = tf.reshape(x, [-1,28,28,1]) 190 | net = ops.conv2d(x_image, 32, [5, 5], scope='conv1', stddev=0.1, bias=0.1) 191 | net = ops.max_pool(net, [2, 2], scope='pool1') 192 | net = ops.conv2d(net, 64, [5, 5], scope='conv2', stddev=0.1, bias=0.1) 193 | net = ops.max_pool(net, [2, 2], scope='pool2') 194 | net = ops.flatten(net, scope='pool2_flat') 195 | net = ops.fc(net, 1024, scope='fc1', stddev=0.1, bias=0.1) 196 | net = ops.fc(net, 10, activation=None, scope='fc2', stddev=0.1, bias=0.1) 197 | y_conv = tf.nn.softmax(net) 198 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), axis=[1])) 199 | model = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 200 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 201 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 202 | tf.summary.scalar('loss', cross_entropy) 203 | tf.summary.scalar('acc', accuracy) 204 | merged = tf.summary.merge_all() 205 | saver = tf.train.Saver() 206 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 207 | writer = tf.summary.FileWriter('./logs', sess.graph) 208 | sess.run(tf.global_variables_initializer()) 209 | print('Teacher Network...') 210 | for i in range(MAX_ITER): 211 | batch = mnist.train.next_batch(BATCH_SIZE) 212 | sess.run(model, feed_dict={x: batch[0], y_: batch[1]}) 213 | # saver.save(sess, './my-model', global_step=TEST_ITER) 214 | if i % 100 == 0: 215 | summary_str, acc = sess.run([merged, accuracy], feed_dict={x: mnist.test.images, y_: mnist.test.labels}) 216 | writer.add_summary(summary_str, i) 217 | print('[Iter: {}] Validation Accuracy : {:.4f}'.format(i,acc)) 218 | saver.save(sess, './my-model', global_step=TEST_ITER) 219 | 220 | def load_teacher_net(sess, model, weights): 221 | saver = tf.train.import_meta_graph(model) 222 | saver.restore(sess, weights) 223 | return sess.graph 224 | 225 | def get_weight_bias_of_layer(net, layer_name, numpy=True): 226 | layer_name = [ op.name for op in net.get_operations() 227 | if layer_name+'/weights'==op.name 228 | or layer_name+'/biases'==op.name ] 229 | assert len(layer_name) == 2, 'Check layer name' 230 | weights = net.get_tensor_by_name(layer_name[0]+':0') 231 | biases = net.get_tensor_by_name(layer_name[1]+':0') 232 | if numpy: 233 | return weights.eval(), biases.eval() 234 | else: 235 | return weights, biases 236 | 237 | def tf_net2wider(model, weight, target_layer, next_layer, new_width): 238 | n2n = Net2Net() 239 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 240 | net = load_teacher_net(sess, model, weight) 241 | w1, b1 = get_weight_bias_of_layer(net, target_layer) 242 | w2, b2 = get_weight_bias_of_layer(net, next_layer) 243 | nw1, nb1, nw2 = n2n.wider(w1, b1, w2, new_width, True) 244 | return nw1, nb1, nw2, b2 245 | 246 | def tf_net2wider_rand(model, weight, target_layer, next_layer, new_width): 247 | n2n = Net2Net() 248 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 249 | net = load_teacher_net(sess, model, weight) 250 | w1, b1 = get_weight_bias_of_layer(net, target_layer) 251 | w2, b2 = get_weight_bias_of_layer(net, next_layer) 252 | nw1, nb1, nw2 = n2n.wider_rand(w1, b1, w2, new_width) 253 | return nw1, nb1, nw2, b2 254 | 255 | def tf_net2deeper(model, weight, target_layer): 256 | n2n = Net2Net() 257 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: 258 | net = load_teacher_net(sess, model, weight) 259 | w1, b1 = get_weight_bias_of_layer(net, target_layer) 260 | new_w, new_b = n2n.deeper(w1, True) 261 | return new_w, new_b 262 | 263 | if __name__ == '__main__': 264 | # 1. Train a teacher network 265 | train_a_teacher_network() 266 | # 2. Train a student network (Net2Wider) 267 | train_a_student_network_wider() 268 | # 3. Random pad (Net2Wider baseline) 269 | train_a_student_network_wider_rand_pad() 270 | # 4. Train a student network (Net2Deeper) 271 | train_a_student_network_deeper() 272 | # 5. Random initialization (Net2Deeper baseline) 273 | train_a_student_network_deeper_rand_init() 274 | -------------------------------------------------------------------------------- /net2net.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of Net2Net (http://arxiv.org/abs/1511.05641) 3 | Numpy modules for Net2Net 4 | - Net2Wider 5 | - Net2Deeper 6 | 7 | Written by Kyunghyun Paeng 8 | 9 | """ 10 | import numpy as np 11 | 12 | class Net2Net(object): 13 | def __init__(self, error=1e-4): 14 | self._error_th = error 15 | print('Net2Net module initialize...') 16 | 17 | def deeper(self, weight, verification=True): 18 | """ Net2Deeper operation 19 | 20 | All weights & biases should be 'numpy' array. 21 | If it is 'conv' type, weight.ndim = 4 (kH, kW, InChannel, OutChannel) 22 | If it is 'fc' type, weight.ndim = 2 (In, Out) 23 | 24 | Args: 25 | weight: weight matrix where the layer to be deepened 26 | 27 | Returns: 28 | Identity matrix & bias fitted to input weight 29 | """ 30 | assert weight.ndim == 4 or weight.ndim == 2, 'Check weight.ndim' 31 | if weight.ndim == 2: 32 | deeper_w = np.eye(weight.shape[1]) 33 | deeper_b = np.zeros(weight.shape[1]) 34 | if verification: 35 | err = np.abs(np.sum(np.dot(weight, deeper_w)-weight)) 36 | assert err < 1e-5, 'Verification failed: [ERROR] {}'.format(err) 37 | else: 38 | deeper_w = np.zeros((weight.shape[0], weight.shape[1], weight.shape[3], weight.shape[3])) 39 | assert weight.shape[0] % 2 == 1 and weight.shape[1] % 2 == 1, 'Kernel size should be odd' 40 | center_h = (weight.shape[0]-1)//2 41 | center_w = (weight.shape[1]-1)//2 42 | for i in range(weight.shape[3]): 43 | tmp = np.zeros((weight.shape[0], weight.shape[1], weight.shape[3])) 44 | tmp[center_h, center_w, i] = 1 45 | deeper_w[:, :, :, i] = tmp 46 | deeper_b = np.zeros(weight.shape[3]) 47 | if verification: 48 | import scipy.signal 49 | inputs = np.random.rand(weight.shape[0]*4, weight.shape[1]*4, weight.shape[2]) 50 | ori = np.zeros((weight.shape[0]*4, weight.shape[1]*4, weight.shape[3])) 51 | new = np.zeros((weight.shape[0]*4, weight.shape[1]*4, weight.shape[3])) 52 | for i in range(weight.shape[3]): 53 | for j in range(inputs.shape[2]): 54 | if j==0: tmp = scipy.signal.convolve2d(inputs[:,:,j], weight[:,:,j,i], mode='same') 55 | else: tmp += scipy.signal.convolve2d(inputs[:,:,j], weight[:,:,j,i], mode='same') 56 | ori[:,:,i] = tmp 57 | for i in range(deeper_w.shape[3]): 58 | for j in range(ori.shape[2]): 59 | if j==0: tmp = scipy.signal.convolve2d(ori[:,:,j], deeper_w[:,:,j,i], mode='same') 60 | else: tmp += scipy.signal.convolve2d(ori[:,:,j], deeper_w[:,:,j,i], mode='same') 61 | new[:,:,i] = tmp 62 | err = np.abs(np.sum(ori-new)) 63 | assert err < self._error_th, 'Verification failed: [ERROR] {}'.format(err) 64 | return deeper_w, deeper_b 65 | 66 | def wider(self, weight1, bias1, weight2, new_width, verification=True): 67 | """ Net2Wider operation 68 | 69 | All weights & biases should be 'numpy' array. 70 | If it is 'conv' type, weight.ndim = 4 (kH, kW, InChannel, OutChannel) 71 | If it is 'fc' type, weight.ndim = 2 (In, Out) 72 | 73 | Args: 74 | weight1: weight matrix of a target layer 75 | bias1: biases of a target layer, bias1.ndim = 1 76 | weight2: weight matrix of a next layer 77 | new_width: It should be larger than old width. 78 | (i.e., 'conv': weight1.OutChannel < new_width, 79 | 'fc' : weight1.Out < new_width ) 80 | Returns: 81 | Transformed weights & biases (w1, b1, w2) 82 | """ 83 | # Check dimensions 84 | assert bias1.squeeze().ndim==1, 'Check bias.ndim' 85 | assert weight1.ndim == 4 or weight1.ndim == 2, 'Check weight1.ndim' 86 | assert weight2.ndim == 4 or weight2.ndim == 2, 'Check weight2.ndim' 87 | bias1 = bias1.squeeze() 88 | if weight1.ndim == 2: 89 | assert weight1.shape[1] == weight2.shape[0], 'Check shape of weight' 90 | assert weight1.shape[1] == len(bias1), 'Check shape of bias' 91 | assert weight1.shape[1] < new_width, 'new_width should be larger than old width' 92 | return self._wider_fc(weight1, bias1, weight2, new_width, verification) 93 | else: 94 | assert weight1.shape[3] == weight2.shape[2], 'Check shape of weight' 95 | assert weight1.shape[3] == len(bias1), 'Check shape of bias' 96 | assert weight1.shape[3] < new_width, 'new_width should be larger than old width' 97 | return self._wider_conv(weight1, bias1, weight2, new_width, verification) 98 | 99 | def wider_rand(self, weight1, bias1, weight2, new_width): 100 | """ Net2Wider operation with random pad (baseline) 101 | 102 | All weights & biases should be 'numpy' array. 103 | If it is 'conv' type, weight.ndim = 4 (kH, kW, InChannel, OutChannel) 104 | If it is 'fc' type, weight.ndim = 2 (In, Out) 105 | 106 | Args: 107 | weight1: weight matrix of a target layer 108 | bias1: biases of a target layer, bias1.ndim = 1 109 | weight2: weight matrix of a next layer 110 | new_width: It should be larger than old width. 111 | (i.e., 'conv': weight1.OutChannel < new_width, 112 | 'fc' : weight1.Out < new_width ) 113 | Returns: 114 | Transformed weights & biases (w1, b1, w2) 115 | """ 116 | # Check dimensions 117 | assert bias1.squeeze().ndim==1, 'Check bias.ndim' 118 | assert weight1.ndim == 4 or weight1.ndim == 2, 'Check weight1.ndim' 119 | assert weight2.ndim == 4 or weight2.ndim == 2, 'Check weight2.ndim' 120 | bias1 = bias1.squeeze() 121 | if weight1.ndim == 2: 122 | assert weight1.shape[1] == weight2.shape[0], 'Check shape of weight' 123 | assert weight1.shape[1] == len(bias1), 'Check shape of bias' 124 | assert weight1.shape[1] < new_width, 'new_width should be larger than old width' 125 | return self._wider_fc_rand(weight1, bias1, weight2, new_width) 126 | else: 127 | assert weight1.shape[3] == weight2.shape[2], 'Check shape of weight' 128 | assert weight1.shape[3] == len(bias1), 'Check shape of bias' 129 | assert weight1.shape[3] < new_width, 'new_width should be larger than old width' 130 | return self._wider_conv_rand(weight1, bias1, weight2, new_width) 131 | 132 | def _wider_conv(self, teacher_w1, teacher_b1, teacher_w2, new_width, verification): 133 | rand = np.random.randint(teacher_w1.shape[3], size=(new_width-teacher_w1.shape[3])) 134 | replication_factor = np.bincount(rand) 135 | student_w1 = teacher_w1.copy() 136 | student_w2 = teacher_w2.copy() 137 | student_b1 = teacher_b1.copy() 138 | # target layer update (i) 139 | for i in range(len(rand)): 140 | teacher_index = rand[i] 141 | new_weight = teacher_w1[:, :, :, teacher_index] 142 | new_weight = new_weight[:, :, :, np.newaxis] 143 | student_w1 = np.concatenate((student_w1, new_weight), axis=3) 144 | student_b1 = np.append(student_b1, teacher_b1[teacher_index]) 145 | # next layer update (i+1) 146 | for i in range(len(rand)): 147 | teacher_index = rand[i] 148 | factor = replication_factor[teacher_index] + 1 149 | assert factor > 1, 'Error in Net2Wider' 150 | new_weight = teacher_w2[:, :, teacher_index, :]*(1./factor) 151 | new_weight_re = new_weight[:, :, np.newaxis, :] 152 | student_w2 = np.concatenate((student_w2, new_weight_re), axis=2) 153 | student_w2[:, :, teacher_index, :] = new_weight 154 | if verification: 155 | import scipy.signal 156 | inputs = np.random.rand(teacher_w1.shape[0]*4, teacher_w1.shape[1]*4, teacher_w1.shape[2]) 157 | ori1 = np.zeros((teacher_w1.shape[0]*4, teacher_w1.shape[1]*4, teacher_w1.shape[3])) 158 | ori2 = np.zeros((teacher_w1.shape[0]*4, teacher_w1.shape[1]*4, teacher_w2.shape[3])) 159 | new1 = np.zeros((teacher_w1.shape[0]*4, teacher_w1.shape[1]*4, student_w1.shape[3])) 160 | new2 = np.zeros((teacher_w1.shape[0]*4, teacher_w1.shape[1]*4, student_w2.shape[3])) 161 | for i in range(teacher_w1.shape[3]): 162 | for j in range(inputs.shape[2]): 163 | if j==0: tmp = scipy.signal.convolve2d(inputs[:,:,j], teacher_w1[:,:,j,i], mode='same') 164 | else: tmp += scipy.signal.convolve2d(inputs[:,:,j], teacher_w1[:,:,j,i], mode='same') 165 | ori1[:,:,i] = tmp + teacher_b1[i] 166 | for i in range(teacher_w2.shape[3]): 167 | for j in range(ori1.shape[2]): 168 | if j==0: tmp = scipy.signal.convolve2d(ori1[:,:,j], teacher_w2[:,:,j,i], mode='same') 169 | else: tmp += scipy.signal.convolve2d(ori1[:,:,j], teacher_w2[:,:,j,i], mode='same') 170 | ori2[:,:,i] = tmp 171 | for i in range(student_w1.shape[3]): 172 | for j in range(inputs.shape[2]): 173 | if j==0: tmp = scipy.signal.convolve2d(inputs[:,:,j], student_w1[:,:,j,i], mode='same') 174 | else: tmp += scipy.signal.convolve2d(inputs[:,:,j], student_w1[:,:,j,i], mode='same') 175 | new1[:,:,i] = tmp + student_b1[i] 176 | for i in range(student_w2.shape[3]): 177 | for j in range(new1.shape[2]): 178 | if j==0: tmp = scipy.signal.convolve2d(new1[:,:,j], student_w2[:,:,j,i], mode='same') 179 | else: tmp += scipy.signal.convolve2d(new1[:,:,j], student_w2[:,:,j,i], mode='same') 180 | new2[:,:,i] = tmp 181 | err = np.abs(np.sum(ori2-new2)) 182 | assert err < self._error_th, 'Verification failed: [ERROR] {}'.format(err) 183 | return student_w1, student_b1, student_w2 184 | 185 | def _wider_conv_rand(self, teacher_w1, teacher_b1, teacher_w2, new_width): 186 | size = new_width-teacher_w1.shape[3] 187 | student_w1 = teacher_w1.copy() 188 | student_w2 = teacher_w2.copy() 189 | student_b1 = teacher_b1.copy() 190 | # target layer update (i) 191 | for i in range(size): 192 | shape = teacher_w1[:,:,:,0].shape 193 | new_weight = np.random.normal(0, 0.1, size=shape) 194 | new_weight = new_weight[:, :, :, np.newaxis] 195 | student_w1 = np.concatenate((student_w1, new_weight), axis=3) 196 | student_b1 = np.append(student_b1, 0.1) 197 | # next layer update (i+1) 198 | for i in range(size): 199 | shape = teacher_w2[:,:,0,:].shape 200 | new_weight = np.random.normal(0, 0.1, size=shape) 201 | new_weight_re = new_weight[:, :, np.newaxis, :] 202 | student_w2 = np.concatenate((student_w2, new_weight_re), axis=2) 203 | return student_w1, student_b1, student_w2 204 | 205 | def _wider_fc(self, teacher_w1, teacher_b1, teacher_w2, new_width, verification): 206 | rand = np.random.randint(teacher_w1.shape[1], size=(new_width-teacher_w1.shape[1])) 207 | replication_factor = np.bincount(rand) 208 | student_w1 = teacher_w1.copy() 209 | student_w2 = teacher_w2.copy() 210 | student_b1 = teacher_b1.copy() 211 | # target layer update (i) 212 | for i in range(len(rand)): 213 | teacher_index = rand[i] 214 | new_weight = teacher_w1[:, teacher_index] 215 | new_weight = new_weight[:, np.newaxis] 216 | student_w1 = np.concatenate((student_w1, new_weight), axis=1) 217 | student_b1 = np.append(student_b1, teacher_b1[teacher_index]) 218 | # next layer update (i+1) 219 | for i in range(len(rand)): 220 | teacher_index = rand[i] 221 | factor = replication_factor[teacher_index] + 1 222 | assert factor > 1, 'Error in Net2Wider' 223 | new_weight = teacher_w2[teacher_index,:]*(1./factor) 224 | new_weight = new_weight[np.newaxis, :] 225 | student_w2 = np.concatenate((student_w2, new_weight), axis=0) 226 | student_w2[teacher_index,:] = new_weight 227 | if verification: 228 | inputs = np.random.rand(1, teacher_w1.shape[0]) 229 | ori1 = np.dot(inputs, teacher_w1) + teacher_b1 230 | ori2 = np.dot(ori1, teacher_w2) 231 | new1 = np.dot(inputs, student_w1) + student_b1 232 | new2 = np.dot(new1, student_w2) 233 | err = np.abs(np.sum(ori2-new2)) 234 | assert err < self._error_th, 'Verification failed: [ERROR] {}'.format(err) 235 | return student_w1, student_b1, student_w2 236 | 237 | def _wider_fc_rand(self, teacher_w1, teacher_b1, teacher_w2, new_width): 238 | size = new_width-teacher_w1.shape[1] 239 | student_w1 = teacher_w1.copy() 240 | student_w2 = teacher_w2.copy() 241 | student_b1 = teacher_b1.copy() 242 | # target layer update (i) 243 | for i in range(size): 244 | shape = teacher_w1[:,0].shape 245 | new_weight = np.random.normal(0, 0.1, size=shape) 246 | new_weight = new_weight[:, np.newaxis] 247 | student_w1 = np.concatenate((student_w1, new_weight), axis=1) 248 | student_b1 = np.append(student_b1, 0.1) 249 | # next layer update (i+1) 250 | for i in range(size): 251 | shape = teacher_w2[0,:].shape 252 | new_weight = np.random.normal(0, 0.1, size=shape) 253 | new_weight = new_weight[np.newaxis, :] 254 | student_w2 = np.concatenate((student_w2, new_weight), axis=0) 255 | return student_w1, student_b1, student_w2 256 | 257 | if __name__ == '__main__': 258 | """ Net2Net Class Test """ 259 | obj = Net2Net() 260 | 261 | w1 = np.random.rand(100, 50) 262 | obj.deeper(w1) 263 | print('Succeed: Net2Deeper (fc)') 264 | 265 | w1 = np.random.rand(3,3,16,32) 266 | obj.deeper(w1) 267 | print('Succeed: Net2Deeper (conv)') 268 | 269 | w1 = np.random.rand(100, 50) 270 | b1 = np.random.rand(50,1) 271 | w2 = np.random.rand(50, 10) 272 | obj.wider(w1, b1, w2, 70) 273 | print('Succeed: Net2Wider (fc)') 274 | 275 | w1 = np.random.rand(3,3,16,32) 276 | b1 = np.random.rand(32) 277 | w2 = np.random.rand(3,3,32,64) 278 | obj.wider(w1, b1, w2, 48) 279 | print('Succeed: Net2Wider (conv)') 280 | -------------------------------------------------------------------------------- /slim/__init__.py: -------------------------------------------------------------------------------- 1 | from slim import losses 2 | from slim import ops 3 | from slim import scopes 4 | from slim import variables 5 | from slim.scopes import arg_scope 6 | -------------------------------------------------------------------------------- /slim/losses.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains convenience wrappers for various Neural Network TensorFlow losses. 16 | 17 | All the losses defined here add themselves to the LOSSES_COLLECTION 18 | collection. 19 | 20 | l1_loss: Define a L1 Loss, useful for regularization, i.e. lasso. 21 | l2_loss: Define a L2 Loss, useful for regularization, i.e. weight decay. 22 | cross_entropy_loss: Define a cross entropy loss using 23 | softmax_cross_entropy_with_logits. Useful for classification. 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | 30 | import tensorflow as tf 31 | 32 | # In order to gather all losses in a network, the user should use this 33 | # key for get_collection, i.e: 34 | # losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) 35 | LOSSES_COLLECTION = '_losses' 36 | 37 | 38 | def l1_loss(tensor, weight=1.0, scope=None): 39 | """Define a L1Loss, useful for regularize, i.e. lasso. 40 | 41 | Args: 42 | tensor: tensor to regularize. 43 | weight: scale the loss by this factor. 44 | scope: Optional scope for op_scope. 45 | 46 | Returns: 47 | the L1 loss op. 48 | """ 49 | with tf.op_scope([tensor], scope, 'L1Loss'): 50 | weight = tf.convert_to_tensor(weight, 51 | dtype=tensor.dtype.base_dtype, 52 | name='loss_weight') 53 | loss = tf.mul(weight, tf.reduce_sum(tf.abs(tensor)), name='value') 54 | tf.add_to_collection(LOSSES_COLLECTION, loss) 55 | return loss 56 | 57 | 58 | def l2_loss(tensor, weight=1.0, scope=None): 59 | """Define a L2Loss, useful for regularize, i.e. weight decay. 60 | 61 | Args: 62 | tensor: tensor to regularize. 63 | weight: an optional weight to modulate the loss. 64 | scope: Optional scope for op_scope. 65 | 66 | Returns: 67 | the L2 loss op. 68 | """ 69 | with tf.op_scope([tensor], scope, 'L2Loss'): 70 | weight = tf.convert_to_tensor(weight, 71 | dtype=tensor.dtype.base_dtype, 72 | name='loss_weight') 73 | loss = tf.multiply(weight, tf.nn.l2_loss(tensor), name='value') 74 | tf.add_to_collection(LOSSES_COLLECTION, loss) 75 | return loss 76 | 77 | 78 | def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0, 79 | weight=1.0, scope=None): 80 | """Define a Cross Entropy loss using softmax_cross_entropy_with_logits. 81 | 82 | It can scale the loss by weight factor, and smooth the labels. 83 | 84 | Args: 85 | logits: [batch_size, num_classes] logits outputs of the network . 86 | one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels. 87 | label_smoothing: if greater than 0 then smooth the labels. 88 | weight: scale the loss by this factor. 89 | scope: Optional scope for op_scope. 90 | 91 | Returns: 92 | A tensor with the softmax_cross_entropy loss. 93 | """ 94 | logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape()) 95 | with tf.op_scope([logits, one_hot_labels], scope, 'CrossEntropyLoss'): 96 | num_classes = one_hot_labels.get_shape()[-1].value 97 | one_hot_labels = tf.cast(one_hot_labels, logits.dtype) 98 | if label_smoothing > 0: 99 | smooth_positives = 1.0 - label_smoothing 100 | smooth_negatives = label_smoothing / num_classes 101 | one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives 102 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, 103 | one_hot_labels, 104 | name='xentropy') 105 | weight = tf.convert_to_tensor(weight, 106 | dtype=logits.dtype.base_dtype, 107 | name='loss_weight') 108 | loss = tf.mul(weight, tf.reduce_mean(cross_entropy), name='value') 109 | tf.add_to_collection(LOSSES_COLLECTION, loss) 110 | return loss 111 | -------------------------------------------------------------------------------- /slim/ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains convenience wrappers for typical Neural Network TensorFlow layers. 16 | 17 | Additionally it maintains a collection with update_ops that need to be 18 | updated after the ops have been computed, for exmaple to update moving means 19 | and moving variances of batch_norm. 20 | 21 | Ops that have different behavior during training or eval have an is_training 22 | parameter. Additionally Ops that contain variables.variable have a trainable 23 | parameter, which control if the ops variables are trainable or not. 24 | """ 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | 30 | 31 | import tensorflow as tf 32 | 33 | from tensorflow.python.training import moving_averages 34 | 35 | from slim import losses 36 | from slim import scopes 37 | from slim import variables 38 | 39 | # Used to keep the update ops done by batch_norm. 40 | UPDATE_OPS_COLLECTION = '_update_ops_' 41 | 42 | 43 | @scopes.add_arg_scope 44 | def batch_norm(inputs, 45 | decay=0.999, 46 | scale=False, 47 | epsilon=0.001, 48 | moving_vars='moving_vars', 49 | activation=None, 50 | is_training=True, 51 | trainable=True, 52 | restore=True, 53 | scope=None): 54 | """Adds a Batch Normalization layer. 55 | 56 | Args: 57 | inputs: a tensor of size [batch_size, height, width, channels] 58 | or [batch_size, channels]. 59 | decay: decay for the moving average. 60 | scale: If True, multiply by gamma. If False, gamma is 61 | not used. When the next layer is linear (also e.g. ReLU), this can be 62 | disabled since the scaling can be done by the next layer. 63 | epsilon: small float added to variance to avoid dividing by zero. 64 | moving_vars: collection to store the moving_mean and moving_variance. 65 | activation: activation function. 66 | is_training: whether or not the model is in training mode. 67 | trainable: whether or not the variables should be trainable or not. 68 | restore: whether or not the variables should be marked for restore. 69 | scope: Optional scope for variable_op_scope. 70 | 71 | Returns: 72 | a tensor representing the output of the operation. 73 | 74 | """ 75 | inputs_shape = inputs.get_shape() 76 | with tf.variable_op_scope([inputs], scope, 'BatchNorm'): 77 | axis = range(len(inputs_shape) - 1) 78 | params_shape = inputs_shape[-1:] 79 | with scopes.arg_scope([variables.variable], restore=restore): 80 | # Allocate parameters for the beta and gamma of the normalization. 81 | beta = variables.variable('beta', 82 | params_shape, 83 | initializer=tf.zeros_initializer, 84 | trainable=trainable) 85 | if scale: 86 | gamma = variables.variable('gamma', 87 | params_shape, 88 | initializer=tf.ones, 89 | trainable=trainable) 90 | else: 91 | gamma = None 92 | # Create moving_mean and moving_variance add them to moving_vars and 93 | # GraphKeys.MOVING_AVERAGE_VARIABLES collections. 94 | with scopes.arg_scope([variables.variable], trainable=False, 95 | collections=[ 96 | moving_vars, 97 | tf.GraphKeys.MOVING_AVERAGE_VARIABLES]): 98 | moving_mean = variables.variable('moving_mean', 99 | params_shape, 100 | initializer=tf.zeros_initializer) 101 | moving_variance = variables.variable('moving_variance', 102 | params_shape, 103 | initializer=tf.ones) 104 | if is_training: 105 | # Calculate the moments based on the individual batch. 106 | mean, variance = tf.nn.moments(inputs, axis) 107 | 108 | update_moving_mean = moving_averages.assign_moving_average( 109 | moving_mean, mean, decay) 110 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) 111 | update_moving_variance = moving_averages.assign_moving_average( 112 | moving_variance, variance, decay) 113 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) 114 | else: 115 | # Just use the moving_mean and moving_variance. 116 | mean = moving_mean 117 | variance = moving_variance 118 | # Normalize the activations. 119 | outputs = tf.nn.batch_normalization( 120 | inputs, mean, variance, beta, gamma, epsilon) 121 | outputs.set_shape(inputs.get_shape()) 122 | if activation: 123 | outputs = activation(outputs) 124 | return outputs 125 | 126 | 127 | @scopes.add_arg_scope 128 | def conv2d(inputs, 129 | num_filters_out, 130 | kernel_size, 131 | stride=1, 132 | padding='SAME', 133 | activation=tf.nn.relu, 134 | initializer='normal', 135 | stddev=0.01, 136 | weights=0.0, 137 | bias=0.0, 138 | weight_decay=0, 139 | batch_norm_params=None, 140 | is_training=True, 141 | trainable=True, 142 | restore=True, 143 | scope=None): 144 | """Adds a 2D convolution followed by an optional batch_norm layer. 145 | 146 | conv2d creates a variable called 'weights', representing the convolutional 147 | kernel, that is convolved with the input. If `batch_norm_params` is None, a 148 | second variable called 'biases' is added to the result of the convolution 149 | operation. 150 | 151 | Args: 152 | inputs: a tensor of size [batch_size, height, width, channels]. 153 | num_filters_out: the number of output filters. 154 | kernel_size: a 2-D list comprising of the height and width of the filters. 155 | stride: the stride in height and width of the convolution. 156 | padding: one of 'VALID' or 'SAME'. 157 | activation: activation function. 158 | stddev: standard deviation of the truncated guassian weight distribution. 159 | bias: the initial value of the biases. 160 | weight_decay: the weight decay. 161 | batch_norm_params: parameters for the batch_norm. If is None don't use it. 162 | is_training: whether or not the model is in training mode. 163 | trainable: whether or not the variables should be trainable or not. 164 | restore: whether or not the variables should be marked for restore. 165 | scope: Optional scope for variable_op_scope. 166 | 167 | Returns: 168 | a tensor representing the output of the operation. 169 | 170 | Raises: 171 | ValueError: if 'kernel_size' is not a 2-D list. 172 | """ 173 | if len(kernel_size) != 2: 174 | raise ValueError('kernel_size must be a 2-D list.') 175 | with tf.variable_op_scope([inputs], scope, 'Conv'): 176 | num_filters_in = inputs.get_shape()[-1] 177 | weights_shape = [kernel_size[0], kernel_size[1], 178 | num_filters_in, num_filters_out] 179 | if initializer == 'normal': 180 | weights_initializer = tf.truncated_normal_initializer(stddev=stddev) 181 | elif initializer == 'constant': 182 | weights_initializer = tf.constant_initializer(weights) 183 | l2_regularizer = lambda t: losses.l2_loss(t, weight_decay) 184 | weights = variables.variable('weights', 185 | shape=weights_shape, 186 | initializer=weights_initializer, 187 | regularizer=l2_regularizer, 188 | trainable=trainable, 189 | restore=restore) 190 | conv = tf.nn.conv2d(inputs, weights, [1, stride, stride, 1], 191 | padding=padding) 192 | if batch_norm_params is not None: 193 | with scopes.arg_scope([batch_norm], is_training=is_training, 194 | trainable=trainable, restore=restore): 195 | outputs = batch_norm(conv, **batch_norm_params) 196 | else: 197 | bias_shape = [num_filters_out,] 198 | bias_initializer = tf.constant_initializer(bias) 199 | biases = variables.variable('biases', 200 | shape=bias_shape, 201 | initializer=bias_initializer, 202 | trainable=trainable, 203 | restore=restore) 204 | outputs = tf.nn.bias_add(conv, biases) 205 | if activation: 206 | outputs = activation(outputs) 207 | return outputs 208 | 209 | 210 | @scopes.add_arg_scope 211 | def fc(inputs, 212 | num_units_out, 213 | activation=tf.nn.relu, 214 | initializer='normal', 215 | stddev=0.01, 216 | weights=0.0, 217 | bias=0.0, 218 | weight_decay=0, 219 | batch_norm_params=None, 220 | is_training=True, 221 | trainable=True, 222 | restore=True, 223 | scope=None): 224 | """Adds a fully connected layer followed by an optional batch_norm layer. 225 | 226 | FC creates a variable called 'weights', representing the fully connected 227 | weight matrix, that is multiplied by the input. If `batch_norm` is None, a 228 | second variable called 'biases' is added to the result of the initial 229 | vector-matrix multiplication. 230 | 231 | Args: 232 | inputs: a [B x N] tensor where B is the batch size and N is the number of 233 | input units in the layer. 234 | num_units_out: the number of output units in the layer. 235 | activation: activation function. 236 | stddev: the standard deviation for the weights. 237 | bias: the initial value of the biases. 238 | weight_decay: the weight decay. 239 | batch_norm_params: parameters for the batch_norm. If is None don't use it. 240 | is_training: whether or not the model is in training mode. 241 | trainable: whether or not the variables should be trainable or not. 242 | restore: whether or not the variables should be marked for restore. 243 | scope: Optional scope for variable_op_scope. 244 | 245 | Returns: 246 | the tensor variable representing the result of the series of operations. 247 | """ 248 | with tf.variable_op_scope([inputs], scope, 'FC'): 249 | num_units_in = inputs.get_shape()[1] 250 | weights_shape = [num_units_in, num_units_out] 251 | if initializer == 'normal': 252 | weights_initializer = tf.truncated_normal_initializer(stddev=stddev) 253 | elif initializer == 'constant': 254 | weights_initializer = tf.constant_initializer(weights) 255 | l2_regularizer = lambda t: losses.l2_loss(t, weight_decay) 256 | weights = variables.variable('weights', 257 | shape=weights_shape, 258 | initializer=weights_initializer, 259 | regularizer=l2_regularizer, 260 | trainable=trainable, 261 | restore=restore) 262 | if batch_norm_params is not None: 263 | outputs = tf.matmul(inputs, weights) 264 | with scopes.arg_scope([batch_norm], is_training=is_training, 265 | trainable=trainable, restore=restore): 266 | outputs = batch_norm(outputs, **batch_norm_params) 267 | else: 268 | bias_shape = [num_units_out,] 269 | bias_initializer = tf.constant_initializer(bias) 270 | biases = variables.variable('biases', 271 | shape=bias_shape, 272 | initializer=bias_initializer, 273 | trainable=trainable, 274 | restore=restore) 275 | outputs = tf.nn.xw_plus_b(inputs, weights, biases) 276 | if activation: 277 | outputs = activation(outputs) 278 | return outputs 279 | 280 | 281 | def one_hot_encoding(labels, num_classes, scope=None): 282 | """Transform numeric labels into onehot_labels. 283 | 284 | Args: 285 | labels: [batch_size] target labels. 286 | num_classes: total number of classes. 287 | scope: Optional scope for op_scope. 288 | Returns: 289 | one hot encoding of the labels. 290 | """ 291 | with tf.op_scope([labels], scope, 'OneHotEncoding'): 292 | batch_size = labels.get_shape()[0] 293 | indices = tf.expand_dims(tf.range(0, batch_size), 1) 294 | labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype) 295 | concated = tf.concat(1, [indices, labels]) 296 | onehot_labels = tf.sparse_to_dense( 297 | concated, tf.pack([batch_size, num_classes]), 1.0, 0.0) 298 | onehot_labels.set_shape([batch_size, num_classes]) 299 | return onehot_labels 300 | 301 | 302 | @scopes.add_arg_scope 303 | def max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): 304 | """Adds a Max Pooling layer. 305 | 306 | It is assumed by the wrapper that the pooling is only done per image and not 307 | in depth or batch. 308 | 309 | Args: 310 | inputs: a tensor of size [batch_size, height, width, depth]. 311 | kernel_size: the size of the pooling kernel over which the op is computed. 312 | stride: the stride in height and width of the convolution. 313 | padding: the padding method, either 'VALID' or 'SAME'. 314 | scope: Optional scope for op_scope. 315 | 316 | Returns: 317 | a tensor representing the results of the pooling operation. 318 | Raises: 319 | ValueError: if 'kernel_size' is not a 2-D list 320 | """ 321 | if len(kernel_size) != 2: 322 | raise ValueError('kernel_size must be a 2-D list.') 323 | with tf.op_scope([inputs], scope, 'MaxPool'): 324 | return tf.nn.max_pool(inputs, 325 | ksize=[1, kernel_size[0], kernel_size[1], 1], 326 | strides=[1, stride, stride, 1], 327 | padding=padding) 328 | 329 | 330 | @scopes.add_arg_scope 331 | def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): 332 | """Adds a Avg Pooling layer. 333 | 334 | It is assumed by the wrapper that the pooling is only done per image and not 335 | in depth or batch. 336 | 337 | Args: 338 | inputs: a tensor of size [batch_size, height, width, depth]. 339 | kernel_size: the size of the pooling kernel over which the op is computed. 340 | stride: the stride in height and width of the convolution. 341 | padding: the padding method, either 'VALID' or 'SAME'. 342 | scope: Optional scope for op_scope. 343 | 344 | Returns: 345 | a tensor representing the results of the pooling operation. 346 | Raises: 347 | ValueError: if 'kernel_size' is not a 2-D list 348 | """ 349 | if len(kernel_size) != 2: 350 | raise ValueError('kernel_size must be a 2-D list.') 351 | with tf.op_scope([inputs], scope, 'AvgPool'): 352 | return tf.nn.avg_pool(inputs, 353 | ksize=[1, kernel_size[0], kernel_size[1], 1], 354 | strides=[1, stride, stride, 1], 355 | padding=padding) 356 | 357 | 358 | @scopes.add_arg_scope 359 | def dropout(inputs, keep_prob=0.5, is_training=True, scope=None): 360 | """Returns a dropout layer applied to the input. 361 | 362 | Args: 363 | inputs: the tensor to pass to the Dropout layer. 364 | keep_prob: the probability of dropping each input unit. 365 | is_training: whether or not the model is in training mode. If so, dropout is 366 | applied and values scaled. Otherwise, inputs is returned. 367 | scope: Optional scope for op_scope. 368 | 369 | Returns: 370 | a tensor representing the output of the operation. 371 | """ 372 | if is_training and keep_prob > 0: 373 | with tf.op_scope([inputs], scope, 'Dropout'): 374 | return tf.nn.dropout(inputs, keep_prob) 375 | else: 376 | return inputs 377 | 378 | 379 | def flatten(inputs, scope=None): 380 | """Flattens the input while maintaining the batch_size. 381 | 382 | Assumes that the first dimension represents the batch. 383 | 384 | Args: 385 | inputs: a tensor of size [batch_size, ...]. 386 | scope: Optional scope for op_scope. 387 | 388 | Returns: 389 | a flattened tensor with shape [batch_size, k]. 390 | Raises: 391 | ValueError: if inputs.shape is wrong. 392 | """ 393 | if len(inputs.get_shape()) < 2: 394 | raise ValueError('Inputs must be have a least 2 dimensions') 395 | dims = inputs.get_shape()[1:] 396 | k = dims.num_elements() 397 | with tf.op_scope([inputs], scope, 'Flatten'): 398 | return tf.reshape(inputs, [-1, k]) 399 | 400 | 401 | def repeat_op(repetitions, inputs, op, *args, **kwargs): 402 | """Build a sequential Tower starting from inputs by using an op repeatedly. 403 | 404 | It creates new scopes for each operation by increasing the counter. 405 | Example: given repeat_op(3, _, ops.conv2d, 64, [3, 3], scope='conv1') 406 | it will repeat the given op under the following variable_scopes: 407 | conv1/Conv 408 | conv1/Conv_1 409 | conv1/Conv_2 410 | 411 | Args: 412 | repetitions: number or repetitions. 413 | inputs: a tensor of size [batch_size, height, width, channels]. 414 | op: an operation. 415 | *args: args for the op. 416 | **kwargs: kwargs for the op. 417 | 418 | Returns: 419 | a tensor result of applying the operation op, num times. 420 | Raises: 421 | ValueError: if the op is unknown or wrong. 422 | """ 423 | scope = kwargs.pop('scope', None) 424 | with tf.variable_op_scope([inputs], scope, 'RepeatOp'): 425 | tower = inputs 426 | for _ in range(repetitions): 427 | tower = op(tower, *args, **kwargs) 428 | return tower 429 | -------------------------------------------------------------------------------- /slim/scopes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the new arg_scope used for TF-Slim ops. 16 | 17 | Allows one to define models much more compactly by eliminating boilerplate 18 | code. This is accomplished through the use of argument scoping (arg_scope). 19 | 20 | Example of how to use scopes.arg_scope: 21 | 22 | with slim.arg_scope(ops.conv2d, padding='SAME', 23 | stddev=0.01, weight_decay=0.0005): 24 | net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') 25 | net = ops.conv2d(net, 256, [5, 5], scope='conv2') 26 | 27 | The first call to conv2d will use predefined args: 28 | ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 29 | stddev=0.01, weight_decay=0.0005, scope='conv1') 30 | 31 | The second call to Conv will overwrite padding: 32 | ops.conv2d(inputs, 256, [5, 5], padding='SAME', 33 | stddev=0.01, weight_decay=0.0005, scope='conv2') 34 | 35 | Example of how to use scopes.add_arg_scope: 36 | 37 | @scopes.add_arg_scope 38 | def conv2d(*args, **kwargs) 39 | """ 40 | from __future__ import absolute_import 41 | from __future__ import division 42 | from __future__ import print_function 43 | 44 | import contextlib 45 | import functools 46 | 47 | 48 | from tensorflow.python.framework import ops 49 | 50 | _ARGSTACK_KEY = ("__arg_stack",) 51 | 52 | _DECORATED_OPS = set() 53 | 54 | 55 | def _get_arg_stack(): 56 | stack = ops.get_collection(_ARGSTACK_KEY) 57 | if stack: 58 | return stack[0] 59 | else: 60 | stack = [{}] 61 | ops.add_to_collection(_ARGSTACK_KEY, stack) 62 | return stack 63 | 64 | 65 | def _current_arg_scope(): 66 | stack = _get_arg_stack() 67 | return stack[-1] 68 | 69 | 70 | def _add_op(op): 71 | key_op = (op.__module__, op.__name__) 72 | if key_op not in _DECORATED_OPS: 73 | _DECORATED_OPS.add(key_op) 74 | 75 | 76 | @contextlib.contextmanager 77 | def arg_scope(list_ops, **kwargs): 78 | """Stores the default arguments for the given set of list_ops. 79 | 80 | Args: 81 | list_ops: List or tuple of operations to set argument scope for. Every op in 82 | list_ops need to be decorated with @add_arg_scope to work. 83 | **kwargs: keyword=value that will define the defaults for each op in 84 | list_ops. All the ops need to accept the given set of arguments. 85 | 86 | Yields: 87 | the current_scope, which is a dictionary of {op: {arg: value}} 88 | Raises: 89 | TypeError: if list_ops is not a list or a tuple. 90 | ValueError: if any op in list_ops has not be decorated with @add_arg_scope. 91 | """ 92 | if not isinstance(list_ops, (list, tuple)): 93 | raise TypeError("list_ops is not a list or a tuple") 94 | try: 95 | current_scope = _current_arg_scope().copy() 96 | for op in list_ops: 97 | key_op = (op.__module__, op.__name__) 98 | if not has_arg_scope(op): 99 | raise ValueError("%s is not decorated with @add_arg_scope", key_op) 100 | if key_op in current_scope: 101 | current_kwargs = current_scope[key_op].copy() 102 | current_kwargs.update(kwargs) 103 | current_scope[key_op] = current_kwargs 104 | else: 105 | current_scope[key_op] = kwargs.copy() 106 | _get_arg_stack().append(current_scope) 107 | yield current_scope 108 | finally: 109 | _get_arg_stack().pop() 110 | 111 | 112 | def add_arg_scope(func): 113 | """Decorates a function with args so it can be used within an arg_scope. 114 | 115 | Args: 116 | func: function to decorate. 117 | 118 | Returns: 119 | A tuple with the decorated function func_with_args(). 120 | """ 121 | @functools.wraps(func) 122 | def func_with_args(*args, **kwargs): 123 | current_scope = _current_arg_scope() 124 | current_args = kwargs 125 | key_func = (func.__module__, func.__name__) 126 | if key_func in current_scope: 127 | current_args = current_scope[key_func].copy() 128 | current_args.update(kwargs) 129 | return func(*args, **current_args) 130 | _add_op(func) 131 | return func_with_args 132 | 133 | 134 | def has_arg_scope(func): 135 | """Checks whether a func has been decorated with @add_arg_scope or not. 136 | 137 | Args: 138 | func: function to check. 139 | 140 | Returns: 141 | a boolean. 142 | """ 143 | key_op = (func.__module__, func.__name__) 144 | return key_op in _DECORATED_OPS 145 | -------------------------------------------------------------------------------- /slim/variables.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains convenience wrappers for creating Variables in TensorFlow. 16 | 17 | Usage: 18 | weights_initializer = tf.truncated_normal_initializer(stddev=0.01) 19 | l2_regularizer = lambda t: losses.l2_loss(t, weight=0.0005) 20 | weights = variables.variable('weights', 21 | shape=[100, 100], 22 | initializer=weights_initializer, 23 | regularizer=l2_regularizer, 24 | device='/cpu:0') 25 | 26 | biases = variables.variable('biases', 27 | shape=[100], 28 | initializer=tf.zeros_initializer, 29 | device='/cpu:0') 30 | 31 | # More complex example. 32 | 33 | net = slim.ops.conv2d(input, 32, [3, 3], scope='conv1') 34 | net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2') 35 | with slim.arg_scope(variables.Variables, restore=False): 36 | net = slim.ops.conv2d(net, 64, [3, 3], scope='conv3') 37 | 38 | # Get all model variables from all the layers. 39 | model_variables = slim.variables.get_variables() 40 | 41 | # Get all model variables from a specific the layer, i.e 'conv1'. 42 | conv1_variables = slim.variables.get_variables('conv1') 43 | 44 | # Get all weights from all the layers. 45 | weights = slim.variables.get_variables_by_name('weights') 46 | 47 | # Get all bias from all the layers. 48 | biases = slim.variables.get_variables_by_name('biases') 49 | 50 | # Get all variables in the VARIABLES_TO_RESTORE collection 51 | # (i.e. only those created by 'conv1' and 'conv2') 52 | variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) 53 | 54 | ************************************************ 55 | * Initializing model variables from a checkpoint 56 | ************************************************ 57 | 58 | # Create some variables. 59 | v1 = slim.variables.variable(name="v1", ..., restore=False) 60 | v2 = slim.variables.variable(name="v2", ...) # By default restore=True 61 | ... 62 | # The list of variables to restore should only contain 'v2'. 63 | variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) 64 | restorer = tf.train.Saver(variables_to_restore) 65 | with tf.Session() as sess: 66 | # Restore variables from disk. 67 | restorer.restore(sess, "/tmp/model.ckpt") 68 | print("Model restored.") 69 | # Do some work with the model 70 | ... 71 | 72 | """ 73 | from __future__ import absolute_import 74 | from __future__ import division 75 | from __future__ import print_function 76 | 77 | 78 | import tensorflow as tf 79 | 80 | from slim import scopes 81 | 82 | # Collection containing all the variables created using slim.variables 83 | VARIABLES_COLLECTION = '_variables_' 84 | 85 | # Collection containing all the slim.variables that are marked to_restore 86 | VARIABLES_TO_RESTORE = '_variables_to_restore_' 87 | 88 | 89 | def get_variable_given_name(var): 90 | """Gets the variable given name without the scope. 91 | 92 | Args: 93 | var: a variable. 94 | 95 | Returns: 96 | the given name of the variable without the scope. 97 | """ 98 | name = var.op.name 99 | if '/' in name: 100 | name = name.split('/')[-1] 101 | return name 102 | 103 | 104 | def default_collections(given_name, restore): 105 | """Define the set of default collections that variables should be added. 106 | 107 | Args: 108 | given_name: the given name of the variable. 109 | restore: whether the variable should be added to the VARIABLES_TO_RESTORE 110 | collection. 111 | 112 | Returns: 113 | a list of default collections. 114 | """ 115 | defaults = [tf.GraphKeys.VARIABLES, VARIABLES_COLLECTION] 116 | defaults += [VARIABLES_COLLECTION + given_name] 117 | if restore: 118 | defaults += [VARIABLES_TO_RESTORE] 119 | return defaults 120 | 121 | 122 | def add_variable(var, restore=True): 123 | """Adds a variable to the default set of collections. 124 | 125 | Args: 126 | var: a variable. 127 | restore: whether the variable should be added to the 128 | VARIABLES_TO_RESTORE collection. 129 | """ 130 | given_name = get_variable_given_name(var) 131 | for collection in default_collections(given_name, restore): 132 | if var not in tf.get_collection(collection): 133 | tf.add_to_collection(collection, var) 134 | 135 | 136 | def get_variables(prefix=None, suffix=None): 137 | """Gets the list of variables, filtered by prefix and/or suffix. 138 | 139 | Args: 140 | prefix: an optional prefix for filtering the variables to return. 141 | suffix: an optional suffix for filtering the variables to return. 142 | 143 | Returns: 144 | a list of variables with prefix and suffix. 145 | """ 146 | candidates = tf.get_collection(VARIABLES_COLLECTION, prefix) 147 | if suffix is not None: 148 | candidates = [var for var in candidates if var.op.name.endswith(suffix)] 149 | return candidates 150 | 151 | 152 | def get_variables_by_name(given_name, prefix=None): 153 | """Gets the list of variables were given that name. 154 | 155 | Args: 156 | given_name: name given to the variable without scope. 157 | prefix: an optional prefix for filtering the variables to return. 158 | 159 | Returns: 160 | a list of variables with prefix and suffix. 161 | """ 162 | return tf.get_collection(VARIABLES_COLLECTION + given_name, prefix) 163 | 164 | 165 | def get_unique_variable(name): 166 | """Gets the variable uniquely identified by that name. 167 | 168 | Args: 169 | name: a name that uniquely identifies the variable. 170 | 171 | Returns: 172 | a tensorflow variable. 173 | 174 | Raises: 175 | ValueError: if no variable uniquely identified by the name exists. 176 | """ 177 | candidates = tf.get_collection(tf.GraphKeys.VARIABLES, name) 178 | if not candidates: 179 | raise ValueError('Couldnt find variable %s' % name) 180 | 181 | for candidate in candidates: 182 | if candidate.op.name == name: 183 | return candidate 184 | raise ValueError('Variable %s does not uniquely identify a variable', name) 185 | 186 | 187 | @scopes.add_arg_scope 188 | def variable(name, shape=None, dtype=tf.float32, initializer=None, 189 | regularizer=None, trainable=True, collections=None, device='', 190 | restore=True): 191 | """Gets an existing variable with these parameters or creates a new one. 192 | 193 | It also add itself to a group with its name. 194 | 195 | Args: 196 | name: the name of the new or existing variable. 197 | shape: shape of the new or existing variable. 198 | dtype: type of the new or existing variable (defaults to `DT_FLOAT`). 199 | initializer: initializer for the variable if one is created. 200 | regularizer: a (Tensor -> Tensor or None) function; the result of 201 | applying it on a newly created variable will be added to the collection 202 | GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. 203 | trainable: If `True` also add the variable to the graph collection 204 | `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). 205 | collections: A list of collection names to which the Variable will be added. 206 | Note that the variable is always also added to the tf.GraphKeys.VARIABLES 207 | collection. 208 | device: Optional device to place the variable. It can be an string or a 209 | function that is called to get the device for the variable. 210 | restore: whether the variable should be added to the 211 | VARIABLES_TO_RESTORE collection. 212 | 213 | Returns: 214 | The created or existing variable. 215 | """ 216 | # Instantiate the device for this variable if it is passed as a function. 217 | if device and callable(device): 218 | device = device() 219 | collections = set(list(collections or []) + default_collections(name, 220 | restore)) 221 | with tf.device(device): 222 | return tf.get_variable(name, shape=shape, dtype=dtype, 223 | initializer=initializer, regularizer=regularizer, 224 | trainable=trainable, collections=collections) 225 | --------------------------------------------------------------------------------