├── README.md ├── convert_to_tfrecords.py ├── convert_to_tfrecords.sh ├── input_data.py ├── model.py ├── png └── README.md └── train_multi_gpus.py /README.md: -------------------------------------------------------------------------------- 1 | # MobileNets-Tensorflow 2 | 3 | Google MobileNets Implementation using Tensorflow 4 | 5 | Code for paper "Orientation Estimation Network" : [Orientation Estimation Network](https://github.com/sunjieee/Orientation_Estimation_Network) 6 | 7 | **NOTE :** Work in progress. 8 | 9 | ## Library Versions 10 | 11 | - Tensorflow 1.0+ 12 | 13 | ## References 14 | 15 | - [Google MobileNet Paper](https://arxiv.org/pdf/1704.04861.pdf) 16 | -------------------------------------------------------------------------------- /convert_to_tfrecords.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import random 4 | 5 | __author__ = "Sun Jie" 6 | 7 | tf.app.flags.DEFINE_string('jpeg_file_path', './*', 8 | 'Jpeg file path') 9 | 10 | tf.app.flags.DEFINE_string('tfrecord_name', './Records/data.tfrecords', 11 | 'TFRecord name') 12 | 13 | FLAGS = tf.app.flags.FLAGS 14 | 15 | 16 | def _int64_feature(value): 17 | 18 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 19 | 20 | 21 | def _bytes_feature(value): 22 | 23 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 24 | 25 | 26 | def _process_image_files(filenames, labels): 27 | 28 | writer = tf.python_io.TFRecordWriter(FLAGS.tfrecord_name) 29 | 30 | for i in range(len(filenames)): 31 | 32 | with tf.gfile.FastGFile(filenames[i], 'rb') as f: 33 | 34 | image_data = f.read() 35 | 36 | example = tf.train.Example(features=tf.train.Features(feature={ 37 | 'image': _bytes_feature(image_data), 38 | 'label': _int64_feature(labels[i]), 39 | 'filename':_bytes_feature(os.path.basename(filenames[i])) 40 | })) 41 | 42 | writer.write(example.SerializeToString()) 43 | 44 | writer.close() 45 | 46 | 47 | def _find_image_files(): 48 | 49 | labels = [] 50 | 51 | filenames = tf.gfile.Glob(FLAGS.jpeg_file_path) 52 | 53 | for filename in filenames: 54 | 55 | label = 1 if 'cat' in filename else 0 56 | 57 | labels.append(label) 58 | 59 | shuffled_index = list(range(len(filenames))) 60 | 61 | random.seed(12345) 62 | 63 | random.shuffle(shuffled_index) 64 | 65 | filenames = [filenames[i] for i in shuffled_index] 66 | 67 | labels = [labels[i] for i in shuffled_index] 68 | 69 | return filenames, labels 70 | 71 | 72 | def _process_dataset(): 73 | 74 | filenames, labels = _find_image_files() 75 | 76 | _process_image_files(filenames, labels) 77 | 78 | 79 | def main(_): 80 | 81 | _process_dataset() 82 | 83 | 84 | if __name__ == '__main__': 85 | 86 | tf.app.run() 87 | -------------------------------------------------------------------------------- /convert_to_tfrecords.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python convert_to_tfrecords.py --jpeg_file_path=../train/*/* --tfrecord_name=../records/train.tfrecords 4 | 5 | python convert_to_tfrecords.py --jpeg_file_path=../valid/*/* --tfrecord_name=../records/valid.tfrecords 6 | -------------------------------------------------------------------------------- /input_data.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | __author__ = 'Sun Jie' 7 | ''' 8 | Tensorflow Implementation of MobileNets 9 | More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861). 10 | ''' 11 | 12 | 13 | tf.app.flags.DEFINE_string('data_path', '/home/sunjieeee/new_project/valid.*', 14 | 'Data directory') 15 | 16 | tf.app.flags.DEFINE_integer('batch_size', 256, 17 | """Number of images to process in a batch.""") 18 | 19 | tf.app.flags.DEFINE_integer('image_size', 224, 20 | """Provide square images of this size.""") 21 | 22 | tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, 23 | """Number of preprocessing threads per tower. """) 24 | 25 | tf.app.flags.DEFINE_bool('is_training', True, 26 | '''Is trainning''') 27 | 28 | FLAGS = tf.app.flags.FLAGS 29 | 30 | def decode_jpeg(image): 31 | 32 | image = tf.image.decode_jpeg(image, channels=3) 33 | 34 | image = tf.image.convert_image_dtype(image, dtype=tf.float32) 35 | 36 | return image 37 | 38 | 39 | def distort_color(image, color_ordering=0): 40 | 41 | if color_ordering == 0: 42 | 43 | image = tf.image.random_brightness(image, max_delta=32./255.) 44 | 45 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 46 | 47 | image = tf.image.random_hue(image, max_delta=0.2) 48 | 49 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 50 | 51 | else: 52 | 53 | image = tf.image.random_saturation(image, lower=0.5, upper=1.5) 54 | 55 | image = tf.image.random_brightness(image, max_delta=32./255.) 56 | 57 | image = tf.image.random_contrast(image, lower=0.5, upper=1.5) 58 | 59 | image = tf.image.random_hue(image, max_delta=0.2) 60 | 61 | return tf.clip_by_value(image, 0.0, 1.0) 62 | 63 | def preprocess_for_train(image, height, width, bbox): 64 | 65 | if bbox is None: 66 | 67 | bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 68 | 69 | bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box( 70 | tf.shape(image), bounding_boxes=bbox, min_object_covered=0.8) 71 | 72 | distorted_image = tf.slice(image, bbox_begin, bbox_size) 73 | 74 | distorted_image = tf.image.resize_images(distorted_image, [height, width], method=np.random.randint(4)) 75 | 76 | distorted_image = tf.image.random_flip_left_right(distorted_image) 77 | 78 | distorted_image = distort_color(distorted_image, np.random.randint(2)) 79 | 80 | return distorted_image 81 | 82 | 83 | def _read_input(filename_queue): 84 | 85 | examples_per_shard = 1024 86 | 87 | min_queue_examples = examples_per_shard * 16; 88 | 89 | if is_training: 90 | 91 | examples_queue = tf.RandomShuffleQueue( 92 | capacity=min_queue_examples + 3 * batch_size, 93 | min_after_dequeue=min_queue_examples, 94 | dtypes=[tf.string]) 95 | 96 | else: 97 | 98 | examples_queue = tf.FIFOQueue( 99 | capacity=examples_per_shard + 3 * batch_size, 100 | dtypes=[tf.string]) 101 | 102 | enqueue_ops = [] 103 | 104 | for _ in range(num_readers): 105 | 106 | reader = tf.TFRecordReader() 107 | 108 | _, value = reader.read(filename_queue) 109 | 110 | enqueue_ops.append(examples_queue.enqueue([value])) 111 | 112 | tf.train.queue_runner.add_queue_runner( 113 | tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) 114 | 115 | serialized_example = examples_queue.dequeue() 116 | 117 | images_and_labels = [] 118 | 119 | for thread_id in range(FLAGS.num_preprocess_threads): 120 | 121 | features = tf.parse_single_example( 122 | serialized_example, 123 | features={ 124 | 'image':tf.FixedLenFeature([],tf.string), 125 | 'label':tf.FixedLenFeature([],tf.int64) 126 | }) 127 | 128 | decoded_image = decode_jpeg(features['image']) 129 | 130 | if FLAGS.is_training: 131 | 132 | distorted_image = preprocess_for_train(decoded_image, FLAGS.image_size, FLAGS.image_size, None) 133 | 134 | else: 135 | 136 | distorted_image = tf.image.resize_images(decoded_image, [FLAGS.image_size, FLAGSimage_size], 137 | method=np.random.randint(4)) 138 | 139 | distorted_image = tf.subtract(distorted_image, 0.5) 140 | 141 | distorted_image = tf.multiply(distorted_image, 2.0) 142 | 143 | label = tf.cast(features['label'], tf.int32) 144 | 145 | images_and_labels.append([distorted_image, label]) 146 | 147 | return images_and_labels 148 | 149 | 150 | 151 | def _read_input_queue(): 152 | 153 | with tf.name_scope('input_data'): 154 | 155 | files = tf.train.match_filenames_once(FLAGS.data_path) 156 | 157 | filename_queue = tf.train.string_input_producer(files, shuffle=False) 158 | 159 | images_and_labels = _read_input(filename_queue) 160 | 161 | capacity = 2 * FLAGS.num_preprocess_threads * FLAGS.batch_size 162 | 163 | image_batch, label_batch = tf.train.batch_join(images_and_labels, 164 | batch_size=FLAGS.batch_size, 165 | capacity=capacity) 166 | 167 | return image_batch, label_batch 168 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | __author__ = 'Sun Jie' 5 | ''' 6 | Tensorflow Implementation of MobileNets 7 | More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861). 8 | ''' 9 | 10 | class MobileNets(object): 11 | 12 | def __init__(self, images, num_classes=1000, is_training=True, spatial_squeeze=True): 13 | self.images = images 14 | self.num_classes=num_classes 15 | self.is_training = is_training 16 | self.spatial_squeeze = spatial_squeeze 17 | self.end_points = {} 18 | 19 | def get_tensor_name(self, tensor): 20 | return tensor.op.name 21 | 22 | def get_tensor_size(self, tensor): 23 | return tensor.get_shape().as_list() 24 | 25 | def weight_variable_xavier_initialized(self, shape, constant=1, name=None): 26 | stddev = constant * np.sqrt(2.0 / (shape[2] + shape[3])) 27 | return self.weight_variable(shape, stddev=stddev, name=name) 28 | 29 | def variance_scaling_initializer(self, shape, constant=1, name=None): 30 | stddev = constant * np.sqrt(2.0 / shape[2]) 31 | return self.weight_variable(shape, stddev=stddev, name=name) 32 | 33 | def weight_variable(self, shape, stddev=0.02, name=None): 34 | initial = tf.truncated_normal(shape, stddev=stddev) 35 | if name is None: 36 | return tf.Variable(initial) 37 | else: 38 | return tf.get_variable(name, initializer=initial) 39 | 40 | def bias_variable(self, shape, name=None): 41 | initial = tf.constant(0.0, shape=shape) 42 | if name is None: 43 | return tf.Variable(initial) 44 | else: 45 | return tf.get_variable(name, initializer=initial) 46 | 47 | def conv2d_strided(self, x, W, b, s): 48 | conv = tf.nn.conv2d(x, W, strides=[1, s, s, 1], padding="SAME") 49 | return tf.nn.bias_add(conv, b) 50 | 51 | def depthwise_conv2d_strided(self, x, W, b, s): 52 | conv = tf.nn.depthwise_conv2d(x, W, strides=[1, s, s, 1], padding="SAME") 53 | return tf.nn.bias_add(conv, b) 54 | 55 | def add_to_regularization_and_summary(self, var): 56 | if var is not None: 57 | tf.summary.histogram(var.op.name, var) 58 | tf.add_to_collection("reg_loss", tf.nn.l2_loss(var)) 59 | 60 | def add_image_summary(self, images): 61 | tf.summary.image(var.op.name, images, 10) 62 | 63 | def add_activation_summary(self, var): 64 | tf.summary.histogram(var.op.name + "/activation", var) 65 | tf.summary.scalar(var.op.name + "/sparsity", tf.nn.zero_fraction(var)) 66 | 67 | 68 | def add_gradient_summary(self, grad, var): 69 | if grad is not None: 70 | tf.summary.histogram(var.op.name + "/gradient", grad) 71 | 72 | 73 | def conv(self, input_tensor, depth, filter, stride, scope, bn=tf.layers.batch_normalization, act=tf.nn.relu): 74 | 75 | with tf.variable_scope(scope): 76 | 77 | dim = self.get_tensor_size(input_tensor)[3] 78 | 79 | W = self.variance_scaling_initializer([filter[0], filter[1], dim, depth], name='weight') 80 | b = self.bias_variable([depth], name='bias') 81 | h_conv = self.conv2d_strided(input_tensor, W, b, stride) 82 | 83 | add_to_regularization_and_summary(W) 84 | 85 | if bn is not None: 86 | h_conv = bn(h_conv, training=self.is_training) 87 | 88 | if act is not None: 89 | h_conv = act(h_conv) 90 | 91 | self.end_points[scope] = h_conv 92 | self.add_activation_summary(h_conv) 93 | 94 | return h_conv 95 | 96 | 97 | def conv_dw(self, input_tensor, filter, stride, scope, bn=tf.layers.batch_normalization, act=tf.nn.relu): 98 | 99 | with tf.variable_scope(scope): 100 | 101 | dim = self.get_tensor_size(input_tensor)[3] 102 | 103 | W = self.variance_scaling_initializer([filter[0], filter[1], dim, 1], name='weight') 104 | b = self.bias_variable([dim], name='bias') 105 | h_conv = self.depthwise_conv2d_strided(input_tensor, W, b, stride) 106 | 107 | add_to_regularization_and_summary(W) 108 | 109 | if bn is not None: 110 | h_conv = bn(h_conv, training=self.is_training) 111 | if act is not None: 112 | h_conv = act(h_conv) 113 | 114 | self.end_points[scope] = h_conv 115 | self.add_activation_summary(h_conv) 116 | 117 | return h_conv 118 | 119 | 120 | def global_avg_pool(self, x, scope): 121 | 122 | k = self.get_tensor_size(x) 123 | return tf.nn.avg_pool(x, ksize=[1, k[1], k[2], 1], strides=[1, 1, 1, 1], padding="VALID") 124 | 125 | def squeeze(self, x): 126 | if self.spatial_squeeze: 127 | return tf.squeeze(x, [1, 2], name='spatial_squeeze') 128 | return x 129 | 130 | def inference(self): 131 | net = self.conv(self.images, 32, [3, 3], 2, scope='conv1') 132 | 133 | net = self.conv_dw(net, [3, 3], 1, scope='conv2_dw') 134 | net = self.conv(net, 64, [1, 1], 1, scope='conv2_pw') 135 | 136 | net = self.conv_dw(net, [3, 3], 2, scope='conv3_dw') 137 | net = self.conv(net, 128, [1, 1], 1, scope='conv3_pw') 138 | 139 | net = self.conv_dw(net, [3, 3], 1, scope='conv4_dw') 140 | net = self.conv(net, 128, [1, 1], 1, scope='conv4_pw') 141 | 142 | net = self.conv_dw(net, [3, 3], 2, scope='conv5_dw') 143 | net = self.conv(net, 256, [1, 1], 1, scope='conv5_pw') 144 | 145 | net = self.conv_dw(net, [3, 3], 1, scope='conv6_dw') 146 | net = self.conv(net, 256, [1, 1], 1, scope='conv6_pw') 147 | 148 | net = self.conv_dw(net, [3, 3], 2, scope='conv7_dw') 149 | net = self.conv(net, 512, [1, 1], 1, scope='conv7_pw') 150 | 151 | net = self.conv_dw(net, [3, 3], 1, scope='conv8_dw') 152 | net = self.conv(net, 512, [1, 1], 1, scope='conv8_pw') 153 | 154 | net = self.conv_dw(net, [3, 3], 1, scope='conv9_dw') 155 | net = self.conv(net, 512, [1, 1], 1, scope='conv9_pw') 156 | 157 | net = self.conv_dw(net, [3, 3], 1, scope='conv10_dw') 158 | net = self.conv(net, 512, [1, 1], 1, scope='conv10_pw') 159 | 160 | net = self.conv_dw(net, [3, 3], 1, scope='conv11_dw') 161 | net = self.conv(net, 512, [1, 1], 1, scope='conv11_pw') 162 | 163 | net = self.conv_dw(net, [3, 3], 1, scope='conv12_dw') 164 | net = self.conv(net, 512, [1, 1], 1, scope='conv12_pw') 165 | 166 | net = self.conv_dw(net, [3, 3], 2, scope='conv13_dw') 167 | net = self.conv(net, 1024, [1, 1], 1, scope='conv13_pw') 168 | 169 | net = self.conv_dw(net, [3, 3], 1, scope='conv14_dw') 170 | net = self.conv(net, 1024, [1, 1], 1, scope='conv14_pw') 171 | 172 | net = self.global_avg_pool(net, scope='avg_pool15') 173 | 174 | net = self.conv(net, self.num_classes, [1, 1], 1, bn=None, act=None, scope='fc16') 175 | #print self.get_tensor_name(net) ,self.get_tensor_size(net) 176 | self.logits = self.squeeze(net) 177 | 178 | self.end_points['logits'] = self.logits 179 | self.end_points['predictions'] = tf.nn.softmax(self.logits) 180 | 181 | return self.logits, self.end_points 182 | -------------------------------------------------------------------------------- /png/README.md: -------------------------------------------------------------------------------- 1 | # Some images for model 2 | -------------------------------------------------------------------------------- /train_multi_gpus.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | import numpy as np 4 | from model import * 5 | from input_data import * 6 | 7 | __author__ = 'Sun Jie' 8 | ''' 9 | Tensorflow Implementation of MobileNets 10 | More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861). 11 | ''' 12 | 13 | tf.app.flags.DEFINE_string('data_path', '', 14 | """Directory where is data""") 15 | 16 | tf.app.flags.DEFINE_integer('num_gpus', 4, 17 | 'Num gpus') 18 | 19 | tf.app.flags.DEFINE_float('moving_average_decay', 0.99, 20 | 'Moving average decay') 21 | 22 | tf.app.flags.DEFINE_float('learning_rate_decay_factor', 0.16, 23 | 'Learning rate decay factor') 24 | 25 | tf.app.flags.DEFINE_float('initial_learning_rate', 0.01, 26 | 'Initial learning rate') 27 | 28 | tf.app.flags.DEFINE_string('model_save_path', '', 29 | 'Model save path') 30 | 31 | tf.app.flags.DEFINE_string('model_name', 'model.ckpt', 32 | 'Model name') 33 | 34 | FLAGS = tf.app.flags.FLAGS 35 | 36 | 37 | 38 | def get_loss(logits, labels): 39 | 40 | models = MobileNets(images) 41 | 42 | logits, end_points = models.inference() 43 | 44 | labels_onehot = tf.one_hot(labels, depth=1000) 45 | 46 | cross_entropy = tf.losses.softmax_cross_entropy(logits, labels_onehot) 47 | 48 | regularization_loss = tf.add_n(tf.get_collection('losses', scope)) 49 | 50 | loss = cross_entropy + regularization_loss 51 | 52 | tf.summary.scalar('loss', loss) 53 | 54 | return loss 55 | 56 | def average_gradients(tower_grads): 57 | 58 | average_grads = [] 59 | 60 | for grad_and_vars in zip(*tower_grads): 61 | 62 | grads = [] 63 | 64 | for g, _ in grad_and_vars: 65 | 66 | expanded_g = tf.expand_dims(g, 0) 67 | 68 | grads.append(expanded_g) 69 | 70 | grad = tf.concat(grads, 0) 71 | 72 | grad = tf.reduce_mean(grad, 0) 73 | 74 | v = grad_and_vars[0][1] 75 | 76 | grad_and_var = (grad, v) 77 | 78 | average_grads.append(grad_and_var) 79 | 80 | return average_grads 81 | 82 | def training(iamges, labels): 83 | 84 | global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), 85 | trainable=False) 86 | 87 | learning_rate = tf.train.exponential_decay(FLAGS.initial_learning_rate, 88 | global_step, 89 | decay_steps, 90 | FLAGS.learning_rate_decay_factor, 91 | staircase=True) 92 | 93 | opt = tf.train.RMSPropOptimizer(learning_rate) 94 | 95 | images_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=images) 96 | 97 | labels_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=labels) 98 | 99 | tower_grads = [] 100 | 101 | reuse_variables = False 102 | 103 | for i in range(N_GPU): 104 | 105 | with tf.device('/gpu:%d' % i): 106 | 107 | with tf.name_scope('GPU_%d' % i) as scope: 108 | 109 | cur_loss = get_loss(x, y_, scope, reuse_variables) 110 | 111 | reuse_variables = True 112 | 113 | grads = opt.compute_gradients(cur_loss) 114 | 115 | tower_grads.append(grads) 116 | 117 | grads = average_gradients(tower_grads) 118 | 119 | for grad, var in grads: 120 | 121 | if grad is not None: 122 | 123 | tf.histogram_summary('gradients_on_average/%s' % var.op.name, grad) 124 | 125 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 126 | 127 | for var in tf.trainable_variables(): 128 | 129 | tf.histogram_summary(var.op.name, var) 130 | 131 | variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step) 132 | 133 | variables_to_average = (tf.trainable_variables() +tf.moving_average_variables()) 134 | 135 | variables_averages_op = variable_averages.apply(variables_to_average) 136 | 137 | train_op = tf.group(apply_gradient_op, variables_averages_op) 138 | 139 | return train_op 140 | 141 | 142 | def main(): 143 | 144 | with tf.Graph().as_default(), tf.device('/cpu:0'): 145 | 146 | images, labels = _read_input_queue() 147 | 148 | train_op = training(loss) 149 | 150 | summary_op = tf.summary.merge_all() 151 | 152 | saver = tf.train.Saver() 153 | 154 | init = tf.global_variables_initializer() 155 | 156 | with tf.Session(config=tf.ConfigProto( 157 | allow_soft_placement=True, 158 | log_device_placement=False)) as sess: 159 | 160 | sess.run(init) 161 | 162 | coord = tf.train.Coordinator() 163 | 164 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 165 | 166 | summary_writer = tf.summary.FileWriter(FLAGS.model_save_path, sess.graph) 167 | 168 | for step in range(TRAINING_STEPS): 169 | 170 | _, loss_value = sess.run([train_op, loss]) 171 | 172 | if step % 10 == 0: 173 | 174 | print('setp %d: loss = %.4f' % (step, loss_value)) 175 | 176 | summary = sess.run(summary_op) 177 | 178 | summary_writer.add_summary(summary, step) 179 | 180 | if step % 1000 == 0: 181 | 182 | checkpoint_path = os.path.join( 183 | FLAGS.model_save_path, FLAGS.model_name) 184 | 185 | saver.save(sess, checkpoint_path, global_step=step) 186 | 187 | coord.request_stop() 188 | 189 | coord.join(threads) 190 | 191 | 192 | if __name__ == '__main__': 193 | 194 | tf.app.run() 195 | --------------------------------------------------------------------------------