├── .gitignore ├── README.md ├── data └── readme.md ├── dataset.py ├── images ├── input.png ├── network.png └── output.png ├── model.py ├── model_part.py ├── prepare_data.py ├── task.py └── train_operation.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cnn_depth_tensorflow 2 | cnn_depth_tensorflow is an implementation of depth estimation using tensorflow. 3 | 4 | Original paper is "Depth Map Prediction from a Single Image using a Multi-Scale Deep Network". 5 | https://arxiv.org/abs/1406.2283 6 | 7 | ![network](images/network.png) 8 | 9 | # requierments 10 | - TensorFlow 0.10+ 11 | - Numpy 12 | 13 | # How to train 14 | - Download training data. Please see readme.md in data directory. 15 | - Convert mat to png images. 16 | ``` 17 | python prepare_data.py 18 | ``` 19 | 20 | - Lets's train. 21 | ``` 22 | python task.py 23 | ``` 24 | 25 | - You can see predicting images through training in data directory. 26 | 27 | # example 28 | - input 29 | 30 | - output 31 | 32 | 33 | --- 34 | 35 | Copyright (c) 2016 Masahiro Imai 36 | Released under the MIT license 37 | -------------------------------------------------------------------------------- /data/readme.md: -------------------------------------------------------------------------------- 1 | NYU Depth Dataset V2 2 | http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html 3 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.platform import gfile 3 | import numpy as np 4 | from PIL import Image 5 | 6 | IMAGE_HEIGHT = 228 7 | IMAGE_WIDTH = 304 8 | TARGET_HEIGHT = 55 9 | TARGET_WIDTH = 74 10 | 11 | class DataSet: 12 | def __init__(self, batch_size): 13 | self.batch_size = batch_size 14 | 15 | def csv_inputs(self, csv_file_path): 16 | filename_queue = tf.train.string_input_producer([csv_file_path], shuffle=True) 17 | reader = tf.TextLineReader() 18 | _, serialized_example = reader.read(filename_queue) 19 | filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"]]) 20 | # input 21 | jpg = tf.read_file(filename) 22 | image = tf.image.decode_jpeg(jpg, channels=3) 23 | image = tf.cast(image, tf.float32) 24 | # target 25 | depth_png = tf.read_file(depth_filename) 26 | depth = tf.image.decode_png(depth_png, channels=1) 27 | depth = tf.cast(depth, tf.float32) 28 | depth = tf.div(depth, [255.0]) 29 | #depth = tf.cast(depth, tf.int64) 30 | # resize 31 | image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH)) 32 | depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH)) 33 | invalid_depth = tf.sign(depth) 34 | # generate batch 35 | images, depths, invalid_depths = tf.train.batch( 36 | [image, depth, invalid_depth], 37 | batch_size=self.batch_size, 38 | num_threads=4, 39 | capacity= 50 + 3 * self.batch_size, 40 | ) 41 | return images, depths, invalid_depths 42 | 43 | 44 | def output_predict(depths, images, output_dir): 45 | print("output predict into %s" % output_dir) 46 | if not gfile.Exists(output_dir): 47 | gfile.MakeDirs(output_dir) 48 | for i, (image, depth) in enumerate(zip(images, depths)): 49 | pilimg = Image.fromarray(np.uint8(image)) 50 | image_name = "%s/%05d_org.png" % (output_dir, i) 51 | pilimg.save(image_name) 52 | depth = depth.transpose(2, 0, 1) 53 | if np.max(depth) != 0: 54 | ra_depth = (depth/np.max(depth))*255.0 55 | else: 56 | ra_depth = depth*255.0 57 | depth_pil = Image.fromarray(np.uint8(ra_depth[0]), mode="L") 58 | depth_name = "%s/%05d.png" % (output_dir, i) 59 | depth_pil.save(depth_name) 60 | -------------------------------------------------------------------------------- /images/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/input.png -------------------------------------------------------------------------------- /images/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/network.png -------------------------------------------------------------------------------- /images/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/output.png -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | # tensorflow 4 | import tensorflow as tf 5 | import math 6 | from model_part import conv2d 7 | from model_part import fc 8 | 9 | def inference(images, reuse=False, trainable=True): 10 | coarse1_conv = conv2d('coarse1', images, [11, 11, 3, 96], [96], [1, 4, 4, 1], padding='VALID', reuse=reuse, trainable=trainable) 11 | coarse1 = tf.nn.max_pool(coarse1_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1') 12 | coarse2_conv = conv2d('coarse2', coarse1, [5, 5, 96, 256], [256], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable) 13 | coarse2 = tf.nn.max_pool(coarse2_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') 14 | coarse3 = conv2d('coarse3', coarse2, [3, 3, 256, 384], [384], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable) 15 | coarse4 = conv2d('coarse4', coarse3, [3, 3, 384, 384], [384], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable) 16 | coarse5 = conv2d('coarse5', coarse4, [3, 3, 384, 256], [256], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable) 17 | coarse6 = fc('coarse6', coarse5, [6*10*256, 4096], [4096], reuse=reuse, trainable=trainable) 18 | coarse7 = fc('coarse7', coarse6, [4096, 4070], [4070], reuse=reuse, trainable=trainable) 19 | coarse7_output = tf.reshape(coarse7, [-1, 55, 74, 1]) 20 | return coarse7_output 21 | 22 | 23 | def inference_refine(images, coarse7_output, keep_conv, reuse=False, trainable=True): 24 | fine1_conv = conv2d('fine1', images, [9, 9, 3, 63], [63], [1, 2, 2, 1], padding='VALID', reuse=reuse, trainable=trainable) 25 | fine1 = tf.nn.max_pool(fine1_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='fine_pool1') 26 | fine1_dropout = tf.nn.dropout(fine1, keep_conv) 27 | fine2 = tf.concat([fine1_dropout, coarse7_output], 3) 28 | fine3 = conv2d('fine3', fine2, [5, 5, 64, 64], [64], [1, 1, 1, 1], padding='SAME', reuse=reuse, trainable=trainable) 29 | fine3_dropout = tf.nn.dropout(fine3, keep_conv) 30 | fine4 = conv2d('fine4', fine3_dropout, [5, 5, 64, 1], [1], [1, 1, 1, 1], padding='SAME', reuse=reuse, trainable=trainable) 31 | return fine4 32 | 33 | 34 | def loss(logits, depths, invalid_depths): 35 | logits_flat = tf.reshape(logits, [-1, 55*74]) 36 | depths_flat = tf.reshape(depths, [-1, 55*74]) 37 | invalid_depths_flat = tf.reshape(invalid_depths, [-1, 55*74]) 38 | 39 | predict = tf.multiply(logits_flat, invalid_depths_flat) 40 | target = tf.multiply(depths_flat, invalid_depths_flat) 41 | d = tf.subtract(predict, target) 42 | square_d = tf.square(d) 43 | sum_square_d = tf.reduce_sum(square_d, 1) 44 | sum_d = tf.reduce_sum(d, 1) 45 | sqare_sum_d = tf.square(sum_d) 46 | cost = tf.reduce_mean(sum_square_d / 55.0*74.0 - 0.5*sqare_sum_d / math.pow(55*74, 2)) 47 | tf.add_to_collection('losses', cost) 48 | return tf.add_n(tf.get_collection('losses'), name='total_loss') 49 | 50 | 51 | def _add_loss_summaries(total_loss): 52 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 53 | losses = tf.get_collection('losses') 54 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 55 | for l in losses + [total_loss]: 56 | tf.summary.scalar(l.op.name + ' (raw)', l) 57 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 58 | return loss_averages_op 59 | -------------------------------------------------------------------------------- /model_part.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | TOWER_NAME = 'tower' 4 | UPDATE_OPS_COLLECTION = '_update_ops_' 5 | 6 | 7 | def _variable_with_weight_decay(name, shape, stddev, wd, trainable=True): 8 | var = _variable_on_gpu(name, shape, tf.truncated_normal_initializer(stddev=stddev)) 9 | if wd: 10 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') 11 | tf.add_to_collection('losses', weight_decay) 12 | return var 13 | 14 | 15 | def _variable_on_gpu(name, shape, initializer): 16 | var = tf.get_variable(name, shape, initializer=initializer) 17 | return var 18 | 19 | 20 | def conv2d(scope_name, inputs, shape, bias_shape, stride, padding='VALID', wd=0.0, reuse=False, trainable=True): 21 | with tf.variable_scope(scope_name) as scope: 22 | if reuse is True: 23 | scope.reuse_variables() 24 | kernel = _variable_with_weight_decay( 25 | 'weights', 26 | shape=shape, 27 | stddev=0.01, 28 | wd=wd, 29 | trainable=trainable 30 | ) 31 | conv = tf.nn.conv2d(inputs, kernel, stride, padding=padding) 32 | biases = _variable_on_gpu('biases', bias_shape, tf.constant_initializer(0.1)) 33 | bias = tf.nn.bias_add(conv, biases) 34 | conv_ = tf.nn.relu(bias, name=scope.name) 35 | return conv_ 36 | 37 | 38 | def fc(scope_name, inputs, shape, bias_shape, wd=0.04, reuse=False, trainable=True): 39 | with tf.variable_scope(scope_name) as scope: 40 | if reuse is True: 41 | scope.reuse_variables() 42 | flat = tf.reshape(inputs, [-1, shape[0]]) 43 | weights = _variable_with_weight_decay( 44 | 'weights', 45 | shape, 46 | stddev=0.01, 47 | wd=wd, 48 | trainable=trainable 49 | ) 50 | biases = _variable_on_gpu('biases', bias_shape, tf.constant_initializer(0.1)) 51 | fc = tf.nn.relu_layer(flat, weights, biases, name=scope.name) 52 | return fc 53 | -------------------------------------------------------------------------------- /prepare_data.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | import os 3 | import numpy as np 4 | import h5py 5 | from PIL import Image 6 | import random 7 | import wget 8 | 9 | 10 | def convert_nyu(path): 11 | imgdir = os.path.join("data", "nyu_datasets"); 12 | if not os.path.exists(imgdir): 13 | os.makedirs(imgdir) 14 | 15 | nyuurl = 'http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/nyu_depth_v2_labeled.mat' 16 | file = os.path.join("data", "nyu_depth_v2_labeled.mat") 17 | if not os.path.exists(file): 18 | filename = wget.download(nyuurl, out="data") 19 | print('\n downloaded: ', filename) 20 | 21 | print("load dataset: %s" % (path)) 22 | f = h5py.File(path) 23 | 24 | trains = [] 25 | for i, (image, depth) in enumerate(zip(f['images'], f['depths'])): 26 | ra_image = image.transpose(2, 1, 0) 27 | ra_depth = depth.transpose(1, 0) 28 | re_depth = (ra_depth/np.max(ra_depth))*255.0 29 | image_pil = Image.fromarray(np.uint8(ra_image)) 30 | depth_pil = Image.fromarray(np.uint8(re_depth)) 31 | image_name = os.path.join("data", "nyu_datasets", "%05d.jpg" % (i)) 32 | image_pil.save(image_name) 33 | depth_name = os.path.join("data", "nyu_datasets", "%05d.png" % (i)) 34 | depth_pil.save(depth_name) 35 | 36 | trains.append((image_name, depth_name)) 37 | 38 | random.shuffle(trains) 39 | 40 | if not os.path.exists('train.csv'): 41 | os.remove('train.csv') 42 | 43 | with open('train.csv', 'w') as output: 44 | for (image_name, depth_name) in trains: 45 | output.write("%s,%s" % (image_name, depth_name)) 46 | output.write("\n") 47 | 48 | if __name__ == '__main__': 49 | current_directory = os.getcwd() 50 | nyu_path = 'data/nyu_depth_v2_labeled.mat' 51 | convert_nyu(nyu_path) 52 | -------------------------------------------------------------------------------- /task.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | 3 | from datetime import datetime 4 | from tensorflow.python.platform import gfile 5 | import numpy as np 6 | import tensorflow as tf 7 | from dataset import DataSet 8 | from dataset import output_predict 9 | import model 10 | import train_operation as op 11 | 12 | MAX_STEPS = 10000000 13 | LOG_DEVICE_PLACEMENT = True 14 | BATCH_SIZE = 8 15 | TRAIN_FILE = "train.csv" 16 | COARSE_DIR = "coarse" 17 | REFINE_DIR = "refine" 18 | 19 | REFINE_TRAIN = True 20 | FINE_TUNE = True 21 | 22 | def train(): 23 | with tf.Graph().as_default(): 24 | global_step = tf.Variable(0, trainable=False) 25 | dataset = DataSet(BATCH_SIZE) 26 | images, depths, invalid_depths = dataset.csv_inputs(TRAIN_FILE) 27 | keep_conv = tf.placeholder(tf.float32) 28 | keep_hidden = tf.placeholder(tf.float32) 29 | if REFINE_TRAIN: 30 | print("refine train.") 31 | coarse = model.inference(images, keep_conv, trainable=False) 32 | logits = model.inference_refine(images, coarse, keep_conv, keep_hidden) 33 | else: 34 | print("coarse train.") 35 | logits = model.inference(images, keep_conv, keep_hidden) 36 | loss = model.loss(logits, depths, invalid_depths) 37 | train_op = op.train(loss, global_step, BATCH_SIZE) 38 | init_op = tf.global_variables_initializer()#tf.initialize_all_variables() 39 | 40 | # Session 41 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) 42 | sess.run(init_op) 43 | 44 | # parameters 45 | coarse_params = {} 46 | refine_params = {} 47 | if REFINE_TRAIN: 48 | for variable in tf.global_variables():#tf.all_variables(): 49 | variable_name = variable.name 50 | print("parameter: %s" % (variable_name)) 51 | if variable_name.find("/") < 0 or variable_name.count("/") != 1: 52 | continue 53 | if variable_name.find('coarse') >= 0: 54 | coarse_params[variable_name] = variable 55 | print("parameter: %s" %(variable_name)) 56 | if variable_name.find('fine') >= 0: 57 | refine_params[variable_name] = variable 58 | else: 59 | for variable in tf.trainable_variables(): 60 | variable_name = variable.name 61 | print("parameter: %s" %(variable_name)) 62 | if variable_name.find("/") < 0 or variable_name.count("/") != 1: 63 | continue 64 | if variable_name.find('coarse') >= 0: 65 | coarse_params[variable_name] = variable 66 | if variable_name.find('fine') >= 0: 67 | refine_params[variable_name] = variable 68 | # define saver 69 | print(coarse_params) 70 | saver_coarse = tf.train.Saver(coarse_params) 71 | if REFINE_TRAIN: 72 | saver_refine = tf.train.Saver(refine_params) 73 | # fine tune 74 | if FINE_TUNE: 75 | coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) 76 | if coarse_ckpt and coarse_ckpt.model_checkpoint_path: 77 | print("Pretrained coarse Model Loading.") 78 | saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) 79 | print("Pretrained coarse Model Restored.") 80 | else: 81 | print("No Pretrained coarse Model.") 82 | if REFINE_TRAIN: 83 | refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) 84 | if refine_ckpt and refine_ckpt.model_checkpoint_path: 85 | print("Pretrained refine Model Loading.") 86 | saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) 87 | print("Pretrained refine Model Restored.") 88 | else: 89 | print("No Pretrained refine Model.") 90 | 91 | # train 92 | coord = tf.train.Coordinator() 93 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 94 | for step in range(MAX_STEPS): 95 | index = 0 96 | for i in range(1000): 97 | _, loss_value, logits_val, images_val = sess.run([train_op, loss, logits, images], feed_dict={keep_conv: 0.8, keep_hidden: 0.5}) 98 | if index % 10 == 0: 99 | print("%s: %d[epoch]: %d[iteration]: train loss %f" % (datetime.now(), step, index, loss_value)) 100 | assert not np.isnan(loss_value), 'Model diverged with loss = NaN' 101 | if index % 500 == 0: 102 | if REFINE_TRAIN: 103 | output_predict(logits_val, images_val, "data/predict_refine_%05d_%05d" % (step, i)) 104 | else: 105 | output_predict(logits_val, images_val, "data/predict_%05d_%05d" % (step, i)) 106 | index += 1 107 | 108 | if step % 5 == 0 or (step * 1) == MAX_STEPS: 109 | if REFINE_TRAIN: 110 | refine_checkpoint_path = REFINE_DIR + '/model.ckpt' 111 | saver_refine.save(sess, refine_checkpoint_path, global_step=step) 112 | else: 113 | coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' 114 | saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) 115 | coord.request_stop() 116 | coord.join(threads) 117 | sess.close() 118 | 119 | 120 | def main(argv=None): 121 | if not gfile.Exists(COARSE_DIR): 122 | gfile.MakeDirs(COARSE_DIR) 123 | if not gfile.Exists(REFINE_DIR): 124 | gfile.MakeDirs(REFINE_DIR) 125 | train() 126 | 127 | 128 | if __name__ == '__main__': 129 | tf.app.run() 130 | -------------------------------------------------------------------------------- /train_operation.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import tensorflow as tf 4 | 5 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 500 6 | NUM_EPOCHS_PER_DECAY = 30 7 | INITIAL_LEARNING_RATE = 0.0001 8 | LEARNING_RATE_DECAY_FACTOR = 0.9 9 | MOVING_AVERAGE_DECAY = 0.999999 10 | 11 | 12 | def _add_loss_summaries(total_loss): 13 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 14 | losses = tf.get_collection('losses') 15 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 16 | for l in losses + [total_loss]: 17 | tf.summary.scalar(l.op.name + ' (raw)', l) 18 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 19 | return loss_averages_op 20 | 21 | 22 | def train(total_loss, global_step, batch_size): 23 | num_batches_per_epoch = float(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) / batch_size 24 | decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) 25 | lr = tf.train.exponential_decay( 26 | INITIAL_LEARNING_RATE, 27 | global_step, 28 | decay_steps, 29 | LEARNING_RATE_DECAY_FACTOR, 30 | staircase=True) 31 | tf.summary.scalar('learning_rate', lr) 32 | loss_averages_op = _add_loss_summaries(total_loss) 33 | with tf.control_dependencies([loss_averages_op]): 34 | opt = tf.train.AdamOptimizer(lr) 35 | grads = opt.compute_gradients(total_loss) 36 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 37 | for var in tf.trainable_variables(): 38 | print(var.op.name) 39 | tf.summary.histogram(var.op.name, var) 40 | for grad, var in grads: 41 | if grad is not None: 42 | tf.summary.histogram(var.op.name + '/gradients', grad) 43 | variable_averages = tf.train.ExponentialMovingAverage( 44 | MOVING_AVERAGE_DECAY, global_step) 45 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 46 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]): 47 | train_op = tf.no_op(name='train') 48 | 49 | return train_op 50 | --------------------------------------------------------------------------------