├── .gitignore
├── README.md
├── data
    └── readme.md
├── dataset.py
├── images
    ├── input.png
    ├── network.png
    └── output.png
├── model.py
├── model_part.py
├── prepare_data.py
├── task.py
└── train_operation.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # cnn_depth_tensorflow
 2 | cnn_depth_tensorflow is an implementation of depth estimation using tensorflow.
 3 | 
 4 | Original paper is "Depth Map Prediction from a Single Image using a Multi-Scale Deep Network".
 5 | https://arxiv.org/abs/1406.2283
 6 | 
 7 | ![network](images/network.png)
 8 | 
 9 | # requierments
10 | - TensorFlow 0.10+
11 | - Numpy
12 | 
13 | # How to train
14 | - Download training data. Please see readme.md in data directory.
15 | - Convert mat to png images.
16 | ```
17 | python prepare_data.py
18 | ```
19 | 
20 | - Lets's train.
21 | ```
22 | python task.py
23 | ```
24 | 
25 | - You can see predicting images through training in data directory.
26 | 
27 | # example
28 | - input  
29 | <img src="images/input.png" width="200">
30 | - output  
31 | <img src="images/output.png" width="200">
32 | 
33 | ---
34 | 
35 | Copyright (c) 2016 Masahiro Imai
36 | Released under the MIT license
37 | 


--------------------------------------------------------------------------------
/data/readme.md:
--------------------------------------------------------------------------------
1 | NYU Depth Dataset V2
2 | http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html
3 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.platform import gfile
 3 | import numpy as np
 4 | from PIL import Image
 5 | 
 6 | IMAGE_HEIGHT = 228
 7 | IMAGE_WIDTH = 304
 8 | TARGET_HEIGHT = 55
 9 | TARGET_WIDTH = 74
10 | 
11 | class DataSet:
12 |     def __init__(self, batch_size):
13 |         self.batch_size = batch_size
14 | 
15 |     def csv_inputs(self, csv_file_path):
16 |         filename_queue = tf.train.string_input_producer([csv_file_path], shuffle=True)
17 |         reader = tf.TextLineReader()
18 |         _, serialized_example = reader.read(filename_queue)
19 |         filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"]])
20 |         # input
21 |         jpg = tf.read_file(filename)
22 |         image = tf.image.decode_jpeg(jpg, channels=3)
23 |         image = tf.cast(image, tf.float32)       
24 |         # target
25 |         depth_png = tf.read_file(depth_filename)
26 |         depth = tf.image.decode_png(depth_png, channels=1)
27 |         depth = tf.cast(depth, tf.float32)
28 |         depth = tf.div(depth, [255.0])
29 |         #depth = tf.cast(depth, tf.int64)
30 |         # resize
31 |         image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
32 |         depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH))
33 |         invalid_depth = tf.sign(depth)
34 |         # generate batch
35 |         images, depths, invalid_depths = tf.train.batch(
36 |             [image, depth, invalid_depth],
37 |             batch_size=self.batch_size,
38 |             num_threads=4,
39 |             capacity= 50 + 3 * self.batch_size,
40 |         )
41 |         return images, depths, invalid_depths
42 | 
43 | 
44 | def output_predict(depths, images, output_dir):
45 |     print("output predict into %s" % output_dir)
46 |     if not gfile.Exists(output_dir):
47 |         gfile.MakeDirs(output_dir)
48 |     for i, (image, depth) in enumerate(zip(images, depths)):
49 |         pilimg = Image.fromarray(np.uint8(image))
50 |         image_name = "%s/%05d_org.png" % (output_dir, i)
51 |         pilimg.save(image_name)
52 |         depth = depth.transpose(2, 0, 1)
53 |         if np.max(depth) != 0:
54 |             ra_depth = (depth/np.max(depth))*255.0
55 |         else:
56 |             ra_depth = depth*255.0
57 |         depth_pil = Image.fromarray(np.uint8(ra_depth[0]), mode="L")
58 |         depth_name = "%s/%05d.png" % (output_dir, i)
59 |         depth_pil.save(depth_name)
60 | 


--------------------------------------------------------------------------------
/images/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/input.png


--------------------------------------------------------------------------------
/images/network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/network.png


--------------------------------------------------------------------------------
/images/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/output.png


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | # tensorflow
 4 | import tensorflow as tf
 5 | import math
 6 | from model_part import conv2d
 7 | from model_part import fc
 8 | 
 9 | def inference(images, reuse=False, trainable=True):
10 |     coarse1_conv = conv2d('coarse1', images, [11, 11, 3, 96], [96], [1, 4, 4, 1], padding='VALID', reuse=reuse, trainable=trainable)
11 |     coarse1 = tf.nn.max_pool(coarse1_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1')
12 |     coarse2_conv = conv2d('coarse2', coarse1, [5, 5, 96, 256], [256], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
13 |     coarse2 = tf.nn.max_pool(coarse2_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
14 |     coarse3 = conv2d('coarse3', coarse2, [3, 3, 256, 384], [384], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
15 |     coarse4 = conv2d('coarse4', coarse3, [3, 3, 384, 384], [384], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
16 |     coarse5 = conv2d('coarse5', coarse4, [3, 3, 384, 256], [256], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
17 |     coarse6 = fc('coarse6', coarse5, [6*10*256, 4096], [4096], reuse=reuse, trainable=trainable)
18 |     coarse7 = fc('coarse7', coarse6, [4096, 4070], [4070], reuse=reuse, trainable=trainable)
19 |     coarse7_output = tf.reshape(coarse7, [-1, 55, 74, 1])
20 |     return coarse7_output
21 | 
22 | 
23 | def inference_refine(images, coarse7_output, keep_conv, reuse=False, trainable=True):
24 |     fine1_conv = conv2d('fine1', images, [9, 9, 3, 63], [63], [1, 2, 2, 1], padding='VALID', reuse=reuse, trainable=trainable)
25 |     fine1 = tf.nn.max_pool(fine1_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='fine_pool1')
26 |     fine1_dropout = tf.nn.dropout(fine1, keep_conv)
27 |     fine2 = tf.concat([fine1_dropout, coarse7_output], 3)
28 |     fine3 = conv2d('fine3', fine2, [5, 5, 64, 64], [64], [1, 1, 1, 1], padding='SAME', reuse=reuse, trainable=trainable)
29 |     fine3_dropout = tf.nn.dropout(fine3, keep_conv)
30 |     fine4 = conv2d('fine4', fine3_dropout, [5, 5, 64, 1], [1], [1, 1, 1, 1], padding='SAME', reuse=reuse, trainable=trainable)
31 |     return fine4
32 | 
33 | 
34 | def loss(logits, depths, invalid_depths):
35 |     logits_flat = tf.reshape(logits, [-1, 55*74])
36 |     depths_flat = tf.reshape(depths, [-1, 55*74])
37 |     invalid_depths_flat = tf.reshape(invalid_depths, [-1, 55*74])
38 | 
39 |     predict = tf.multiply(logits_flat, invalid_depths_flat)
40 |     target = tf.multiply(depths_flat, invalid_depths_flat)
41 |     d = tf.subtract(predict, target)
42 |     square_d = tf.square(d)
43 |     sum_square_d = tf.reduce_sum(square_d, 1)
44 |     sum_d = tf.reduce_sum(d, 1)
45 |     sqare_sum_d = tf.square(sum_d)
46 |     cost = tf.reduce_mean(sum_square_d / 55.0*74.0 - 0.5*sqare_sum_d / math.pow(55*74, 2))
47 |     tf.add_to_collection('losses', cost)
48 |     return tf.add_n(tf.get_collection('losses'), name='total_loss')
49 | 
50 | 
51 | def _add_loss_summaries(total_loss):
52 |     loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
53 |     losses = tf.get_collection('losses')
54 |     loss_averages_op = loss_averages.apply(losses + [total_loss])
55 |     for l in losses + [total_loss]:
56 |         tf.summary.scalar(l.op.name + ' (raw)', l)
57 |         tf.summary.scalar(l.op.name, loss_averages.average(l))
58 |     return loss_averages_op
59 | 


--------------------------------------------------------------------------------
/model_part.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | TOWER_NAME = 'tower'
 4 | UPDATE_OPS_COLLECTION = '_update_ops_'
 5 | 
 6 | 
 7 | def _variable_with_weight_decay(name, shape, stddev, wd, trainable=True):
 8 |     var = _variable_on_gpu(name, shape, tf.truncated_normal_initializer(stddev=stddev))
 9 |     if wd:
10 |         weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
11 |         tf.add_to_collection('losses', weight_decay)
12 |     return var
13 | 
14 | 
15 | def _variable_on_gpu(name, shape, initializer):
16 |     var = tf.get_variable(name, shape, initializer=initializer)
17 |     return var
18 | 
19 | 
20 | def conv2d(scope_name, inputs, shape, bias_shape, stride, padding='VALID', wd=0.0, reuse=False, trainable=True):
21 |     with tf.variable_scope(scope_name) as scope:
22 |         if reuse is True:
23 |             scope.reuse_variables()
24 |         kernel = _variable_with_weight_decay(
25 |             'weights',
26 |             shape=shape,
27 |             stddev=0.01,
28 |             wd=wd,
29 |             trainable=trainable
30 |         )
31 |         conv = tf.nn.conv2d(inputs, kernel, stride, padding=padding)
32 |         biases = _variable_on_gpu('biases', bias_shape, tf.constant_initializer(0.1))
33 |         bias = tf.nn.bias_add(conv, biases)
34 |         conv_ = tf.nn.relu(bias, name=scope.name)
35 |         return conv_
36 | 
37 | 
38 | def fc(scope_name, inputs, shape, bias_shape, wd=0.04, reuse=False, trainable=True):
39 |     with tf.variable_scope(scope_name) as scope:
40 |         if reuse is True:
41 |             scope.reuse_variables()
42 |         flat = tf.reshape(inputs, [-1, shape[0]])
43 |         weights = _variable_with_weight_decay(
44 |             'weights',
45 |             shape,
46 |             stddev=0.01,
47 |             wd=wd,
48 |             trainable=trainable
49 |         )
50 |         biases = _variable_on_gpu('biases', bias_shape, tf.constant_initializer(0.1))
51 |         fc = tf.nn.relu_layer(flat, weights, biases, name=scope.name)
52 |         return fc
53 | 


--------------------------------------------------------------------------------
/prepare_data.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | import os
 3 | import numpy as np
 4 | import h5py
 5 | from PIL import Image
 6 | import random
 7 | import wget
 8 | 
 9 | 
10 | def convert_nyu(path):
11 |     imgdir = os.path.join("data", "nyu_datasets");
12 |     if not os.path.exists(imgdir):
13 |         os.makedirs(imgdir)
14 | 
15 |     nyuurl = 'http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/nyu_depth_v2_labeled.mat'
16 |     file = os.path.join("data", "nyu_depth_v2_labeled.mat")
17 |     if not os.path.exists(file):    
18 |         filename = wget.download(nyuurl, out="data")
19 |         print('\n downloaded: ', filename)
20 | 
21 |     print("load dataset: %s" % (path))
22 |     f = h5py.File(path)
23 | 
24 |     trains = []
25 |     for i, (image, depth) in enumerate(zip(f['images'], f['depths'])):
26 |         ra_image = image.transpose(2, 1, 0)
27 |         ra_depth = depth.transpose(1, 0)
28 |         re_depth = (ra_depth/np.max(ra_depth))*255.0
29 |         image_pil = Image.fromarray(np.uint8(ra_image))
30 |         depth_pil = Image.fromarray(np.uint8(re_depth))
31 |         image_name = os.path.join("data", "nyu_datasets", "%05d.jpg" % (i))
32 |         image_pil.save(image_name)
33 |         depth_name = os.path.join("data", "nyu_datasets", "%05d.png" % (i))
34 |         depth_pil.save(depth_name)
35 | 
36 |         trains.append((image_name, depth_name))
37 | 
38 |     random.shuffle(trains)
39 | 
40 |     if not os.path.exists('train.csv'):  
41 |         os.remove('train.csv')
42 | 
43 |     with open('train.csv', 'w') as output:
44 |         for (image_name, depth_name) in trains:
45 |             output.write("%s,%s" % (image_name, depth_name))
46 |             output.write("\n")
47 | 
48 | if __name__ == '__main__':
49 |     current_directory = os.getcwd()
50 |     nyu_path = 'data/nyu_depth_v2_labeled.mat'
51 |     convert_nyu(nyu_path)
52 | 


--------------------------------------------------------------------------------
/task.py:
--------------------------------------------------------------------------------
  1 | #encoding: utf-8
  2 | 
  3 | from datetime import datetime
  4 | from tensorflow.python.platform import gfile
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from dataset import DataSet
  8 | from dataset import output_predict
  9 | import model
 10 | import train_operation as op
 11 | 
 12 | MAX_STEPS = 10000000
 13 | LOG_DEVICE_PLACEMENT = True
 14 | BATCH_SIZE = 8
 15 | TRAIN_FILE = "train.csv"
 16 | COARSE_DIR = "coarse"
 17 | REFINE_DIR = "refine"
 18 | 
 19 | REFINE_TRAIN = True
 20 | FINE_TUNE = True
 21 | 
 22 | def train():
 23 |     with tf.Graph().as_default():
 24 |         global_step = tf.Variable(0, trainable=False)
 25 |         dataset = DataSet(BATCH_SIZE)
 26 |         images, depths, invalid_depths = dataset.csv_inputs(TRAIN_FILE)
 27 |         keep_conv = tf.placeholder(tf.float32)
 28 |         keep_hidden = tf.placeholder(tf.float32)
 29 |         if REFINE_TRAIN:
 30 |             print("refine train.")
 31 |             coarse = model.inference(images, keep_conv, trainable=False)
 32 |             logits = model.inference_refine(images, coarse, keep_conv, keep_hidden)
 33 |         else:
 34 |             print("coarse train.")
 35 |             logits = model.inference(images, keep_conv, keep_hidden)
 36 |         loss = model.loss(logits, depths, invalid_depths)
 37 |         train_op = op.train(loss, global_step, BATCH_SIZE)
 38 |         init_op = tf.global_variables_initializer()#tf.initialize_all_variables()
 39 | 
 40 |         # Session
 41 |         sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT))
 42 |         sess.run(init_op)
 43 | 
 44 |         # parameters
 45 |         coarse_params = {}
 46 |         refine_params = {}
 47 |         if REFINE_TRAIN:
 48 |             for variable in tf.global_variables():#tf.all_variables():
 49 |                 variable_name = variable.name
 50 |                 print("parameter: %s" % (variable_name))
 51 |                 if variable_name.find("/") < 0 or variable_name.count("/") != 1:
 52 |                     continue
 53 |                 if variable_name.find('coarse') >= 0:
 54 |                     coarse_params[variable_name] = variable
 55 |                 print("parameter: %s" %(variable_name))
 56 |                 if variable_name.find('fine') >= 0:
 57 |                     refine_params[variable_name] = variable
 58 |         else:
 59 |             for variable in tf.trainable_variables():
 60 |                 variable_name = variable.name
 61 |                 print("parameter: %s" %(variable_name))
 62 |                 if variable_name.find("/") < 0 or variable_name.count("/") != 1:
 63 |                     continue
 64 |                 if variable_name.find('coarse') >= 0:
 65 |                     coarse_params[variable_name] = variable
 66 |                 if variable_name.find('fine') >= 0:
 67 |                     refine_params[variable_name] = variable
 68 |         # define saver
 69 |         print(coarse_params)
 70 |         saver_coarse = tf.train.Saver(coarse_params)
 71 |         if REFINE_TRAIN:
 72 |             saver_refine = tf.train.Saver(refine_params)
 73 |         # fine tune
 74 |         if FINE_TUNE:
 75 |             coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR)
 76 |             if coarse_ckpt and coarse_ckpt.model_checkpoint_path:
 77 |                 print("Pretrained coarse Model Loading.")
 78 |                 saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path)
 79 |                 print("Pretrained coarse Model Restored.")
 80 |             else:
 81 |                 print("No Pretrained coarse Model.")
 82 |             if REFINE_TRAIN:
 83 |                 refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR)
 84 |                 if refine_ckpt and refine_ckpt.model_checkpoint_path:
 85 |                     print("Pretrained refine Model Loading.")
 86 |                     saver_refine.restore(sess, refine_ckpt.model_checkpoint_path)
 87 |                     print("Pretrained refine Model Restored.")
 88 |                 else:
 89 |                     print("No Pretrained refine Model.")
 90 | 
 91 |         # train
 92 |         coord = tf.train.Coordinator()
 93 |         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 94 |         for step in range(MAX_STEPS):
 95 |             index = 0
 96 |             for i in range(1000):
 97 |                 _, loss_value, logits_val, images_val = sess.run([train_op, loss, logits, images], feed_dict={keep_conv: 0.8, keep_hidden: 0.5})
 98 |                 if index % 10 == 0:
 99 |                     print("%s: %d[epoch]: %d[iteration]: train loss %f" % (datetime.now(), step, index, loss_value))
100 |                     assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
101 |                 if index % 500 == 0:
102 |                     if REFINE_TRAIN:
103 |                         output_predict(logits_val, images_val, "data/predict_refine_%05d_%05d" % (step, i))
104 |                     else:
105 |                         output_predict(logits_val, images_val, "data/predict_%05d_%05d" % (step, i))
106 |                 index += 1
107 | 
108 |             if step % 5 == 0 or (step * 1) == MAX_STEPS:
109 |                 if REFINE_TRAIN:
110 |                     refine_checkpoint_path = REFINE_DIR + '/model.ckpt'
111 |                     saver_refine.save(sess, refine_checkpoint_path, global_step=step)
112 |                 else:
113 |                     coarse_checkpoint_path = COARSE_DIR + '/model.ckpt'
114 |                     saver_coarse.save(sess, coarse_checkpoint_path, global_step=step)
115 |         coord.request_stop()
116 |         coord.join(threads)
117 |         sess.close()
118 | 
119 | 
120 | def main(argv=None):
121 |     if not gfile.Exists(COARSE_DIR):
122 |         gfile.MakeDirs(COARSE_DIR)
123 |     if not gfile.Exists(REFINE_DIR):
124 |         gfile.MakeDirs(REFINE_DIR)
125 |     train()
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     tf.app.run()
130 | 


--------------------------------------------------------------------------------
/train_operation.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 500
 6 | NUM_EPOCHS_PER_DECAY = 30
 7 | INITIAL_LEARNING_RATE = 0.0001
 8 | LEARNING_RATE_DECAY_FACTOR = 0.9
 9 | MOVING_AVERAGE_DECAY = 0.999999
10 | 
11 | 
12 | def _add_loss_summaries(total_loss):
13 |     loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
14 |     losses = tf.get_collection('losses')
15 |     loss_averages_op = loss_averages.apply(losses + [total_loss])
16 |     for l in losses + [total_loss]:
17 |         tf.summary.scalar(l.op.name + ' (raw)', l)
18 |         tf.summary.scalar(l.op.name, loss_averages.average(l))
19 |     return loss_averages_op
20 | 
21 | 
22 | def train(total_loss, global_step, batch_size):
23 |     num_batches_per_epoch = float(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) / batch_size
24 |     decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
25 |     lr = tf.train.exponential_decay(
26 |         INITIAL_LEARNING_RATE,
27 |         global_step,
28 |         decay_steps,
29 |         LEARNING_RATE_DECAY_FACTOR,
30 |         staircase=True)
31 |     tf.summary.scalar('learning_rate', lr)
32 |     loss_averages_op = _add_loss_summaries(total_loss)
33 |     with tf.control_dependencies([loss_averages_op]):
34 |         opt = tf.train.AdamOptimizer(lr)
35 |         grads = opt.compute_gradients(total_loss)
36 |     apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
37 |     for var in tf.trainable_variables():
38 |         print(var.op.name)
39 |         tf.summary.histogram(var.op.name, var)
40 |     for grad, var in grads:
41 |         if grad is not None:
42 |             tf.summary.histogram(var.op.name + '/gradients', grad)
43 |     variable_averages = tf.train.ExponentialMovingAverage(
44 |         MOVING_AVERAGE_DECAY, global_step)
45 |     variables_averages_op = variable_averages.apply(tf.trainable_variables())
46 |     with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
47 |         train_op = tf.no_op(name='train')
48 | 
49 |     return train_op
50 | 


--------------------------------------------------------------------------------