├── .gitignore
├── README.md
├── data
└── readme.md
├── dataset.py
├── images
├── input.png
├── network.png
└── output.png
├── model.py
├── model_part.py
├── prepare_data.py
├── task.py
└── train_operation.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cnn_depth_tensorflow
2 | cnn_depth_tensorflow is an implementation of depth estimation using tensorflow.
3 |
4 | Original paper is "Depth Map Prediction from a Single Image using a Multi-Scale Deep Network".
5 | https://arxiv.org/abs/1406.2283
6 |
7 | 
8 |
9 | # requierments
10 | - TensorFlow 0.10+
11 | - Numpy
12 |
13 | # How to train
14 | - Download training data. Please see readme.md in data directory.
15 | - Convert mat to png images.
16 | ```
17 | python prepare_data.py
18 | ```
19 |
20 | - Lets's train.
21 | ```
22 | python task.py
23 | ```
24 |
25 | - You can see predicting images through training in data directory.
26 |
27 | # example
28 | - input
29 |
30 | - output
31 |
32 |
33 | ---
34 |
35 | Copyright (c) 2016 Masahiro Imai
36 | Released under the MIT license
37 |
--------------------------------------------------------------------------------
/data/readme.md:
--------------------------------------------------------------------------------
1 | NYU Depth Dataset V2
2 | http://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html
3 |
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.platform import gfile
3 | import numpy as np
4 | from PIL import Image
5 |
6 | IMAGE_HEIGHT = 228
7 | IMAGE_WIDTH = 304
8 | TARGET_HEIGHT = 55
9 | TARGET_WIDTH = 74
10 |
11 | class DataSet:
12 | def __init__(self, batch_size):
13 | self.batch_size = batch_size
14 |
15 | def csv_inputs(self, csv_file_path):
16 | filename_queue = tf.train.string_input_producer([csv_file_path], shuffle=True)
17 | reader = tf.TextLineReader()
18 | _, serialized_example = reader.read(filename_queue)
19 | filename, depth_filename = tf.decode_csv(serialized_example, [["path"], ["annotation"]])
20 | # input
21 | jpg = tf.read_file(filename)
22 | image = tf.image.decode_jpeg(jpg, channels=3)
23 | image = tf.cast(image, tf.float32)
24 | # target
25 | depth_png = tf.read_file(depth_filename)
26 | depth = tf.image.decode_png(depth_png, channels=1)
27 | depth = tf.cast(depth, tf.float32)
28 | depth = tf.div(depth, [255.0])
29 | #depth = tf.cast(depth, tf.int64)
30 | # resize
31 | image = tf.image.resize_images(image, (IMAGE_HEIGHT, IMAGE_WIDTH))
32 | depth = tf.image.resize_images(depth, (TARGET_HEIGHT, TARGET_WIDTH))
33 | invalid_depth = tf.sign(depth)
34 | # generate batch
35 | images, depths, invalid_depths = tf.train.batch(
36 | [image, depth, invalid_depth],
37 | batch_size=self.batch_size,
38 | num_threads=4,
39 | capacity= 50 + 3 * self.batch_size,
40 | )
41 | return images, depths, invalid_depths
42 |
43 |
44 | def output_predict(depths, images, output_dir):
45 | print("output predict into %s" % output_dir)
46 | if not gfile.Exists(output_dir):
47 | gfile.MakeDirs(output_dir)
48 | for i, (image, depth) in enumerate(zip(images, depths)):
49 | pilimg = Image.fromarray(np.uint8(image))
50 | image_name = "%s/%05d_org.png" % (output_dir, i)
51 | pilimg.save(image_name)
52 | depth = depth.transpose(2, 0, 1)
53 | if np.max(depth) != 0:
54 | ra_depth = (depth/np.max(depth))*255.0
55 | else:
56 | ra_depth = depth*255.0
57 | depth_pil = Image.fromarray(np.uint8(ra_depth[0]), mode="L")
58 | depth_name = "%s/%05d.png" % (output_dir, i)
59 | depth_pil.save(depth_name)
60 |
--------------------------------------------------------------------------------
/images/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/input.png
--------------------------------------------------------------------------------
/images/network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/network.png
--------------------------------------------------------------------------------
/images/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MasazI/cnn_depth_tensorflow/7959165c8924394154c4229a4b24c163e6dc70e4/images/output.png
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | # tensorflow
4 | import tensorflow as tf
5 | import math
6 | from model_part import conv2d
7 | from model_part import fc
8 |
9 | def inference(images, reuse=False, trainable=True):
10 | coarse1_conv = conv2d('coarse1', images, [11, 11, 3, 96], [96], [1, 4, 4, 1], padding='VALID', reuse=reuse, trainable=trainable)
11 | coarse1 = tf.nn.max_pool(coarse1_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1')
12 | coarse2_conv = conv2d('coarse2', coarse1, [5, 5, 96, 256], [256], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
13 | coarse2 = tf.nn.max_pool(coarse2_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
14 | coarse3 = conv2d('coarse3', coarse2, [3, 3, 256, 384], [384], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
15 | coarse4 = conv2d('coarse4', coarse3, [3, 3, 384, 384], [384], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
16 | coarse5 = conv2d('coarse5', coarse4, [3, 3, 384, 256], [256], [1, 1, 1, 1], padding='VALID', reuse=reuse, trainable=trainable)
17 | coarse6 = fc('coarse6', coarse5, [6*10*256, 4096], [4096], reuse=reuse, trainable=trainable)
18 | coarse7 = fc('coarse7', coarse6, [4096, 4070], [4070], reuse=reuse, trainable=trainable)
19 | coarse7_output = tf.reshape(coarse7, [-1, 55, 74, 1])
20 | return coarse7_output
21 |
22 |
23 | def inference_refine(images, coarse7_output, keep_conv, reuse=False, trainable=True):
24 | fine1_conv = conv2d('fine1', images, [9, 9, 3, 63], [63], [1, 2, 2, 1], padding='VALID', reuse=reuse, trainable=trainable)
25 | fine1 = tf.nn.max_pool(fine1_conv, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='fine_pool1')
26 | fine1_dropout = tf.nn.dropout(fine1, keep_conv)
27 | fine2 = tf.concat([fine1_dropout, coarse7_output], 3)
28 | fine3 = conv2d('fine3', fine2, [5, 5, 64, 64], [64], [1, 1, 1, 1], padding='SAME', reuse=reuse, trainable=trainable)
29 | fine3_dropout = tf.nn.dropout(fine3, keep_conv)
30 | fine4 = conv2d('fine4', fine3_dropout, [5, 5, 64, 1], [1], [1, 1, 1, 1], padding='SAME', reuse=reuse, trainable=trainable)
31 | return fine4
32 |
33 |
34 | def loss(logits, depths, invalid_depths):
35 | logits_flat = tf.reshape(logits, [-1, 55*74])
36 | depths_flat = tf.reshape(depths, [-1, 55*74])
37 | invalid_depths_flat = tf.reshape(invalid_depths, [-1, 55*74])
38 |
39 | predict = tf.multiply(logits_flat, invalid_depths_flat)
40 | target = tf.multiply(depths_flat, invalid_depths_flat)
41 | d = tf.subtract(predict, target)
42 | square_d = tf.square(d)
43 | sum_square_d = tf.reduce_sum(square_d, 1)
44 | sum_d = tf.reduce_sum(d, 1)
45 | sqare_sum_d = tf.square(sum_d)
46 | cost = tf.reduce_mean(sum_square_d / 55.0*74.0 - 0.5*sqare_sum_d / math.pow(55*74, 2))
47 | tf.add_to_collection('losses', cost)
48 | return tf.add_n(tf.get_collection('losses'), name='total_loss')
49 |
50 |
51 | def _add_loss_summaries(total_loss):
52 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
53 | losses = tf.get_collection('losses')
54 | loss_averages_op = loss_averages.apply(losses + [total_loss])
55 | for l in losses + [total_loss]:
56 | tf.summary.scalar(l.op.name + ' (raw)', l)
57 | tf.summary.scalar(l.op.name, loss_averages.average(l))
58 | return loss_averages_op
59 |
--------------------------------------------------------------------------------
/model_part.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | TOWER_NAME = 'tower'
4 | UPDATE_OPS_COLLECTION = '_update_ops_'
5 |
6 |
7 | def _variable_with_weight_decay(name, shape, stddev, wd, trainable=True):
8 | var = _variable_on_gpu(name, shape, tf.truncated_normal_initializer(stddev=stddev))
9 | if wd:
10 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
11 | tf.add_to_collection('losses', weight_decay)
12 | return var
13 |
14 |
15 | def _variable_on_gpu(name, shape, initializer):
16 | var = tf.get_variable(name, shape, initializer=initializer)
17 | return var
18 |
19 |
20 | def conv2d(scope_name, inputs, shape, bias_shape, stride, padding='VALID', wd=0.0, reuse=False, trainable=True):
21 | with tf.variable_scope(scope_name) as scope:
22 | if reuse is True:
23 | scope.reuse_variables()
24 | kernel = _variable_with_weight_decay(
25 | 'weights',
26 | shape=shape,
27 | stddev=0.01,
28 | wd=wd,
29 | trainable=trainable
30 | )
31 | conv = tf.nn.conv2d(inputs, kernel, stride, padding=padding)
32 | biases = _variable_on_gpu('biases', bias_shape, tf.constant_initializer(0.1))
33 | bias = tf.nn.bias_add(conv, biases)
34 | conv_ = tf.nn.relu(bias, name=scope.name)
35 | return conv_
36 |
37 |
38 | def fc(scope_name, inputs, shape, bias_shape, wd=0.04, reuse=False, trainable=True):
39 | with tf.variable_scope(scope_name) as scope:
40 | if reuse is True:
41 | scope.reuse_variables()
42 | flat = tf.reshape(inputs, [-1, shape[0]])
43 | weights = _variable_with_weight_decay(
44 | 'weights',
45 | shape,
46 | stddev=0.01,
47 | wd=wd,
48 | trainable=trainable
49 | )
50 | biases = _variable_on_gpu('biases', bias_shape, tf.constant_initializer(0.1))
51 | fc = tf.nn.relu_layer(flat, weights, biases, name=scope.name)
52 | return fc
53 |
--------------------------------------------------------------------------------
/prepare_data.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 | import os
3 | import numpy as np
4 | import h5py
5 | from PIL import Image
6 | import random
7 | import wget
8 |
9 |
10 | def convert_nyu(path):
11 | imgdir = os.path.join("data", "nyu_datasets");
12 | if not os.path.exists(imgdir):
13 | os.makedirs(imgdir)
14 |
15 | nyuurl = 'http://horatio.cs.nyu.edu/mit/silberman/nyu_depth_v2/nyu_depth_v2_labeled.mat'
16 | file = os.path.join("data", "nyu_depth_v2_labeled.mat")
17 | if not os.path.exists(file):
18 | filename = wget.download(nyuurl, out="data")
19 | print('\n downloaded: ', filename)
20 |
21 | print("load dataset: %s" % (path))
22 | f = h5py.File(path)
23 |
24 | trains = []
25 | for i, (image, depth) in enumerate(zip(f['images'], f['depths'])):
26 | ra_image = image.transpose(2, 1, 0)
27 | ra_depth = depth.transpose(1, 0)
28 | re_depth = (ra_depth/np.max(ra_depth))*255.0
29 | image_pil = Image.fromarray(np.uint8(ra_image))
30 | depth_pil = Image.fromarray(np.uint8(re_depth))
31 | image_name = os.path.join("data", "nyu_datasets", "%05d.jpg" % (i))
32 | image_pil.save(image_name)
33 | depth_name = os.path.join("data", "nyu_datasets", "%05d.png" % (i))
34 | depth_pil.save(depth_name)
35 |
36 | trains.append((image_name, depth_name))
37 |
38 | random.shuffle(trains)
39 |
40 | if not os.path.exists('train.csv'):
41 | os.remove('train.csv')
42 |
43 | with open('train.csv', 'w') as output:
44 | for (image_name, depth_name) in trains:
45 | output.write("%s,%s" % (image_name, depth_name))
46 | output.write("\n")
47 |
48 | if __name__ == '__main__':
49 | current_directory = os.getcwd()
50 | nyu_path = 'data/nyu_depth_v2_labeled.mat'
51 | convert_nyu(nyu_path)
52 |
--------------------------------------------------------------------------------
/task.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 |
3 | from datetime import datetime
4 | from tensorflow.python.platform import gfile
5 | import numpy as np
6 | import tensorflow as tf
7 | from dataset import DataSet
8 | from dataset import output_predict
9 | import model
10 | import train_operation as op
11 |
12 | MAX_STEPS = 10000000
13 | LOG_DEVICE_PLACEMENT = True
14 | BATCH_SIZE = 8
15 | TRAIN_FILE = "train.csv"
16 | COARSE_DIR = "coarse"
17 | REFINE_DIR = "refine"
18 |
19 | REFINE_TRAIN = True
20 | FINE_TUNE = True
21 |
22 | def train():
23 | with tf.Graph().as_default():
24 | global_step = tf.Variable(0, trainable=False)
25 | dataset = DataSet(BATCH_SIZE)
26 | images, depths, invalid_depths = dataset.csv_inputs(TRAIN_FILE)
27 | keep_conv = tf.placeholder(tf.float32)
28 | keep_hidden = tf.placeholder(tf.float32)
29 | if REFINE_TRAIN:
30 | print("refine train.")
31 | coarse = model.inference(images, keep_conv, trainable=False)
32 | logits = model.inference_refine(images, coarse, keep_conv, keep_hidden)
33 | else:
34 | print("coarse train.")
35 | logits = model.inference(images, keep_conv, keep_hidden)
36 | loss = model.loss(logits, depths, invalid_depths)
37 | train_op = op.train(loss, global_step, BATCH_SIZE)
38 | init_op = tf.global_variables_initializer()#tf.initialize_all_variables()
39 |
40 | # Session
41 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT))
42 | sess.run(init_op)
43 |
44 | # parameters
45 | coarse_params = {}
46 | refine_params = {}
47 | if REFINE_TRAIN:
48 | for variable in tf.global_variables():#tf.all_variables():
49 | variable_name = variable.name
50 | print("parameter: %s" % (variable_name))
51 | if variable_name.find("/") < 0 or variable_name.count("/") != 1:
52 | continue
53 | if variable_name.find('coarse') >= 0:
54 | coarse_params[variable_name] = variable
55 | print("parameter: %s" %(variable_name))
56 | if variable_name.find('fine') >= 0:
57 | refine_params[variable_name] = variable
58 | else:
59 | for variable in tf.trainable_variables():
60 | variable_name = variable.name
61 | print("parameter: %s" %(variable_name))
62 | if variable_name.find("/") < 0 or variable_name.count("/") != 1:
63 | continue
64 | if variable_name.find('coarse') >= 0:
65 | coarse_params[variable_name] = variable
66 | if variable_name.find('fine') >= 0:
67 | refine_params[variable_name] = variable
68 | # define saver
69 | print(coarse_params)
70 | saver_coarse = tf.train.Saver(coarse_params)
71 | if REFINE_TRAIN:
72 | saver_refine = tf.train.Saver(refine_params)
73 | # fine tune
74 | if FINE_TUNE:
75 | coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR)
76 | if coarse_ckpt and coarse_ckpt.model_checkpoint_path:
77 | print("Pretrained coarse Model Loading.")
78 | saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path)
79 | print("Pretrained coarse Model Restored.")
80 | else:
81 | print("No Pretrained coarse Model.")
82 | if REFINE_TRAIN:
83 | refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR)
84 | if refine_ckpt and refine_ckpt.model_checkpoint_path:
85 | print("Pretrained refine Model Loading.")
86 | saver_refine.restore(sess, refine_ckpt.model_checkpoint_path)
87 | print("Pretrained refine Model Restored.")
88 | else:
89 | print("No Pretrained refine Model.")
90 |
91 | # train
92 | coord = tf.train.Coordinator()
93 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
94 | for step in range(MAX_STEPS):
95 | index = 0
96 | for i in range(1000):
97 | _, loss_value, logits_val, images_val = sess.run([train_op, loss, logits, images], feed_dict={keep_conv: 0.8, keep_hidden: 0.5})
98 | if index % 10 == 0:
99 | print("%s: %d[epoch]: %d[iteration]: train loss %f" % (datetime.now(), step, index, loss_value))
100 | assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
101 | if index % 500 == 0:
102 | if REFINE_TRAIN:
103 | output_predict(logits_val, images_val, "data/predict_refine_%05d_%05d" % (step, i))
104 | else:
105 | output_predict(logits_val, images_val, "data/predict_%05d_%05d" % (step, i))
106 | index += 1
107 |
108 | if step % 5 == 0 or (step * 1) == MAX_STEPS:
109 | if REFINE_TRAIN:
110 | refine_checkpoint_path = REFINE_DIR + '/model.ckpt'
111 | saver_refine.save(sess, refine_checkpoint_path, global_step=step)
112 | else:
113 | coarse_checkpoint_path = COARSE_DIR + '/model.ckpt'
114 | saver_coarse.save(sess, coarse_checkpoint_path, global_step=step)
115 | coord.request_stop()
116 | coord.join(threads)
117 | sess.close()
118 |
119 |
120 | def main(argv=None):
121 | if not gfile.Exists(COARSE_DIR):
122 | gfile.MakeDirs(COARSE_DIR)
123 | if not gfile.Exists(REFINE_DIR):
124 | gfile.MakeDirs(REFINE_DIR)
125 | train()
126 |
127 |
128 | if __name__ == '__main__':
129 | tf.app.run()
130 |
--------------------------------------------------------------------------------
/train_operation.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | import tensorflow as tf
4 |
5 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 500
6 | NUM_EPOCHS_PER_DECAY = 30
7 | INITIAL_LEARNING_RATE = 0.0001
8 | LEARNING_RATE_DECAY_FACTOR = 0.9
9 | MOVING_AVERAGE_DECAY = 0.999999
10 |
11 |
12 | def _add_loss_summaries(total_loss):
13 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
14 | losses = tf.get_collection('losses')
15 | loss_averages_op = loss_averages.apply(losses + [total_loss])
16 | for l in losses + [total_loss]:
17 | tf.summary.scalar(l.op.name + ' (raw)', l)
18 | tf.summary.scalar(l.op.name, loss_averages.average(l))
19 | return loss_averages_op
20 |
21 |
22 | def train(total_loss, global_step, batch_size):
23 | num_batches_per_epoch = float(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) / batch_size
24 | decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
25 | lr = tf.train.exponential_decay(
26 | INITIAL_LEARNING_RATE,
27 | global_step,
28 | decay_steps,
29 | LEARNING_RATE_DECAY_FACTOR,
30 | staircase=True)
31 | tf.summary.scalar('learning_rate', lr)
32 | loss_averages_op = _add_loss_summaries(total_loss)
33 | with tf.control_dependencies([loss_averages_op]):
34 | opt = tf.train.AdamOptimizer(lr)
35 | grads = opt.compute_gradients(total_loss)
36 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
37 | for var in tf.trainable_variables():
38 | print(var.op.name)
39 | tf.summary.histogram(var.op.name, var)
40 | for grad, var in grads:
41 | if grad is not None:
42 | tf.summary.histogram(var.op.name + '/gradients', grad)
43 | variable_averages = tf.train.ExponentialMovingAverage(
44 | MOVING_AVERAGE_DECAY, global_step)
45 | variables_averages_op = variable_averages.apply(tf.trainable_variables())
46 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
47 | train_op = tf.no_op(name='train')
48 |
49 | return train_op
50 |
--------------------------------------------------------------------------------