├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── capnet
    └── capsnet.py
├── cifar
    ├── README.md
    ├── __init__.py
    ├── cifar10.png
    └── cifar_conv.py
├── gan
    ├── README.md
    ├── gan.png
    └── gan.py
├── kaggle
    ├── Avito
    │   ├── __init__.py
    │   ├── avito.py
    │   ├── avito2.py
    │   └── baseline_lgb.csv
    ├── CostaRicanHouseholdPovertyLevelPrediction
    │   └── kernel
    │   │   └── kernel.ipynb
    ├── DigitalRecognizer
    │   ├── __init__.py
    │   ├── digital_recognizer.py
    │   └── predict.csv
    ├── SantanderValuePrediction
    │   ├── SantanderPredict.ipynb
    │   ├── pipline.py
    │   └── santander.py
    ├── TalkingData
    │   ├── __init__.py
    │   └── talking_data.py
    ├── __init__.py
    ├── titanic
    │   ├── README.md
    │   ├── __init__.py
    │   ├── titanic.png
    │   └── titanic.py
    └── zillow
    │   ├── __init__.py
    │   ├── location.py
    │   ├── log_error.py
    │   ├── log_error_hist.py
    │   ├── missing_data.py
    │   ├── month.py
    │   └── train_data_shape.py
├── mnist
    ├── __init__.py
    ├── fully_connected_feed.py
    ├── fully_connected_feed_simple.py
    ├── mnist.py
    ├── mnist_conv.py
    ├── mnist_simple.py
    ├── mnist_softmax.py
    ├── mnist_with_summaries.py
    └── mnist_with_summary.py
├── reading
    └── capsnet
    │   └── drbc.pdf
├── self_driving
    ├── README.md
    ├── __init__.py
    ├── lane_detect
    │   ├── README.md
    │   ├── __init__.py
    │   ├── comma_ai_lane_detect.py
    │   ├── lane_detect.png
    │   └── udacity_lane_detect.py
    ├── optical_flow
    │   ├── __init__.py
    │   └── python
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── opt_flow.py
    │   │   ├── tst_scene_render.py
    │   │   └── video.py
    ├── road_seg
    │   ├── README.md
    │   ├── __init__.py
    │   ├── convnet.py
    │   ├── fcn8_vgg.py
    │   ├── road_seg.png
    │   ├── test_fcn8_vgg.py
    │   └── unet.py
    ├── segnet
    │   ├── README.md
    │   ├── __init__.py
    │   ├── evaluate.py
    │   ├── evaluate_kitti.py
    │   ├── evaluate_test.py
    │   ├── merge_output.sh
    │   ├── prepare_camvid.py
    │   ├── prepare_camvid.sh
    │   ├── prepare_kitti.py
    │   ├── prepare_kitti.sh
    │   ├── prepare_kitti_test.py
    │   ├── prepare_kitti_test.sh
    │   ├── segnet.png
    │   ├── segnet_vgg.py
    │   ├── segnet_vgg_test.py
    │   ├── train.py
    │   └── train_kitti.py
    └── steering
    │   ├── __init__.py
    │   ├── driving_data.py
    │   ├── evaluate.py
    │   ├── model.py
    │   ├── model_resnet50.py
    │   ├── model_saliency.py
    │   ├── split_data.sh
    │   └── train.py
├── utils
    ├── __init__.py
    ├── camvid.py
    ├── camvid_test.py
    ├── cifar.py
    ├── cifar_test.py
    ├── dataset.py
    ├── kitti.py
    ├── kitti_segnet.py
    ├── my_image.py
    ├── my_image_test.py
    ├── svhn.py
    ├── udacity_data.py
    ├── udacity_data_test.py
    ├── udacity_train.txt
    ├── udacity_val.txt
    └── utils.py
└── vae
    ├── README.md
    ├── __init__.py
    ├── vae_mnist.png
    ├── vae_mnist.py
    ├── vaegan_cifar.py
    └── vaegan_svhn.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Built application files
 2 | *.apk
 3 | *.ap_
 4 | 
 5 | # Files for the Dalvik VM
 6 | *.dex
 7 | 
 8 | # Java class files
 9 | *.class
10 | 
11 | # Generated files
12 | bin/
13 | gen/
14 | .idea/
15 | 
16 | # Gradle files
17 | .gradle/
18 | build/
19 | 
20 | # Local configuration file (sdk path, etc)
21 | local.properties
22 | 
23 | # Proguard folder generated by Eclipse
24 | proguard/
25 | 
26 | # Log Files
27 | *.log
28 | 
29 | .DS_Store
30 | 
31 | *.pyc
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning
2 | ================
3 | 
4 | Welcome to my blog [听雨居](https://limengweb.wordpress.com). It contains detailed description of the code here.


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/__init__.py


--------------------------------------------------------------------------------
/capnet/capsnet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, unicode_literals
 2 | 
 3 | %matplotlib inline
 4 | import matplotlib
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | 
 9 | tf.reset_default_graph()
10 | np.random.seed(42)
11 | tf.set_random_seed(42)
12 | 
13 | from tensorflow.examples.tutorials.mnist import input_data
14 | 
15 | mnist = input_data.read_data_sets("/tmp/data/")
16 | 
17 | n_samples = 5
18 | 
19 | plt.figure(figsize=(n_samples * 2, 3))
20 | for index in range(n_samples):
21 |     plt.subplot(1, n_samples, index + 1)
22 |     sample_image = mnist.train.images[index].reshape(28, 28)
23 |     plt.imshow(sample_image, cmap="binary")
24 |     plt.axis("off")
25 | 
26 | plt.show()
27 | 


--------------------------------------------------------------------------------
/cifar/README.md:
--------------------------------------------------------------------------------
1 | [利用卷积神经网络识别CIFAR-10](https://limengweb.wordpress.com/2016/12/31/%E5%88%A9%E7%94%A8%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%E8%AF%86%E5%88%ABcifar-10/)
2 | <div align="center">
3 |   <img src="https://github.com/mengli/MachineLearning/blob/master/cifar/cifar10.png"><br><br>
4 | </div>
5 | 


--------------------------------------------------------------------------------
/cifar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/cifar/__init__.py


--------------------------------------------------------------------------------
/cifar/cifar10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/cifar/cifar10.png


--------------------------------------------------------------------------------
/cifar/cifar_conv.py:
--------------------------------------------------------------------------------
  1 | """A convolutional neural network for CIFAR-10 classification.
  2 | """
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import tensorflow as tf
  8 | from utils import cifar
  9 | from utils.utils import put_kernels_on_grid
 10 | 
 11 | EPOCH = 36000
 12 | BATCH_SIZE = 128
 13 | 
 14 | 
 15 | def weight_variable_with_decay(shape, wd):
 16 |     initial = tf.truncated_normal(shape, stddev=0.05, dtype=tf.float32)
 17 |     var = tf.Variable(initial, 'weights')
 18 |     weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
 19 |     tf.add_to_collection('losses', weight_decay)
 20 |     return var
 21 | 
 22 | 
 23 | def bias_variable(shape):
 24 |     initial = tf.constant(0.0, shape=shape, dtype=tf.float32)
 25 |     return tf.Variable(initial, 'biases')
 26 | 
 27 | 
 28 | def conv2d(x, W):
 29 |     return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 30 | 
 31 | 
 32 | def max_pool_2x2(x):
 33 |     return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
 34 |                           strides=[1, 2, 2, 1], padding='SAME')
 35 | 
 36 | 
 37 | def conv_layer(layer_name, input, in_dim, in_ch, out_dim, out_size, summary_conv=False):
 38 |     with tf.name_scope(layer_name):
 39 |         # Initialize weights and bias
 40 |         W_conv = weight_variable_with_decay([in_dim, in_dim, in_ch, out_dim], 0.004)
 41 |         b_conv = bias_variable([out_dim])
 42 | 
 43 |         # Log weights and bias
 44 |         tf.summary.histogram("weights", W_conv)
 45 |         tf.summary.histogram("biases", b_conv)
 46 | 
 47 |         # Draw weights in 8x8 grid for the first conv layer
 48 |         if summary_conv:
 49 |             kernel_grid = put_kernels_on_grid(W_conv, (8, 8))
 50 |             tf.summary.image("kernel", kernel_grid, max_outputs=1)
 51 | 
 52 |         # Draw conv activation in 8x8 grid
 53 |         activation = tf.nn.bias_add(conv2d(input, W_conv), b_conv)
 54 |         # Only draw the activation for the first image in a batch
 55 |         activation_sample = tf.slice(activation, [0, 0, 0, 0], [1, out_size, out_size, out_dim])
 56 |         activation_grid = put_kernels_on_grid(tf.transpose(activation_sample, [1, 2, 0, 3]), (8, 8))
 57 |         tf.summary.image("conv/activatins", activation_grid, max_outputs=1)
 58 | 
 59 |         # Draw relu activation in 8x8 grid
 60 |         activation = tf.nn.relu(activation)
 61 |         # Only draw the activation for the first image in a batch
 62 |         activation_sample = tf.slice(activation, [0, 0, 0, 0], [1, out_size, out_size, out_dim])
 63 |         activation_grid = put_kernels_on_grid(tf.transpose(activation_sample, [1, 2, 0, 3]), (8, 8))
 64 |         tf.summary.image("relu/activatins", activation_grid, max_outputs=1)
 65 | 
 66 |         # 2x2 max pooling
 67 |         pool = max_pool_2x2(activation)
 68 | 
 69 |         return tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm')
 70 | 
 71 | 
 72 | def fc_layer(layer_name, input, in_dim, out_dim, activation=True):
 73 |     with tf.name_scope(layer_name):
 74 |         # Initialize weights and bias
 75 |         W_fc = weight_variable_with_decay([in_dim, out_dim], 0.004)
 76 |         b_fc = bias_variable([out_dim])
 77 | 
 78 |         # Log weights and bias
 79 |         tf.summary.histogram("weights", W_fc)
 80 |         tf.summary.histogram("biases", b_fc)
 81 | 
 82 |         # Shouldn't only apply activation function for the last fc layer
 83 |         if activation:
 84 |             return tf.nn.relu(tf.nn.bias_add(tf.matmul(input, W_fc), b_fc))
 85 |         else:
 86 |             return tf.nn.bias_add(tf.matmul(input, W_fc), b_fc)
 87 | 
 88 | 
 89 | def loss(logits, labels):
 90 |     cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
 91 |     tf.add_to_collection('losses', cross_entropy)
 92 |     total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
 93 |     tf.summary.scalar('loss', total_loss)
 94 |     return total_loss
 95 | 
 96 | 
 97 | def learning_rate(global_step):
 98 |     starter_learning_rate = 0.001
 99 |     learning_rate_1 = tf.train.exponential_decay(
100 |         starter_learning_rate, global_step, EPOCH * 0.2, 0.1, staircase=True)
101 |     learning_rate_2 = tf.train.exponential_decay(
102 |         learning_rate_1, global_step, EPOCH * 0.4, 0.5, staircase=True)
103 |     decayed_learning_rate = tf.train.exponential_decay(
104 |         learning_rate_2, global_step, EPOCH * 0.6, 0.8, staircase=True)
105 |     tf.summary.scalar('learning_rate', decayed_learning_rate)
106 |     return decayed_learning_rate
107 | 
108 | 
109 | def main(_):
110 |     cifar10 = cifar.Cifar()
111 |     cifar10.ReadDataSets(one_hot=True)
112 | 
113 |     keep_prob = tf.placeholder(tf.float32)
114 | 
115 |     # Create the model
116 |     x = tf.placeholder(tf.float32, [None, 3, 32, 32])
117 | 
118 |     # Define loss and optimizer
119 |     y_ = tf.placeholder(tf.float32, [None, 10])
120 | 
121 |     x_image = tf.transpose(x, [0, 2, 3, 1])
122 | 
123 |     tf.summary.image("images", x_image, max_outputs=1)
124 | 
125 |     h_pool1 = conv_layer("conv_layer1", x_image, 5, 3, 64, 32, summary_conv=True)
126 |     h_pool2 = conv_layer("conv_layer2", h_pool1, 5, 64, 64, 16)
127 | 
128 |     h_conv3_flat = tf.reshape(h_pool2, [-1, 8 * 8 * 64])
129 | 
130 |     h_fc1 = fc_layer('fc_layer1', h_conv3_flat, 8 * 8 * 64, 384, activation=True)
131 |     h_fc2 = fc_layer('fc_layer2', h_fc1, 384, 192, activation=True)
132 |     y_conv = fc_layer('fc_layer3', h_fc2, 192, 10, activation=False)
133 | 
134 |     global_step = tf.Variable(0, trainable=False)
135 |     lr = learning_rate(global_step)
136 | 
137 |     total_loss = loss(y_conv, y_)
138 |     optimizer = tf.train.AdamOptimizer(lr)
139 |     grads_and_vars = optimizer.compute_gradients(total_loss)
140 |     with tf.name_scope("conv_layer1_grad"):
141 |         kernel_grad_grid = put_kernels_on_grid(grads_and_vars[0][0], (8, 8))
142 |         tf.summary.image("weight_grad", kernel_grad_grid, max_outputs=1)
143 | 
144 |     train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
145 |     correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
146 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
147 | 
148 |     sess = tf.InteractiveSession()
149 | 
150 |     merged = tf.summary.merge_all()
151 |     train_writer = tf.summary.FileWriter('train', sess.graph)
152 | 
153 |     sess.run(tf.global_variables_initializer())
154 | 
155 |     for i in range(EPOCH):
156 |         batch = cifar10.train.next_batch(BATCH_SIZE)
157 |         if i % 100 == 0:
158 |             test_accuracy = accuracy.eval(feed_dict={x: cifar10.test.images, y_: cifar10.test.labels})
159 |             print("step %d, test accuracy %g" % (i, test_accuracy))
160 |         summary, _ = sess.run([merged, train_step], feed_dict={x: batch[0], y_: batch[1]})
161 |         train_writer.add_summary(summary, i)
162 | 
163 |     print("test accuracy %g" % accuracy.eval(feed_dict={
164 |         x: cifar10.test.images, y_: cifar10.test.labels}))
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     tf.app.run(main=main)
169 | 


--------------------------------------------------------------------------------
/gan/README.md:
--------------------------------------------------------------------------------
1 | [浅析生成对抗网络](https://limengweb.wordpress.com/2017/02/19/%E6%B5%85%E6%9E%90%E7%94%9F%E6%88%90%E5%AF%B9%E6%8A%97%E7%BD%91%E7%BB%9C/)
2 | <div align="center">
3 |   <img src="https://github.com/mengli/MachineLearning/blob/master/gan/gan.png"><br><br>
4 | </div>
5 | 


--------------------------------------------------------------------------------
/gan/gan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/gan/gan.png


--------------------------------------------------------------------------------
/gan/gan.py:
--------------------------------------------------------------------------------
  1 | """Generative Adversarial Networks
  2 | 
  3 | An example of distribution approximation using Generative Adversarial Networks in TensorFlow.
  4 | """
  5 | import os
  6 | 
  7 | os.environ["KERAS_BACKEND"] = "tensorflow"
  8 | 
  9 | from keras.layers import Dense
 10 | from keras.models import Sequential
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | import tensorflow as tf
 14 | import seaborn as sns
 15 | 
 16 | sns.set(color_codes=True)
 17 | np.random.seed(688)
 18 | 
 19 | RANDOM_PORTION = 0.01
 20 | HIDDEN_SIZE = 16
 21 | BATCH_SIZE = 256
 22 | EPOCH = 15000
 23 | SAMPLE_RATE = 50
 24 | 
 25 | 
 26 | class DataDistribution(object):
 27 |     def __init__(self):
 28 |         self.mu = 4
 29 |         self.sigma = .5
 30 | 
 31 |     def sample(self, N):
 32 |         samples = np.random.normal(self.mu, self.sigma, N)
 33 |         samples.sort()
 34 |         return samples
 35 | 
 36 | 
 37 | class GeneratorDistribution(object):
 38 |     def __init__(self, low, high):
 39 |         self._low = low
 40 |         self._high = high
 41 | 
 42 |     def sample(self, N):
 43 |         return np.linspace(self._low, self._high, N) + np.random.random(N) * RANDOM_PORTION
 44 | 
 45 | 
 46 | def generator(hidden_size):
 47 |     model = Sequential()
 48 | 
 49 |     model.add(Dense(hidden_size, activation='softplus', batch_input_shape=(BATCH_SIZE, 1), init='normal', name="g0"))
 50 |     model.add(Dense(1, init='normal', name="g1"))
 51 | 
 52 |     return model
 53 | 
 54 | 
 55 | def discriminator(hidden_size):
 56 |     model = Sequential()
 57 | 
 58 |     model.add(Dense(hidden_size * 2, activation='tanh', batch_input_shape=(BATCH_SIZE, 1), init='normal', name="d0"))
 59 |     model.add(Dense(hidden_size * 2, activation='tanh', init='normal', name="d1"))
 60 |     model.add(Dense(hidden_size * 2, activation='tanh', init='normal', name="d2"))
 61 |     model.add(Dense(1, activation='sigmoid', init='normal', name="d3"))
 62 | 
 63 |     return model
 64 | 
 65 | 
 66 | def optimizer(loss, var_list):
 67 |     initial_learning_rate = 0.005
 68 |     decay = 0.95
 69 |     num_decay_steps = 150
 70 |     batch = tf.Variable(0)
 71 |     learning_rate = tf.train.exponential_decay(
 72 |         initial_learning_rate,
 73 |         batch,
 74 |         num_decay_steps,
 75 |         decay,
 76 |         staircase=True
 77 |     )
 78 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
 79 |         loss,
 80 |         global_step=batch,
 81 |         var_list=var_list
 82 |     )
 83 |     return optimizer
 84 | 
 85 | 
 86 | def export_animation(anim_frames):
 87 |     i = 0
 88 |     for t_data, g_data in anim_frames:
 89 |         f, ax = plt.subplots(figsize=(12, 8))
 90 |         f.suptitle('Generative Adversarial Network', fontsize=15)
 91 |         plt.xlabel('Data values')
 92 |         plt.ylabel('Probability density')
 93 |         ax.set_xlim(-2, 10)
 94 |         ax.set_ylim(0, 1.2)
 95 |         sns.distplot(t_data, hist=False, rug=True, color='r', label='Target Data', ax=ax)
 96 |         sns.distplot(g_data, hist=False, rug=True, color='g', label='Generated Data', ax=ax)
 97 |         f.savefig("images/frame_" + str(i) + ".png")
 98 |         print "Frame index: ", i * SAMPLE_RATE
 99 |         f.clf()
100 |         plt.close()
101 |         i += 1
102 | 
103 | # Generate mp4 from images:
104 | # avconv -r 10 -i frame_%d.png -b:v 1000k gan.mp4
105 | # convert -delay 20 -loop 0 output/decision_*.png myimage.gif
106 | 
107 | def train(_):
108 |     anim_frames = []
109 |     with tf.variable_scope('GAN'):
110 |         G = generator(HIDDEN_SIZE)
111 |         D = discriminator(HIDDEN_SIZE)
112 | 
113 |         Z = G.input
114 |         X = D.input
115 |         tf.summary.histogram("target", X)
116 | 
117 |         D1 = D(X)
118 |         G_train = G(Z)
119 |         tf.summary.histogram("generated", G_train)
120 |         D2 = D(G_train)
121 | 
122 |         loss_d = tf.reduce_mean(-tf.log(D1) - tf.log(1 - D2))
123 |         loss_g = tf.reduce_mean(-tf.log(D2))
124 | 
125 |         tf.summary.scalar("loss_d", loss_d)
126 |         tf.summary.scalar("loss_g", loss_g)
127 | 
128 |         g_params = G.trainable_weights
129 |         d_params = D.trainable_weights
130 | 
131 |         opt_g = optimizer(loss_g, g_params)
132 |         opt_d = optimizer(loss_d, d_params)
133 | 
134 |     with tf.Session() as session:
135 |         merged = tf.summary.merge_all()
136 |         train_writer = tf.summary.FileWriter('train', session.graph)
137 | 
138 |         session.run(tf.global_variables_initializer())
139 | 
140 |         for step in xrange(EPOCH):
141 |             # update discriminator
142 |             x = DataDistribution().sample(BATCH_SIZE)
143 |             gen = GeneratorDistribution(-2, 10)
144 |             z = gen.sample(BATCH_SIZE)
145 |             _, _, summary = session.run([loss_d, opt_d, merged], {
146 |                 X: np.reshape(x, (BATCH_SIZE, 1)),
147 |                 Z: np.reshape(z, (BATCH_SIZE, 1))
148 |             })
149 | 
150 |             # update generator
151 |             z = gen.sample(BATCH_SIZE)
152 |             _,  _, summary = session.run([loss_g, opt_g, merged], {
153 |                 X: np.reshape(x, (BATCH_SIZE, 1)),
154 |                 Z: np.reshape(z, (BATCH_SIZE, 1))
155 |             })
156 | 
157 |             G_gen = session.run([G_train], {
158 |                 X: np.reshape(x, (BATCH_SIZE, 1)),
159 |                 Z: np.reshape(z, (BATCH_SIZE, 1))
160 |             })
161 | 
162 |             train_writer.add_summary(summary, step)
163 | 
164 |             if step % SAMPLE_RATE == 0:
165 |                 anim_frames.append((x, G_gen))
166 |                 print "step: ", step
167 | 
168 |         export_animation(anim_frames)
169 | 
170 | 
171 | if __name__ == "__main__":
172 |     tf.app.run(main=train)
173 | 


--------------------------------------------------------------------------------
/kaggle/Avito/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/Avito/__init__.py


--------------------------------------------------------------------------------
/kaggle/Avito/avito2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
 2 | import matplotlib.pyplot as plt
 3 | from sklearn import preprocessing, model_selection, metrics
 4 | import lightgbm as lgb
 5 | 
 6 | pd.options.mode.chained_assignment = None
 7 | pd.options.display.max_columns = 999
 8 | 
 9 | train_df = pd.read_csv("C:\\Users\\jowet\\Downloads\\kaggle\\avito\\train.csv", parse_dates=["activation_date"])
10 | test_df = pd.read_csv("C:\\Users\\jowet\\Downloads\\kaggle\\avito\\test.csv", parse_dates=["activation_date"])
11 | print("Train file rows and columns are : ", train_df.shape)
12 | print("Test file rows and columns are : ", test_df.shape)
13 | 
14 | # Target and ID variables #
15 | train_y = train_df["deal_probability"].values
16 | test_id = test_df["item_id"].values
17 | 
18 | # New variable on weekday #
19 | train_df["activation_weekday"] = train_df["activation_date"].dt.weekday
20 | test_df["activation_weekday"] = test_df["activation_date"].dt.weekday
21 | 
22 | # Label encode the categorical variables #
23 | cat_vars = ["region", "city", "parent_category_name", "category_name", "user_type", "param_1", "param_2", "param_3"]
24 | for col in cat_vars:
25 |     lbl = preprocessing.LabelEncoder()
26 |     lbl.fit(list(train_df[col].values.astype('str')) + list(test_df[col].values.astype('str')))
27 |     train_df[col] = lbl.transform(list(train_df[col].values.astype('str')))
28 |     test_df[col] = lbl.transform(list(test_df[col].values.astype('str')))
29 | 
30 | cols_to_drop = ["item_id", "user_id", "title", "description", "activation_date", "image"]
31 | train_X = train_df.drop(cols_to_drop + ["deal_probability"], axis=1)
32 | test_X = test_df.drop(cols_to_drop, axis=1)
33 | 
34 | 
35 | def run_lgb(train_X, train_y, val_X, val_y, test_X):
36 |     params = {
37 |         "objective": "regression",
38 |         "metric": "rmse",
39 |         "num_leaves": 30,
40 |         "learning_rate": 0.1,
41 |         "bagging_fraction": 0.7,
42 |         "feature_fraction": 0.7,
43 |         "bagging_frequency": 5,
44 |         "bagging_seed": 2018,
45 |         "verbosity": -1
46 |     }
47 | 
48 |     lgtrain = lgb.Dataset(train_X, label=train_y)
49 |     lgval = lgb.Dataset(val_X, label=val_y)
50 |     evals_result = {}
51 |     model = lgb.train(params, lgtrain, 10000, valid_sets=[lgval], early_stopping_rounds=100, verbose_eval=20,
52 |                       evals_result=evals_result)
53 | 
54 |     pred_test_y = model.predict(test_X, num_iteration=model.best_iteration)
55 |     return pred_test_y, model, evals_result
56 | 
57 | 
58 | # Splitting the data for model training#
59 | dev_X = train_X.iloc[:-200000, :]
60 | val_X = train_X.iloc[-200000:, :]
61 | dev_y = train_y[:-200000]
62 | val_y = train_y[-200000:]
63 | print(dev_X.shape, val_X.shape, test_X.shape)
64 | 
65 | # Training the model #
66 | pred_test, model, evals_result = run_lgb(dev_X, dev_y, val_X, val_y, test_X)
67 | 
68 | # Making a submission file #
69 | pred_test[pred_test > 1] = 1
70 | pred_test[pred_test < 0] = 0
71 | sub_df = pd.DataFrame({"item_id": test_id})
72 | sub_df["deal_probability"] = pred_test
73 | sub_df.to_csv("baseline_lgb.csv", index=False)
74 | 
75 | fig, ax = plt.subplots(figsize=(12, 18))
76 | lgb.plot_importance(model, max_num_features=50, height=0.8, ax=ax)
77 | ax.grid(False)
78 | plt.title("LightGBM - Feature Importance", fontsize=15)
79 | plt.show()
80 | 


--------------------------------------------------------------------------------
/kaggle/DigitalRecognizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/DigitalRecognizer/__init__.py


--------------------------------------------------------------------------------
/kaggle/DigitalRecognizer/digital_recognizer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import pandas as pd
 3 | import numpy as np
 4 | import keras
 5 | from keras.models import Sequential
 6 | from keras.layers import Dense, Dropout, Flatten
 7 | from keras.layers import Conv2D, MaxPooling2D
 8 | from keras import backend as K
 9 | 
10 | batch_size = 128
11 | num_classes = 10
12 | epochs = 12
13 | 
14 | # input image dimensions
15 | img_rows, img_cols = 28, 28
16 | 
17 | train = pd.read_csv('C:\\Users\\jowet\\Downloads\\kaggle\\digit_recognizer\\train.csv')
18 | test = pd.read_csv('C:\\Users\\jowet\\Downloads\\kaggle\\digit_recognizer\\test.csv')
19 | 
20 | x_train = train.drop(['label'], axis=1).as_matrix()
21 | y_train = train['label'].as_matrix()
22 | x_test = test.as_matrix()
23 | 
24 | print(x_train.shape)
25 | print(y_train.shape)
26 | print(x_test.shape)
27 | 
28 | if K.image_data_format() == 'channels_first':
29 |     x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
30 |     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
31 |     input_shape = (1, img_rows, img_cols)
32 | else:
33 |     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
34 |     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
35 |     input_shape = (img_rows, img_cols, 1)
36 | 
37 | x_train = x_train.astype('float32')
38 | x_test = x_test.astype('float32')
39 | x_train /= 255
40 | x_test /= 255
41 | print('x_train shape:', x_train.shape)
42 | print(x_train.shape[0], 'train samples')
43 | print(x_test.shape[0], 'test samples')
44 | 
45 | # convert class vectors to binary class matrices
46 | y_train = keras.utils.to_categorical(y_train, num_classes)
47 | 
48 | model = Sequential()
49 | model.add(Conv2D(32, kernel_size=(3, 3),
50 |                  activation='relu',
51 |                  input_shape=input_shape))
52 | model.add(Conv2D(64, (3, 3), activation='relu'))
53 | model.add(MaxPooling2D(pool_size=(2, 2)))
54 | model.add(Dropout(0.25))
55 | model.add(Flatten())
56 | model.add(Dense(128, activation='relu'))
57 | model.add(Dropout(0.5))
58 | model.add(Dense(num_classes, activation='softmax'))
59 | 
60 | model.compile(loss=keras.losses.categorical_crossentropy,
61 |               optimizer=keras.optimizers.Adadelta(),
62 |               metrics=['accuracy'])
63 | 
64 | model.fit(x_train, y_train,
65 |           batch_size=batch_size,
66 |           epochs=epochs,
67 |           verbose=1)
68 | result = model.predict(x_test, verbose=0)
69 | 
70 | predict = np.argmax(result, axis=1)
71 | sub_df = pd.DataFrame({"ImageId": range(1, len(predict) + 1)})
72 | sub_df["Label"] = predict
73 | sub_df.to_csv("predict.csv", index=False)
74 | 


--------------------------------------------------------------------------------
/kaggle/SantanderValuePrediction/santander.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from xgboost import XGBRegressor
 4 | import pandas as pd
 5 | 
 6 | train = pd.read_csv("C:\\Users\\jowet\\Downloads\\Santander\\train.csv")
 7 | test = pd.read_csv("C:\\Users\\jowet\\Downloads\\Santander\\test.csv")
 8 | 
 9 | train.drop('ID', axis=1, inplace=True)
10 | 
11 | y_train = train.pop('target')
12 | pred_index = test.pop('ID')
13 | 
14 | reg = XGBRegressor()
15 | reg.fit(train, y_train)
16 | y_pred = reg.predict(test)
17 | 
18 | submit = pd.DataFrame()
19 | submit['ID'] = pred_index
20 | submit['target'] = y_pred
21 | submit.to_csv('my_XGB_prediction.csv', index=False)
22 | 


--------------------------------------------------------------------------------
/kaggle/TalkingData/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/TalkingData/__init__.py


--------------------------------------------------------------------------------
/kaggle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/__init__.py


--------------------------------------------------------------------------------
/kaggle/titanic/README.md:
--------------------------------------------------------------------------------
1 | [泰坦尼克：机器学习应用](https://limengweb.wordpress.com/2017/09/30/%E6%B3%B0%E5%9D%A6%E5%B0%BC%E5%85%8B%EF%BC%9A%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E5%BA%94%E7%94%A8/)
2 | <div align="center">
3 |   <img src="https://github.com/mengli/MachineLearning/blob/master/kaggle/titanic/titanic.png"><br><br>
4 | </div>
5 | 


--------------------------------------------------------------------------------
/kaggle/titanic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/titanic/__init__.py


--------------------------------------------------------------------------------
/kaggle/titanic/titanic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/titanic/titanic.png


--------------------------------------------------------------------------------
/kaggle/titanic/titanic.py:
--------------------------------------------------------------------------------
  1 | # remove warnings
  2 | import warnings
  3 | import pandas as pd
  4 | #from matplotlib import pyplot as plt
  5 | import numpy as np
  6 | 
  7 | from sklearn.pipeline import make_pipeline
  8 | from sklearn.ensemble import RandomForestClassifier
  9 | from sklearn.feature_selection import SelectKBest
 10 | from sklearn.cross_validation import StratifiedKFold
 11 | from sklearn.grid_search import GridSearchCV
 12 | from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
 13 | from sklearn.cross_validation import cross_val_score
 14 | 
 15 | from sklearn.ensemble import ExtraTreesClassifier
 16 | from sklearn.feature_selection import SelectFromModel
 17 | 
 18 | warnings.filterwarnings('ignore')
 19 | pd.options.display.max_rows = 100
 20 | 
 21 | def get_combined_data():
 22 |     # reading train data
 23 |     train = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/train.csv')
 24 | 
 25 |     # reading test data
 26 |     test = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/test.csv')
 27 | 
 28 |     # extracting and then removing the targets from the training data
 29 |     targets = train.Survived
 30 |     train.drop('Survived',1,inplace=True)
 31 | 
 32 | 
 33 |     # merging train data and test data for future feature engineering
 34 |     combined = train.append(test)
 35 |     combined.reset_index(inplace=True)
 36 |     combined.drop('index',inplace=True,axis=1)
 37 | 
 38 |     return combined, targets
 39 | 
 40 | 
 41 | def create_titles(combined):
 42 |     # we extract the title from each name
 43 |     combined['Title'] = combined['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip())
 44 | 
 45 |     # a map of more aggregated titles
 46 |     Title_Dictionary = {
 47 |         "Capt":         "Officer",
 48 |         "Col":          "Officer",
 49 |         "Major":        "Officer",
 50 |         "Jonkheer":     "Royalty",
 51 |         "Don":          "Royalty",
 52 |         "Sir" :         "Royalty",
 53 |         "Dr":           "Officer",
 54 |         "Rev":          "Officer",
 55 |         "the Countess": "Royalty",
 56 |         "Dona":         "Royalty",
 57 |         "Mme":          "Mrs",
 58 |         "Mlle":         "Miss",
 59 |         "Ms":           "Mrs",
 60 |         "Mr" :          "Mr",
 61 |         "Mrs" :         "Mrs",
 62 |         "Miss" :        "Miss",
 63 |         "Master" :      "Master",
 64 |         "Lady" :        "Royalty"
 65 |     }
 66 | 
 67 |     # we map each title
 68 |     combined['Title'] = combined.Title.map(Title_Dictionary)
 69 | 
 70 | 
 71 | def process_age(combined):
 72 |     # a function that fills the missing values of the Age variable
 73 |     def fillAges(row):
 74 |         if row['Sex']=='female' and row['Pclass'] == 1:
 75 |             if row['Title'] == 'Miss':
 76 |                 return 30
 77 |             elif row['Title'] == 'Mrs':
 78 |                 return 45
 79 |             elif row['Title'] == 'Officer':
 80 |                 return 49
 81 |             elif row['Title'] == 'Royalty':
 82 |                 return 39
 83 |         elif row['Sex']=='female' and row['Pclass'] == 2:
 84 |             if row['Title'] == 'Miss':
 85 |                 return 20
 86 |             elif row['Title'] == 'Mrs':
 87 |                 return 30
 88 |         elif row['Sex']=='female' and row['Pclass'] == 3:
 89 |             if row['Title'] == 'Miss':
 90 |                 return 18
 91 |             elif row['Title'] == 'Mrs':
 92 |                 return 31
 93 |         elif row['Sex']=='male' and row['Pclass'] == 1:
 94 |             if row['Title'] == 'Master':
 95 |                 return 6
 96 |             elif row['Title'] == 'Mr':
 97 |                 return 41.5
 98 |             elif row['Title'] == 'Officer':
 99 |                 return 52
100 |             elif row['Title'] == 'Royalty':
101 |                 return 40
102 |         elif row['Sex']=='male' and row['Pclass'] == 2:
103 |             if row['Title'] == 'Master':
104 |                 return 2
105 |             elif row['Title'] == 'Mr':
106 |                 return 30
107 |             elif row['Title'] == 'Officer':
108 |                 return 41.5
109 |         elif row['Sex']=='male' and row['Pclass'] == 3:
110 |             if row['Title'] == 'Master':
111 |                 return 6
112 |             elif row['Title'] == 'Mr':
113 |                 return 26
114 | 
115 |     combined.Age = combined.apply(
116 |         lambda r : fillAges(r) if np.isnan(r['Age']) else r['Age'], axis=1)
117 | 
118 | 
119 | def process_names(combined):
120 |     # we clean the Name variable
121 |     combined.drop('Name',axis=1,inplace=True)
122 | 
123 |     # encoding in dummy variable
124 |     titles_dummies = pd.get_dummies(combined['Title'],prefix='Title')
125 |     combined = pd.concat([combined,titles_dummies],axis=1)
126 | 
127 |     # removing the title variable
128 |     combined.drop('Title',axis=1,inplace=True)
129 | 
130 |     return combined
131 | 
132 | 
133 | def process_fares(combined):
134 |     # there's one missing fare value - replacing it with the mean.
135 |     combined.Fare.fillna(combined.Fare.mean(),inplace=True)
136 | 
137 | 
138 | def process_embarked(combined):
139 |     # two missing embarked values - filling them with the most frequent one (S)
140 |     combined.Embarked.fillna('S',inplace=True)
141 | 
142 |     # dummy encoding
143 |     embarked_dummies = pd.get_dummies(combined['Embarked'],prefix='Embarked')
144 |     combined = pd.concat([combined,embarked_dummies],axis=1)
145 |     combined.drop('Embarked',axis=1,inplace=True)
146 | 
147 |     return combined
148 | 
149 | 
150 | def process_cabin(combined):
151 |     # replacing missing cabins with U (for Uknown)
152 |     combined.Cabin.fillna('U',inplace=True)
153 | 
154 |     # mapping each Cabin value with the cabin letter
155 |     combined['Cabin'] = combined['Cabin'].map(lambda c : c[0])
156 | 
157 |     # dummy encoding ...
158 |     cabin_dummies = pd.get_dummies(combined['Cabin'],prefix='Cabin')
159 | 
160 |     combined = pd.concat([combined,cabin_dummies],axis=1)
161 | 
162 |     combined.drop('Cabin',axis=1,inplace=True)
163 | 
164 |     return combined
165 | 
166 | 
167 | def process_sex(combined):
168 |     # mapping string values to numerical one
169 |     combined['Sex'] = combined['Sex'].map({'male':1,'female':0})
170 | 
171 | 
172 | def process_pclass(combined):
173 |     # encoding into 3 categories:
174 |     pclass_dummies = pd.get_dummies(combined['Pclass'],prefix="Pclass")
175 | 
176 |     # adding dummy variables
177 |     combined = pd.concat([combined,pclass_dummies],axis=1)
178 | 
179 |     # removing "Pclass"
180 | 
181 |     combined.drop('Pclass',axis=1,inplace=True)
182 | 
183 |     return combined
184 | 
185 | 
186 | def process_ticket(combined):
187 |     # a function that extracts each prefix of the ticket,
188 |     # returns 'XXX' if no prefix (i.e the ticket is a digit)
189 |     def cleanTicket(ticket):
190 |         ticket = ticket.replace('.','')
191 |         ticket = ticket.replace('/','')
192 |         ticket = ticket.split()
193 |         ticket = map(lambda t : t.strip() , ticket)
194 |         ticket = filter(lambda t : not t.isdigit(), ticket)
195 |         if len(ticket) > 0:
196 |             return ticket[0]
197 |         else:
198 |             return 'XXX'
199 | 
200 |     # Extracting dummy variables from tickets:
201 |     combined['Ticket'] = combined['Ticket'].map(cleanTicket)
202 |     tickets_dummies = pd.get_dummies(combined['Ticket'],prefix='Ticket')
203 |     combined = pd.concat([combined, tickets_dummies],axis=1)
204 |     combined.drop('Ticket',inplace=True,axis=1)
205 |     return combined
206 | 
207 | 
208 | def process_family(combined):
209 |     # introducing a new feature : the size of families (including the passenger)
210 |     combined['FamilySize'] = combined['Parch'] + combined['SibSp'] + 1
211 | 
212 |     # introducing other features based on the family size
213 |     combined['Singleton'] = combined['FamilySize'].map(lambda s : 1 if s == 1 else 0)
214 |     combined['SmallFamily'] = combined['FamilySize'].map(lambda s : 1 if 2<=s<=4 else 0)
215 |     combined['LargeFamily'] = combined['FamilySize'].map(lambda s : 1 if 5<=s else 0)
216 | 
217 | 
218 | def scale_all_features(combined):
219 |     features = list(combined.columns)
220 |     features.remove('PassengerId')
221 |     combined[features] = combined[features].apply(lambda x: x/x.max(), axis=0)
222 | 
223 | 
224 | combined, targets = get_combined_data()
225 | create_titles(combined)
226 | process_age(combined)
227 | combined = process_names(combined)
228 | process_fares(combined)
229 | combined = process_embarked(combined)
230 | combined = process_cabin(combined)
231 | process_sex(combined)
232 | combined = process_pclass(combined)
233 | combined = process_ticket(combined)
234 | process_family(combined)
235 | scale_all_features(combined)
236 | 
237 | 
238 | def compute_score(clf, X, y,scoring='accuracy'):
239 |     xval = cross_val_score(clf, X, y, cv = 5,scoring=scoring)
240 |     return np.mean(xval)
241 | 
242 | 
243 | def recover_train_test_target(combined):
244 |     train_set = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/train.csv')
245 | 
246 |     targets = train_set.Survived
247 |     train = combined.ix[0:890]
248 |     test = combined.ix[891:]
249 | 
250 |     return train, test, targets
251 | 
252 | 
253 | train,test,targets = recover_train_test_target(combined)
254 | 
255 | clf = ExtraTreesClassifier(n_estimators=200)
256 | clf = clf.fit(train, targets)
257 | 
258 | features = pd.DataFrame()
259 | features['feature'] = train.columns
260 | features['importance'] = clf.feature_importances_
261 | print(features.sort_values(['importance'],ascending=False))
262 | 
263 | model = SelectFromModel(clf, prefit=True)
264 | train_new = model.transform(train)
265 | print(train_new.shape)
266 | 
267 | test_new = model.transform(test)
268 | print(test_new.shape)
269 | 
270 | forest = RandomForestClassifier(max_features='sqrt')
271 | 
272 | parameter_grid = {
273 |     'max_depth' : [4,5,6,7,8],
274 |     'n_estimators': [200,210,240,250],
275 |     'criterion': ['gini','entropy']
276 | }
277 | 
278 | cross_validation = StratifiedKFold(targets, n_folds=5)
279 | 
280 | grid_search = GridSearchCV(forest,
281 |                            param_grid=parameter_grid,
282 |                            cv=cross_validation)
283 | 
284 | grid_search.fit(train_new, targets)
285 | 
286 | print('Best score: {}'.format(grid_search.best_score_))
287 | print('Best parameters: {}'.format(grid_search.best_params_))
288 | 
289 | output = grid_search.predict(test_new).astype(int)
290 | df_output = pd.DataFrame()
291 | df_output['PassengerId'] = test['PassengerId']
292 | df_output['Survived'] = output
293 | df_output[['PassengerId','Survived']].to_csv('logistic_regression_predictions.csv',index=False)


--------------------------------------------------------------------------------
/kaggle/zillow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/zillow/__init__.py


--------------------------------------------------------------------------------
/kaggle/zillow/location.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | import seaborn as sns
 4 | 
 5 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False)
 6 | 
 7 | sns.jointplot(x=properties_data.latitude.values, y=properties_data.longitude.values, size=10)
 8 | plt.ylabel('Longitude', fontsize=12)
 9 | plt.xlabel('Latitude', fontsize=12)
10 | 
11 | plt.show()
12 | 


--------------------------------------------------------------------------------
/kaggle/zillow/log_error.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
 5 | 
 6 | plt.figure(figsize=(10, 10))
 7 | plt.scatter(range(train_data.shape[0]), train_data.sort_values(by='logerror').logerror)
 8 | plt.xlabel('index')
 9 | plt.ylabel('logerror')
10 | 
11 | plt.show()
12 | 


--------------------------------------------------------------------------------
/kaggle/zillow/log_error_hist.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
 6 | 
 7 | plt.figure(figsize=(10, 10))
 8 | up_limit = np.percentile(train_data.logerror, 99)
 9 | low_limit = np.percentile(train_data.logerror, 1)
10 | tmp_data = train_data[train_data.logerror < up_limit][train_data.logerror > low_limit]
11 | plt.hist(tmp_data.logerror, bins=50)
12 | plt.xlabel('logerror')
13 | 
14 | plt.show()
15 | 


--------------------------------------------------------------------------------
/kaggle/zillow/missing_data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False)
 5 | 
 6 | missing_df = properties_data.isnull().sum(axis=0).reset_index()
 7 | missing_df.columns = ['column_name', 'missing_count']
 8 | missing_df = missing_df[missing_df.missing_count > 0]
 9 | missing_df = missing_df.sort_values(by='missing_count')
10 | missing_df.plot(kind='barh')
11 | plt.yticks(range(missing_df.shape[0]), missing_df.column_name.values)
12 | 
13 | plt.show()
14 | 


--------------------------------------------------------------------------------
/kaggle/zillow/month.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
 5 | 
 6 | plt.figure(figsize=(10, 10))
 7 | datetime_data = pd.to_datetime(train_data.transactiondate)
 8 | datetime_data.dt.month.value_counts().sort_index(axis=0).plot(kind='bar')
 9 | plt.xlabel('month')
10 | 
11 | plt.show()
12 | 


--------------------------------------------------------------------------------
/kaggle/zillow/train_data_shape.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
4 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False)
5 | 
6 | print(train_data.shape)
7 | print(properties_data.shape)
8 | 


--------------------------------------------------------------------------------
/mnist/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Imports mnist tutorial libraries used by tutorial examples."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 


--------------------------------------------------------------------------------
/mnist/mnist.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Builds the MNIST network.
 17 | 
 18 | Implements the inference/loss/training pattern for model building.
 19 | 
 20 | 1. inference() - Builds the model as far as is required for running the network
 21 | forward to make predictions.
 22 | 2. loss() - Adds to the inference model the layers required to generate loss.
 23 | 3. training() - Adds to the loss model the Ops required to generate and
 24 | apply gradients.
 25 | 
 26 | This file is used by the various "fully_connected_*.py" files and not meant to
 27 | be run.
 28 | """
 29 | from __future__ import absolute_import
 30 | from __future__ import division
 31 | from __future__ import print_function
 32 | 
 33 | import math
 34 | 
 35 | import tensorflow as tf
 36 | 
 37 | # The MNIST dataset has 10 classes, representing the digits 0 through 9.
 38 | NUM_CLASSES = 10
 39 | 
 40 | # The MNIST images are always 28x28 pixels.
 41 | IMAGE_SIZE = 28
 42 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
 43 | 
 44 | 
 45 | def inference(images, hidden1_units, hidden2_units):
 46 |   """Build the MNIST model up to where it may be used for inference.
 47 | 
 48 |   Args:
 49 |     images: Images placeholder, from inputs().
 50 |     hidden1_units: Size of the first hidden layer.
 51 |     hidden2_units: Size of the second hidden layer.
 52 | 
 53 |   Returns:
 54 |     softmax_linear: Output tensor with the computed logits.
 55 |   """
 56 |   # Hidden 1
 57 |   with tf.name_scope('hidden1'):
 58 |     weights = tf.Variable(
 59 |         tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
 60 |                             stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
 61 |         name='weights')
 62 |     biases = tf.Variable(tf.zeros([hidden1_units]),
 63 |                          name='biases')
 64 |     hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
 65 |   # Hidden 2
 66 |   with tf.name_scope('hidden2'):
 67 |     weights = tf.Variable(
 68 |         tf.truncated_normal([hidden1_units, hidden2_units],
 69 |                             stddev=1.0 / math.sqrt(float(hidden1_units))),
 70 |         name='weights')
 71 |     biases = tf.Variable(tf.zeros([hidden2_units]),
 72 |                          name='biases')
 73 |     hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
 74 |   # Linear
 75 |   with tf.name_scope('softmax_linear'):
 76 |     weights = tf.Variable(
 77 |         tf.truncated_normal([hidden2_units, NUM_CLASSES],
 78 |                             stddev=1.0 / math.sqrt(float(hidden2_units))),
 79 |         name='weights')
 80 |     biases = tf.Variable(tf.zeros([NUM_CLASSES]),
 81 |                          name='biases')
 82 |     logits = tf.matmul(hidden2, weights) + biases
 83 |   return logits
 84 | 
 85 | 
 86 | def loss(logits, labels):
 87 |   """Calculates the loss from the logits and the labels.
 88 | 
 89 |   Args:
 90 |     logits: Logits tensor, float - [batch_size, NUM_CLASSES].
 91 |     labels: Labels tensor, int32 - [batch_size].
 92 | 
 93 |   Returns:
 94 |     loss: Loss tensor of type float.
 95 |   """
 96 |   labels = tf.to_int64(labels)
 97 |   cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
 98 |       logits, labels, name='xentropy')
 99 |   loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
100 |   return loss
101 | 
102 | 
103 | def training(loss, learning_rate):
104 |   """Sets up the training Ops.
105 | 
106 |   Creates a summarizer to track the loss over time in TensorBoard.
107 | 
108 |   Creates an optimizer and applies the gradients to all trainable variables.
109 | 
110 |   The Op returned by this function is what must be passed to the
111 |   `sess.run()` call to cause the model to train.
112 | 
113 |   Args:
114 |     loss: Loss tensor, from loss().
115 |     learning_rate: The learning rate to use for gradient descent.
116 | 
117 |   Returns:
118 |     train_op: The Op for training.
119 |   """
120 |   # Add a scalar summary for the snapshot loss.
121 |   tf.summary.scalar('loss', loss)
122 |   # Create the gradient descent optimizer with the given learning rate.
123 |   optimizer = tf.train.GradientDescentOptimizer(learning_rate)
124 |   # Create a variable to track the global step.
125 |   global_step = tf.Variable(0, name='global_step', trainable=False)
126 |   # Use the optimizer to apply the gradients that minimize the loss
127 |   # (and also increment the global step counter) as a single training step.
128 |   train_op = optimizer.minimize(loss, global_step=global_step)
129 |   return train_op
130 | 
131 | 
132 | def evaluation(logits, labels):
133 |   """Evaluate the quality of the logits at predicting the label.
134 | 
135 |   Args:
136 |     logits: Logits tensor, float - [batch_size, NUM_CLASSES].
137 |     labels: Labels tensor, int32 - [batch_size], with values in the
138 |       range [0, NUM_CLASSES).
139 | 
140 |   Returns:
141 |     A scalar int32 tensor with the number of examples (out of batch_size)
142 |     that were predicted correctly.
143 |   """
144 |   # For a classifier model, we can use the in_top_k Op.
145 |   # It returns a bool tensor with shape [batch_size] that is true for
146 |   # the examples where the label is in the top k (here k=1)
147 |   # of all logits for that example.
148 |   correct = tf.nn.in_top_k(logits, labels, 1)
149 |   # Return the number of true entries.
150 |   return tf.reduce_sum(tf.cast(correct, tf.int32))
151 | 


--------------------------------------------------------------------------------
/mnist/mnist_conv.py:
--------------------------------------------------------------------------------
 1 | """A convolutional neural network for MNIST classification.
 2 | """
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import argparse
 8 | import sys
 9 | 
10 | # Import data
11 | from tensorflow.examples.tutorials.mnist import input_data
12 | 
13 | import tensorflow as tf
14 | 
15 | FLAGS = None
16 | 
17 | 
18 | def weight_variable(shape):
19 |   initial = tf.truncated_normal(shape, stddev=0.1)
20 |   return tf.Variable(initial)
21 | 
22 | 
23 | def bias_variable(shape):
24 |   initial = tf.constant(0.1, shape=shape)
25 |   return tf.Variable(initial)
26 | 
27 | 
28 | def conv2d(x, W):
29 |   return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
30 | 
31 | 
32 | def max_pool_2x2(x):
33 |   return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
34 |                         strides=[1, 2, 2, 1], padding='SAME')
35 | 
36 | 
37 | def main(_):
38 |   mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
39 | 
40 |   # Create the model
41 |   x = tf.placeholder(tf.float32, [None, 784])
42 | 
43 |   # Define loss and optimizer
44 |   y_ = tf.placeholder(tf.float32, [None, 10])
45 | 
46 |   x_image = tf.reshape(x, [-1, 28, 28, 1])
47 |   
48 |   W_conv1 = weight_variable([5, 5, 1, 32])
49 |   b_conv1 = bias_variable([32])
50 | 
51 |   h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
52 |   h_pool1 = max_pool_2x2(h_conv1)
53 | 
54 |   W_conv2 = weight_variable([5, 5, 32, 64])
55 |   b_conv2 = bias_variable([64])
56 | 
57 |   h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
58 |   h_pool2 = max_pool_2x2(h_conv2)
59 | 
60 |   W_fc1 = weight_variable([7 * 7 * 64, 1024])
61 |   b_fc1 = bias_variable([1024])
62 | 
63 |   h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
64 |   h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
65 | 
66 |   keep_prob = tf.placeholder(tf.float32)
67 |   h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
68 | 
69 |   W_fc2 = weight_variable([1024, 10])
70 |   b_fc2 = bias_variable([10])
71 | 
72 |   y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
73 | 
74 |   cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
75 |   train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
76 |   correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
77 |   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
78 |   
79 |   sess = tf.InteractiveSession()
80 |   sess.run(tf.global_variables_initializer())
81 |   
82 |   for i in range(20000):
83 |     batch = mnist.train.next_batch(50)
84 |     if i % 100 == 0:
85 |       train_accuracy = accuracy.eval(feed_dict={
86 |           x: mnist.validation.images, y_: mnist.validation.labels, keep_prob: 1.0})
87 |       print("step %d, training accuracy %g"%(i, train_accuracy))
88 |     train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
89 | 
90 |   print("test accuracy %g"%accuracy.eval(feed_dict={
91 |       x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
92 | 
93 | if __name__ == '__main__':
94 |   parser = argparse.ArgumentParser()
95 |   parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
96 |                       help='Directory for storing input data')
97 |   FLAGS, unparsed = parser.parse_known_args()
98 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
99 | 


--------------------------------------------------------------------------------
/mnist/mnist_simple.py:
--------------------------------------------------------------------------------
  1 | """Builds the MNIST network.
  2 | 
  3 | Simplify the MNIST model building work.
  4 | 
  5 | """
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import math
 11 | 
 12 | import tensorflow as tf
 13 | 
 14 | # The MNIST dataset has 10 classes, representing the digits 0 through 9.
 15 | NUM_CLASSES = 10
 16 | 
 17 | # The MNIST images are always 28x28 pixels.
 18 | IMAGE_SIZE = 28
 19 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
 20 | 
 21 | 
 22 | def variable_summaries(var, name):
 23 |   """Attach a lot of summaries to a Tensor."""
 24 |   with tf.name_scope('summaries'):
 25 |     mean = tf.reduce_mean(var)
 26 |     tf.scalar_summary('mean/' + name, mean)
 27 |   with tf.name_scope('stddev'):
 28 |     stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
 29 |     tf.scalar_summary('stddev/' + name, stddev)
 30 |     tf.scalar_summary('max/' + name, tf.reduce_max(var))
 31 |     tf.scalar_summary('min/' + name, tf.reduce_min(var))
 32 |     tf.histogram_summary(name, var)
 33 | 
 34 | 
 35 | def nn_layer(input_tensor, input_dim, output_dim, layer_name):
 36 |   with tf.name_scope(layer_name):
 37 |     weights = tf.Variable(
 38 |         tf.truncated_normal([input_dim, output_dim],
 39 |                             stddev=1.0 / math.sqrt(float(input_dim))),
 40 |         name='weights')
 41 |     variable_summaries(weights, layer_name + '/weights')
 42 |     biases = tf.Variable(tf.zeros([output_dim]), name='biases')
 43 |     variable_summaries(biases, layer_name + '/biases')
 44 |     return tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
 45 | 
 46 | 
 47 | def inference(images, hidden1_units, hidden2_units):
 48 |   """Build the MNIST model up to where it may be used for inference.
 49 | 
 50 |   Args:
 51 |     images: Images placeholder, from inputs().
 52 |     hidden1_units: Size of the first hidden layer.
 53 |     hidden2_units: Size of the second hidden layer.
 54 | 
 55 |   Returns:
 56 |     softmax_linear: Output tensor with the computed logits.
 57 |   """
 58 |   hidden1 = nn_layer(images, IMAGE_PIXELS, hidden1_units, 'layer1')
 59 |   hidden2 = nn_layer(hidden1, hidden1_units, hidden2_units, 'layer2')
 60 |   logits = nn_layer(hidden2, hidden2_units, NUM_CLASSES, 'layer3')
 61 |   return logits
 62 | 
 63 | 
 64 | def loss(logits, labels):
 65 |   """Calculates the loss from the logits and the labels.
 66 | 
 67 |   Args:
 68 |     logits: Logits tensor, float - [batch_size, NUM_CLASSES].
 69 |     labels: Labels tensor, int32 - [batch_size].
 70 | 
 71 |   Returns:
 72 |     loss: Loss tensor of type float.
 73 |   """
 74 |   labels = tf.to_int64(labels)
 75 |   cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
 76 |       logits, labels, name='xentropy')
 77 |   loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
 78 |   return loss
 79 | 
 80 | 
 81 | def training(loss, learning_rate):
 82 |   """Sets up the training Ops.
 83 | 
 84 |   Creates a summarizer to track the loss over time in TensorBoard.
 85 | 
 86 |   Creates an optimizer and applies the gradients to all trainable variables.
 87 | 
 88 |   The Op returned by this function is what must be passed to the
 89 |   `sess.run()` call to cause the model to train.
 90 | 
 91 |   Args:
 92 |     loss: Loss tensor, from loss().
 93 |     learning_rate: The learning rate to use for gradient descent.
 94 | 
 95 |   Returns:
 96 |     train_op: The Op for training.
 97 |   """
 98 |   # Add a scalar summary for the snapshot loss.
 99 |   tf.scalar_summary('loss', loss)
100 |   # Create the gradient descent optimizer with the given learning rate.
101 |   optimizer = tf.train.GradientDescentOptimizer(learning_rate)
102 |   # Create a variable to track the global step.
103 |   global_step = tf.Variable(0, name='global_step', trainable=False)
104 |   # Use the optimizer to apply the gradients that minimize the loss
105 |   # (and also increment the global step counter) as a single training step.
106 |   train_op = optimizer.minimize(loss, global_step=global_step)
107 |   return train_op
108 | 
109 | 
110 | def evaluation(logits, labels):
111 |   """Evaluate the quality of the logits at predicting the label.
112 | 
113 |   Args:
114 |     logits: Logits tensor, float - [batch_size, NUM_CLASSES].
115 |     labels: Labels tensor, int32 - [batch_size], with values in the
116 |       range [0, NUM_CLASSES).
117 | 
118 |   Returns:
119 |     A scalar int32 tensor with the number of examples (out of batch_size)
120 |     that were predicted correctly.
121 |   """
122 |   # For a classifier model, we can use the in_top_k Op.
123 |   # It returns a bool tensor with shape [batch_size] that is true for
124 |   # the examples where the label is in the top k (here k=1)
125 |   # of all logits for that example.
126 |   correct = tf.nn.in_top_k(logits, labels, 1)
127 |   # Return the number of true entries.
128 |   return tf.reduce_sum(tf.cast(correct, tf.int32))
129 | 


--------------------------------------------------------------------------------
/mnist/mnist_softmax.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """A very simple MNIST classifier.
17 | 
18 | See extensive documentation at
19 | http://tensorflow.org/tutorials/mnist/beginners/index.md
20 | """
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 | 
25 | import argparse
26 | import sys
27 | 
28 | # Import data
29 | from tensorflow.examples.tutorials.mnist import input_data
30 | 
31 | import tensorflow as tf
32 | 
33 | FLAGS = None
34 | 
35 | 
36 | def main(_):
37 |   mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
38 | 
39 |   # Create the model
40 |   x = tf.placeholder(tf.float32, [None, 784])
41 |   W = tf.Variable(tf.zeros([784, 10]))
42 |   b = tf.Variable(tf.zeros([10]))
43 |   y = tf.matmul(x, W) + b
44 | 
45 |   # Define loss and optimizer
46 |   y_ = tf.placeholder(tf.float32, [None, 10])
47 | 
48 |   # The raw formulation of cross-entropy,
49 |   #
50 |   #   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
51 |   #                                 reduction_indices=[1]))
52 |   #
53 |   # can be numerically unstable.
54 |   #
55 |   # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
56 |   # outputs of 'y', and then average across the batch.
57 |   cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
58 |   train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
59 | 
60 |   sess = tf.InteractiveSession()
61 |   # Train
62 |   tf.global_variables_initializer().run()
63 |   for _ in range(1000):
64 |     batch_xs, batch_ys = mnist.train.next_batch(100)
65 |     sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
66 | 
67 |   # Test trained model
68 |   correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
69 |   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
70 |   print(sess.run(accuracy, feed_dict={x: mnist.test.images,
71 |                                       y_: mnist.test.labels}))
72 | 
73 | if __name__ == '__main__':
74 |   parser = argparse.ArgumentParser()
75 |   parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
76 |                       help='Directory for storing input data')
77 |   FLAGS, unparsed = parser.parse_known_args()
78 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
79 | 


--------------------------------------------------------------------------------
/mnist/mnist_with_summaries.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the 'License');
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an 'AS IS' BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """A simple MNIST classifier which displays summaries in TensorBoard.
 16 | 
 17 |  This is an unimpressive MNIST model, but it is a good example of using
 18 | tf.name_scope to make a graph legible in the TensorBoard graph explorer, and of
 19 | naming summary tags so that they are grouped meaningfully in TensorBoard.
 20 | 
 21 | It demonstrates the functionality of every TensorBoard dashboard.
 22 | """
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | import argparse
 28 | import sys
 29 | 
 30 | import tensorflow as tf
 31 | 
 32 | from tensorflow.examples.tutorials.mnist import input_data
 33 | 
 34 | FLAGS = None
 35 | 
 36 | 
 37 | def train():
 38 |   # Import data
 39 |   mnist = input_data.read_data_sets(FLAGS.data_dir,
 40 |                                     one_hot=True,
 41 |                                     fake_data=FLAGS.fake_data)
 42 | 
 43 |   sess = tf.InteractiveSession()
 44 |   # Create a multilayer model.
 45 | 
 46 |   # Input placeholders
 47 |   with tf.name_scope('input'):
 48 |     x = tf.placeholder(tf.float32, [None, 784], name='x-input')
 49 |     y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
 50 | 
 51 |   with tf.name_scope('input_reshape'):
 52 |     image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])
 53 |     tf.summary.image('input', image_shaped_input, 10)
 54 | 
 55 |   # We can't initialize these variables to 0 - the network will get stuck.
 56 |   def weight_variable(shape):
 57 |     """Create a weight variable with appropriate initialization."""
 58 |     initial = tf.truncated_normal(shape, stddev=0.1)
 59 |     return tf.Variable(initial)
 60 | 
 61 |   def bias_variable(shape):
 62 |     """Create a bias variable with appropriate initialization."""
 63 |     initial = tf.constant(0.1, shape=shape)
 64 |     return tf.Variable(initial)
 65 | 
 66 |   def variable_summaries(var):
 67 |     """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
 68 |     with tf.name_scope('summaries'):
 69 |       mean = tf.reduce_mean(var)
 70 |       tf.summary.scalar('mean', mean)
 71 |       with tf.name_scope('stddev'):
 72 |         stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
 73 |       tf.summary.scalar('stddev', stddev)
 74 |       tf.summary.scalar('max', tf.reduce_max(var))
 75 |       tf.summary.scalar('min', tf.reduce_min(var))
 76 |       tf.summary.histogram('histogram', var)
 77 | 
 78 |   def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
 79 |     """Reusable code for making a simple neural net layer.
 80 | 
 81 |     It does a matrix multiply, bias add, and then uses relu to nonlinearize.
 82 |     It also sets up name scoping so that the resultant graph is easy to read,
 83 |     and adds a number of summary ops.
 84 |     """
 85 |     # Adding a name scope ensures logical grouping of the layers in the graph.
 86 |     with tf.name_scope(layer_name):
 87 |       # This Variable will hold the state of the weights for the layer
 88 |       with tf.name_scope('weights'):
 89 |         weights = weight_variable([input_dim, output_dim])
 90 |         variable_summaries(weights)
 91 |       with tf.name_scope('biases'):
 92 |         biases = bias_variable([output_dim])
 93 |         variable_summaries(biases)
 94 |       with tf.name_scope('Wx_plus_b'):
 95 |         preactivate = tf.matmul(input_tensor, weights) + biases
 96 |         tf.summary.histogram('pre_activations', preactivate)
 97 |       activations = act(preactivate, name='activation')
 98 |       tf.summary.histogram('activations', activations)
 99 |       return activations
100 | 
101 |   hidden1 = nn_layer(x, 784, 500, 'layer1')
102 | 
103 |   with tf.name_scope('dropout'):
104 |     keep_prob = tf.placeholder(tf.float32)
105 |     tf.summary.scalar('dropout_keep_probability', keep_prob)
106 |     dropped = tf.nn.dropout(hidden1, keep_prob)
107 | 
108 |   # Do not apply softmax activation yet, see below.
109 |   y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity)
110 | 
111 |   with tf.name_scope('cross_entropy'):
112 |     # The raw formulation of cross-entropy,
113 |     #
114 |     # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
115 |     #                               reduction_indices=[1]))
116 |     #
117 |     # can be numerically unstable.
118 |     #
119 |     # So here we use tf.nn.softmax_cross_entropy_with_logits on the
120 |     # raw outputs of the nn_layer above, and then average across
121 |     # the batch.
122 |     diff = tf.nn.softmax_cross_entropy_with_logits(y, y_)
123 |     with tf.name_scope('total'):
124 |       cross_entropy = tf.reduce_mean(diff)
125 |   tf.summary.scalar('cross_entropy', cross_entropy)
126 | 
127 |   with tf.name_scope('train'):
128 |     train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
129 |         cross_entropy)
130 | 
131 |   with tf.name_scope('accuracy'):
132 |     with tf.name_scope('correct_prediction'):
133 |       correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
134 |     with tf.name_scope('accuracy'):
135 |       accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
136 |   tf.summary.scalar('accuracy', accuracy)
137 | 
138 |   # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
139 |   merged = tf.summary.merge_all()
140 |   train_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/train',
141 |                                         sess.graph)
142 |   test_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/test')
143 |   tf.global_variables_initializer().run()
144 | 
145 |   # Train the model, and also write summaries.
146 |   # Every 10th step, measure test-set accuracy, and write test summaries
147 |   # All other steps, run train_step on training data, & add training summaries
148 | 
149 |   def feed_dict(train):
150 |     """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
151 |     if train or FLAGS.fake_data:
152 |       xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
153 |       k = FLAGS.dropout
154 |     else:
155 |       xs, ys = mnist.test.images, mnist.test.labels
156 |       k = 1.0
157 |     return {x: xs, y_: ys, keep_prob: k}
158 | 
159 |   for i in range(FLAGS.max_steps):
160 |     if i % 10 == 0:  # Record summaries and test-set accuracy
161 |       summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
162 |       test_writer.add_summary(summary, i)
163 |       print('Accuracy at step %s: %s' % (i, acc))
164 |     else:  # Record train set summaries, and train
165 |       if i % 100 == 99:  # Record execution stats
166 |         run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
167 |         run_metadata = tf.RunMetadata()
168 |         summary, _ = sess.run([merged, train_step],
169 |                               feed_dict=feed_dict(True),
170 |                               options=run_options,
171 |                               run_metadata=run_metadata)
172 |         train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
173 |         train_writer.add_summary(summary, i)
174 |         print('Adding run metadata for', i)
175 |       else:  # Record a summary
176 |         summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
177 |         train_writer.add_summary(summary, i)
178 |   train_writer.close()
179 |   test_writer.close()
180 | 
181 | 
182 | def main(_):
183 |   if tf.gfile.Exists(FLAGS.log_dir):
184 |     tf.gfile.DeleteRecursively(FLAGS.log_dir)
185 |   tf.gfile.MakeDirs(FLAGS.log_dir)
186 |   train()
187 | 
188 | 
189 | if __name__ == '__main__':
190 |   parser = argparse.ArgumentParser()
191 |   parser.add_argument('--fake_data', nargs='?', const=True, type=bool,
192 |                       default=False,
193 |                       help='If true, uses fake data for unit testing.')
194 |   parser.add_argument('--max_steps', type=int, default=1000,
195 |                       help='Number of steps to run trainer.')
196 |   parser.add_argument('--learning_rate', type=float, default=0.001,
197 |                       help='Initial learning rate')
198 |   parser.add_argument('--dropout', type=float, default=0.9,
199 |                       help='Keep probability for training dropout.')
200 |   parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
201 |                       help='Directory for storing input data')
202 |   parser.add_argument('--log_dir', type=str, default='/tmp/tensorflow/mnist/logs/mnist_with_summaries',
203 |                       help='Summaries log directory')
204 |   FLAGS, unparsed = parser.parse_known_args()
205 |   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
206 | 


--------------------------------------------------------------------------------
/mnist/mnist_with_summary.py:
--------------------------------------------------------------------------------
 1 | """Builds the MNIST network.
 2 | 
 3 | Simplify the MNIST model building work.
 4 | 
 5 | """
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import math
11 | 
12 | import tensorflow as tf
13 | 
14 | # The MNIST dataset has 10 classes, representing the digits 0 through 9.
15 | NUM_CLASSES = 10
16 | 
17 | # The MNIST images are always 28x28 pixels.
18 | IMAGE_SIZE = 28
19 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
20 | 
21 | 
22 | def variable_summaries(var, name):
23 |   """Attach a lot of summaries to a Tensor."""
24 |   with tf.name_scope('summaries'):
25 |     mean = tf.reduce_mean(var)
26 |     tf.scalar_summary('mean/' + name, mean)
27 |   with tf.name_scope('stddev'):
28 |     stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
29 |     tf.scalar_summary('stddev/' + name, stddev)
30 |     tf.scalar_summary('max/' + name, tf.reduce_max(var))
31 |     tf.scalar_summary('min/' + name, tf.reduce_min(var))
32 |     tf.histogram_summary(name, var)
33 | 
34 | 
35 | def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
36 |   """Reusable code for making a simple neural net layer.
37 |     
38 |   It does a matrix multiply, bias add, and then uses relu to nonlinearize.
39 |   It also sets up name scoping so that the resultant graph is easy to read,
40 |   and adds a number of summary ops.
41 |   """
42 |   # Adding a name scope ensures logical grouping of the layers in the graph.
43 |   with tf.name_scope(layer_name):
44 |     # This Variable will hold the state of the weights for the layer
45 |     with tf.name_scope('weights'):
46 |       weights = weight_variable([input_dim, output_dim])
47 |       variable_summaries(weights, layer_name + '/weights')
48 |     with tf.name_scope('biases'):
49 |       biases = bias_variable([output_dim])
50 |       variable_summaries(biases, layer_name + '/biases')
51 |     with tf.name_scope('Wx_plus_b'):
52 |       preactivate = tf.matmul(input_tensor, weights) + biases
53 |       tf.histogram_summary(layer_name + '/pre_activations', preactivate)
54 |     activations = act(preactivate, 'activation')
55 |     tf.histogram_summary(layer_name + '/activations', activations)
56 |     return activations
57 | 
58 | hidden1 = nn_layer(x, 784, 500, 'layer1')
59 | 
60 | with tf.name_scope('dropout'):
61 |   keep_prob = tf.placeholder(tf.float32)
62 |   tf.scalar_summary('dropout_keep_probability', keep_prob)
63 |   dropped = tf.nn.dropout(hidden1, keep_prob)
64 | 
65 | y = nn_layer(dropped, 500, 10, 'layer2', act=tf.nn.softmax)
66 | 
67 | with tf.name_scope('cross_entropy'):
68 |   diff = y_ * tf.log(y)
69 |   with tf.name_scope('total'):
70 |     cross_entropy = -tf.reduce_mean(diff)
71 |     tf.scalar_summary('cross entropy', cross_entropy)
72 | 
73 | with tf.name_scope('train'):
74 |   train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy)
75 | 
76 | with tf.name_scope('accuracy'):
77 |   with tf.name_scope('correct_prediction'):
78 |     correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
79 |   with tf.name_scope('accuracy'):
80 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
81 |     tf.scalar_summary('accuracy', accuracy)
82 | 
83 | # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
84 | merged = tf.merge_all_summaries()
85 | train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train',
86 |                                       sess.graph)
87 | test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test')
88 | tf.initialize_all_variables().run()
89 | 


--------------------------------------------------------------------------------
/reading/capsnet/drbc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/reading/capsnet/drbc.pdf


--------------------------------------------------------------------------------
/self_driving/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning
2 | ================
3 | 
4 | Welcome to my blog [听雨居](https://limengweb.wordpress.com). It contains detailed description of the code here.


--------------------------------------------------------------------------------
/self_driving/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/__init__.py


--------------------------------------------------------------------------------
/self_driving/lane_detect/README.md:
--------------------------------------------------------------------------------
1 | [基于OpenCV的车道分割线提取](https://limengweb.wordpress.com/2017/08/19/%E5%9F%BA%E4%BA%8Eopencv%E7%9A%84%E8%BD%A6%E9%81%93%E5%88%86%E5%89%B2%E7%BA%BF%E6%8F%90%E5%8F%96/)
2 | <div align="center">
3 |   <img src="https://github.com/mengli/MachineLearning/blob/master/self_driving/lane_detect/lane_detect.png"><br><br>
4 | </div>
5 | 


--------------------------------------------------------------------------------
/self_driving/lane_detect/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/lane_detect/__init__.py


--------------------------------------------------------------------------------
/self_driving/lane_detect/comma_ai_lane_detect.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import sys
  4 | from self_driving.optical_flow.python import video
  5 | from scipy import misc
  6 | 
  7 | 
  8 | def color_seg(img_raw, red_thresh=0, green_thresh=0, blue_thresh=0):
  9 |     img_color_mask = np.copy(img_raw)
 10 |     red_mask = img_raw[:,:,0] < red_thresh
 11 |     green_mask = img_raw[:,:,1] < green_thresh
 12 |     rgb_mask = np.logical_or(red_mask, green_mask)
 13 |     img_color_mask[rgb_mask] = [0,0,0]
 14 |     return img_color_mask
 15 | 
 16 | 
 17 | def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
 18 |     for line in lines:
 19 |         for x1,y1,x2,y2 in line:
 20 |             cv2.line(img, (x1, y1), (x2, y2), color, thickness)
 21 | 
 22 | 
 23 | def draw_lines_extrapolate(img, lines, color=[255, 0, 0], thickness=2):
 24 |     # Assume lines on left and right have opposite signed slopes
 25 |     left_xs = []
 26 |     left_ys = []
 27 |     right_xs = []
 28 |     right_ys = []
 29 |     for line in lines:
 30 |         for x1, y1, x2, y2 in line:
 31 |             if x2 - x1 == 0: continue; # Infinite slope
 32 |             slope = float(y2-y1) / float(x2-x1)
 33 |             if .5 <= abs(slope) < 1.0: # Discard unlikely slopes
 34 |                 if slope > 0:
 35 |                     left_xs.extend([x1, x2])
 36 |                     left_ys.extend([y1, y2])
 37 |                 else:
 38 |                     right_xs.extend([x1, x2])
 39 |                     right_ys.extend([y1, y2])
 40 | 
 41 |     y1 = img.shape[0] - 120 # Bottom of image
 42 |     y2 = img.shape[0] / 2 + 10 # Middle of view
 43 |     y1 = int(y1); y2 = int(y2);
 44 | 
 45 |     if left_xs and left_ys:
 46 |         left_fit = np.polyfit(left_xs, left_ys, 1)
 47 |         x1_left = (y1 - left_fit[1]) / left_fit[0]
 48 |         x2_left = (y2 - left_fit[1]) / left_fit[0]
 49 |         x1_left = int(x1_left); x2_left = int(x2_left);
 50 |         cv2.line(img, (x1_left, y1), (x2_left, y2), color, thickness)
 51 | 
 52 |     if right_xs and right_ys:
 53 |         right_fit = np.polyfit(right_xs, right_ys, 1)
 54 |         x1_right = (y1 - right_fit[1]) / right_fit[0]
 55 |         x2_right = (y2 - right_fit[1]) / right_fit[0]
 56 |         x1_right = int(x1_right); x2_right = int(x2_right);
 57 |         cv2.line(img, (x1_right, y1), (x2_right, y2), color, thickness)
 58 | 
 59 | 
 60 | if __name__ == '__main__':
 61 |     try:
 62 |         fn = sys.argv[1]
 63 |     except IndexError:
 64 |         fn = 0
 65 |     cam = video.create_capture(fn)
 66 |     index = 0
 67 |     while True:
 68 |         ret, img = cam.read()
 69 | 
 70 |         if img is None:
 71 |             break
 72 | 
 73 |         rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 74 | 
 75 |         gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
 76 | 
 77 |         # Define a kernel size and apply Gaussian smoothing
 78 |         kernel_size = 3
 79 |         blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
 80 | 
 81 |         # Define our parameters for Canny and apply
 82 |         low_threshold = 50
 83 |         high_threshold = 150
 84 |         edges = cv2.Canny(blur_gray, low_threshold, high_threshold)
 85 | 
 86 |         # Next we'll create a masked edges image using cv2.fillPoly()
 87 |         mask = np.zeros_like(edges)
 88 |         ignore_mask_color = 255
 89 | 
 90 |         # This time we are defining a four sided polygon to mask
 91 |         imshape = img.shape
 92 |         vertices = np.array([[(0, imshape[0] - 120),
 93 |                               (imshape[1] / 2 - 80, imshape[0] / 2 + 10),
 94 |                               (imshape[1] / 2 + 80, imshape[0] / 2 + 10),
 95 |                               (imshape[1], imshape[0] - 120)]],
 96 |                             dtype=np.int32)
 97 |         cv2.fillPoly(mask, vertices, ignore_mask_color)
 98 |         masked_edges = cv2.bitwise_and(edges, mask)
 99 | 
100 |         # Define the Hough transform parameters
101 |         # Make a blank the same size as our image to draw on
102 |         rho = 1  # distance resolution in pixels of the Hough grid
103 |         theta = np.pi / 180  # angular resolution in radians of the Hough grid
104 |         threshold = 5  # minimum number of votes (intersections in Hough grid cell)
105 |         min_line_length = 10  # minimum number of pixels making up a line
106 |         max_line_gap = 2  # maximum gap in pixels between connectable line segments
107 |         line_image = np.copy(img) * 0  # creating a blank to draw lines on
108 | 
109 |         # Run Hough on edge detected image
110 |         # Output "lines" is an array containing endpoints of detected line segments
111 |         lines = cv2.HoughLinesP(masked_edges, rho, theta, threshold, np.array([]),
112 |                                 min_line_length, max_line_gap)
113 | 
114 |         if lines is None:
115 |             continue
116 | 
117 |         draw_lines_extrapolate(line_image, lines, thickness=8)
118 |         #draw_lines(line_image, lines, thickness=8)
119 | 
120 |         # Create a "color" binary image to combine with line image
121 |         color_edges = np.dstack((masked_edges, masked_edges, masked_edges))
122 | 
123 |         # Draw the lines on the edge image
124 |         lines_edges = cv2.addWeighted(rgb, 1, line_image, 1, 0)
125 |         misc.imsave(sys.argv[2] + 'frame_%d.png' % index, lines_edges)
126 |         index += 1
127 |     cv2.destroyAllWindows()
128 | 


--------------------------------------------------------------------------------
/self_driving/lane_detect/lane_detect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/lane_detect/lane_detect.png


--------------------------------------------------------------------------------
/self_driving/lane_detect/udacity_lane_detect.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import sys
  4 | from self_driving.optical_flow.python import video
  5 | from scipy import misc
  6 | 
  7 | 
  8 | def color_seg(img_raw, red_thresh=0, green_thresh=0, blue_thresh=0):
  9 |     img_color_mask = np.copy(img_raw)
 10 |     red_mask = img_raw[:,:,0] < red_thresh
 11 |     green_mask = img_raw[:,:,1] < green_thresh
 12 |     rgb_mask = np.logical_or(red_mask, green_mask)
 13 |     img_color_mask[rgb_mask] = [0,0,0]
 14 |     return img_color_mask
 15 | 
 16 | 
 17 | def draw_lines_extrapolate(img, lines, color=[255, 0, 0], thickness=2):
 18 |     # Assume lines on left and right have opposite signed slopes
 19 |     left_xs = []
 20 |     left_ys = []
 21 |     right_xs = []
 22 |     right_ys = []
 23 |     for line in lines:
 24 |         for x1, y1, x2, y2 in line:
 25 |             if x2 - x1 == 0: continue; # Infinite slope
 26 |             slope = float(y2-y1) / float(x2-x1)
 27 |             if .5 <= abs(slope) < 1.0: # Discard unlikely slopes
 28 |                 if slope > 0:
 29 |                     left_xs.extend([x1, x2])
 30 |                     left_ys.extend([y1, y2])
 31 |                 else:
 32 |                     right_xs.extend([x1, x2])
 33 |                     right_ys.extend([y1, y2])
 34 | 
 35 |     left_fit = np.polyfit(left_xs, left_ys, 1)
 36 |     right_fit = np.polyfit(right_xs, right_ys, 1)
 37 | 
 38 |     y1 = img.shape[0] # Bottom of image
 39 |     y2 = img.shape[0] / 2+ 50 # Middle of view
 40 |     x1_left = (y1 - left_fit[1]) / left_fit[0]
 41 |     x2_left = (y2 - left_fit[1]) / left_fit[0]
 42 |     x1_right = (y1 - right_fit[1]) / right_fit[0]
 43 |     x2_right = (y2 - right_fit[1]) / right_fit[0]
 44 |     y1 = int(y1); y2 = int(y2);
 45 |     x1_left = int(x1_left); x2_left = int(x2_left);
 46 |     x1_right = int(x1_right); x2_right = int(x2_right);
 47 | 
 48 |     cv2.line(img, (x1_left, y1), (x2_left, y2), color, thickness)
 49 |     cv2.line(img, (x1_right, y1), (x2_right, y2), color, thickness)
 50 | 
 51 | 
 52 | if __name__ == '__main__':
 53 |     try:
 54 |         fn = sys.argv[1]
 55 |     except IndexError:
 56 |         fn = 0
 57 |     cam = video.create_capture(fn)
 58 |     index = 0
 59 |     while True:
 60 |         ret, img = cam.read()
 61 | 
 62 |         if img is None:
 63 |             break
 64 | 
 65 |         rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 66 |         masked_img = color_seg(rgb, red_thresh=200, green_thresh=150, blue_thresh=0)
 67 | 
 68 |         gray = cv2.cvtColor(masked_img, cv2.COLOR_RGB2GRAY)
 69 | 
 70 |         # Define a kernel size and apply Gaussian smoothing
 71 |         kernel_size = 5
 72 |         blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
 73 | 
 74 |         # Define our parameters for Canny and apply
 75 |         low_threshold = 50
 76 |         high_threshold = 150
 77 |         edges = cv2.Canny(blur_gray, low_threshold, high_threshold)
 78 | 
 79 |         # Next we'll create a masked edges image using cv2.fillPoly()
 80 |         mask = np.zeros_like(edges)
 81 |         ignore_mask_color = 255
 82 | 
 83 |         # This time we are defining a four sided polygon to mask
 84 |         imshape = img.shape
 85 |         vertices = np.array([[(0 + 120, imshape[0]),
 86 |                               (imshape[1] / 2 - 15, imshape[0] / 2 + 40),
 87 |                               (imshape[1] / 2 + 15, imshape[0] / 2 + 40),
 88 |                               (imshape[1] - 50, imshape[0])]],
 89 |                             dtype=np.int32)
 90 |         cv2.fillPoly(mask, vertices, ignore_mask_color)
 91 |         masked_edges = cv2.bitwise_and(edges, mask)
 92 | 
 93 |         # Define the Hough transform parameters
 94 |         # Make a blank the same size as our image to draw on
 95 |         rho = 1  # distance resolution in pixels of the Hough grid
 96 |         theta = np.pi / 180  # angular resolution in radians of the Hough grid
 97 |         threshold = 5  # minimum number of votes (intersections in Hough grid cell)
 98 |         min_line_length = 10  # minimum number of pixels making up a line
 99 |         max_line_gap = 2  # maximum gap in pixels between connectable line segments
100 |         line_image = np.copy(img) * 0  # creating a blank to draw lines on
101 | 
102 |         # Run Hough on edge detected image
103 |         # Output "lines" is an array containing endpoints of detected line segments
104 |         lines = cv2.HoughLinesP(masked_edges, rho, theta, threshold, np.array([]),
105 |                                 min_line_length, max_line_gap)
106 | 
107 |         if lines is None:
108 |             continue
109 | 
110 |         draw_lines_extrapolate(line_image, lines, thickness=8)
111 | 
112 |         # Draw the lines on the edge image
113 |         lines_edges = cv2.addWeighted(rgb, 1, line_image, 1, 0)
114 |         misc.imsave(sys.argv[2] + 'frame_%d.png' % index, lines_edges)
115 |         index += 1
116 |     cv2.destroyAllWindows()
117 | 


--------------------------------------------------------------------------------
/self_driving/optical_flow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/optical_flow/__init__.py


--------------------------------------------------------------------------------
/self_driving/optical_flow/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/optical_flow/python/__init__.py


--------------------------------------------------------------------------------
/self_driving/optical_flow/python/common.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | This module contains some common routines used by other samples.
  5 | '''
  6 | 
  7 | # Python 2/3 compatibility
  8 | from __future__ import print_function
  9 | import sys
 10 | PY3 = sys.version_info[0] == 3
 11 | 
 12 | if PY3:
 13 |     from functools import reduce
 14 | 
 15 | import numpy as np
 16 | import cv2
 17 | 
 18 | # built-in modules
 19 | import os
 20 | import itertools as it
 21 | from contextlib import contextmanager
 22 | 
 23 | image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm']
 24 | 
 25 | class Bunch(object):
 26 |     def __init__(self, **kw):
 27 |         self.__dict__.update(kw)
 28 |     def __str__(self):
 29 |         return str(self.__dict__)
 30 | 
 31 | def splitfn(fn):
 32 |     path, fn = os.path.split(fn)
 33 |     name, ext = os.path.splitext(fn)
 34 |     return path, name, ext
 35 | 
 36 | def anorm2(a):
 37 |     return (a*a).sum(-1)
 38 | def anorm(a):
 39 |     return np.sqrt( anorm2(a) )
 40 | 
 41 | def homotrans(H, x, y):
 42 |     xs = H[0, 0]*x + H[0, 1]*y + H[0, 2]
 43 |     ys = H[1, 0]*x + H[1, 1]*y + H[1, 2]
 44 |     s  = H[2, 0]*x + H[2, 1]*y + H[2, 2]
 45 |     return xs/s, ys/s
 46 | 
 47 | def to_rect(a):
 48 |     a = np.ravel(a)
 49 |     if len(a) == 2:
 50 |         a = (0, 0, a[0], a[1])
 51 |     return np.array(a, np.float64).reshape(2, 2)
 52 | 
 53 | def rect2rect_mtx(src, dst):
 54 |     src, dst = to_rect(src), to_rect(dst)
 55 |     cx, cy = (dst[1] - dst[0]) / (src[1] - src[0])
 56 |     tx, ty = dst[0] - src[0] * (cx, cy)
 57 |     M = np.float64([[ cx,  0, tx],
 58 |                     [  0, cy, ty],
 59 |                     [  0,  0,  1]])
 60 |     return M
 61 | 
 62 | 
 63 | def lookat(eye, target, up = (0, 0, 1)):
 64 |     fwd = np.asarray(target, np.float64) - eye
 65 |     fwd /= anorm(fwd)
 66 |     right = np.cross(fwd, up)
 67 |     right /= anorm(right)
 68 |     down = np.cross(fwd, right)
 69 |     R = np.float64([right, down, fwd])
 70 |     tvec = -np.dot(R, eye)
 71 |     return R, tvec
 72 | 
 73 | def mtx2rvec(R):
 74 |     w, u, vt = cv2.SVDecomp(R - np.eye(3))
 75 |     p = vt[0] + u[:,0]*w[0]    # same as np.dot(R, vt[0])
 76 |     c = np.dot(vt[0], p)
 77 |     s = np.dot(vt[1], p)
 78 |     axis = np.cross(vt[0], vt[1])
 79 |     return axis * np.arctan2(s, c)
 80 | 
 81 | def draw_str(dst, target, s):
 82 |     x, y = target
 83 |     cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.LINE_AA)
 84 |     cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.LINE_AA)
 85 | 
 86 | class Sketcher:
 87 |     def __init__(self, windowname, dests, colors_func):
 88 |         self.prev_pt = None
 89 |         self.windowname = windowname
 90 |         self.dests = dests
 91 |         self.colors_func = colors_func
 92 |         self.dirty = False
 93 |         self.show()
 94 |         cv2.setMouseCallback(self.windowname, self.on_mouse)
 95 | 
 96 |     def show(self):
 97 |         cv2.imshow(self.windowname, self.dests[0])
 98 | 
 99 |     def on_mouse(self, event, x, y, flags, param):
100 |         pt = (x, y)
101 |         if event == cv2.EVENT_LBUTTONDOWN:
102 |             self.prev_pt = pt
103 |         elif event == cv2.EVENT_LBUTTONUP:
104 |             self.prev_pt = None
105 | 
106 |         if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON:
107 |             for dst, color in zip(self.dests, self.colors_func()):
108 |                 cv2.line(dst, self.prev_pt, pt, color, 5)
109 |             self.dirty = True
110 |             self.prev_pt = pt
111 |             self.show()
112 | 
113 | 
114 | # palette data from matplotlib/_cm.py
115 | _jet_data =   {'red':   ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1),
116 |                          (1, 0.5, 0.5)),
117 |                'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1),
118 |                          (0.91,0,0), (1, 0, 0)),
119 |                'blue':  ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0),
120 |                          (1, 0, 0))}
121 | 
122 | cmap_data = { 'jet' : _jet_data }
123 | 
124 | def make_cmap(name, n=256):
125 |     data = cmap_data[name]
126 |     xs = np.linspace(0.0, 1.0, n)
127 |     channels = []
128 |     eps = 1e-6
129 |     for ch_name in ['blue', 'green', 'red']:
130 |         ch_data = data[ch_name]
131 |         xp, yp = [], []
132 |         for x, y1, y2 in ch_data:
133 |             xp += [x, x+eps]
134 |             yp += [y1, y2]
135 |         ch = np.interp(xs, xp, yp)
136 |         channels.append(ch)
137 |     return np.uint8(np.array(channels).T*255)
138 | 
139 | def nothing(*arg, **kw):
140 |     pass
141 | 
142 | def clock():
143 |     return cv2.getTickCount() / cv2.getTickFrequency()
144 | 
145 | @contextmanager
146 | def Timer(msg):
147 |     print(msg, '...',)
148 |     start = clock()
149 |     try:
150 |         yield
151 |     finally:
152 |         print("%.2f ms" % ((clock()-start)*1000))
153 | 
154 | class StatValue:
155 |     def __init__(self, smooth_coef = 0.5):
156 |         self.value = None
157 |         self.smooth_coef = smooth_coef
158 |     def update(self, v):
159 |         if self.value is None:
160 |             self.value = v
161 |         else:
162 |             c = self.smooth_coef
163 |             self.value = c * self.value + (1.0-c) * v
164 | 
165 | class RectSelector:
166 |     def __init__(self, win, callback):
167 |         self.win = win
168 |         self.callback = callback
169 |         cv2.setMouseCallback(win, self.onmouse)
170 |         self.drag_start = None
171 |         self.drag_rect = None
172 |     def onmouse(self, event, x, y, flags, param):
173 |         x, y = np.int16([x, y]) # BUG
174 |         if event == cv2.EVENT_LBUTTONDOWN:
175 |             self.drag_start = (x, y)
176 |             return
177 |         if self.drag_start:
178 |             if flags & cv2.EVENT_FLAG_LBUTTON:
179 |                 xo, yo = self.drag_start
180 |                 x0, y0 = np.minimum([xo, yo], [x, y])
181 |                 x1, y1 = np.maximum([xo, yo], [x, y])
182 |                 self.drag_rect = None
183 |                 if x1-x0 > 0 and y1-y0 > 0:
184 |                     self.drag_rect = (x0, y0, x1, y1)
185 |             else:
186 |                 rect = self.drag_rect
187 |                 self.drag_start = None
188 |                 self.drag_rect = None
189 |                 if rect:
190 |                     self.callback(rect)
191 |     def draw(self, vis):
192 |         if not self.drag_rect:
193 |             return False
194 |         x0, y0, x1, y1 = self.drag_rect
195 |         cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2)
196 |         return True
197 |     @property
198 |     def dragging(self):
199 |         return self.drag_rect is not None
200 | 
201 | 
202 | def grouper(n, iterable, fillvalue=None):
203 |     '''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx'''
204 |     args = [iter(iterable)] * n
205 |     if PY3:
206 |         output = it.zip_longest(fillvalue=fillvalue, *args)
207 |     else:
208 |         output = it.izip_longest(fillvalue=fillvalue, *args)
209 |     return output
210 | 
211 | def mosaic(w, imgs):
212 |     '''Make a grid from images.
213 | 
214 |     w    -- number of grid columns
215 |     imgs -- images (must have same size and format)
216 |     '''
217 |     imgs = iter(imgs)
218 |     if PY3:
219 |         img0 = next(imgs)
220 |     else:
221 |         img0 = imgs.next()
222 |     pad = np.zeros_like(img0)
223 |     imgs = it.chain([img0], imgs)
224 |     rows = grouper(w, imgs, pad)
225 |     return np.vstack(map(np.hstack, rows))
226 | 
227 | def getsize(img):
228 |     h, w = img.shape[:2]
229 |     return w, h
230 | 
231 | def mdot(*args):
232 |     return reduce(np.dot, args)
233 | 
234 | def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
235 |     for kp in keypoints:
236 |             x, y = kp.pt
237 |             cv2.circle(vis, (int(x), int(y)), 2, color)
238 | 


--------------------------------------------------------------------------------
/self_driving/optical_flow/python/opt_flow.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Python 2/3 compatibility
 4 | from __future__ import print_function
 5 | 
 6 | import numpy as np
 7 | import cv2
 8 | import video
 9 | from scipy import misc
10 | 
11 | NUM_FRAME = 20400
12 | 
13 | 
14 | def draw_flow(img, flow, step=16):
15 |     h, w = img.shape[:2]
16 |     y, x = np.mgrid[step / 2:h:step, step / 2:w:step].reshape(2, -1).astype(int)
17 |     fx, fy = flow[y, x].T
18 |     lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
19 |     lines = np.int32(lines + 0.5)
20 |     vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
21 |     cv2.polylines(vis, lines, 0, (0, 255, 0))
22 |     for (x1, y1), (x2, y2) in lines:
23 |         cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
24 |     return vis
25 | 
26 | 
27 | def draw_hsv(flow):
28 |     h, w = flow.shape[:2]
29 |     fx, fy = flow[:, :, 0], flow[:, :, 1]
30 |     ang = np.arctan2(fy, fx) + np.pi
31 |     v = np.sqrt(fx * fx + fy * fy)
32 |     hsv = np.zeros((h, w, 3), np.uint8)
33 |     hsv[..., 0] = ang * (180 / np.pi / 2)
34 |     hsv[..., 1] = 255
35 |     hsv[..., 2] = np.minimum(v * 4, 255)
36 |     bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
37 |     return bgr
38 | 
39 | 
40 | def warp_flow(img, flow):
41 |     h, w = flow.shape[:2]
42 |     flow = -flow
43 |     flow[:, :, 0] += np.arange(w)
44 |     flow[:, :, 1] += np.arange(h)[:, np.newaxis]
45 |     res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
46 |     return res
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     import sys
51 | 
52 |     print(__doc__)
53 |     try:
54 |         fn = sys.argv[1]
55 |     except IndexError:
56 |         fn = 0
57 | 
58 |     fr = NUM_FRAME
59 |     cam = video.create_capture(fn)
60 |     ret, prev = cam.read()
61 |     fr -= 1
62 |     prevgray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
63 |     show_hsv = False
64 |     show_glitch = False
65 |     cur_glitch = prev.copy()
66 |     index = 0
67 | 
68 |     while fr > 0:
69 |         ret, img = cam.read()
70 |         fr -= 1
71 |         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
72 |         flow = cv2.calcOpticalFlowFarneback(prevgray, gray, 0.5, 3, 15, 3, 5, 1.2, 0)
73 |         prevgray = gray
74 | 
75 |         hsv = draw_hsv(flow)[120:420, 70:-70]
76 |         misc.imsave('/usr/local/google/home/limeng/Downloads/speed_est/data/train_data/frame_%d.png' % index, hsv)
77 |         cv2.imshow('flow HSV', hsv)
78 |         index += 1
79 |     cv2.destroyAllWindows()
80 | 


--------------------------------------------------------------------------------
/self_driving/optical_flow/python/tst_scene_render.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | 
  4 | # Python 2/3 compatibility
  5 | from __future__ import print_function
  6 | 
  7 | import numpy as np
  8 | from numpy import pi, sin, cos
  9 | 
 10 | import cv2
 11 | 
 12 | defaultSize = 512
 13 | 
 14 | class TestSceneRender():
 15 | 
 16 |     def __init__(self, bgImg = None, fgImg = None,
 17 |         deformation = False, speed = 0.25, **params):
 18 |         self.time = 0.0
 19 |         self.timeStep = 1.0 / 30.0
 20 |         self.foreground = fgImg
 21 |         self.deformation = deformation
 22 |         self.speed = speed
 23 | 
 24 |         if bgImg is not None:
 25 |             self.sceneBg = bgImg.copy()
 26 |         else:
 27 |             self.sceneBg = np.zeros(defaultSize, defaultSize, np.uint8)
 28 | 
 29 |         self.w = self.sceneBg.shape[0]
 30 |         self.h = self.sceneBg.shape[1]
 31 | 
 32 |         if fgImg is not None:
 33 |             self.foreground = fgImg.copy()
 34 |             self.center = self.currentCenter = (int(self.w/2 - fgImg.shape[0]/2), int(self.h/2 - fgImg.shape[1]/2))
 35 | 
 36 |             self.xAmpl = self.sceneBg.shape[0] - (self.center[0] + fgImg.shape[0])
 37 |             self.yAmpl = self.sceneBg.shape[1] - (self.center[1] + fgImg.shape[1])
 38 | 
 39 |         self.initialRect = np.array([ (self.h/2, self.w/2), (self.h/2, self.w/2 + self.w/10),
 40 |          (self.h/2 + self.h/10, self.w/2 + self.w/10), (self.h/2 + self.h/10, self.w/2)]).astype(int)
 41 |         self.currentRect = self.initialRect
 42 | 
 43 |     def getXOffset(self, time):
 44 |         return int( self.xAmpl*cos(time*self.speed))
 45 | 
 46 | 
 47 |     def getYOffset(self, time):
 48 |         return int(self.yAmpl*sin(time*self.speed))
 49 | 
 50 |     def setInitialRect(self, rect):
 51 |         self.initialRect = rect
 52 | 
 53 |     def getRectInTime(self, time):
 54 | 
 55 |         if self.foreground is not None:
 56 |             tmp = np.array(self.center) + np.array((self.getXOffset(time), self.getYOffset(time)))
 57 |             x0, y0 = tmp
 58 |             x1, y1 = tmp + self.foreground.shape[0:2]
 59 |             return np.array([y0, x0, y1, x1])
 60 |         else:
 61 |             x0, y0 = self.initialRect[0] + np.array((self.getXOffset(time), self.getYOffset(time)))
 62 |             x1, y1 = self.initialRect[2] + np.array((self.getXOffset(time), self.getYOffset(time)))
 63 |             return np.array([y0, x0, y1, x1])
 64 | 
 65 |     def getCurrentRect(self):
 66 | 
 67 |         if self.foreground is not None:
 68 | 
 69 |             x0 = self.currentCenter[0]
 70 |             y0 = self.currentCenter[1]
 71 |             x1 = self.currentCenter[0] + self.foreground.shape[0]
 72 |             y1 = self.currentCenter[1] + self.foreground.shape[1]
 73 |             return np.array([y0, x0, y1, x1])
 74 |         else:
 75 |             x0, y0 = self.currentRect[0]
 76 |             x1, y1 = self.currentRect[2]
 77 |             return np.array([x0, y0, x1, y1])
 78 | 
 79 |     def getNextFrame(self):
 80 |         img = self.sceneBg.copy()
 81 | 
 82 |         if self.foreground is not None:
 83 |             self.currentCenter = (self.center[0] + self.getXOffset(self.time), self.center[1] + self.getYOffset(self.time))
 84 |             img[self.currentCenter[0]:self.currentCenter[0]+self.foreground.shape[0],
 85 |              self.currentCenter[1]:self.currentCenter[1]+self.foreground.shape[1]] = self.foreground
 86 |         else:
 87 |             self.currentRect = self.initialRect + np.int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed))
 88 |             if self.deformation:
 89 |                 self.currentRect[1:3] += self.h/20*cos(self.time)
 90 |             cv2.fillConvexPoly(img, self.currentRect, (0, 0, 255))
 91 | 
 92 |         self.time += self.timeStep
 93 |         return img
 94 | 
 95 |     def resetTime(self):
 96 |         self.time = 0.0
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 | 
101 |     backGr = cv2.imread('../data/graf1.png')
102 |     fgr = cv2.imread('../data/box.png')
103 | 
104 |     render = TestSceneRender(backGr, fgr)
105 | 
106 |     while True:
107 | 
108 |         img = render.getNextFrame()
109 |         cv2.imshow('img', img)
110 | 
111 |         ch = cv2.waitKey(3)
112 |         if  ch == 27:
113 |             break
114 |     #import os
115 |     #print (os.environ['PYTHONPATH'])
116 |     cv2.destroyAllWindows()
117 | 


--------------------------------------------------------------------------------
/self_driving/optical_flow/python/video.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | '''
  4 | Video capture sample.
  5 | 
  6 | Sample shows how VideoCapture class can be used to acquire video
  7 | frames from a camera of a movie file. Also the sample provides
  8 | an example of procedural video generation by an object, mimicking
  9 | the VideoCapture interface (see Chess class).
 10 | 
 11 | 'create_capture' is a convinience function for capture creation,
 12 | falling back to procedural video in case of error.
 13 | 
 14 | Usage:
 15 |     video.py [--shotdir <shot path>] [source0] [source1] ...'
 16 | 
 17 |     sourceN is an
 18 |      - integer number for camera capture
 19 |      - name of video file
 20 |      - synth:<params> for procedural video
 21 | 
 22 | Synth examples:
 23 |     synth:bg=../data/lena.jpg:noise=0.1
 24 |     synth:class=chess:bg=../data/lena.jpg:noise=0.1:size=640x480
 25 | 
 26 | Keys:
 27 |     ESC    - exit
 28 |     SPACE  - save current frame to <shot path> directory
 29 | 
 30 | '''
 31 | 
 32 | # Python 2/3 compatibility
 33 | from __future__ import print_function
 34 | 
 35 | import numpy as np
 36 | from numpy import pi, sin, cos
 37 | 
 38 | import cv2
 39 | 
 40 | # built-in modules
 41 | from time import clock
 42 | 
 43 | # local modules
 44 | from tst_scene_render import TestSceneRender
 45 | import common
 46 | 
 47 | 
 48 | class VideoSynthBase(object):
 49 |     def __init__(self, size=None, noise=0.0, bg=None, **params):
 50 |         self.bg = None
 51 |         self.frame_size = (640, 480)
 52 |         if bg is not None:
 53 |             self.bg = cv2.imread(bg, 1)
 54 |             h, w = self.bg.shape[:2]
 55 |             self.frame_size = (w, h)
 56 | 
 57 |         if size is not None:
 58 |             w, h = map(int, size.split('x'))
 59 |             self.frame_size = (w, h)
 60 |             self.bg = cv2.resize(self.bg, self.frame_size)
 61 | 
 62 |         self.noise = float(noise)
 63 | 
 64 |     def render(self, dst):
 65 |         pass
 66 | 
 67 |     def read(self, dst=None):
 68 |         w, h = self.frame_size
 69 | 
 70 |         if self.bg is None:
 71 |             buf = np.zeros((h, w, 3), np.uint8)
 72 |         else:
 73 |             buf = self.bg.copy()
 74 | 
 75 |         self.render(buf)
 76 | 
 77 |         if self.noise > 0.0:
 78 |             noise = np.zeros((h, w, 3), np.int8)
 79 |             cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise)
 80 |             buf = cv2.add(buf, noise, dtype=cv2.CV_8UC3)
 81 |         return True, buf
 82 | 
 83 |     def isOpened(self):
 84 |         return True
 85 | 
 86 | 
 87 | class Book(VideoSynthBase):
 88 |     def __init__(self, **kw):
 89 |         super(Book, self).__init__(**kw)
 90 |         backGr = cv2.imread('../data/graf1.png')
 91 |         fgr = cv2.imread('../data/box.png')
 92 |         self.render = TestSceneRender(backGr, fgr, speed=1)
 93 | 
 94 |     def read(self, dst=None):
 95 |         noise = np.zeros(self.render.sceneBg.shape, np.int8)
 96 |         cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise)
 97 | 
 98 |         return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3)
 99 | 
100 | 
101 | class Cube(VideoSynthBase):
102 |     def __init__(self, **kw):
103 |         super(Cube, self).__init__(**kw)
104 |         self.render = TestSceneRender(cv2.imread('../data/pca_test1.jpg'), deformation=True, speed=1)
105 | 
106 |     def read(self, dst=None):
107 |         noise = np.zeros(self.render.sceneBg.shape, np.int8)
108 |         cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise)
109 | 
110 |         return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3)
111 | 
112 | 
113 | class Chess(VideoSynthBase):
114 |     def __init__(self, **kw):
115 |         super(Chess, self).__init__(**kw)
116 | 
117 |         w, h = self.frame_size
118 | 
119 |         self.grid_size = sx, sy = 10, 7
120 |         white_quads = []
121 |         black_quads = []
122 |         for i, j in np.ndindex(sy, sx):
123 |             q = [[j, i, 0], [j + 1, i, 0], [j + 1, i + 1, 0], [j, i + 1, 0]]
124 |             [white_quads, black_quads][(i + j) % 2].append(q)
125 |         self.white_quads = np.float32(white_quads)
126 |         self.black_quads = np.float32(black_quads)
127 | 
128 |         fx = 0.9
129 |         self.K = np.float64([[fx * w, 0, 0.5 * (w - 1)],
130 |                              [0, fx * w, 0.5 * (h - 1)],
131 |                              [0.0, 0.0, 1.0]])
132 | 
133 |         self.dist_coef = np.float64([-0.2, 0.1, 0, 0])
134 |         self.t = 0
135 | 
136 |     def draw_quads(self, img, quads, color=(0, 255, 0)):
137 |         img_quads = cv2.projectPoints(quads.reshape(-1, 3), self.rvec, self.tvec, self.K, self.dist_coef)[0]
138 |         img_quads.shape = quads.shape[:2] + (2,)
139 |         for q in img_quads:
140 |             cv2.fillConvexPoly(img, np.int32(q * 4), color, cv2.LINE_AA, shift=2)
141 | 
142 |     def render(self, dst):
143 |         t = self.t
144 |         self.t += 1.0 / 30.0
145 | 
146 |         sx, sy = self.grid_size
147 |         center = np.array([0.5 * sx, 0.5 * sy, 0.0])
148 |         phi = pi / 3 + sin(t * 3) * pi / 8
149 |         c, s = cos(phi), sin(phi)
150 |         ofs = np.array([sin(1.2 * t), cos(1.8 * t), 0]) * sx * 0.2
151 |         eye_pos = center + np.array([cos(t) * c, sin(t) * c, s]) * 15.0 + ofs
152 |         target_pos = center + ofs
153 | 
154 |         R, self.tvec = common.lookat(eye_pos, target_pos)
155 |         self.rvec = common.mtx2rvec(R)
156 | 
157 |         self.draw_quads(dst, self.white_quads, (245, 245, 245))
158 |         self.draw_quads(dst, self.black_quads, (10, 10, 10))
159 | 
160 | 
161 | classes = dict(chess=Chess, book=Book, cube=Cube)
162 | 
163 | presets = dict(
164 |     empty='synth:',
165 |     lena='synth:bg=../data/lena.jpg:noise=0.1',
166 |     chess='synth:class=chess:bg=../data/lena.jpg:noise=0.1:size=640x480',
167 |     book='synth:class=book:bg=../data/graf1.png:noise=0.1:size=640x480',
168 |     cube='synth:class=cube:bg=../data/pca_test1.jpg:noise=0.0:size=640x480'
169 | )
170 | 
171 | 
172 | def create_capture(source=0, fallback=presets['chess']):
173 |     '''source: <int> or '<int>|<filename>|synth [:<param_name>=<value> [:...]]'
174 |     '''
175 |     source = str(source).strip()
176 |     chunks = source.split(':')
177 |     # handle drive letter ('c:', ...)
178 |     if len(chunks) > 1 and len(chunks[0]) == 1 and chunks[0].isalpha():
179 |         chunks[1] = chunks[0] + ':' + chunks[1]
180 |         del chunks[0]
181 | 
182 |     source = chunks[0]
183 |     try:
184 |         source = int(source)
185 |     except ValueError:
186 |         pass
187 |     params = dict(s.split('=') for s in chunks[1:])
188 | 
189 |     cap = None
190 |     if source == 'synth':
191 |         Class = classes.get(params.get('class', None), VideoSynthBase)
192 |         try:
193 |             cap = Class(**params)
194 |         except:
195 |             pass
196 |     else:
197 |         cap = cv2.VideoCapture(source)
198 |         if 'size' in params:
199 |             w, h = map(int, params['size'].split('x'))
200 |             cap.set(cv2.CAP_PROP_FRAME_WIDTH, w)
201 |             cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h)
202 |     if cap is None or not cap.isOpened():
203 |         print('Warning: unable to open video source: ', source)
204 |         if fallback is not None:
205 |             return create_capture(fallback, None)
206 |     return cap
207 | 
208 | 
209 | if __name__ == '__main__':
210 |     import sys
211 |     import getopt
212 | 
213 |     print(__doc__)
214 | 
215 |     args, sources = getopt.getopt(sys.argv[1:], '', 'shotdir=')
216 |     args = dict(args)
217 |     shotdir = args.get('--shotdir', '.')
218 |     if len(sources) == 0:
219 |         sources = [0]
220 | 
221 |     caps = list(map(create_capture, sources))
222 |     shot_idx = 0
223 |     while True:
224 |         imgs = []
225 |         for i, cap in enumerate(caps):
226 |             ret, img = cap.read()
227 |             imgs.append(img)
228 |             cv2.imshow('capture %d' % i, img)
229 |         ch = cv2.waitKey(1)
230 |         if ch == 27:
231 |             break
232 |         if ch == ord(' '):
233 |             for i, img in enumerate(imgs):
234 |                 fn = '%s/shot_%d_%03d.bmp' % (shotdir, i, shot_idx)
235 |                 cv2.imwrite(fn, img)
236 |                 print(fn, 'saved')
237 |             shot_idx += 1
238 |     cv2.destroyAllWindows()
239 | 


--------------------------------------------------------------------------------
/self_driving/road_seg/README.md:
--------------------------------------------------------------------------------
1 | [利用全卷积网络进行车道识别](https://limengweb.wordpress.com/2017/05/03/%E5%88%A9%E7%94%A8%E5%85%A8%E5%8D%B7%E7%A7%AF%E7%BD%91%E7%BB%9C%E8%BF%9B%E8%A1%8C%E8%BD%A6%E9%81%93%E8%AF%86%E5%88%AB/)
2 | <div align="center">
3 |   <img src="https://github.com/mengli/MachineLearning/blob/master/self_driving/road_seg/road_seg.png"><br><br>
4 | </div>
5 | 


--------------------------------------------------------------------------------
/self_driving/road_seg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/road_seg/__init__.py


--------------------------------------------------------------------------------
/self_driving/road_seg/convnet.py:
--------------------------------------------------------------------------------
  1 | """A full convolutional neural network for road segmentation.
  2 | 
  3 | nohup python -u -m self_driving.road_seg.convnet > self_driving/road_seg/output.txt 2>&1 &
  4 | 
  5 | """
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import copy
 11 | import tensorflow as tf
 12 | from utils import kitti
 13 | from self_driving.road_seg import fcn8_vgg
 14 | import scipy as scp
 15 | import scipy.misc
 16 | import matplotlib as mpl
 17 | import matplotlib.cm
 18 | 
 19 | EPOCH = 5000
 20 | N_cl = 2
 21 | UU_TRAIN_SET_SIZE = 98 - 9
 22 | UU_TEST_SET_SIZE = 9
 23 | 
 24 | 
 25 | def _compute_cross_entropy_mean(labels, softmax):
 26 |     cross_entropy = -tf.reduce_sum(
 27 |         tf.multiply(labels * tf.log(softmax), [1, 1]), reduction_indices=[1])
 28 |     cross_entropy_mean = tf.reduce_mean(cross_entropy, name='xentropy_mean')
 29 |     return cross_entropy_mean
 30 | 
 31 | 
 32 | def loss(logits, labels):
 33 |     with tf.name_scope('loss'):
 34 |         labels = tf.to_float(tf.reshape(labels, (-1, 2)))
 35 |         logits = tf.reshape(logits, (-1, 2))
 36 |         epsilon = 1e-9
 37 |         softmax = tf.nn.softmax(logits) + epsilon
 38 | 
 39 |         cross_entropy_mean = _compute_cross_entropy_mean(labels, softmax)
 40 | 
 41 |         enc_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
 42 |         dec_loss = tf.add_n(tf.get_collection('dec_losses'), name='total_loss')
 43 |         fc_loss = tf.add_n(tf.get_collection('fc_wlosses'), name='total_loss')
 44 |         weight_loss = enc_loss + dec_loss + fc_loss
 45 | 
 46 |         total_loss = cross_entropy_mean + weight_loss
 47 | 
 48 |         losses = {}
 49 |         losses['total_loss'] = total_loss
 50 |         losses['xentropy'] = cross_entropy_mean
 51 |         losses['weight_loss'] = weight_loss
 52 | 
 53 |     return losses
 54 | 
 55 | 
 56 | def f1_score(logits, labels):
 57 |     true_labels = tf.to_float(tf.reshape(labels, (-1, 2)))[:, 1]
 58 |     pred = tf.to_float(tf.reshape(logits, [-1]))
 59 | 
 60 |     true_positives = tf.reduce_sum(pred * true_labels)
 61 |     false_positives = tf.reduce_sum(pred * (1 - true_labels))
 62 | 
 63 |     precision = true_positives / (true_positives + false_positives)
 64 |     recall = true_positives / tf.reduce_sum(labels)
 65 | 
 66 |     f1_score = 2 * precision * recall / (precision + recall)
 67 | 
 68 |     return f1_score, precision, recall
 69 | 
 70 | 
 71 | def learning_rate(global_step):
 72 |     starter_learning_rate = 1e-5
 73 |     learning_rate_1 = tf.train.exponential_decay(
 74 |         starter_learning_rate, global_step, EPOCH * 0.2, 0.1, staircase=True)
 75 |     learning_rate_2 = tf.train.exponential_decay(
 76 |         learning_rate_1, global_step, EPOCH * 0.4, 0.5, staircase=True)
 77 |     decayed_learning_rate = tf.train.exponential_decay(
 78 |         learning_rate_2, global_step, EPOCH * 0.6, 0.8, staircase=True)
 79 |     tf.summary.scalar('learning_rate', decayed_learning_rate)
 80 |     return decayed_learning_rate
 81 | 
 82 | 
 83 | def color_image(image, num_classes=20):
 84 |     norm = mpl.colors.Normalize(vmin=0., vmax=num_classes)
 85 |     mycm = mpl.cm.get_cmap('Set1')
 86 |     return mycm(norm(image))
 87 | 
 88 | 
 89 | def save_output(index, training_image, prediction, label):
 90 |     prediction_label = 1 - prediction[0]
 91 |     output_image = copy.copy(training_image)
 92 |     # Save prediction
 93 |     up_color = color_image(prediction[0], 2)
 94 |     scp.misc.imsave('output/decision_%d.png' % index, up_color)
 95 |     # Merge true positive with training images' green channel
 96 |     true_positive = prediction_label * label[..., 0][0]
 97 |     merge_green = (1 - true_positive) * training_image[..., 1] + true_positive * 255
 98 |     output_image[..., 1] = merge_green
 99 |     # Merge false positive with training images' red channel
100 |     false_positive = prediction_label * label[..., 1][0]
101 |     merge_red = (1 - false_positive) * training_image[..., 0] + false_positive * 255
102 |     output_image[..., 0] = merge_red
103 |     # Merge false negative with training images' blue channel
104 |     false_negative = (1 - prediction_label) * label[..., 0][0]
105 |     merge_blue = (1 - false_negative) * training_image[..., 2] + false_negative * 255
106 |     output_image[..., 2] = merge_blue
107 |     # Save images
108 |     scp.misc.imsave('merge/decision_%d.png' % index, output_image)
109 | 
110 | 
111 | def main(_):
112 |     kitti_data = kitti.Kitti()
113 | 
114 |     x_image = tf.placeholder(tf.float32, [1, None, None, 3])
115 |     y_ = tf.placeholder(tf.float32, [1, None, None, N_cl])
116 | 
117 |     tf.summary.image("images", x_image, max_outputs=1)
118 | 
119 |     vgg_fcn = fcn8_vgg.FCN8VGG(vgg16_npy_path="data/vgg16.npy")
120 |     vgg_fcn.build(x_image, debug=True, num_classes=N_cl)
121 | 
122 |     losses = loss(vgg_fcn.upscore32, y_)
123 |     f1, precision, recall = f1_score(vgg_fcn.pred_up, y_)
124 |     total_loss = losses['total_loss']
125 |     tf.summary.scalar("Loss", total_loss)
126 |     tf.summary.scalar("F1 Score", f1)
127 |     tf.summary.scalar("Precision", precision)
128 |     tf.summary.scalar("Recall", recall)
129 | 
130 |     global_step = tf.Variable(0, trainable=False)
131 |     lr = learning_rate(global_step)
132 |     optimizer = tf.train.AdamOptimizer(lr)
133 |     grads_and_vars = optimizer.compute_gradients(total_loss)
134 | 
135 |     grads, tvars = zip(*grads_and_vars)
136 |     clipped_grads, norm = tf.clip_by_global_norm(grads, 1.0)
137 |     grads_and_vars = zip(clipped_grads, tvars)
138 | 
139 |     train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
140 | 
141 |     sess = tf.InteractiveSession()
142 |     merged = tf.summary.merge_all()
143 |     train_writer = tf.summary.FileWriter('train', sess.graph)
144 |     sess.run(tf.global_variables_initializer())
145 | 
146 |     for i in range(EPOCH):
147 |         print("step %d" % i)
148 |         t_img, t_label = kitti_data.next_batch(i % UU_TRAIN_SET_SIZE)
149 |         pred, _ = sess.run([vgg_fcn.pred_up, train_step],
150 |                            feed_dict={x_image: t_img, y_: t_label})
151 |         if i % 5 == 0:
152 |             for test_index in range(UU_TEST_SET_SIZE):
153 |                 test_img, test_label = kitti_data.next_batch(test_index + UU_TRAIN_SET_SIZE)
154 |                 pred, summary = sess.run([vgg_fcn.pred_up, merged],
155 |                                          feed_dict={x_image: test_img, y_: test_label})
156 |                 save_output(test_index + UU_TRAIN_SET_SIZE, test_img[0], pred, test_label)
157 |                 train_writer.add_summary(summary, i)
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     tf.app.run(main=main)


--------------------------------------------------------------------------------
/self_driving/road_seg/road_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/road_seg/road_seg.png


--------------------------------------------------------------------------------
/self_driving/road_seg/test_fcn8_vgg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import scipy as scp
 4 | import scipy.misc
 5 | import matplotlib as mpl
 6 | import matplotlib.cm
 7 | import logging
 8 | import tensorflow as tf
 9 | import sys
10 | import fcn8_vgg
11 | 
12 | 
13 | def main(_):
14 |     logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
15 |                         level=logging.INFO,
16 |                         stream=sys.stdout)
17 |     img1 = scp.misc.imread("/Users/limeng/Downloads/kitti/data_road/training/image_2/uu_000000.png")
18 |     with tf.Session() as sess:
19 |         images = tf.placeholder("float")
20 |         feed_dict = {images: img1}
21 |         batch_images = tf.expand_dims(images, 0)
22 | 
23 |         vgg_fcn = fcn8_vgg.FCN8VGG(vgg16_npy_path="/Users/limeng/Downloads/vgg16.npy")
24 |         with tf.name_scope("content_vgg"):
25 |             vgg_fcn.build(batch_images, debug=True, num_classes=2)
26 | 
27 |         print('Finished building Network.')
28 | 
29 |         logging.warning("Score weights are initialized random.")
30 |         logging.warning("Do not expect meaningful results.")
31 | 
32 |         logging.info("Start Initializing Variabels.")
33 | 
34 |         init = tf.global_variables_initializer()
35 |         sess.run(init)
36 | 
37 |         print('Running the Network')
38 |         tensors = [vgg_fcn.pred, vgg_fcn.pred_up]
39 |         down, up = sess.run(tensors, feed_dict=feed_dict)
40 | 
41 |         down_color = color_image(down[0], 2)
42 |         up_color = color_image(up[0], 2)
43 | 
44 |         scp.misc.imsave('fcn8_downsampled.png', down_color)
45 |         scp.misc.imsave('fcn8_upsampled.png', up_color)
46 | 
47 | 
48 | def color_image(image, num_classes=20):
49 |     norm = mpl.colors.Normalize(vmin=0., vmax=num_classes)
50 |     mycm = mpl.cm.get_cmap('Set1')
51 |     return mycm(norm(image))
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     tf.app.run(main=main)
56 | 


--------------------------------------------------------------------------------
/self_driving/road_seg/unet.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from keras.models import Model
 3 | from keras.layers import Input
 4 | from keras.layers.core import Dropout
 5 | from keras.layers.convolutional import Convolution2D, Deconvolution2D, MaxPooling2D, ZeroPadding2D
 6 | 
 7 | 
 8 | N_cl = 2
 9 | C = 32
10 | 
11 | 
12 | def get_model():
13 |     # KITTI data set.
14 |     main_input = Input(shape=(None, 3, 1242, 375), dtype='float32', name='kitti_data')
15 | 
16 |     conv1_1 = ZeroPadding2D((10, 10))(main_input)
17 |     conv1_1 = Convolution2D(64, 3, 3, activation='relu')(conv1_1)  # 1260 * 393 * 64
18 |     conv1_2 = ZeroPadding2D((1, 1))(conv1_1)
19 |     conv1_2 = Convolution2D(64, 3, 3, activation='relu')(conv1_2)  # 1260 * 393 * 64
20 |     pool1 = ZeroPadding2D((0, 1))(conv1_2)  # 1260 * 394 * 64
21 |     pool1 = MaxPooling2D((2, 2), strides=(2, 2))(pool1)  # 630 * 197 * 64
22 | 
23 |     conv2_1 = ZeroPadding2D((1, 1))(pool1)
24 |     conv2_1 = Convolution2D(128, 3, 3, activation='relu')(conv2_1) # 630 * 197 * 128
25 |     conv2_2 = ZeroPadding2D((1, 1))(conv2_1)
26 |     conv2_2 = Convolution2D(128, 3, 3, activation='relu')(conv2_2) # 630 * 197 * 128
27 |     pool2 = ZeroPadding2D((0, 1))(conv2_2)  # 630 * 198 * 128
28 |     pool2 = MaxPooling2D((2, 2), strides=(2, 2))(pool2) # 315 * 99 * 128
29 | 
30 |     conv3_1 = ZeroPadding2D((1, 1))(pool2)
31 |     conv3_1 = Convolution2D(256, 3, 3, activation='relu')(conv3_1) # 315 * 99 * 256
32 |     conv3_2 = ZeroPadding2D((1, 1))(conv3_1)
33 |     conv3_2 = Convolution2D(256, 3, 3, activation='relu')(conv3_2) # 315 * 99 * 256
34 |     conv3_3 = ZeroPadding2D((1, 1))(conv3_2)
35 |     conv3_3 = Convolution2D(256, 3, 3, activation='relu')(conv3_3) # 315 * 99 * 256
36 |     pool3 = ZeroPadding2D((1, 1))(conv3_3)  # 316 * 100 * 256
37 |     pool3 = MaxPooling2D((2, 2), strides=(2, 2))(pool3) # 158 * 50 * 256
38 | 
39 |     conv4_1 = ZeroPadding2D((1, 1))(pool3)
40 |     conv4_1 = Convolution2D(512, 3, 3, activation='relu')(conv4_1) # 158 * 50 * 512
41 |     conv4_2 = ZeroPadding2D((1, 1))(conv4_1)
42 |     conv4_2 = Convolution2D(512, 3, 3, activation='relu')(conv4_2) # 158 * 50 * 512
43 |     conv4_3 = ZeroPadding2D((1, 1))(conv4_2)
44 |     conv4_3 = Convolution2D(512, 3, 3, activation='relu')(conv4_3) # 158 * 50 * 512
45 |     pool4 = MaxPooling2D((2, 2), strides=(2, 2))(conv4_3) # 79 * 25 * 512
46 | 
47 |     conv5_1 = ZeroPadding2D((1, 1))(pool4)
48 |     conv5_1 = Convolution2D(512, 3, 3, activation='relu')(conv5_1) # 79 * 25 * 512
49 |     conv5_2 = ZeroPadding2D((1, 1))(conv5_1)
50 |     conv5_2 = Convolution2D(512, 3, 3, activation='relu')(conv5_2) # 79 * 25 * 512
51 |     conv5_3 = ZeroPadding2D((1, 1))(conv5_2)
52 |     conv5_3 = Convolution2D(512, 3, 3, activation='relu')(conv5_3) # 79 * 25 * 512
53 |     pool5 = ZeroPadding2D((1, 1))(conv5_3)  # 80 * 26 * 512
54 |     pool5 = MaxPooling2D((2, 2), strides=(2, 2))(pool5) # 40 * 13 * 512
55 | 
56 |     # FC_conv1
57 |     fc6 = ZeroPadding2D((1, 1))(pool5)
58 |     fc6 = Convolution2D(1024, 3, 3, activation='relu')(fc6)  # 40 * 13 * 1024
59 |     fc6 = Dropout(0.5)(fc6)
60 |     # FC_conv2
61 |     fc7 = Convolution2D(1024, 1, 1, activation='relu')(fc6)  # 40 * 13 * 1024
62 |     fc7 = Dropout(0.5)(fc7)
63 | 
64 |     score_fc7 = Convolution2D(N_cl, 1, 1, activation='relu')(fc7) # 40 * 13 * N_cl
65 |     score_fc7_up = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 80, 26))(score_fc7)
66 | 
67 |     # scale pool4 skip for compatibility
68 |     scale_pool4 = tf.mul(pool4, 0.01)
69 |     scale_pool4 = ZeroPadding2D((1, 1))(scale_pool4) # 80 * 26 * 512
70 |     score_pool4 = Convolution2D(N_cl, 1, 1, activation='relu')(scale_pool4) # 80 * 26 * N_cl
71 |     fuse_pool4 = tf.add(score_fc7_up, score_pool4)
72 |     score_pool4_up = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 158, 50))(fuse_pool4)
73 | 
74 |     # scale pool3 skip for compatibility
75 |     scale_pool3 = tf.mul(pool3, 0.0001)
76 |     score_pool3 = Convolution2D(N_cl, 1, 1, activation='relu')(scale_pool3)  # 158 * 50 * N_cl
77 |     fuse_pool3 = tf.add(score_pool4_up, score_pool3)
78 |     score = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 1242, 375))(fuse_pool3)
79 | 
80 |     model = Model(input=main_input, output=score)
81 | 
82 |     return model
83 | 


--------------------------------------------------------------------------------
/self_driving/segnet/README.md:
--------------------------------------------------------------------------------
1 | [分割网络的Tensorflow实现](https://limengweb.wordpress.com/2017/08/06/%E5%88%86%E5%89%B2%E7%BD%91%E7%BB%9C%E7%9A%84tensorflow%E5%AE%9E%E7%8E%B0/)
2 | <div align="center">
3 |   <img src="https://github.com/mengli/MachineLearning/blob/master/self_driving/segnet/segnet.png"><br><br>
4 | </div>
5 | 


--------------------------------------------------------------------------------
/self_driving/segnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/segnet/__init__.py


--------------------------------------------------------------------------------
/self_driving/segnet/evaluate.py:
--------------------------------------------------------------------------------
  1 | """Evaluate SegNet.
  2 | 
  3 | nohup python -u -m self_driving.segnet.evaluate > self_driving/segnet/output.txt 2>&1 &
  4 | 
  5 | """
  6 | 
  7 | import os
  8 | import tensorflow as tf
  9 | from utils import camvid
 10 | from scipy import misc
 11 | 
 12 | LOG_DIR = 'save'
 13 | BATCH_SIZE = 4
 14 | EPOCH = 25
 15 | IMAGE_HEIGHT = 720
 16 | IMAGE_WIDTH = 960
 17 | IMAGE_CHANNEL = 3
 18 | NUM_CLASSES = 32
 19 | 
 20 | test_dir = "/usr/local/google/home/limeng/Downloads/camvid/val.txt"
 21 | 
 22 | colors = [
 23 |     [64, 128, 64],  # Animal
 24 |     [192, 0, 128],  # Archway
 25 |     [0, 128, 192],  # Bicyclist
 26 |     [0, 128, 64],  # Bridge
 27 |     [128, 0, 0],  # Building
 28 |     [64, 0, 128],  # Car
 29 |     [64, 0, 192],  # CartLuggagePram
 30 |     [192, 128, 64],  # Child
 31 |     [192, 192, 128],  # Column_Pole
 32 |     [64, 64, 128],  # Fence
 33 |     [128, 0, 192],  # LaneMkgsDriv
 34 |     [192, 0, 64],  # LaneMkgsNonDriv
 35 |     [128, 128, 64],  # Misc_Text
 36 |     [192, 0, 192],  # MotorcycleScooter
 37 |     [128, 64, 64],  # OtherMoving
 38 |     [64, 192, 128],  # ParkingBlock
 39 |     [64, 64, 0],  # Pedestrian
 40 |     [128, 64, 128],  # Road
 41 |     [128, 128, 192],  # RoadShoulder
 42 |     [0, 0, 192],  # Sidewalk
 43 |     [192, 128, 128],  # SignSymbol
 44 |     [128, 128, 128],  # Sky
 45 |     [64, 128, 192],  # SUVPickupTruck
 46 |     [0, 0, 64],  # TrafficCone
 47 |     [0, 64, 64],  # TrafficLight
 48 |     [192, 64, 128],  # Train
 49 |     [128, 128, 0],  # Tree
 50 |     [192, 128, 192],  # Truck_Bus
 51 |     [64, 0, 64],  # Tunnel
 52 |     [192, 192, 0],  # VegetationMisc
 53 |     [0, 0, 0],  # Void
 54 |     [64, 192, 0]  # Wall
 55 | ]
 56 | 
 57 | 
 58 | def color_mask(tensor, color):
 59 |     return tf.reduce_all(tf.equal(tensor, color), 3)
 60 | 
 61 | 
 62 | def one_hot(labels):
 63 |     color_tensors = tf.unstack(colors)
 64 |     channel_tensors = list(map(lambda color: color_mask(labels, color), color_tensors))
 65 |     one_hot_labels = tf.cast(tf.stack(channel_tensors, 3), 'float32')
 66 |     return one_hot_labels
 67 | 
 68 | 
 69 | def rgb(logits):
 70 |     softmax = tf.nn.softmax(logits)
 71 |     argmax = tf.argmax(softmax, 3)
 72 |     color_map = tf.constant(colors, dtype=tf.float32)
 73 |     n = color_map.get_shape().as_list()[0]
 74 |     one_hot = tf.one_hot(argmax, n, dtype=tf.float32)
 75 |     one_hot_matrix = tf.reshape(one_hot, [-1, n])
 76 |     rgb_matrix = tf.matmul(one_hot_matrix, color_map)
 77 |     rgb_tensor = tf.reshape(rgb_matrix, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 3])
 78 |     return tf.cast(rgb_tensor, tf.float32)
 79 | 
 80 | 
 81 | def main(_):
 82 |     test_image_filenames, test_label_filenames = camvid.get_filename_list(test_dir)
 83 |     index = 0
 84 | 
 85 |     with tf.Graph().as_default():
 86 |         with tf.device('/cpu:0'):
 87 |             config = tf.ConfigProto()
 88 |             config.gpu_options.allocator_type = 'BFC'
 89 |             sess = tf.InteractiveSession(config=config)
 90 | 
 91 |             images, labels = camvid.CamVidInputs(test_image_filenames,
 92 |                                                  test_label_filenames,
 93 |                                                  BATCH_SIZE,
 94 |                                                  shuffle=False)
 95 | 
 96 |             saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "segnet.ckpt.meta"))
 97 |             saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR))
 98 | 
 99 |             graph = tf.get_default_graph()
100 |             train_data = graph.get_tensor_by_name("train_data:0")
101 |             train_label = graph.get_tensor_by_name("train_labels:0")
102 |             is_training = graph.get_tensor_by_name("is_training:0")
103 |             logits = tf.get_collection("logits")[0]
104 | 
105 |             # Start the queue runners.
106 |             coord = tf.train.Coordinator()
107 |             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
108 | 
109 |             for i in range(EPOCH):
110 |                 image_batch, label_batch = sess.run([images, labels])
111 |                 feed_dict = {
112 |                     train_data: image_batch,
113 |                     train_label: label_batch,
114 |                     is_training: True
115 |                 }
116 |                 prediction = rgb(logits)
117 |                 pred = sess.run([prediction], feed_dict)[0]
118 |                 for batch in range(BATCH_SIZE):
119 |                     misc.imsave('output/segnet_camvid/decision_%d.png' % index, pred[batch])
120 |                     misc.imsave('output/segnet_camvid/train_%d.png' % index, image_batch[batch])
121 |                     index += 1
122 | 
123 |             coord.request_stop()
124 |             coord.join(threads)
125 | 
126 | 
127 | if __name__ == '__main__':
128 |     tf.app.run(main=main)
129 | 


--------------------------------------------------------------------------------
/self_driving/segnet/evaluate_kitti.py:
--------------------------------------------------------------------------------
 1 | """Evaluate SegNet.
 2 | 
 3 | nohup python -u -m self_driving.segnet.evaluate_kitti > self_driving/segnet/output.txt 2>&1 &
 4 | 
 5 | """
 6 | 
 7 | import os
 8 | import tensorflow as tf
 9 | from utils import kitti_segnet
10 | from scipy import misc
11 | 
12 | LOG_DIR = 'backup/segnet_kitti'
13 | EPOCH = 237
14 | BATCH_SIZE = 1
15 | IMAGE_HEIGHT = 375
16 | IMAGE_WIDTH = 1242
17 | NUM_CLASSES = 2
18 | 
19 | test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/test.txt"
20 | 
21 | colors = [
22 |     [255, 0, 255],
23 |     [255, 0,   0],
24 | ]
25 | 
26 | def color_mask(tensor, color):
27 |     return tf.reduce_all(tf.equal(tensor, color), 3)
28 | 
29 | 
30 | def one_hot(labels):
31 |     color_tensors = tf.unstack(colors)
32 |     channel_tensors = list(map(lambda color: color_mask(labels, color), color_tensors))
33 |     one_hot_labels = tf.cast(tf.stack(channel_tensors, 3), 'float32')
34 |     return one_hot_labels
35 | 
36 | 
37 | def rgb(logits):
38 |     softmax = tf.nn.softmax(logits)
39 |     argmax = tf.argmax(softmax, 3)
40 |     color_map = tf.constant(colors, dtype=tf.float32)
41 |     n = color_map.get_shape().as_list()[0]
42 |     one_hot = tf.one_hot(argmax, n, dtype=tf.float32)
43 |     one_hot_matrix = tf.reshape(one_hot, [-1, n])
44 |     rgb_matrix = tf.matmul(one_hot_matrix, color_map)
45 |     rgb_tensor = tf.reshape(rgb_matrix, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 3])
46 |     return tf.cast(rgb_tensor, tf.float32)
47 | 
48 | 
49 | def main(_):
50 |     test_image_filenames, test_label_filenames = kitti_segnet.get_filename_list(test_dir)
51 |     index = 0
52 | 
53 |     with tf.Graph().as_default():
54 |         with tf.device('/cpu:0'):
55 |             config = tf.ConfigProto()
56 |             config.gpu_options.allocator_type = 'BFC'
57 |             sess = tf.InteractiveSession(config=config)
58 | 
59 |             images, labels = kitti_segnet.CamVidInputs(test_image_filenames,
60 |                                                        test_label_filenames,
61 |                                                        BATCH_SIZE,
62 |                                                        shuffle=False)
63 | 
64 |             saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "segnet.ckpt.meta"))
65 |             saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR))
66 | 
67 |             graph = tf.get_default_graph()
68 |             train_data = graph.get_tensor_by_name("train_data:0")
69 |             train_label = graph.get_tensor_by_name("train_labels:0")
70 |             is_training = graph.get_tensor_by_name("is_training:0")
71 |             logits = tf.get_collection("logits")[0]
72 | 
73 |             # Start the queue runners.
74 |             coord = tf.train.Coordinator()
75 |             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
76 | 
77 |             for i in range(EPOCH):
78 |                 image_batch, label_batch = sess.run([images, labels])
79 |                 feed_dict = {
80 |                     train_data: image_batch,
81 |                     train_label: label_batch,
82 |                     is_training: True
83 |                 }
84 |                 prediction = rgb(logits)
85 |                 pred = sess.run([prediction], feed_dict)[0]
86 |                 for batch in range(BATCH_SIZE):
87 |                     misc.imsave('output/segnet_kitti/decision_%d.png' % index, pred[batch])
88 |                     misc.imsave('output/segnet_kitti/train_%d.png' % index, image_batch[batch])
89 |                     index += 1
90 | 
91 |             coord.request_stop()
92 |             coord.join(threads)
93 | 
94 | 
95 | if __name__ == '__main__':
96 |     tf.app.run(main=main)
97 | 


--------------------------------------------------------------------------------
/self_driving/segnet/evaluate_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from tensorflow.python.framework import constant_op
 6 | from tensorflow.python.platform import test
 7 | from self_driving.segnet import evaluate
 8 | import tensorflow as tf
 9 | 
10 | 
11 | class EvaluateTest(test.TestCase):
12 | 
13 |     def testTfArgmax(self):
14 |         '''[[[[  1.   2.]
15 |               [  3.   4.]
16 |               [  5.   6.]]
17 |              [[  8.   7.]
18 |               [  9.  10.]
19 |               [ 11.  12.]]
20 |              [[ 13.  14.]
21 |               [ 16.  15.]
22 |               [ 17.  18.]]]]'''
23 |         tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 7.0, 9.0,
24 |                         10.0, 11.0, 12.0, 13.0, 14.0, 16.0, 15.0, 17.0, 18.0]
25 |         with self.test_session(use_gpu=False) as sess:
26 |             t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2])
27 |             argmax_op = tf.argmax(t, axis=3)
28 |             argmax = sess.run([argmax_op])
29 |             self.assertAllEqual(argmax, [[[[1, 1, 1], [0, 1, 1], [1, 0, 1]]]])
30 | 
31 | 
32 |     def testColorImage(self):
33 |         '''[[[[  0.   2.]
34 |               [  3.   4.]
35 |               [  5.   6.]]
36 |              [[  8.   7.]
37 |               [  9.  10.]
38 |               [ 11.  12.]]
39 |              [[ 13.  14.]
40 |               [ 16.  15.]
41 |               [ 17.  18.]]]]'''
42 |         tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 7.0, 9.0,
43 |                         10.0, 11.0, 12.0, 13.0, 14.0, 16.0, 15.0, 17.0, 18.0]
44 |         with self.test_session(use_gpu=False):
45 |             t = constant_op.constant(tensor_input, shape=[3, 3, 1, 2])
46 |             argmax_op = tf.argmax(t, dimension=3)
47 |             up_color = evaluate.color_image(argmax_op.eval(), 1.)
48 |             self.assertAllClose(up_color, [[[[0.60000002, 0.60000002, 0.60000002, 1.]],
49 |                                             [[0.60000002, 0.60000002, 0.60000002, 1.]],
50 |                                             [[0.60000002, 0.60000002, 0.60000002, 1.]]],
51 |                                            [[[0.89411765, 0.10196079, 0.10980392, 1.]],
52 |                                             [[0.60000002, 0.60000002, 0.60000002, 1.]],
53 |                                             [[0.60000002, 0.60000002, 0.60000002, 1.]]],
54 |                                            [[[0.60000002, 0.60000002, 0.60000002, 1.]],
55 |                                             [[0.89411765, 0.10196079, 0.10980392, 1.]],
56 |                                             [[0.60000002, 0.60000002, 0.60000002, 1.]]]])
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     test.main()


--------------------------------------------------------------------------------
/self_driving/segnet/merge_output.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | result_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_camvid"
 4 | output_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_camvid/result"
 5 | 
 6 | #result_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_kitti"
 7 | #output_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_kitti/result"
 8 | 
 9 | echo "Merge output"
10 | 
11 | train_file_names=($(ls -v $result_dir/train_*.png))
12 | output_file_names=($(ls -v $result_dir/decision_*.png))
13 | 
14 | output_data_size=${#train_file_names[@]}
15 | 
16 | for (( i=0; i<${output_data_size}; i++ ));
17 | do
18 |   convert ${output_file_names[$i]} ${train_file_names[$i]} +append $output_dir/frame_$i.png
19 | done
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/self_driving/segnet/prepare_camvid.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from scipy import misc
 4 | 
 5 | data_image_dir = "/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full"
 6 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full/image_2"
 7 | 
 8 | 
 9 | IMAGE_HEIGHT = 720
10 | IMAGE_WIDTH = 960
11 | IMAGE_DEPTH = 3
12 | 
13 | 
14 | color2index = {
15 |     (64, 128, 64) : 0, # Animal
16 |     (192, 0, 128) : 1, # Archway
17 |     (0, 128, 192) : 2, # Bicyclist
18 |     (0, 128, 64)  : 3, # Bridge
19 |     (128, 0, 0)   : 4, # Building
20 |     (64, 0, 128)  : 5, # Car
21 |     (64, 0, 192)  : 6, # CartLuggagePram
22 |     (192, 128, 64) : 7, # Child
23 |     (192, 192, 128) : 8, # Column_Pole
24 |     (64, 64, 128) :9, # Fence
25 |     (128, 0, 192) : 10, # LaneMkgsDriv
26 |     (192, 0, 64) : 11, # LaneMkgsNonDriv
27 |     (128, 128, 64) : 12, # Misc_Text
28 |     (192, 0, 192) : 13, # MotorcycleScooter
29 |     (128, 64, 64) : 14, # OtherMoving
30 |     (64, 192, 128) : 15, # ParkingBlock
31 |     (64, 64, 0) : 16, # Pedestrian
32 |     (128, 64, 128) : 17, # Road
33 |     (128, 128, 192) : 18, # RoadShoulder
34 |     (0, 0, 192) : 19, # Sidewalk
35 |     (192, 128, 128) : 20, # SignSymbol
36 |     (128, 128, 128) : 21, # Sky
37 |     (64, 128, 192) : 22, # SUVPickupTruck
38 |     (0, 0, 64) : 23, # TrafficCone
39 |     (0, 64, 64) : 24, # TrafficLight
40 |     (192, 64, 128) : 25, # Train
41 |     (128, 128, 0) : 26, # Tree
42 |     (192, 128, 192) : 27, # Truck_Bus
43 |     (64, 0, 64) : 28, # Tunnel
44 |     (192, 192, 0) : 29, # VegetationMisc
45 |     (0, 0, 0) : 30, # Void
46 |     (64, 192, 0) : 31, # Wall
47 | }
48 | 
49 | 
50 | def im2index(im):
51 |     height, width, ch = im.shape
52 |     assert ch == IMAGE_DEPTH
53 |     if height != IMAGE_HEIGHT or width != IMAGE_WIDTH:
54 |         print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
55 |         return None
56 |     m_lable = np.zeros((height, width), dtype=np.uint8)
57 |     for w in range(width):
58 |         for h in range(height):
59 |             r, g, b = im[h, w, :]
60 |             if (r, g, b) in color2index:
61 |                 m_lable[h, w] = color2index[(r, g, b)]
62 |             else:
63 |                 m_lable[h, w] = 30
64 |     return m_lable
65 | 
66 | 
67 | def convert_to_label_data(file_name):
68 |     assert os.path.isfile(file_name), 'Cannot find: %s' % file_name
69 |     return im2index(misc.imread(file_name, mode='RGB'))
70 | 
71 | 
72 | def main():
73 |     for file in os.listdir(data_image_dir):
74 |         if file.endswith(".png"):
75 |             print("Try to converting %s" % file)
76 |             gt_label = convert_to_label_data(os.path.join(data_image_dir, file))
77 |             if gt_label is not None:
78 |                 misc.imsave(os.path.join(image_dir, file), gt_label)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     main()
83 | 


--------------------------------------------------------------------------------
/self_driving/segnet/prepare_camvid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | data_dir="/usr/local/google/home/limeng/Downloads/camvid/701_StillsRaw_full"
 4 | label_data_dir="/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full/image_2"
 5 | 
 6 | echo "Camvid dataset"
 7 | 
 8 | rm -f train.txt
 9 | touch train.txt
10 | 
11 | data_file_names=($(ls $data_dir))
12 | label_file_names=($(ls $label_data_dir))
13 | data_size=${#data_file_names[@]}
14 | 
15 | for (( i=0; i<${data_size}; i++ ));
16 | do
17 |     echo $data_dir/${data_file_names[$i]} $label_data_dir/${label_file_names[$i]} >> train.txt
18 | done
19 | 


--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from scipy import misc
 4 | 
 5 | data_image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/data_image_2"
 6 | image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/image_2"
 7 | data_label_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/data_label_2"
 8 | label_output_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/gt_image_2"
 9 | 
10 | 
11 | IMAGE_HEIGHT = 375
12 | IMAGE_WIDTH = 1242
13 | IMAGE_DEPTH = 3
14 | 
15 | 
16 | #   R   G   B
17 | # 255   0 255 road
18 | #   0   0 255 road
19 | # 255   0   0 valid
20 | #   0   0   0 invalid
21 | color2index = {
22 |     (255, 0, 255) : 0,
23 |     (0,   0, 255) : 0,
24 |     (255, 0,   0) : 1,
25 |     (0,   0,   0) : 1,
26 | }
27 | 
28 | 
29 | def im2index(im):
30 |     height, width, ch = im.shape
31 |     assert ch == IMAGE_DEPTH
32 |     if height != IMAGE_HEIGHT or width != IMAGE_WIDTH:
33 |         print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
34 |         return None
35 |     m_lable = np.zeros((height, width), dtype=np.uint8)
36 |     for w in range(width):
37 |         for h in range(height):
38 |             r, g, b = im[h, w, :]
39 |             m_lable[h, w] = color2index[(r, g, b)]
40 |     return m_lable
41 | 
42 | 
43 | def convert_to_label_data(file_name):
44 |     assert os.path.isfile(file_name), 'Cannot find: %s' % file_name
45 |     return im2index(misc.imread(file_name, mode='RGB'))
46 | 
47 | 
48 | def main():
49 |     for file in os.listdir(data_image_dir):
50 |         if file.endswith(".png"):
51 |             print("Try to copy %s" % file)
52 |             im = misc.imread(os.path.join(data_image_dir, file), mode='RGB')
53 |             height, width, ch = im.shape
54 |             assert ch == IMAGE_DEPTH
55 |             if height == IMAGE_HEIGHT and width == IMAGE_WIDTH and ch == IMAGE_DEPTH:
56 |                 misc.imsave(os.path.join(image_dir, file), im)
57 |             else:
58 |                 print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
59 | 
60 |     for file in os.listdir(data_label_dir):
61 |         if file.endswith(".png"):
62 |             print("Try to converting %s" % file)
63 |             gt_label = convert_to_label_data(os.path.join(data_label_dir, file))
64 |             if gt_label is not None:
65 |                 misc.imsave(os.path.join(label_output_dir, file), gt_label)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | train_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/training"
 4 | 
 5 | echo "KITTI dataset"
 6 | 
 7 | rm -f train.txt
 8 | touch train.txt
 9 | 
10 | append_data_items()
11 | {
12 |   train_file_names=($(ls $train_data_dir/image_2/$1))
13 |   gt_file_names=($(ls $train_data_dir/gt_image_2/$1))
14 | 
15 |   train_data_size=${#train_file_names[@]}
16 | 
17 |   for (( i=0; i<${train_data_size}; i++ ));
18 |   do
19 |     echo ${train_file_names[$i]} ${gt_file_names[$i]} >> train.txt
20 |   done
21 | }
22 | 
23 | append_data_items "um_*"
24 | append_data_items "umm_*"
25 | append_data_items "uu_*"
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from scipy import misc
 3 | 
 4 | data_test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/data_image_2"
 5 | test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/image_2"
 6 | 
 7 | 
 8 | IMAGE_HEIGHT = 375
 9 | IMAGE_WIDTH = 1242
10 | IMAGE_DEPTH = 3
11 | 
12 | 
13 | def main():
14 |     for file in os.listdir(data_test_dir):
15 |         if file.endswith(".png"):
16 |             print("Try to copy %s" % file)
17 |             im = misc.imread(os.path.join(data_test_dir, file), mode='RGB')
18 |             height, width, ch = im.shape
19 |             assert ch == IMAGE_DEPTH
20 |             if height == IMAGE_HEIGHT and width == IMAGE_WIDTH and ch == IMAGE_DEPTH:
21 |                 misc.imsave(os.path.join(test_dir, file), im)
22 |             else:
23 |                 print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     main()
28 | 


--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | test_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/image_2"
 4 | fake_label_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/testing"
 5 | 
 6 | echo "KITTI test dataset"
 7 | 
 8 | rm -f test.txt
 9 | touch test.txt
10 | 
11 | test_file_names=($(ls $test_data_dir))
12 | test_data_size=${#test_file_names[@]}
13 | 
14 | for (( i=0; i<${test_data_size}; i++ ));
15 | do
16 |     echo $test_data_dir/${test_file_names[$i]} $fake_label_data_dir/umm_road_000000.png >> test.txt
17 | done
18 | 


--------------------------------------------------------------------------------
/self_driving/segnet/segnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/segnet/segnet.png


--------------------------------------------------------------------------------
/self_driving/segnet/segnet_vgg.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | 
  8 | VGG16_NPY_PATH = 'vgg16.npy'
  9 | WD = 5e-4
 10 | 
 11 | data_dict = np.load(VGG16_NPY_PATH, encoding='latin1').item()
 12 | 
 13 | 
 14 | def activation_summary(var):
 15 |     tensor_name = var.op.name
 16 |     tf.summary.histogram(tensor_name + '/activations', var)
 17 |     tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(var))
 18 | 
 19 | 
 20 | def variable_summaries(var):
 21 |     if not tf.get_variable_scope().reuse:
 22 |         name = var.op.name
 23 |         with tf.name_scope('summaries'):
 24 |             mean = tf.reduce_mean(var)
 25 |             tf.summary.scalar(name + '/mean', mean)
 26 |             with tf.name_scope('stddev'):
 27 |                 stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
 28 |             tf.summary.scalar(name + '/sttdev', stddev)
 29 |             tf.summary.scalar(name + '/max', tf.reduce_max(var))
 30 |             tf.summary.scalar(name + '/min', tf.reduce_min(var))
 31 |             tf.summary.histogram(name, var)
 32 | 
 33 | 
 34 | def load_conv_filter(name):
 35 |     init = tf.constant_initializer(value=data_dict[name][0],
 36 |                                    dtype=tf.float32)
 37 |     shape = data_dict[name][0].shape
 38 |     var = tf.get_variable(name=name + "_weight", initializer=init, shape=shape)
 39 |     if not tf.get_variable_scope().reuse:
 40 |         weight_decay = tf.multiply(tf.nn.l2_loss(var), WD, name=name + '_weight_decay')
 41 |         tf.add_to_collection('losses', weight_decay)
 42 |     variable_summaries(var)
 43 |     return var
 44 | 
 45 | 
 46 | def get_conv_filter(name, shape):
 47 |     init = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float32)
 48 |     var = tf.get_variable(name=name + "_weight", initializer=init)
 49 |     weight_decay = tf.multiply(tf.nn.l2_loss(var), WD, name=name + '_weight_decay')
 50 |     tf.add_to_collection('losses', weight_decay)
 51 |     variable_summaries(var)
 52 |     return var
 53 | 
 54 | 
 55 | def load_conv_bias(name):
 56 |     bias_wights = data_dict[name][1]
 57 |     shape = data_dict[name][1].shape
 58 |     init = tf.constant_initializer(value=bias_wights,
 59 |                                    dtype=tf.float32)
 60 |     var = tf.get_variable(name=name + "_bias", initializer=init, shape=shape)
 61 |     variable_summaries(var)
 62 |     return var
 63 | 
 64 | 
 65 | def get_conv_bias(name, shape):
 66 |     init = tf.constant(0.0, shape=shape)
 67 |     var = tf.get_variable(name=name + "_bias", initializer=init)
 68 |     variable_summaries(var)
 69 |     return var
 70 | 
 71 | 
 72 | def conv2d(bottom, weight):
 73 |     return tf.nn.conv2d(bottom, weight, strides=[1, 1, 1, 1], padding='SAME')
 74 | 
 75 | 
 76 | def batch_norm_layer(bottom, is_training, scope):
 77 |     return tf.cond(is_training,
 78 |                    lambda: tf.contrib.layers.batch_norm(bottom,
 79 |                                                         is_training=True,
 80 |                                                         center=False,
 81 |                                                         scope=scope+"_bn"),
 82 |                    lambda: tf.contrib.layers.batch_norm(bottom,
 83 |                                                         is_training=False,
 84 |                                                         center=False,
 85 |                                                         scope=scope+"_bn",
 86 |                                                         reuse=True))
 87 | 
 88 | 
 89 | def conv_layer_with_bn(bottom=None, is_training=True, shape=None, name=None):
 90 |     with tf.variable_scope(name) as scope:
 91 |         if shape:
 92 |             weight = get_conv_filter(name, shape)
 93 |             bias = get_conv_bias(name, [shape[3]])
 94 |         else:
 95 |             weight = load_conv_filter(name)
 96 |             bias = load_conv_bias(name)
 97 |         conv = tf.nn.bias_add(conv2d(bottom, weight), bias)
 98 |         conv = batch_norm_layer(conv, is_training, scope.name)
 99 |         conv = tf.nn.relu(conv, name="relu")
100 |         activation_summary(conv)
101 |         return conv
102 | 
103 | 
104 | def max_pool_with_argmax(bottom):
105 |     with tf.name_scope('max_pool_arg_max'):
106 |         with tf.device('/gpu:0'):
107 |             _, indices = tf.nn.max_pool_with_argmax(
108 |                 bottom,
109 |                 ksize=[1, 2, 2, 1],
110 |                 strides=[1, 2, 2, 1],
111 |                 padding='SAME')
112 |         indices = tf.stop_gradient(indices)
113 |         bottom = tf.nn.max_pool(bottom,
114 |                                 ksize=[1, 2, 2, 1],
115 |                                 strides=[1, 2, 2, 1],
116 |                                 padding='SAME')
117 |         return bottom, indices
118 | 
119 | 
120 | def max_unpool_with_argmax(bottom, mask, output_shape=None):
121 |     with tf.name_scope('max_unpool_with_argmax'):
122 |         ksize = [1, 2, 2, 1]
123 |         input_shape = bottom.get_shape().as_list()
124 |         #  calculation new shape
125 |         if output_shape is None:
126 |             output_shape = (input_shape[0],
127 |                             input_shape[1] * ksize[1],
128 |                             input_shape[2] * ksize[2],
129 |                             input_shape[3])
130 |         # calculation indices for batch, height, width and feature maps
131 |         one_like_mask = tf.ones_like(mask)
132 |         batch_range = tf.reshape(tf.range(output_shape[0],
133 |                                           dtype=tf.int64),
134 |                                  shape=[input_shape[0], 1, 1, 1])
135 |         b = one_like_mask * batch_range
136 |         y = mask // (output_shape[2] * output_shape[3])
137 |         x = mask % (output_shape[2] * output_shape[3]) // output_shape[3]
138 |         feature_range = tf.range(output_shape[3], dtype=tf.int64)
139 |         f = one_like_mask * feature_range
140 |         # transpose indices & reshape update values to one dimension
141 |         updates_size = tf.size(bottom)
142 |         indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
143 |         values = tf.reshape(bottom, [updates_size])
144 |         return tf.scatter_nd(indices, values, output_shape)
145 | 
146 | 
147 | def inference(images, is_training, num_classes):
148 |     training = tf.equal(is_training, tf.constant(True))
149 |     conv1_1 = conv_layer_with_bn(bottom=images, is_training=training, name="conv1_1")
150 |     conv1_2 = conv_layer_with_bn(bottom=conv1_1, is_training=training, name="conv1_2")
151 |     pool1, pool1_indices = max_pool_with_argmax(conv1_2)
152 | 
153 |     print("pool1: ", pool1.shape)
154 | 
155 |     conv2_1 = conv_layer_with_bn(bottom=pool1, is_training=training, name="conv2_1")
156 |     conv2_2 = conv_layer_with_bn(bottom=conv2_1, is_training=training, name="conv2_2")
157 |     pool2, pool2_indices = max_pool_with_argmax(conv2_2)
158 | 
159 |     print("pool2: ", pool2.shape)
160 | 
161 |     conv3_1 = conv_layer_with_bn(bottom=pool2, is_training=training, name="conv3_1")
162 |     conv3_2 = conv_layer_with_bn(bottom=conv3_1, is_training=training, name="conv3_2")
163 |     conv3_3 = conv_layer_with_bn(bottom=conv3_2, is_training=training, name="conv3_3")
164 |     pool3, pool3_indices = max_pool_with_argmax(conv3_3)
165 | 
166 |     print("pool3: ", pool3.shape)
167 | 
168 |     conv4_1 = conv_layer_with_bn(bottom=pool3, is_training=training, name="conv4_1")
169 |     conv4_2 = conv_layer_with_bn(bottom=conv4_1, is_training=training, name="conv4_2")
170 |     conv4_3 = conv_layer_with_bn(bottom=conv4_2, is_training=training, name="conv4_3")
171 |     pool4, pool4_indices = max_pool_with_argmax(conv4_3)
172 | 
173 |     print("pool4: ", pool4.shape)
174 | 
175 |     conv5_1 = conv_layer_with_bn(bottom=pool4, is_training=training, name="conv5_1")
176 |     conv5_2 = conv_layer_with_bn(bottom=conv5_1, is_training=training, name="conv5_2")
177 |     conv5_3 = conv_layer_with_bn(bottom=conv5_2, is_training=training, name="conv5_3")
178 |     pool5, pool5_indices = max_pool_with_argmax(conv5_3)
179 | 
180 |     print("pool5: ", pool5.shape)
181 | 
182 |     # End of encoders
183 |     # start of decoders
184 | 
185 |     up_sample_5 = max_unpool_with_argmax(pool5,
186 |                                          pool5_indices,
187 |                                          output_shape=conv5_3.shape)
188 |     up_conv5 = conv_layer_with_bn(bottom=up_sample_5,
189 |                                   shape=[3, 3, 512, 512],
190 |                                   is_training=training,
191 |                                   name="up_conv5")
192 | 
193 |     print("up_conv5: ", up_conv5.shape)
194 | 
195 |     up_sample_4 = max_unpool_with_argmax(up_conv5,
196 |                                          pool4_indices,
197 |                                          output_shape=conv4_3.shape)
198 |     up_conv4 = conv_layer_with_bn(bottom=up_sample_4,
199 |                                   shape=[3, 3, 512, 256],
200 |                                   is_training=training,
201 |                                   name="up_conv4")
202 | 
203 |     print("up_conv4: ", up_conv4.shape)
204 | 
205 |     up_sample_3 = max_unpool_with_argmax(up_conv4,
206 |                                          pool3_indices,
207 |                                          output_shape=conv3_3.shape)
208 |     up_conv3 = conv_layer_with_bn(bottom=up_sample_3,
209 |                                   shape=[3, 3, 256, 128],
210 |                                   is_training=training,
211 |                                   name="up_conv3")
212 | 
213 |     print("up_conv3: ", up_conv3.shape)
214 | 
215 |     up_sample_2 = max_unpool_with_argmax(up_conv3,
216 |                                          pool2_indices,
217 |                                          output_shape=conv2_2.shape)
218 |     up_conv2 = conv_layer_with_bn(bottom=up_sample_2,
219 |                                   shape=[3, 3, 128, 64],
220 |                                   is_training=training,
221 |                                   name="up_conv2")
222 | 
223 |     print("up_conv2: ", up_conv2.shape)
224 | 
225 |     up_sample_1 = max_unpool_with_argmax(up_conv2,
226 |                                          pool1_indices,
227 |                                          output_shape=conv1_2.shape)
228 |     logits = conv_layer_with_bn(bottom=up_sample_1,
229 |                                 shape=[3, 3, 64, num_classes],
230 |                                 is_training=training,
231 |                                 name="up_conv1")
232 | 
233 |     print("logits: ", logits.shape)
234 |     tf.add_to_collection("logits", logits)
235 | 
236 |     return logits
237 | 


--------------------------------------------------------------------------------
/self_driving/segnet/segnet_vgg_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from tensorflow.python.framework import constant_op
  6 | from tensorflow.python.platform import test
  7 | from self_driving.segnet import segnet_vgg
  8 | import tensorflow as tf
  9 | import numpy as np
 10 | 
 11 | NUM_CLASSES = 11
 12 | 
 13 | class PoolingTest(test.TestCase):
 14 | 
 15 |     def testMaxPoolingWithArgmax(self):
 16 |         # MaxPoolWithArgMax is implemented only on CUDA.
 17 |         if not test.is_gpu_available(cuda_only=True):
 18 |             return
 19 |         '''[[[[  1.   2.]
 20 |               [  3.   4.]
 21 |               [  5.   6.]]
 22 |              [[  7.   8.]
 23 |               [  9.  10.]
 24 |               [ 11.  12.]]
 25 |              [[ 13.  14.]
 26 |               [ 15.  16.]
 27 |               [ 17.  18.]]]]'''
 28 |         tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
 29 |                         10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0]
 30 |         with self.test_session(use_gpu=True) as sess:
 31 |             t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2])
 32 |             out_op, argmax_op = segnet_vgg.max_pool_with_argmax(t)
 33 |             out, argmax = sess.run([out_op, argmax_op])
 34 |             self.assertShapeEqual(out, out_op)
 35 |             self.assertShapeEqual(argmax, argmax_op)
 36 |             '''[[[9, 10]
 37 |                  [11, 12]]
 38 |                 [[15, 16]
 39 |                  [17, 18]]]'''
 40 |             self.assertAllClose(out.ravel(), [9., 10., 11., 12., 15., 16., 17., 18.])
 41 |             self.assertAllEqual(argmax.ravel(), [8, 9, 10, 11, 14, 15, 16, 17])
 42 | 
 43 |     def testMaxUnpoolingWithArgmax(self):
 44 |         '''[[[[  1.   2.]
 45 |               [  3.   4.]
 46 |               [  5.   6.]]
 47 |              [[  7.   8.]
 48 |               [  9.  10.]
 49 |               [ 11.  12.]]
 50 |              [[ 13.  14.]
 51 |               [ 15.  16.]
 52 |               [ 17.  18.]]]]'''
 53 |         tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
 54 |                         10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0]
 55 |         with self.test_session(use_gpu=True) as sess:
 56 |             t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2])
 57 |             out_op, argmax_op = segnet_vgg.max_pool_with_argmax(t)
 58 |             out_op = segnet_vgg.max_unpool_with_argmax(out_op,
 59 |                                                        argmax_op,
 60 |                                                        output_shape=np.int64([1, 3, 3, 2]))
 61 |             out = sess.run([out_op])
 62 |             self.assertAllClose(out, [[[[[  0.,   0.],
 63 |                                          [  0.,   0.],
 64 |                                          [  0.,   0.]],
 65 |                                         [[  0.,   0.],
 66 |                                          [  9.,  10.],
 67 |                                          [ 11.,  12.]],
 68 |                                         [[  0.,   0.],
 69 |                                          [ 15.,  16.],
 70 |                                          [ 17.,  18.]]]]])
 71 | 
 72 |     def testGetBias(self):
 73 |         with self.test_session(use_gpu=True) as sess:
 74 |             bias = segnet_vgg.get_bias("conv1_1")
 75 |             sess.run(tf.global_variables_initializer())
 76 |             self.assertEqual(bias.get_shape(), [64,])
 77 |             self.assertAllClose(tf.reduce_sum(bias).eval(), 32.08903503417969)
 78 | 
 79 |     def testGetConvFilter(self):
 80 |         with self.test_session(use_gpu=True) as sess:
 81 |             weight = segnet_vgg.get_conv_filter("conv1_1")
 82 |             sess.run(tf.global_variables_initializer())
 83 |             self.assertEqual(weight.get_shape(), [3, 3, 3, 64])
 84 |             self.assertAllClose(tf.reduce_sum(weight).eval(), -4.212705612182617)
 85 | 
 86 |     def testConvLayerWithBn(self):
 87 |         config = tf.ConfigProto()
 88 |         config.gpu_options.allocator_type = 'BFC'
 89 |         tensor_input = tf.ones([10, 495, 289, 3], tf.float32)
 90 |         with self.test_session(use_gpu=True, config = config) as sess:
 91 |             conv_op = segnet_vgg.conv_layer_with_bn(tensor_input, tf.constant(True), "conv1_1")
 92 |             sess.run(tf.global_variables_initializer())
 93 |             conv_out = sess.run([conv_op])
 94 |             self.assertEqual(np.array(conv_out).shape, (1, 10, 495, 289, 64))
 95 | 
 96 |     def testDeconvLayerWithBn(self):
 97 |         config = tf.ConfigProto()
 98 |         config.gpu_options.allocator_type = 'BFC'
 99 |         tensor_input = tf.ones([10, 495, 289, 3], tf.float32)
100 |         with self.test_session(use_gpu=True, config = config) as sess:
101 |             conv_op = segnet_vgg.deconv_layer_with_bn(tensor_input,
102 |                                                       [3, 3, 3, 128],
103 |                                                       tf.constant(True), "conv1_1")
104 |             sess.run(tf.global_variables_initializer())
105 |             conv_out = sess.run([conv_op])
106 |             self.assertEqual(np.array(conv_out).shape, (1, 10, 495, 289, 128))
107 | 
108 |     def testInference(self):
109 |         config = tf.ConfigProto()
110 |         config.gpu_options.allocator_type = 'BFC'
111 |         train_data = tf.ones([10, 495, 289, 3], tf.float32)
112 |         with self.test_session(use_gpu=True, config = config) as sess:
113 |             model_op = segnet_vgg.inference(train_data)
114 |             sess.run(tf.global_variables_initializer())
115 |             model_out = sess.run([model_op])
116 |             self.assertEqual(np.array(model_out).shape, (1, 10, 495, 289, NUM_CLASSES))
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     test.main()
121 | 


--------------------------------------------------------------------------------
/self_driving/segnet/train.py:
--------------------------------------------------------------------------------
  1 | """Train SegNet with camvid dataset.
  2 | 
  3 | nohup python -u -m self_driving.segnet.train > self_driving/segnet/output.txt 2>&1 &
  4 | 
  5 | """
  6 | 
  7 | import os
  8 | import tensorflow as tf
  9 | from utils import camvid
 10 | import segnet_vgg
 11 | 
 12 | LOG_DIR = 'save'
 13 | EPOCH = 6000
 14 | BATCH_SIZE = 4
 15 | IMAGE_HEIGHT = 720
 16 | IMAGE_WIDTH = 960
 17 | IMAGE_CHANNEL = 3
 18 | NUM_CLASSES = 32
 19 | INITIAL_LEARNING_RATE = 0.0001
 20 | 
 21 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/train.txt"
 22 | val_dir = "/usr/local/google/home/limeng/Downloads/camvid/val.txt"
 23 | 
 24 | 
 25 | def loss(logits, labels):
 26 |     logits = tf.reshape(logits, [-1, NUM_CLASSES])
 27 |     labels = tf.reshape(labels, [-1])
 28 | 
 29 |     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
 30 |         logits=logits, labels=labels, name='cross_entropy')
 31 |     cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
 32 |     tf.add_to_collection('losses', cross_entropy_mean)
 33 |     return tf.add_n(tf.get_collection('losses'), name='total_loss')
 34 | 
 35 | 
 36 | def train(total_loss):
 37 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 38 |     with tf.control_dependencies(update_ops):
 39 |         global_step = tf.Variable(0, name='global_step', trainable=False)
 40 |         learning_rate = tf.train.exponential_decay(
 41 |             INITIAL_LEARNING_RATE, global_step, EPOCH * 0.2, 0.9, staircase=True)
 42 |         tf.summary.scalar('total_loss', total_loss)
 43 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
 44 |         return optimizer.minimize(total_loss, global_step=global_step)
 45 | 
 46 | 
 47 | def main(_):
 48 |     image_filenames, label_filenames = camvid.get_filename_list(image_dir)
 49 |     val_image_filenames, val_label_filenames = camvid.get_filename_list(val_dir)
 50 | 
 51 |     with tf.Graph().as_default():
 52 |         with tf.device('/cpu:0'):
 53 |             # config = tf.ConfigProto(device_count = {'GPU': 0})
 54 |             config = tf.ConfigProto()
 55 |             config.gpu_options.allocator_type = 'BFC'
 56 |             sess = tf.InteractiveSession(config=config)
 57 | 
 58 |             train_data = tf.placeholder(tf.float32,
 59 |                                         shape=[BATCH_SIZE,
 60 |                                                IMAGE_HEIGHT,
 61 |                                                IMAGE_WIDTH,
 62 |                                                IMAGE_CHANNEL],
 63 |                                         name='train_data')
 64 |             train_labels = tf.placeholder(tf.int64,
 65 |                                           shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1],
 66 |                                           name='train_labels')
 67 |             is_training = tf.placeholder(tf.bool, name='is_training')
 68 | 
 69 |             images, labels = camvid.CamVidInputs(image_filenames,
 70 |                                                  label_filenames,
 71 |                                                  BATCH_SIZE)
 72 |             val_images, val_labels = camvid.CamVidInputs(val_image_filenames,
 73 |                                                          val_label_filenames,
 74 |                                                          BATCH_SIZE)
 75 | 
 76 |             logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES)
 77 |             total_loss = loss(logits, train_labels)
 78 |             train_op = train(total_loss)
 79 |             check_op = tf.add_check_numerics_ops()
 80 | 
 81 |             merged_summary_op = tf.summary.merge_all()
 82 |             summary_writer = tf.summary.FileWriter('train', sess.graph)
 83 |             saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
 84 |             if not os.path.exists(LOG_DIR):
 85 |                 os.makedirs(LOG_DIR)
 86 |             checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt")
 87 | 
 88 |             sess.run(tf.global_variables_initializer())
 89 | 
 90 |             # Start the queue runners.
 91 |             coord = tf.train.Coordinator()
 92 |             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 93 | 
 94 |             for i in range(EPOCH):
 95 |                 image_batch, label_batch = sess.run([images, labels])
 96 |                 feed_dict = {
 97 |                     train_data: image_batch,
 98 |                     train_labels: label_batch,
 99 |                     is_training: True
100 |                 }
101 |                 _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op],
102 |                                             feed_dict=feed_dict)
103 |                 if i % 10 == 0:
104 |                     print("Start validating...")
105 |                     val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
106 |                     loss_value = total_loss.eval(feed_dict={train_data: val_images_batch,
107 |                                                             train_labels: val_labels_batch,
108 |                                                             is_training: True})
109 |                     print("Epoch: %d, Loss: %g" % (i, loss_value))
110 |                     saver.save(sess, checkpoint_path)
111 |                 # write logs at every iteration
112 |                 summary_writer.add_summary(summary, i)
113 | 
114 |             coord.request_stop()
115 |             coord.join(threads)
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     tf.app.run(main=main)
120 | 


--------------------------------------------------------------------------------
/self_driving/segnet/train_kitti.py:
--------------------------------------------------------------------------------
  1 | """Train SegNet with KITTI dataset.
  2 | 
  3 | nohup python -u -m self_driving.segnet.train_kitti > self_driving/segnet/output.txt 2>&1 &
  4 | 
  5 | """
  6 | 
  7 | import os
  8 | import tensorflow as tf
  9 | from utils import kitti_segnet
 10 | import segnet_vgg
 11 | 
 12 | LOG_DIR = 'save'
 13 | EPOCH = 4000
 14 | BATCH_SIZE = 1
 15 | IMAGE_HEIGHT = 375
 16 | IMAGE_WIDTH = 1242
 17 | IMAGE_CHANNEL = 3
 18 | NUM_CLASSES = 2
 19 | INITIAL_LEARNING_RATE = 0.0001
 20 | 
 21 | image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/train.txt"
 22 | val_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/train.txt"
 23 | 
 24 | 
 25 | def loss(logits, labels):
 26 |     logits = tf.reshape(logits, [-1, NUM_CLASSES])
 27 |     labels = tf.reshape(labels, [-1])
 28 | 
 29 |     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
 30 |         logits=logits, labels=labels, name='cross_entropy')
 31 |     cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
 32 |     tf.add_to_collection('losses', cross_entropy_mean)
 33 |     return tf.add_n(tf.get_collection('losses'), name='total_loss')
 34 | 
 35 | 
 36 | def train(total_loss):
 37 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 38 |     with tf.control_dependencies(update_ops):
 39 |         global_step = tf.Variable(0, name='global_step', trainable=False)
 40 |         learning_rate = tf.train.exponential_decay(
 41 |             INITIAL_LEARNING_RATE, global_step, EPOCH * 0.2, 0.9, staircase=True)
 42 |         tf.summary.scalar('total_loss', total_loss)
 43 |         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
 44 |         return optimizer.minimize(total_loss, global_step=global_step)
 45 | 
 46 | 
 47 | def main(_):
 48 |     image_filenames, label_filenames = kitti_segnet.get_filename_list(image_dir)
 49 |     val_image_filenames, val_label_filenames = kitti_segnet.get_filename_list(val_dir)
 50 | 
 51 |     with tf.Graph().as_default():
 52 |         with tf.device('/cpu:0'):
 53 |             # config = tf.ConfigProto(device_count = {'GPU': 0})
 54 |             config = tf.ConfigProto()
 55 |             config.gpu_options.allocator_type = 'BFC'
 56 |             sess = tf.InteractiveSession(config=config)
 57 | 
 58 |             train_data = tf.placeholder(tf.float32,
 59 |                                         shape=[BATCH_SIZE,
 60 |                                                IMAGE_HEIGHT,
 61 |                                                IMAGE_WIDTH,
 62 |                                                IMAGE_CHANNEL],
 63 |                                         name='train_data')
 64 |             train_labels = tf.placeholder(tf.int64,
 65 |                                           shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1],
 66 |                                           name='train_labels')
 67 |             is_training = tf.placeholder(tf.bool, name='is_training')
 68 | 
 69 |             images, labels = kitti_segnet.CamVidInputs(image_filenames,
 70 |                                                        label_filenames,
 71 |                                                        BATCH_SIZE)
 72 |             val_images, val_labels = kitti_segnet.CamVidInputs(val_image_filenames,
 73 |                                                                val_label_filenames,
 74 |                                                                BATCH_SIZE)
 75 | 
 76 |             logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES)
 77 |             total_loss = loss(logits, train_labels)
 78 |             train_op = train(total_loss)
 79 |             check_op = tf.add_check_numerics_ops()
 80 | 
 81 |             merged_summary_op = tf.summary.merge_all()
 82 |             summary_writer = tf.summary.FileWriter('train', sess.graph)
 83 |             saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
 84 |             if not os.path.exists(LOG_DIR):
 85 |                 os.makedirs(LOG_DIR)
 86 |             checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt")
 87 | 
 88 |             sess.run(tf.global_variables_initializer())
 89 | 
 90 |             # Start the queue runners.
 91 |             coord = tf.train.Coordinator()
 92 |             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 93 | 
 94 |             for i in range(EPOCH):
 95 |                 image_batch, label_batch = sess.run([images, labels])
 96 |                 feed_dict = {
 97 |                     train_data: image_batch,
 98 |                     train_labels: label_batch,
 99 |                     is_training: True
100 |                 }
101 |                 _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op],
102 |                                             feed_dict=feed_dict)
103 |                 if i % 10 == 0:
104 |                     print("Start validating...")
105 |                     val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
106 |                     loss_value = total_loss.eval(feed_dict={train_data: val_images_batch,
107 |                                                             train_labels: val_labels_batch,
108 |                                                             is_training: True})
109 |                     print("Epoch: %d, Loss: %g" % (i, loss_value))
110 |                     saver.save(sess, checkpoint_path)
111 |                 # write logs at every iteration
112 |                 summary_writer.add_summary(summary, i)
113 | 
114 |             coord.request_stop()
115 |             coord.join(threads)
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     tf.app.run(main=main)
120 | 


--------------------------------------------------------------------------------
/self_driving/steering/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/steering/__init__.py


--------------------------------------------------------------------------------
/self_driving/steering/driving_data.py:
--------------------------------------------------------------------------------
 1 | import scipy.misc
 2 | import random
 3 | 
 4 | xs = []
 5 | ys = []
 6 | 
 7 | #points to the end of the last batch
 8 | train_batch_pointer = 0
 9 | val_batch_pointer = 0
10 | 
11 | #read data.txt
12 | with open("/usr/local/google/home/limeng/Downloads/nvida/data.txt") as f:
13 |     for line in f:
14 |         xs.append("/usr/local/google/home/limeng/Downloads/nvida/driving_dataset/" + line.split()[0])
15 |         #the paper by Nvidia uses the inverse of the turning radius,
16 |         #but steering wheel angle is proportional to the inverse of turning radius
17 |         #so the steering wheel angle in radians is used as the output
18 |         ys.append(float(line.split()[1]) * scipy.pi / 180)
19 | 
20 | #get number of images
21 | num_images = len(xs)
22 | 
23 | #shuffle list of images
24 | c = list(zip(xs, ys))
25 | random.shuffle(c)
26 | xs, ys = zip(*c)
27 | 
28 | train_xs = xs[:int(len(xs) * 0.8)]
29 | train_ys = ys[:int(len(xs) * 0.8)]
30 | 
31 | val_xs = xs[-int(len(xs) * 0.2):]
32 | val_ys = ys[-int(len(xs) * 0.2):]
33 | 
34 | num_train_images = len(train_xs)
35 | num_val_images = len(val_xs)
36 | 
37 | 
38 | def load_train_batch(batch_size):
39 |     global train_batch_pointer
40 |     x_out = []
41 |     y_out = []
42 |     for i in range(0, batch_size):
43 |         x_out.append(
44 |             scipy.misc.imresize(
45 |                 scipy.misc.imread(
46 |                     train_xs[(train_batch_pointer + i) % num_train_images]), [66, 200]) / 255.0)
47 |         y_out.append([train_ys[(train_batch_pointer + i) % num_train_images]])
48 |     train_batch_pointer += batch_size
49 |     return x_out, y_out
50 | 
51 | 
52 | def load_val_batch(batch_size):
53 |     global val_batch_pointer
54 |     x_out = []
55 |     y_out = []
56 |     for i in range(0, batch_size):
57 |         x_out.append(
58 |             scipy.misc.imresize(
59 |                 scipy.misc.imread(
60 |                     val_xs[(val_batch_pointer + i) % num_val_images]), [66, 200]) / 255.0)
61 |         y_out.append([val_ys[(val_batch_pointer + i) % num_val_images]])
62 |     val_batch_pointer += batch_size
63 |     return x_out, y_out
64 | 


--------------------------------------------------------------------------------
/self_driving/steering/evaluate.py:
--------------------------------------------------------------------------------
 1 | """Evaluate SegNet.
 2 | 
 3 | nohup python -u -m self_driving.steering.evaluate > self_driving/segnet/output.txt 2>&1 &
 4 | 
 5 | """
 6 | 
 7 | import os
 8 | import tensorflow as tf
 9 | from utils import udacity_data
10 | 
11 | LOG_DIR = 'save'
12 | BATCH_SIZE = 128
13 | EPOCH = udacity_data.NUM_VAL_IMAGES / BATCH_SIZE
14 | OUTPUT = "steering_out.txt"
15 | 
16 | 
17 | def main(_):
18 |     udacity_data.read_data(shuffe=False)
19 |     with tf.Graph().as_default():
20 |         config = tf.ConfigProto()
21 |         config.gpu_options.allocator_type = 'BFC'
22 |         sess = tf.InteractiveSession(config=config)
23 | 
24 |         saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "steering.ckpt.meta"))
25 |         saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR))
26 | 
27 |         graph = tf.get_default_graph()
28 |         x_image = graph.get_tensor_by_name("x_image:0")
29 |         y_label = graph.get_tensor_by_name("y_label:0")
30 |         keep_prob = graph.get_tensor_by_name("keep_prob:0")
31 |         logits = tf.get_collection("logits")[0]
32 | 
33 |         if os.path.exists(OUTPUT):
34 |             os.remove(OUTPUT)
35 | 
36 |         for epoch in range(EPOCH):
37 |             image_batch, label_batch = udacity_data.load_val_batch(BATCH_SIZE)
38 |             feed_dict = {
39 |                 x_image: image_batch,
40 |                 y_label: label_batch,
41 |                 keep_prob: 0.6
42 |             }
43 |             prediction = sess.run([logits], feed_dict)
44 |             with open(OUTPUT, 'a') as out:
45 |                 for batch in range(BATCH_SIZE):
46 |                     out.write("%s %.10f\n" % (udacity_data.val_xs[epoch * BATCH_SIZE + batch],
47 |                                             prediction[0][batch]))
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     tf.app.run(main=main)
52 | 


--------------------------------------------------------------------------------
/self_driving/steering/model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def weight_variable(shape):
 5 |     initial = tf.truncated_normal(shape, stddev=0.1)
 6 |     return tf.Variable(initial)
 7 | 
 8 | 
 9 | def bias_variable(shape):
10 |     initial = tf.constant(0.1, shape=shape)
11 |     return tf.Variable(initial)
12 | 
13 | 
14 | def conv2d(x, W, stride):
15 |     return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID')
16 | 
17 | 
18 | def inference(x_image, keep_prob, is_training=True):
19 |     #first convolutional layer
20 |     W_conv1 = weight_variable([5, 5, 3, 24])
21 |     b_conv1 = bias_variable([24])
22 | 
23 |     h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 2) + b_conv1, 'relu_conv1')
24 |     h_conv1_norm = tf.contrib.layers.batch_norm(h_conv1, is_training=is_training, trainable=True)
25 | 
26 |     #second convolutional layer
27 |     W_conv2 = weight_variable([5, 5, 24, 36])
28 |     b_conv2 = bias_variable([36])
29 | 
30 |     h_conv2 = tf.nn.relu(conv2d(h_conv1_norm, W_conv2, 2) + b_conv2, 'relu_conv2')
31 |     h_conv2_norm = tf.contrib.layers.batch_norm(h_conv2, is_training=is_training, trainable=True)
32 | 
33 |     #third convolutional layer
34 |     W_conv3 = weight_variable([5, 5, 36, 48])
35 |     b_conv3 = bias_variable([48])
36 | 
37 |     h_conv3 = tf.nn.relu(conv2d(h_conv2_norm, W_conv3, 2) + b_conv3, 'relu_conv3')
38 |     h_conv3_norm = tf.contrib.layers.batch_norm(h_conv3, is_training=is_training, trainable=True)
39 | 
40 |     #fourth convolutional layer
41 |     W_conv4 = weight_variable([3, 3, 48, 64])
42 |     b_conv4 = bias_variable([64])
43 | 
44 |     h_conv4 = tf.nn.relu(conv2d(h_conv3_norm, W_conv4, 1) + b_conv4, 'relu_conv4')
45 |     h_conv4_norm = tf.contrib.layers.batch_norm(h_conv4, is_training=is_training, trainable=True)
46 | 
47 |     #fifth convolutional layer
48 |     W_conv5 = weight_variable([3, 3, 64, 64])
49 |     b_conv5 = bias_variable([64])
50 | 
51 |     h_conv5 = tf.nn.relu(conv2d(h_conv4_norm, W_conv5, 1) + b_conv5, 'relu_conv5')
52 |     h_conv5_norm = tf.contrib.layers.batch_norm(h_conv5, is_training=is_training, trainable=True)
53 | 
54 |     #FCL 1
55 |     W_fc1 = weight_variable([1152, 1164])
56 |     b_fc1 = bias_variable([1164])
57 | 
58 |     h_conv5_flat = tf.reshape(h_conv5_norm, [-1, 1152])
59 |     h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1,  'relu_fc1')
60 |     h_fc1_norm = tf.contrib.layers.batch_norm(h_fc1, is_training=is_training, trainable=True)
61 |     h_fc1_drop = tf.nn.dropout(h_fc1_norm, keep_prob)
62 | 
63 |     #FCL 2
64 |     W_fc2 = weight_variable([1164, 100])
65 |     b_fc2 = bias_variable([100])
66 | 
67 |     h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2,  'relu_fc2')
68 |     h_fc2_norm = tf.contrib.layers.batch_norm(h_fc2, is_training=is_training, trainable=True)
69 |     h_fc2_drop = tf.nn.dropout(h_fc2_norm, keep_prob)
70 | 
71 |     #FCL 3
72 |     W_fc3 = weight_variable([100, 50])
73 |     b_fc3 = bias_variable([50])
74 | 
75 |     h_fc3 = tf.nn.relu(tf.matmul(h_fc2_drop, W_fc3) + b_fc3,  'relu_fc3')
76 |     h_fc3_norm = tf.contrib.layers.batch_norm(h_fc3, is_training=is_training, trainable=True)
77 |     h_fc3_drop = tf.nn.dropout(h_fc3_norm, keep_prob)
78 | 
79 |     #FCL 3
80 |     W_fc4 = weight_variable([50, 10])
81 |     b_fc4 = bias_variable([10])
82 | 
83 |     h_fc4 = tf.nn.relu(tf.matmul(h_fc3_drop, W_fc4) + b_fc4,  'relu_fc4')
84 |     h_fc4_norm = tf.contrib.layers.batch_norm(h_fc4, is_training=is_training, trainable=True)
85 |     h_fc4_drop = tf.nn.dropout(h_fc4_norm, keep_prob)
86 | 
87 |     #Output
88 |     W_fc5 = weight_variable([10, 1])
89 |     b_fc5 = bias_variable([1])
90 | 
91 |     y = tf.multiply(tf.atan(tf.matmul(h_fc4_drop, W_fc5) + b_fc5), 2) #scale the atan output
92 |     tf.add_to_collection("logits", y)
93 | 
94 |     return y
95 | 


--------------------------------------------------------------------------------
/self_driving/steering/model_resnet50.py:
--------------------------------------------------------------------------------
 1 | # nohup python -u -m self_driving.steering.model_resnet50 > self_driving/steering/output.txt 2>&1 &
 2 | 
 3 | from keras import applications
 4 | from keras import optimizers
 5 | from keras.models import Sequential
 6 | from keras.models import Model
 7 | from keras.layers import Dropout, Flatten, Dense
 8 | from utils import my_image
 9 | from keras import backend as K
10 | from keras.callbacks import ModelCheckpoint
11 | 
12 | # dimensions of our images.
13 | img_width, img_height = 224, 224
14 | 
15 | train_data_dir = 'utils/udacity_train.txt'
16 | validation_data_dir = 'utils/udacity_val.txt'
17 | nb_train_samples = 33808
18 | nb_validation_samples = 10558
19 | epochs = 50
20 | batch_size = 32
21 | 
22 | # build the resnet50 network
23 | base_model = applications.ResNet50(include_top=False, input_shape=(224, 224, 3))
24 | print('Model loaded.')
25 | 
26 | # build a classifier model to put on top of the convolutional model
27 | top_model = Sequential()
28 | top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
29 | top_model.add(Dense(512, activation='relu'))
30 | top_model.add(Dense(256, activation='relu'))
31 | top_model.add(Dense(64, activation='relu'))
32 | top_model.add(Dense(1))
33 | 
34 | # add the model on top of the convolutional base
35 | # model.add(top_model)
36 | model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
37 | 
38 | # set the first 25 layers (up to the last conv block)
39 | # to non-trainable (weights will not be updated)
40 | for layer in model.layers[:15]:
41 |     layer.trainable = False
42 | 
43 | def root_mean_squared_error(y_true, y_pred):
44 |     return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
45 | 
46 | # compile the model with a Adam optimizer
47 | # and a very slow learning rate.
48 | model.compile(loss=root_mean_squared_error,
49 |               optimizer=optimizers.Adam(lr=0.001),
50 |               metrics=['accuracy'])
51 | 
52 | # prepare data augmentation configuration
53 | train_datagen = my_image.MyImageDataGenerator(rescale=1. / 255)
54 | 
55 | test_datagen = my_image.MyImageDataGenerator(rescale=1. / 255)
56 | 
57 | train_generator = train_datagen.flow(
58 |     train_data_dir,
59 |     [img_width, img_height, 3],
60 |     shuffle=True)
61 | 
62 | validation_generator = test_datagen.flow(
63 |     validation_data_dir,
64 |     [img_width, img_height, 3],
65 |     shuffle=True)
66 | 
67 | # checkpoint
68 | filepath="save/steering_resnet50-{epoch:02d}-{val_loss:.4f}.hdf5"
69 | checkpoint = ModelCheckpoint(
70 |     filepath,
71 |     monitor='val_loss',
72 |     save_best_only=True,
73 |     mode='min')
74 | callbacks_list = [checkpoint]
75 | 
76 | model.summary()
77 | 
78 | # fine-tune the model
79 | model.fit_generator(
80 |     train_generator,
81 |     steps_per_epoch=nb_train_samples // batch_size,
82 |     epochs=epochs,
83 |     validation_data=validation_generator,
84 |     validation_steps=nb_validation_samples // batch_size,
85 |     callbacks=callbacks_list)
86 | 


--------------------------------------------------------------------------------
/self_driving/steering/model_saliency.py:
--------------------------------------------------------------------------------
 1 | # nohup python -u -m self_driving.steering.model_saliency > self_driving/steering/output.txt 2>&1 &
 2 | 
 3 | from keras import applications
 4 | from keras.models import Sequential
 5 | from scipy import misc
 6 | from keras.models import Model
 7 | from keras.layers import Dropout, Flatten, Dense
 8 | from vis.visualization import visualize_saliency, overlay
 9 | from vis.utils import utils
10 | import numpy as np
11 | from keras.preprocessing.image import img_to_array
12 | import os
13 | 
14 | VAL_DATASET = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/center/"
15 | 
16 | # dimensions of our images.
17 | img_width, img_height = 224, 224
18 | model_weights_path = 'save/steering_resnet50-22-0.0603.hdf5'
19 | 
20 | # build the resnet50 network
21 | base_model = applications.ResNet50(include_top=False,
22 |                                    input_shape=(224, 224, 3))
23 | print('Model loaded.')
24 | 
25 | # build a classifier model to put on top of the convolutional model
26 | top_model = Sequential()
27 | top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
28 | top_model.add(Dense(512, activation='relu'))
29 | top_model.add(Dense(256, activation='relu'))
30 | top_model.add(Dense(64, activation='relu'))
31 | top_model.add(Dense(1))
32 | 
33 | model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
34 | model.load_weights(model_weights_path)
35 | 
36 | with open("output/steering/steering_val.txt", 'a') as out:
37 |     for img in os.listdir(VAL_DATASET):
38 |         img_data = utils.load_img(VAL_DATASET + img, target_size=(224, 224))
39 |         img_input = np.expand_dims(img_to_array(img_data), axis=0)
40 |         out.write("%s %.10f\n" % (img, model.predict(img_input / 255.)[0][0]))
41 |         out.flush()
42 |         heat_map = visualize_saliency(model,
43 |                                       -2,
44 |                                       filter_indices=None,
45 |                                       seed_input=img_data,
46 |                                       backprop_modifier='guided')
47 |         misc.imsave("output/steering/%s" % img, overlay(img_data, heat_map, alpha=0.3))
48 | 


--------------------------------------------------------------------------------
/self_driving/steering/split_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Split Nvida dataset into train data and test data
 3 | 
 4 | src_dir="driving_dataset"
 5 | train_dst_dir="train_data"
 6 | test_dst_dir="test_data"
 7 | 
 8 | train_data_size=40000
 9 | data_size=45568
10 | 
11 | echo "Split Nvida driving dataset into train data and test data"
12 | 
13 | rm -rf $train_dst_dir $test_dst_dir
14 | mkdir $train_dst_dir $test_dst_dir
15 | 
16 | # train_data
17 | i=0
18 | while [ $i -lt $train_data_size ]
19 | do
20 |   cp $src_dir/"$i.jpg" $train_dst_dir/"$i.jpg"
21 |   true $(( i++ ))
22 | done
23 | 
24 | # test_data
25 | while [ $i -lt $data_size ]
26 | do
27 |   cp $src_dir/"$i.jpg" $test_dst_dir/"$i.jpg"
28 |   true $(( i++ ))
29 | done
30 | 
31 | 


--------------------------------------------------------------------------------
/self_driving/steering/train.py:
--------------------------------------------------------------------------------
 1 | # nohup python -u -m self_driving.steering.train > self_driving/steering/output.txt 2>&1 &
 2 | 
 3 | import os
 4 | import tensorflow as tf
 5 | from utils import udacity_data
 6 | import model
 7 | 
 8 | LOG_DIR = 'save'
 9 | EPOCH = 32
10 | BATCH_SIZE = 128
11 | LEARNING_RATE = 1e-3
12 | STEP_PER_EPOCH = udacity_data.NUM_TRAIN_IMAGES / BATCH_SIZE
13 | 
14 | 
15 | def loss(pred, labels):
16 |     train_vars = tf.trainable_variables()
17 |     norm = tf.add_n([tf.nn.l2_loss(v) for v in train_vars])
18 |     # create a summary to monitor L2 norm
19 |     tf.summary.scalar('L2 Normalization', norm)
20 |     losses = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(pred, labels))))
21 |     # create a summary to monitor loss
22 |     tf.summary.scalar('Loss', losses)
23 |     return norm, losses, losses + norm * 0.0005
24 | 
25 | 
26 | def train(total_loss):
27 |     global_step = tf.Variable(0, name='global_step', trainable=False)
28 |     # create a summary to monitor total loss
29 |     tf.summary.scalar('Total Loss', total_loss)
30 |     optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
31 |     return optimizer.minimize(total_loss, global_step=global_step)
32 | 
33 | 
34 | def main(_):
35 |     with tf.Graph().as_default():
36 |         config = tf.ConfigProto()
37 |         config.gpu_options.allocator_type = 'BFC'
38 |         sess = tf.InteractiveSession(config=config)
39 | 
40 |         x_image = tf.placeholder(tf.float32, shape=[None, 66, 200, 3], name="x_image")
41 |         y_label = tf.placeholder(tf.float32, shape=[None, 1], name="y_label")
42 |         keep_prob = tf.placeholder(tf.float32, name="keep_prob")
43 | 
44 |         y_pred = model.inference(x_image, keep_prob)
45 |         norm, losses, total_loss = loss(y_pred, y_label)
46 |         train_op = train(total_loss)
47 | 
48 |         merged_summary_op = tf.summary.merge_all()
49 |         summary_writer = tf.summary.FileWriter('train', sess.graph)
50 |         saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
51 |         if not os.path.exists(LOG_DIR):
52 |             os.makedirs(LOG_DIR)
53 |         checkpoint_path = os.path.join(LOG_DIR, "steering.ckpt")
54 | 
55 |         sess.run(tf.global_variables_initializer())
56 | 
57 |         udacity_data.read_data()
58 | 
59 |         for epoch in range(EPOCH):
60 |             for i in range(STEP_PER_EPOCH):
61 |                 steps = epoch * STEP_PER_EPOCH + i
62 | 
63 |                 xs, ys = udacity_data.load_train_batch(BATCH_SIZE)
64 | 
65 |                 _, summary = sess.run([train_op, merged_summary_op],
66 |                                       feed_dict={x_image: xs, y_label: ys, keep_prob: 0.7})
67 | 
68 |                 if i % 10 == 0:
69 |                     xs, ys = udacity_data.load_val_batch(BATCH_SIZE)
70 |                     loss_value = losses.eval(feed_dict={x_image: xs, y_label: ys, keep_prob: 1.0})
71 |                     print("Epoch: %d, Step: %d, Loss: %g" % (epoch, steps, loss_value))
72 | 
73 |                 # write logs at every iteration
74 |                 summary_writer.add_summary(summary, steps)
75 | 
76 |                 if i % 32 == 0:
77 |                     if not os.path.exists(LOG_DIR):
78 |                         os.makedirs(LOG_DIR)
79 |                     saver.save(sess, checkpoint_path)
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     tf.app.run(main=main)
84 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/utils/__init__.py


--------------------------------------------------------------------------------
/utils/camvid.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.framework import ops
  3 | from tensorflow.python.framework import dtypes
  4 | 
  5 | IMAGE_HEIGHT = 720
  6 | IMAGE_WIDTH = 960
  7 | IMAGE_DEPTH = 3
  8 | 
  9 | NUM_CLASSES = 32
 10 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 580
 11 | NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 580
 12 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 580
 13 | 
 14 | def _generate_image_and_label_batch(image, label, min_queue_examples,
 15 |                                     batch_size, shuffle):
 16 |     """Construct a queued batch of images and labels.
 17 |     Args:
 18 |       image: 3-D Tensor of [height, width, 3] of type.float32.
 19 |       label: 3-D Tensor of [height, width, 1] type.int32
 20 |       min_queue_examples: int32, minimum number of samples to retain
 21 |         in the queue that provides of batches of examples.
 22 |       batch_size: Number of images per batch.
 23 |       shuffle: boolean indicating whether to use a shuffling queue.
 24 |     Returns:
 25 |       images: Images. 4D tensor of [batch_size, height, width, 3] size.
 26 |       labels: Labels. 3D tensor of [batch_size, height, width ,1] size.
 27 |     """
 28 |     # Create a queue that shuffles the examples, and then
 29 |     # read 'batch_size' images + labels from the example queue.
 30 |     num_preprocess_threads = 1
 31 |     if shuffle:
 32 |         images, label_batch = tf.train.shuffle_batch(
 33 |             [image, label],
 34 |             batch_size=batch_size,
 35 |             num_threads=num_preprocess_threads,
 36 |             capacity=min_queue_examples + 3 * batch_size,
 37 |             min_after_dequeue=min_queue_examples)
 38 |     else:
 39 |         images, label_batch = tf.train.batch(
 40 |             [image, label],
 41 |             batch_size=batch_size,
 42 |             num_threads=num_preprocess_threads,
 43 |             capacity=min_queue_examples + 3 * batch_size)
 44 | 
 45 |     return images, label_batch
 46 | 
 47 | 
 48 | def CamVid_reader_seq(filename_queue, seq_length):
 49 |     image_seq_filenames = tf.split(axis=0,
 50 |                                    num_or_size_splits=seq_length,
 51 |                                    value=filename_queue[0])
 52 |     label_seq_filenames = tf.split(axis=0,
 53 |                                    num_or_size_splits=seq_length,
 54 |                                    value=filename_queue[1])
 55 | 
 56 |     image_seq = []
 57 |     label_seq = []
 58 |     for im ,la in zip(image_seq_filenames, label_seq_filenames):
 59 |         imageValue = tf.read_file(tf.squeeze(im))
 60 |         labelValue = tf.read_file(tf.squeeze(la))
 61 |         image_bytes = tf.image.decode_png(imageValue)
 62 |         label_bytes = tf.image.decode_png(labelValue)
 63 |         image = tf.cast(tf.reshape(image_bytes,
 64 |                                    (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)), tf.float32)
 65 |         label = tf.cast(tf.reshape(label_bytes,
 66 |                                    (IMAGE_HEIGHT, IMAGE_WIDTH, 1)), tf.int64)
 67 |         image_seq.append(image)
 68 |         label_seq.append(label)
 69 |     return image_seq, label_seq
 70 | 
 71 | 
 72 | def CamVid_reader(filename_queue):
 73 |     image_filename = filename_queue[0]
 74 |     label_filename = filename_queue[1]
 75 | 
 76 |     imageValue = tf.read_file(image_filename)
 77 |     labelValue = tf.read_file(label_filename)
 78 | 
 79 |     image_bytes = tf.image.decode_png(imageValue)
 80 |     label_bytes = tf.image.decode_png(labelValue)
 81 | 
 82 |     image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
 83 |     label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1))
 84 | 
 85 |     return image, label
 86 | 
 87 | 
 88 | def get_filename_list(path):
 89 |     fd = open(path)
 90 |     image_filenames = []
 91 |     label_filenames = []
 92 |     for i in fd:
 93 |         i = i.strip().split(" ")
 94 |         image_filenames.append(i[0])
 95 |         label_filenames.append(i[1])
 96 |     return image_filenames, label_filenames
 97 | 
 98 | 
 99 | def CamVidInputs(image_filenames, label_filenames, batch_size, shuffle=True):
100 | 
101 |     images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string)
102 |     labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string)
103 | 
104 |     filename_queue = tf.train.slice_input_producer([images, labels], shuffle=shuffle)
105 | 
106 |     image, label = CamVid_reader(filename_queue)
107 |     reshaped_image = tf.cast(image, tf.float32)
108 | 
109 |     min_fraction_of_examples_in_queue = 0.05
110 |     min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
111 |                              min_fraction_of_examples_in_queue)
112 |     print ('Filling queue with %d CamVid images before starting to train. '
113 |            'This will take a few minutes.' % min_queue_examples)
114 | 
115 |     # Generate a batch of images and labels by building up a queue of examples.
116 |     return _generate_image_and_label_batch(reshaped_image, label,
117 |                                            min_queue_examples, batch_size,
118 |                                            shuffle=shuffle)
119 | 


--------------------------------------------------------------------------------
/utils/camvid_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from tensorflow.python.platform import test
 6 | 
 7 | import camvid
 8 | import tensorflow as tf
 9 | 
10 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/data/train.txt"
11 | 
12 | 
13 | class CamvidTest(test.TestCase):
14 | 
15 |     def testGetFileNameList(self):
16 |         image_filenames, label_filenames = camvid.get_filename_list(image_dir)
17 |         self.assertEqual(len(image_filenames), 367)
18 | 
19 |     def testCamVidInputs(self):
20 |         config = tf.ConfigProto()
21 |         config.gpu_options.allocator_type = 'BFC'
22 |         with self.test_session(use_gpu=True, config = config) as sess:
23 |             image_filenames, label_filenames = camvid.get_filename_list(image_dir)
24 |             images, labels = camvid.CamVidInputs(image_filenames, label_filenames, 32)
25 |             # Start the queue runners.
26 |             coord = tf.train.Coordinator()
27 |             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
28 |             images_batch, labels_batch = sess.run([images, labels])
29 |             self.assertEqual(images.get_shape(), [32, 360, 480, 3])
30 |             self.assertEqual(labels.get_shape(), [32, 360, 480, 1])
31 |             coord.request_stop()
32 |             coord.join(threads)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     test.main()
37 | 


--------------------------------------------------------------------------------
/utils/cifar.py:
--------------------------------------------------------------------------------
  1 | """Load data from CIFAR-10 dataset
  2 | 
  3 | The archive contains the files data_batch_1, data_batch_2, ..., data_batch_5,
  4 | as well as test_batch. Each of these files is a Python "pickled" object
  5 | produced with cPickle. Here is a Python routine which will open such a file
  6 | and return a dictionary:
  7 | """
  8 | 
  9 | import os.path
 10 | import pickle
 11 | import tarfile
 12 | import urllib2
 13 | 
 14 | import numpy
 15 | import dataset
 16 | 
 17 | FLAGS = None
 18 | 
 19 | CIFAR10_DOWNLOAD_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
 20 | CIFAR10_FILE_NAME = 'cifar-10-python.tar.gz'
 21 | CIFAR10_TRAIN_PREFIX = 'cifar-10-batches-py/data_batch_'
 22 | CIFAR10_TEST = 'cifar-10-batches-py/test_batch'
 23 | CIFAR10_DATA = 'data'
 24 | CIFAR10_LABEL = 'labels'
 25 | 
 26 | 
 27 | class Cifar(object):
 28 |     def __init__(self):
 29 |         self.train = dataset.DataSet()
 30 |         self.test = dataset.DataSet()
 31 | 
 32 |     def ReadDataSets(self, data_dir=".", one_hot=False, raw=False):
 33 |         file_path = os.path.join(data_dir, CIFAR10_FILE_NAME)
 34 |         if not os.path.isfile(file_path):
 35 |             _DownloadCifar10(data_dir)
 36 | 
 37 |         UnzipTarGzFile(file_path)
 38 | 
 39 |         xs = []
 40 |         ys = []
 41 |         for j in range(5):
 42 |             d = Unpickle(os.path.join(data_dir, CIFAR10_TRAIN_PREFIX + `j + 1`))
 43 |             x = d[CIFAR10_DATA]
 44 |             y = d[CIFAR10_LABEL]
 45 |             xs.append(x)
 46 |             ys.append(y)
 47 | 
 48 |         d = Unpickle(os.path.join(data_dir, CIFAR10_TEST))
 49 |         xs.append(d[CIFAR10_DATA])
 50 |         ys.append(d[CIFAR10_LABEL])
 51 | 
 52 |         x = numpy.concatenate(xs) / numpy.float32(255)
 53 |         y = numpy.concatenate(ys)
 54 |         if not raw:
 55 |             x = numpy.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:]))
 56 |             x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0, 3, 1, 2)
 57 | 
 58 |         # subtract per-pixel mean
 59 |         pixel_mean = numpy.mean(x[0:50000], axis=0)
 60 |         x -= pixel_mean
 61 | 
 62 |         # create mirrored images
 63 |         if not raw:
 64 |             self.train.images = x[0:50000, :, :, :]
 65 |         else:
 66 |             self.train.images = x[0:50000]
 67 |         self.train.labels = y[0:50000]
 68 | 
 69 |         if not raw:
 70 |             self.test.images = x[50000:, :, :, :]
 71 |         else:
 72 |             self.train.images = x[0:50000]
 73 |         self.test.labels = y[50000:]
 74 | 
 75 |         if one_hot:
 76 |             train_labels = numpy.zeros((50000, 10), dtype=numpy.float32)
 77 |             test_labels = numpy.zeros((10000, 10), dtype=numpy.float32)
 78 | 
 79 |             for i in range(50000):
 80 |                 train_labels[i, self.train.labels[i]] = 1.
 81 |             self.train.labels = train_labels
 82 | 
 83 |             for j in range(10000):
 84 |                 test_labels[j, self.test.labels[j]] = 1.
 85 |             self.test.labels = test_labels
 86 | 
 87 | 
 88 | def _DownloadCifar10(data_dir):
 89 |     _EnsureDir(data_dir)
 90 |     cifar10_zip_file = urllib2.urlopen(CIFAR10_DOWNLOAD_URL)
 91 |     with open(os.path.join(data_dir, CIFAR10_FILE_NAME), 'wb') as output:
 92 |         output.write(cifar10_zip_file.read())
 93 | 
 94 | 
 95 | def UnzipTarGzFile(file_path):
 96 |     with tarfile.open(file_path) as tar:
 97 |         tar.extractall()
 98 |         tar.close()
 99 | 
100 | 
101 | def _EnsureDir(directory):
102 |     if not os.path.exists(directory):
103 |         os.makedirs(directory)
104 | 
105 | 
106 | def Unpickle(file_path):
107 |     with open(file_path, mode='rb') as file:
108 |         dict = pickle.load(file)
109 |     return dict
110 | 


--------------------------------------------------------------------------------
/utils/cifar_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from tensorflow.utils import cifar
 4 | 
 5 | 
 6 | class CifarTest(unittest.TestCase):
 7 |     def setUp(self):
 8 |         self._cifar = cifar.Cifar()
 9 | 
10 |     def testReadDataSets(self):
11 |         self._cifar.ReadDataSets()
12 |         self.assertEqual(len(self._cifar.train.images), 50000)
13 |         self.assertEqual(len(self._cifar.train.labels), 50000)
14 |         self.assertEqual(len(self._cifar.test.images), 10000)
15 |         self.assertEqual(len(self._cifar.test.labels), 10000)
16 | 
17 |     def testReadDataSetsOneHotEnabled(self):
18 |         self._cifar.ReadDataSets(one_hot=True)
19 | 
20 |         self.assertEqual(len(self._cifar.train.images), 50000)
21 |         self.assertEqual(len(self._cifar.train.images[0]), 3072)
22 |         self.assertEqual(len(self._cifar.train.labels[0]), 10)
23 |         self.assertEqual(1, self._cifar.train.labels[0][6])
24 | 
25 |         self.assertEqual(len(self._cifar.test.images), 10000)
26 |         self.assertEqual(len(self._cifar.test.labels[0]), 10)
27 |         self.assertEqual(1, self._cifar.test.labels[0][3])
28 | 
29 | if __name__ == '__main__':
30 |     unittest.main()
31 | 


--------------------------------------------------------------------------------
/utils/dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | 
 4 | class DataSet(object):
 5 |     def __init__(self):
 6 |         self._images = numpy.array([])
 7 |         self._labels = numpy.array([])
 8 |         self._index_in_epoch = 0
 9 | 
10 |     @property
11 |     def images(self):
12 |         return self._images
13 | 
14 |     @property
15 |     def labels(self):
16 |         return self._labels
17 | 
18 |     @labels.setter
19 |     def labels(self, value):
20 |         self._labels = value
21 | 
22 |     @images.setter
23 |     def images(self, value):
24 |         self._images = value
25 | 
26 |     def appendImage(self, images):
27 |         arr = self._images.tolist()
28 |         arr.extend(images)
29 |         self._images = numpy.array(arr)
30 | 
31 |     def appendLabel(self, labels):
32 |         arr = self._labels.tolist()
33 |         arr.extend(labels)
34 |         self._labels = numpy.array(arr)
35 | 
36 |     def next_batch(self, batch_size):
37 |         start = self._index_in_epoch
38 |         self._index_in_epoch += batch_size
39 |         if self._index_in_epoch > len(self._images):
40 |             perm = numpy.arange(len(self._images))
41 |             numpy.random.shuffle(perm)
42 |             self._images = self._images[perm]
43 |             self._labels = self._labels[perm]
44 |             self._index_in_epoch = batch_size
45 |             start = 0
46 |         end = self._index_in_epoch
47 |         return self._images[start:end], self._labels[start:end]
48 | 


--------------------------------------------------------------------------------
/utils/kitti.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy
 3 | import tensorflow as tf
 4 | import scipy as scp
 5 | import scipy.misc
 6 | 
 7 | KITTI_TRAIN_DIR_PREFIX = '/usr/local/google/home/limeng/Downloads/kitti/data_road/training/image_2/'
 8 | KITTI_GT_DIR_PREFIX = '/usr/local/google/home/limeng/Downloads/kitti/data_road/training/gt_image_2/'
 9 | 
10 | UM_TRAIN_TEMPLATE = "um_0000%02d.png"
11 | UU_TRAIN_TEMPLATE = "uu_0000%02d.png"
12 | UMM_TRAIN_TEMPLATE = "umm_0000%02d.png"
13 | 
14 | UU_GT_ROAD_TEMPLATE = "uu_road_0000%02d.png"
15 | UM_GT_LANE_TEMPLATE = "um_lane_0000%02d.png"
16 | UM_GT_ROAD_TEMPLATE = "um_road_0000%02d.png"
17 | UMM_GT_ROAD_TEMPLATE = "umm_road_0000%02d.png"
18 | 
19 | 
20 | class Kitti(object):
21 |     def __init__(self):
22 |         self._images = []
23 |         self._labels = []
24 |         self._file_count = 0
25 |         self._read_datasets()
26 | 
27 |     def _read_datasets(self,
28 |                        train_data_dir=KITTI_TRAIN_DIR_PREFIX,
29 |                        gt_data_dir=KITTI_GT_DIR_PREFIX,
30 |                        cat='uu'):
31 |         assert os.path.isdir(train_data_dir), 'Cannot find: %s' % train_data_dir
32 | 
33 |         self._file_count = 98
34 |         train_file_temp = UU_TRAIN_TEMPLATE
35 |         gt_file_temp = UU_GT_ROAD_TEMPLATE
36 |         if cat == 'um':
37 |             self._file_count = 95
38 |             train_file_temp = UM_TRAIN_TEMPLATE
39 |             gt_file_temp = UM_GT_ROAD_TEMPLATE
40 |         elif cat == 'umm':
41 |             self._file_count = 96
42 |             train_file_temp = UMM_TRAIN_TEMPLATE
43 |             gt_file_temp = UMM_GT_ROAD_TEMPLATE
44 | 
45 |         for i in range(0, self._file_count):
46 |             train_file_name = train_data_dir + train_file_temp % i
47 |             gt_file_name = gt_data_dir + gt_file_temp % i
48 |             print(train_file_name)
49 |             x = get_training_data(train_file_name)
50 |             y = get_ground_truth(gt_file_name)
51 | 
52 |             self._images.append(x)
53 |             self._labels.append(y)
54 | 
55 |     def next_batch(self, batch_id):
56 |         return self._images[batch_id], self._labels[batch_id]
57 | 
58 | 
59 | def get_training_data(file_name):
60 |     assert os.path.isfile(file_name), 'Cannot find: %s' % file_name
61 |     training_data = scp.misc.imread(file_name, mode='RGB')
62 |     return numpy.expand_dims(training_data, axis=0)
63 | 
64 | 
65 | def get_ground_truth(fileNameGT):
66 |     assert os.path.isfile(fileNameGT), 'Cannot find: %s' % fileNameGT
67 |     full_gt = scp.misc.imread(fileNameGT, mode='RGB')
68 |     roadArea = (full_gt[:, :, 2] > 0)
69 |     notRoadArea = (full_gt[:, :, 2] == 0)
70 |     gt_data = numpy.dstack((roadArea, notRoadArea))
71 |     return numpy.expand_dims(gt_data, axis=0)
72 | 
73 | 
74 | def main(_):
75 |     kitti = Kitti()
76 |     for i in range(0, 20):
77 |         img, label = kitti.next_batch()
78 |         print "images"
79 |         print img.shape
80 |         print "labels"
81 |         print label.shape
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     tf.app.run(main=main)
86 | 


--------------------------------------------------------------------------------
/utils/kitti_segnet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.framework import ops
  3 | from tensorflow.python.framework import dtypes
  4 | 
  5 | IMAGE_HEIGHT = 375
  6 | IMAGE_WIDTH = 1242
  7 | IMAGE_DEPTH = 3
  8 | 
  9 | NUM_CLASSES = 3
 10 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 100
 11 | NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 100
 12 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 100
 13 | 
 14 | def _generate_image_and_label_batch(image, label, min_queue_examples,
 15 |                                     batch_size, shuffle):
 16 |     """Construct a queued batch of images and labels.
 17 |     Args:
 18 |       image: 3-D Tensor of [height, width, 3] of type.float32.
 19 |       label: 3-D Tensor of [height, width, 1] type.int32
 20 |       min_queue_examples: int32, minimum number of samples to retain
 21 |         in the queue that provides of batches of examples.
 22 |       batch_size: Number of images per batch.
 23 |       shuffle: boolean indicating whether to use a shuffling queue.
 24 |     Returns:
 25 |       images: Images. 4D tensor of [batch_size, height, width, 3] size.
 26 |       labels: Labels. 3D tensor of [batch_size, height, width ,1] size.
 27 |     """
 28 |     # Create a queue that shuffles the examples, and then
 29 |     # read 'batch_size' images + labels from the example queue.
 30 |     num_preprocess_threads = 1
 31 |     if shuffle:
 32 |         images, label_batch = tf.train.shuffle_batch(
 33 |             [image, label],
 34 |             batch_size=batch_size,
 35 |             num_threads=num_preprocess_threads,
 36 |             capacity=min_queue_examples + 3 * batch_size,
 37 |             min_after_dequeue=min_queue_examples)
 38 |     else:
 39 |         images, label_batch = tf.train.batch(
 40 |             [image, label],
 41 |             batch_size=batch_size,
 42 |             num_threads=num_preprocess_threads,
 43 |             capacity=min_queue_examples + 3 * batch_size)
 44 | 
 45 |     return images, label_batch
 46 | 
 47 | 
 48 | def CamVid_reader_seq(filename_queue, seq_length):
 49 |     image_seq_filenames = tf.split(axis=0,
 50 |                                    num_or_size_splits=seq_length,
 51 |                                    value=filename_queue[0])
 52 |     label_seq_filenames = tf.split(axis=0,
 53 |                                    num_or_size_splits=seq_length,
 54 |                                    value=filename_queue[1])
 55 | 
 56 |     image_seq = []
 57 |     label_seq = []
 58 |     for im ,la in zip(image_seq_filenames, label_seq_filenames):
 59 |         imageValue = tf.read_file(tf.squeeze(im))
 60 |         labelValue = tf.read_file(tf.squeeze(la))
 61 |         image_bytes = tf.image.decode_png(imageValue)
 62 |         label_bytes = tf.image.decode_png(labelValue)
 63 |         image = tf.cast(tf.reshape(image_bytes,
 64 |                                    (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)), tf.float32)
 65 |         label = tf.cast(tf.reshape(label_bytes,
 66 |                                    (IMAGE_HEIGHT, IMAGE_WIDTH, 1)), tf.int64)
 67 |         image_seq.append(image)
 68 |         label_seq.append(label)
 69 |     return image_seq, label_seq
 70 | 
 71 | 
 72 | def CamVid_reader(filename_queue):
 73 |     image_filename = filename_queue[0]
 74 |     label_filename = filename_queue[1]
 75 | 
 76 |     imageValue = tf.read_file(image_filename)
 77 |     labelValue = tf.read_file(label_filename)
 78 | 
 79 |     image_bytes = tf.image.decode_png(imageValue)
 80 |     label_bytes = tf.image.decode_png(labelValue)
 81 | 
 82 |     image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
 83 |     label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1))
 84 | 
 85 |     return image, label
 86 | 
 87 | 
 88 | def get_filename_list(path):
 89 |     fd = open(path)
 90 |     image_filenames = []
 91 |     label_filenames = []
 92 |     for i in fd:
 93 |         i = i.strip().split(" ")
 94 |         image_filenames.append(i[0])
 95 |         label_filenames.append(i[1])
 96 |     return image_filenames, label_filenames
 97 | 
 98 | 
 99 | def CamVidInputs(image_filenames, label_filenames, batch_size, shuffle=True):
100 | 
101 |     images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string)
102 |     labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string)
103 | 
104 |     filename_queue = tf.train.slice_input_producer([images, labels], shuffle=shuffle)
105 | 
106 |     image, label = CamVid_reader(filename_queue)
107 |     reshaped_image = tf.cast(image, tf.float32)
108 | 
109 |     min_fraction_of_examples_in_queue = 0.05
110 |     min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
111 |                              min_fraction_of_examples_in_queue)
112 |     print ('Filling queue with %d CamVid images before starting to train. '
113 |            'This will take a few minutes.' % min_queue_examples)
114 | 
115 |     # Generate a batch of images and labels by building up a queue of examples.
116 |     return _generate_image_and_label_batch(reshaped_image, label,
117 |                                            min_queue_examples, batch_size,
118 |                                            shuffle=shuffle)
119 | 


--------------------------------------------------------------------------------
/utils/my_image.py:
--------------------------------------------------------------------------------
 1 | import scipy.misc
 2 | import numpy as np
 3 | import os
 4 | 
 5 | from keras.preprocessing.image import ImageDataGenerator
 6 | from keras.preprocessing.image import Iterator
 7 | from keras import backend as K
 8 | from keras.preprocessing import image
 9 | 
10 | class MyImageDataGenerator(ImageDataGenerator):
11 | 
12 |     def flow(self, file, image_size, batch_size=32, shuffle=True, seed=None,
13 |              save_to_dir=None, save_prefix='', save_format='png'):
14 |         return FileIterator(
15 |             file, image_size, self,
16 |             batch_size=batch_size,
17 |             shuffle=shuffle,
18 |             seed=seed,
19 |             data_format=self.data_format,
20 |             save_to_dir=save_to_dir,
21 |             save_prefix=save_prefix,
22 |             save_format=save_format)
23 | 
24 | 
25 | class FileIterator(Iterator):
26 |     """Iterator yielding data from a file.
27 | 
28 |     The file should be in the following format:
29 |     <image_path_1> <label_data_1>
30 |     <image_path_2> <label_data_2>
31 |     ...
32 |     <image_path_n> <label_data_n>
33 | 
34 |     # Arguments
35 |         file: Path to the file to read the image list and label data.
36 |         image_size: Image size, [height, width, channel]
37 |         image_data_generator: Instance of `ImageDataGenerator`
38 |             to use for random transformations and normalization.
39 |         batch_size: Integer, size of a batch.
40 |         shuffle: Boolean, whether to shuffle the data between epochs.
41 |         seed: Random seed for data shuffling.
42 |         data_format: String, one of `channels_first`, `channels_last`.
43 |         save_to_dir: Optional directory where to save the pictures
44 |             being yielded, in a viewable format. This is useful
45 |             for visualizing the random transformations being
46 |             applied, for debugging purposes.
47 |         save_prefix: String prefix to use for saving sample
48 |             images (if `save_to_dir` is set).
49 |         save_format: Format to use for saving sample images
50 |             (if `save_to_dir` is set).
51 |     """
52 | 
53 |     def __init__(self, file, image_size, image_data_generator,
54 |                  batch_size=32, shuffle=False, seed=None,
55 |                  data_format=None,
56 |                  save_to_dir=None, save_prefix='', save_format='png'):
57 |         if not os.path.exists(file):
58 |             raise ValueError('Cannot find file: %s' % file)
59 | 
60 |         if data_format is None:
61 |             data_format = K.image_data_format()
62 | 
63 |         split_lines = [line.rstrip('\n').split(' ') for line in open(file, 'r')]
64 |         self.x = np.asarray([e[0] for e in split_lines])
65 |         self.y = np.asarray([float(e[1]) for e in split_lines])
66 |         self.image_size = image_size
67 |         self.image_data_generator = image_data_generator
68 |         self.data_format = data_format
69 |         self.save_to_dir = save_to_dir
70 |         self.save_prefix = save_prefix
71 |         self.save_format = save_format
72 |         super(FileIterator, self).__init__(self.x.shape[0], batch_size, shuffle, seed)
73 | 
74 |     def next(self):
75 |         # Keeps under lock only the mechanism which advances
76 |         # the indexing of each batch.
77 |         with self.lock:
78 |             index_array, current_index, current_batch_size = next(self.index_generator)
79 |         # The transformation of images is not under thread lock
80 |         # so it can be done in parallel
81 |         batch_x = np.zeros(tuple([current_batch_size] + list(self.image_size)), dtype=K.floatx())
82 |         for i, j in enumerate(index_array):
83 |             x = scipy.misc.imread(self.x[j])
84 |             x = scipy.misc.imresize(x, self.image_size)
85 |             x = self.image_data_generator.random_transform(x.astype(K.floatx()))
86 |             x = self.image_data_generator.standardize(x)
87 |             batch_x[i] = x
88 |         if self.save_to_dir:
89 |             for i in range(current_batch_size):
90 |                 img = image.array_to_img(batch_x[i], self.data_format, scale=True)
91 |                 fname = '{prefix}_{index}_{hash}.{format}'.format(prefix=self.save_prefix,
92 |                                                                   index=current_index + i,
93 |                                                                   hash=np.random.randint(1e4),
94 |                                                                   format=self.save_format)
95 |                 img.save(os.path.join(self.save_to_dir, fname))
96 |         batch_y = self.y[index_array]
97 |         return batch_x, batch_y


--------------------------------------------------------------------------------
/utils/my_image_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from tensorflow.python.platform import test
 6 | import my_image
 7 | 
 8 | class MyImageTest(test.TestCase):
 9 | 
10 |     def testReadData(self):
11 |         myImageDataGenerator = my_image.MyImageDataGenerator()
12 |         generator = myImageDataGenerator.flow("udacity_train.txt",
13 |                                               [224, 224, 3],
14 |                                               shuffle=False,
15 |                                               save_to_dir='test')
16 |         images, labels = generator.next()
17 |         self.assertAllEqual(images.shape, [32, 224, 224, 3])
18 |         self.assertAllEqual(labels.shape, [32])
19 |         self.assertAllClose(labels[0], 0.0490969472)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     test.main()


--------------------------------------------------------------------------------
/utils/svhn.py:
--------------------------------------------------------------------------------
 1 | """Load data from SVHN dataset
 2 | """
 3 | 
 4 | import os.path
 5 | import dataset
 6 | import numpy
 7 | import scipy.io
 8 | 
 9 | FLAGS = None
10 | 
11 | SVHN_TRAIN_FILE_NAME = 'train_32x32.mat'
12 | SVHN_TEST_FILE_NAME = 'test_32x32.mat'
13 | SVHN_DATA = 'X'
14 | SVHN_LABEL = 'y'
15 | 
16 | 
17 | class SVHN(object):
18 |     def __init__(self):
19 |         self.train = dataset.DataSet()
20 |         self.test = dataset.DataSet()
21 | 
22 |     def ReadDataSets(self, data_dir=".", one_hot=False):
23 |         train_file_path = os.path.join(data_dir, SVHN_TRAIN_FILE_NAME)
24 |         test_file_path = os.path.join(data_dir, SVHN_TEST_FILE_NAME)
25 |         if not os.path.isfile(train_file_path) and not os.path.isfile(test_file_path):
26 |             print("SVHN dataset not found.")
27 |             return
28 | 
29 |         read_input = scipy.io.loadmat(train_file_path)
30 |         self.train.images = read_input[SVHN_DATA]
31 |         self.train.labels = read_input[SVHN_LABEL]
32 | 
33 |         read_input = scipy.io.loadmat(test_file_path)
34 |         self.test.images = read_input[SVHN_DATA]
35 |         self.test.labels = read_input[SVHN_LABEL]
36 | 
37 |         self.train.images = numpy.swapaxes(self.train.images, 0, 3)
38 |         self.train.images = numpy.swapaxes(self.train.images, 1, 2)
39 |         self.train.images = numpy.swapaxes(self.train.images, 2, 3)
40 | 
41 |         self.train.images = self.train.images.reshape((73257, -1))
42 | 
43 |         self.test.images = numpy.swapaxes(self.test.images, 0, 3)
44 |         self.test.images = numpy.swapaxes(self.test.images, 1, 2)
45 |         self.test.images = numpy.swapaxes(self.test.images, 2, 3)
46 | 
47 |         self.test.images = self.test.images.reshape((26032, -1))
48 | 
49 |         self.train.images = self.train.images / numpy.float32(255)
50 |         self.test.images = self.test.images / numpy.float32(255)
51 | 
52 |         if one_hot:
53 |             train_labels = numpy.zeros((73257, 10), dtype=numpy.float32)
54 |             test_labels = numpy.zeros((26032, 10), dtype=numpy.float32)
55 | 
56 |             for i in range(73257):
57 |                 train_labels[i, self.train.labels[i] - 1] = 1.
58 |             self.train.labels = train_labels
59 | 
60 |             for j in range(26032):
61 |                 test_labels[j, self.test.labels[j] - 1] = 1.
62 |             self.test.labels = test_labels
63 | 


--------------------------------------------------------------------------------
/utils/udacity_data.py:
--------------------------------------------------------------------------------
  1 | import scipy.misc
  2 | import random
  3 | import pandas as pd
  4 | import tensorflow as tf
  5 | 
  6 | #points to the end of the last batch
  7 | train_batch_pointer = 0
  8 | val_batch_pointer = 0
  9 | 
 10 | train_xs = []
 11 | train_ys = []
 12 | val_xs = []
 13 | val_ys = []
 14 | 
 15 | TRAIN_IMG_PREFIX = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_%s/"
 16 | TRAIN_CSV = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_%s/interpolated.csv"
 17 | VAL_IMG_PREFIX = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/"
 18 | VAL_CSV = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/interpolated.csv"
 19 | 
 20 | NUM_TRAIN_IMAGES = 33808
 21 | NUM_VAL_IMAGES = 5279
 22 | 
 23 | 
 24 | def read_csv(csv_file_name, img_prefix):
 25 |     x_out = []
 26 |     data_csv = pd.read_csv(csv_file_name)
 27 |     data = data_csv[[x.startswith("center") for x in data_csv["filename"]]]
 28 |     for file_name in data["filename"]:
 29 |         x_out.append(img_prefix + file_name)
 30 |     return x_out, data["angle"]
 31 | 
 32 | 
 33 | def read_data(shuffe=True):
 34 |     global train_xs
 35 |     global train_ys
 36 |     global val_xs
 37 |     global val_ys
 38 | 
 39 |     # Read train set
 40 |     for idx in range(1, 7):
 41 |         if idx == 3:
 42 |             continue
 43 |         x_out, y_out = read_csv(TRAIN_CSV % idx, TRAIN_IMG_PREFIX % idx)
 44 |         train_xs.extend(x_out)
 45 |         train_ys.extend(y_out)
 46 |     # Read val set
 47 |     val_xs, val_ys = read_csv(VAL_CSV, VAL_IMG_PREFIX)
 48 | 
 49 |     #shuffle train set
 50 |     c = list(zip(train_xs, train_ys))
 51 |     if shuffe:
 52 |         random.shuffle(c)
 53 |     # with open("train.txt", 'a') as out:
 54 |     #     for item in c:
 55 |     #         out.write("%s %.10f\n" % (item[0], item[1]))
 56 |     train_xs, train_ys = zip(*c)
 57 |     #shuffle val set
 58 |     c = list(zip(val_xs, val_ys))
 59 |     # with open("val.txt", 'a') as out:
 60 |     #     for item in c:
 61 |     #         out.write("%s %.10f\n" % (item[0], item[1]))
 62 |     if shuffe:
 63 |         random.shuffle(c)
 64 |     val_xs, val_ys = zip(*c)
 65 | 
 66 | 
 67 | def load_train_batch(batch_size):
 68 |     global train_batch_pointer
 69 |     global train_xs
 70 |     global train_ys
 71 | 
 72 |     x_out = []
 73 |     y_out = []
 74 |     for i in range(0, batch_size):
 75 |         image = scipy.misc.imread(train_xs[(train_batch_pointer + i) % NUM_TRAIN_IMAGES], mode="RGB")
 76 |         x_out.append(scipy.misc.imresize(image[-300:], [66, 200]) / 255.0)
 77 |         y_out.append([train_ys[(train_batch_pointer + i) % NUM_TRAIN_IMAGES]])
 78 |     train_batch_pointer += batch_size
 79 |     return x_out, y_out
 80 | 
 81 | 
 82 | def load_val_batch(batch_size):
 83 |     global val_batch_pointer
 84 |     global val_xs
 85 |     global val_ys
 86 | 
 87 |     x_out = []
 88 |     y_out = []
 89 |     for i in range(0, batch_size):
 90 |         image = scipy.misc.imread(val_xs[(val_batch_pointer + i) % NUM_VAL_IMAGES], mode="RGB")
 91 |         x_out.append(scipy.misc.imresize(image[-300:], [66, 200]) / 255.0)
 92 |         y_out.append([val_ys[(val_batch_pointer + i) % NUM_VAL_IMAGES]])
 93 |     val_batch_pointer += batch_size
 94 |     return x_out, y_out
 95 | 
 96 | 
 97 | def main(_):
 98 |     read_data()
 99 | 
100 | if __name__ == '__main__':
101 |     tf.app.run(main=main)


--------------------------------------------------------------------------------
/utils/udacity_data_test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from tensorflow.python.platform import test
 6 | from scipy import misc
 7 | import udacity_data
 8 | 
 9 | IMG_TRAIN = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_1/center/1479424215880976321.png"
10 | IMG_VAL = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/center/1479425441182877835.png"
11 | 
12 | class UdacityDataTest(test.TestCase):
13 | 
14 |     def testReadData(self):
15 |         udacity_data.read_data()
16 |         self.assertAllEqual(len(udacity_data.train_xs), 33808)
17 |         self.assertAllEqual(len(udacity_data.train_ys), 33808)
18 |         self.assertAllEqual(len(udacity_data.val_xs), 5279)
19 |         self.assertAllEqual(len(udacity_data.val_ys), 5279)
20 |         self.assertTrue(IMG_TRAIN in udacity_data.train_xs)
21 |         self.assertAllClose(udacity_data.train_ys[udacity_data.train_xs.index(IMG_TRAIN)], 0.0010389391)
22 |         self.assertTrue(IMG_VAL in udacity_data.val_xs)
23 |         self.assertAllClose(udacity_data.val_ys[udacity_data.val_xs.index(IMG_VAL)], -0.0169280299)
24 | 
25 |     def testReadData(self):
26 |         udacity_data.read_data()
27 |         x_out, y_out = udacity_data.load_val_batch(64)
28 |         misc.imsave('test.png', x_out[0])
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     test.main()


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def put_kernels_on_grid(kernel, (grid_Y, grid_X), pad=1):
 4 |     '''Visualize conv. features as an image (mostly for the 1st layer).
 5 |     Place kernel into a grid, with some paddings between adjacent filters.
 6 |     Args:
 7 |       kernel:            tensor of shape [Y, X, NumChannels, NumKernels]
 8 |       (grid_Y, grid_X):  shape of the grid. Require: NumKernels == grid_Y * grid_X
 9 |                            User is responsible of how to break into two multiples.
10 |       pad:               number of black pixels around each filter (between them)
11 | 
12 |     Return:
13 |       Tensor of shape [(Y+pad)*grid_Y, (X+pad)*grid_X, NumChannels, 1].
14 |     '''
15 |     # pad X and Y
16 |     x1 = tf.pad(kernel, tf.constant([[pad, 0], [pad, 0], [0, 0], [0, 0]]))
17 | 
18 |     # X and Y dimensions, w.r.t. padding
19 |     Y = kernel.get_shape()[0] + pad
20 |     X = kernel.get_shape()[1] + pad
21 |     ch = kernel.get_shape()[2]
22 | 
23 |     # put NumKernels to the 1st dimension
24 |     x2 = tf.transpose(x1, (3, 0, 1, 2))
25 |     # organize grid on Y axis
26 |     x3 = tf.reshape(x2, tf.pack([grid_X, Y * grid_Y, X, ch]))
27 | 
28 |     # switch X and Y axes
29 |     x4 = tf.transpose(x3, (0, 2, 1, 3))
30 |     # organize grid on X axis
31 |     x5 = tf.reshape(x4, tf.pack([1, X * grid_X, Y * grid_Y, ch]))
32 | 
33 |     # back to normal order (not combining with the next step for clarity)
34 |     x6 = tf.transpose(x5, (2, 1, 3, 0))
35 | 
36 |     # to tf.image_summary order [batch_size, height, width, channels],
37 |     #   where in this case batch_size == 1
38 |     x7 = tf.transpose(x6, (3, 0, 1, 2))
39 | 
40 |     # scale to [0, 1]
41 |     x_min = tf.reduce_min(x7)
42 |     x_max = tf.reduce_max(x7)
43 |     x8 = (x7 - x_min) / (x_max - x_min)
44 | 
45 |     return x8
46 | 


--------------------------------------------------------------------------------
/vae/README.md:
--------------------------------------------------------------------------------
1 | [深入理解变分推断](https://limengweb.wordpress.com/2017/11/13/%E6%B7%B1%E5%85%A5%E7%90%86%E8%A7%A3%E5%8F%98%E5%88%86%E6%8E%A8%E6%96%AD/)
2 | <div align="center">
3 |   <img src="https://github.com/mengli/MachineLearning/blob/master/vae/vae_mnist.png"><br><br>
4 | </div>
5 | 


--------------------------------------------------------------------------------
/vae/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/vae/__init__.py


--------------------------------------------------------------------------------
/vae/vae_mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/vae/vae_mnist.png


--------------------------------------------------------------------------------
/vae/vae_mnist.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """A Variational Autoencoders for MNIST.
  5 | """
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | from keras.layers import Input, Dense, Lambda, Conv2D, Conv2DTranspose, \
 12 |     Flatten, Reshape
 13 | from keras.models import Model
 14 | from keras import backend as K
 15 | from keras.datasets import mnist
 16 | from keras import metrics
 17 | import tensorflow as tf
 18 | import numpy as np
 19 | import matplotlib.pyplot as plt
 20 | from scipy.stats import norm
 21 | 
 22 | EPOCH = 5
 23 | INPUT_DIM = 784
 24 | BATCH_SIZE = 64
 25 | HIDDEN_VAR_DIM = 7 * 7 * 32
 26 | LATENT_VAR_DIM = 2
 27 | 
 28 | # input image dimensions
 29 | 
 30 | (img_rows, img_cols, img_chns) = (28, 28, 1)
 31 | 
 32 | if K.image_data_format() == 'channels_first':
 33 |     original_img_size = (img_chns, img_rows, img_cols)
 34 |     output_shape = (BATCH_SIZE, 32, 7, 7)
 35 | else:
 36 |     original_img_size = (img_rows, img_cols, img_chns)
 37 |     output_shape = (BATCH_SIZE, 7, 7, 32)
 38 | 
 39 | 
 40 | def sampling(args):
 41 |     (z_mean, z_var) = args
 42 |     epsilon = K.random_normal(shape=(K.shape(z_mean)[0],
 43 |                               LATENT_VAR_DIM), mean=0., stddev=1.)
 44 |     return z_mean + z_var * epsilon
 45 | 
 46 | 
 47 | def encode(x):
 48 |     input_reshape = Reshape(original_img_size)(x)
 49 |     conv1 = Conv2D(16, 5, strides=(2, 2), padding='same',
 50 |                    activation='relu')(input_reshape)
 51 |     conv2 = Conv2D(32, 5, strides=(2, 2), padding='same',
 52 |                    activation='relu')(conv1)
 53 |     hidden = Flatten()(conv2)
 54 |     z_mean = Dense(LATENT_VAR_DIM, activation='relu')(hidden)
 55 |     z_var = Dense(LATENT_VAR_DIM, activation='relu')(hidden)
 56 |     return (z_mean, z_var)
 57 | 
 58 | 
 59 | def decode(z):
 60 |     hidden = Dense(HIDDEN_VAR_DIM, activation='relu')(z)
 61 |     hidden_reshape = Reshape(output_shape[1:])(hidden)
 62 |     deconv1 = Conv2DTranspose(16, 5, strides=(2, 2), padding='same',
 63 |                               activation='relu')(hidden_reshape)
 64 |     deconv2 = Conv2DTranspose(1, 5, strides=(2, 2), padding='same',
 65 |                               activation='sigmoid')(deconv1)
 66 |     return Flatten()(deconv2)
 67 | 
 68 | 
 69 | def main(_):
 70 |     x = Input(shape=(INPUT_DIM, ))
 71 |     (z_mean, z_var) = encode(x)
 72 |     z = Lambda(sampling)([z_mean, z_var])
 73 |     x_decoded = decode(z)
 74 |     model = Model(inputs=x, outputs=x_decoded)
 75 | 
 76 |     def vae_loss(y_true, y_pred):
 77 |         generation_loss = img_rows * img_cols \
 78 |             * metrics.binary_crossentropy(x, x_decoded)
 79 |         kl_loss = 0.5 * tf.reduce_sum(K.square(z_mean)
 80 |                 + K.square(z_var) - K.log(K.square(z_var + 1e-8)) - 1,
 81 |                 axis=1)
 82 |         return tf.reduce_mean(generation_loss + kl_loss)
 83 | 
 84 |     model.compile(optimizer='rmsprop', loss=vae_loss)
 85 | 
 86 |     # train the VAE on MNIST digits
 87 | 
 88 |     ((x_train, y_train), (x_test, y_test)) = mnist.load_data()
 89 | 
 90 |     x_train = x_train.astype('float32') / 255.
 91 |     x_test = x_test.astype('float32') / 255.
 92 |     x_train = x_train.reshape((len(x_train),
 93 |                               np.prod(x_train.shape[1:])))
 94 |     x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
 95 | 
 96 |     print(model.summary())
 97 | 
 98 |     model.fit(
 99 |         x_train,
100 |         y_train,
101 |         shuffle=True,
102 |         epochs=EPOCH,
103 |         batch_size=BATCH_SIZE,
104 |         validation_data=(x_test, y_test),
105 |         )
106 | 
107 |     generator = K.function([model.layers[8].input],
108 |                            [model.layers[12].output])
109 | 
110 |     # display a 2D manifold of the digits
111 | 
112 |     n = 15  # figure with 15x15 digits
113 |     digit_size = 28
114 |     figure = np.zeros((digit_size * n, digit_size * n))
115 | 
116 |     # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
117 |     # to produce values of the latent variables z, since the prior of the latent space is Gaussian
118 | 
119 |     grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
120 |     grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
121 | 
122 |     for (i, yi) in enumerate(grid_x):
123 |         for (j, xi) in enumerate(grid_y):
124 |             z_sample = np.array([[xi, yi]])
125 |             z_sample = np.tile(z_sample,
126 |                                BATCH_SIZE).reshape(BATCH_SIZE, 2)
127 |             x_decoded = generator([z_sample])[0]
128 |             digit = x_decoded[0].reshape(digit_size, digit_size)
129 | 
130 |             figure[i * digit_size:(i + 1) * digit_size, j * digit_size:
131 |                    (j + 1) * digit_size] = digit
132 | 
133 |     plt.figure(figsize=(10, 10))
134 |     plt.imshow(figure, cmap='Greys_r')
135 |     plt.show()
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     tf.app.run(main=main)
140 | 


--------------------------------------------------------------------------------