├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── capnet
└── capsnet.py
├── cifar
├── README.md
├── __init__.py
├── cifar10.png
└── cifar_conv.py
├── gan
├── README.md
├── gan.png
└── gan.py
├── kaggle
├── Avito
│ ├── __init__.py
│ ├── avito.py
│ ├── avito2.py
│ └── baseline_lgb.csv
├── CostaRicanHouseholdPovertyLevelPrediction
│ └── kernel
│ │ └── kernel.ipynb
├── DigitalRecognizer
│ ├── __init__.py
│ ├── digital_recognizer.py
│ └── predict.csv
├── SantanderValuePrediction
│ ├── SantanderPredict.ipynb
│ ├── pipline.py
│ └── santander.py
├── TalkingData
│ ├── __init__.py
│ └── talking_data.py
├── __init__.py
├── titanic
│ ├── README.md
│ ├── __init__.py
│ ├── titanic.png
│ └── titanic.py
└── zillow
│ ├── __init__.py
│ ├── location.py
│ ├── log_error.py
│ ├── log_error_hist.py
│ ├── missing_data.py
│ ├── month.py
│ └── train_data_shape.py
├── mnist
├── __init__.py
├── fully_connected_feed.py
├── fully_connected_feed_simple.py
├── mnist.py
├── mnist_conv.py
├── mnist_simple.py
├── mnist_softmax.py
├── mnist_with_summaries.py
└── mnist_with_summary.py
├── reading
└── capsnet
│ └── drbc.pdf
├── self_driving
├── README.md
├── __init__.py
├── lane_detect
│ ├── README.md
│ ├── __init__.py
│ ├── comma_ai_lane_detect.py
│ ├── lane_detect.png
│ └── udacity_lane_detect.py
├── optical_flow
│ ├── __init__.py
│ └── python
│ │ ├── __init__.py
│ │ ├── common.py
│ │ ├── opt_flow.py
│ │ ├── tst_scene_render.py
│ │ └── video.py
├── road_seg
│ ├── README.md
│ ├── __init__.py
│ ├── convnet.py
│ ├── fcn8_vgg.py
│ ├── road_seg.png
│ ├── test_fcn8_vgg.py
│ └── unet.py
├── segnet
│ ├── README.md
│ ├── __init__.py
│ ├── evaluate.py
│ ├── evaluate_kitti.py
│ ├── evaluate_test.py
│ ├── merge_output.sh
│ ├── prepare_camvid.py
│ ├── prepare_camvid.sh
│ ├── prepare_kitti.py
│ ├── prepare_kitti.sh
│ ├── prepare_kitti_test.py
│ ├── prepare_kitti_test.sh
│ ├── segnet.png
│ ├── segnet_vgg.py
│ ├── segnet_vgg_test.py
│ ├── train.py
│ └── train_kitti.py
└── steering
│ ├── __init__.py
│ ├── driving_data.py
│ ├── evaluate.py
│ ├── model.py
│ ├── model_resnet50.py
│ ├── model_saliency.py
│ ├── split_data.sh
│ └── train.py
├── utils
├── __init__.py
├── camvid.py
├── camvid_test.py
├── cifar.py
├── cifar_test.py
├── dataset.py
├── kitti.py
├── kitti_segnet.py
├── my_image.py
├── my_image_test.py
├── svhn.py
├── udacity_data.py
├── udacity_data_test.py
├── udacity_train.txt
├── udacity_val.txt
└── utils.py
└── vae
├── README.md
├── __init__.py
├── vae_mnist.png
├── vae_mnist.py
├── vaegan_cifar.py
└── vaegan_svhn.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Built application files
2 | *.apk
3 | *.ap_
4 |
5 | # Files for the Dalvik VM
6 | *.dex
7 |
8 | # Java class files
9 | *.class
10 |
11 | # Generated files
12 | bin/
13 | gen/
14 | .idea/
15 |
16 | # Gradle files
17 | .gradle/
18 | build/
19 |
20 | # Local configuration file (sdk path, etc)
21 | local.properties
22 |
23 | # Proguard folder generated by Eclipse
24 | proguard/
25 |
26 | # Log Files
27 | *.log
28 |
29 | .DS_Store
30 |
31 | *.pyc
32 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning
2 | ================
3 |
4 | Welcome to my blog [听雨居](https://limengweb.wordpress.com). It contains detailed description of the code here.
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/__init__.py
--------------------------------------------------------------------------------
/capnet/capsnet.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function, unicode_literals
2 |
3 | %matplotlib inline
4 | import matplotlib
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import tensorflow as tf
8 |
9 | tf.reset_default_graph()
10 | np.random.seed(42)
11 | tf.set_random_seed(42)
12 |
13 | from tensorflow.examples.tutorials.mnist import input_data
14 |
15 | mnist = input_data.read_data_sets("/tmp/data/")
16 |
17 | n_samples = 5
18 |
19 | plt.figure(figsize=(n_samples * 2, 3))
20 | for index in range(n_samples):
21 | plt.subplot(1, n_samples, index + 1)
22 | sample_image = mnist.train.images[index].reshape(28, 28)
23 | plt.imshow(sample_image, cmap="binary")
24 | plt.axis("off")
25 |
26 | plt.show()
27 |
--------------------------------------------------------------------------------
/cifar/README.md:
--------------------------------------------------------------------------------
1 | [利用卷积神经网络识别CIFAR-10](https://limengweb.wordpress.com/2016/12/31/%E5%88%A9%E7%94%A8%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%E8%AF%86%E5%88%ABcifar-10/)
2 |
3 |

4 |
5 |
--------------------------------------------------------------------------------
/cifar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/cifar/__init__.py
--------------------------------------------------------------------------------
/cifar/cifar10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/cifar/cifar10.png
--------------------------------------------------------------------------------
/cifar/cifar_conv.py:
--------------------------------------------------------------------------------
1 | """A convolutional neural network for CIFAR-10 classification.
2 | """
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import tensorflow as tf
8 | from utils import cifar
9 | from utils.utils import put_kernels_on_grid
10 |
11 | EPOCH = 36000
12 | BATCH_SIZE = 128
13 |
14 |
15 | def weight_variable_with_decay(shape, wd):
16 | initial = tf.truncated_normal(shape, stddev=0.05, dtype=tf.float32)
17 | var = tf.Variable(initial, 'weights')
18 | weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
19 | tf.add_to_collection('losses', weight_decay)
20 | return var
21 |
22 |
23 | def bias_variable(shape):
24 | initial = tf.constant(0.0, shape=shape, dtype=tf.float32)
25 | return tf.Variable(initial, 'biases')
26 |
27 |
28 | def conv2d(x, W):
29 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
30 |
31 |
32 | def max_pool_2x2(x):
33 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
34 | strides=[1, 2, 2, 1], padding='SAME')
35 |
36 |
37 | def conv_layer(layer_name, input, in_dim, in_ch, out_dim, out_size, summary_conv=False):
38 | with tf.name_scope(layer_name):
39 | # Initialize weights and bias
40 | W_conv = weight_variable_with_decay([in_dim, in_dim, in_ch, out_dim], 0.004)
41 | b_conv = bias_variable([out_dim])
42 |
43 | # Log weights and bias
44 | tf.summary.histogram("weights", W_conv)
45 | tf.summary.histogram("biases", b_conv)
46 |
47 | # Draw weights in 8x8 grid for the first conv layer
48 | if summary_conv:
49 | kernel_grid = put_kernels_on_grid(W_conv, (8, 8))
50 | tf.summary.image("kernel", kernel_grid, max_outputs=1)
51 |
52 | # Draw conv activation in 8x8 grid
53 | activation = tf.nn.bias_add(conv2d(input, W_conv), b_conv)
54 | # Only draw the activation for the first image in a batch
55 | activation_sample = tf.slice(activation, [0, 0, 0, 0], [1, out_size, out_size, out_dim])
56 | activation_grid = put_kernels_on_grid(tf.transpose(activation_sample, [1, 2, 0, 3]), (8, 8))
57 | tf.summary.image("conv/activatins", activation_grid, max_outputs=1)
58 |
59 | # Draw relu activation in 8x8 grid
60 | activation = tf.nn.relu(activation)
61 | # Only draw the activation for the first image in a batch
62 | activation_sample = tf.slice(activation, [0, 0, 0, 0], [1, out_size, out_size, out_dim])
63 | activation_grid = put_kernels_on_grid(tf.transpose(activation_sample, [1, 2, 0, 3]), (8, 8))
64 | tf.summary.image("relu/activatins", activation_grid, max_outputs=1)
65 |
66 | # 2x2 max pooling
67 | pool = max_pool_2x2(activation)
68 |
69 | return tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm')
70 |
71 |
72 | def fc_layer(layer_name, input, in_dim, out_dim, activation=True):
73 | with tf.name_scope(layer_name):
74 | # Initialize weights and bias
75 | W_fc = weight_variable_with_decay([in_dim, out_dim], 0.004)
76 | b_fc = bias_variable([out_dim])
77 |
78 | # Log weights and bias
79 | tf.summary.histogram("weights", W_fc)
80 | tf.summary.histogram("biases", b_fc)
81 |
82 | # Shouldn't only apply activation function for the last fc layer
83 | if activation:
84 | return tf.nn.relu(tf.nn.bias_add(tf.matmul(input, W_fc), b_fc))
85 | else:
86 | return tf.nn.bias_add(tf.matmul(input, W_fc), b_fc)
87 |
88 |
89 | def loss(logits, labels):
90 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
91 | tf.add_to_collection('losses', cross_entropy)
92 | total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
93 | tf.summary.scalar('loss', total_loss)
94 | return total_loss
95 |
96 |
97 | def learning_rate(global_step):
98 | starter_learning_rate = 0.001
99 | learning_rate_1 = tf.train.exponential_decay(
100 | starter_learning_rate, global_step, EPOCH * 0.2, 0.1, staircase=True)
101 | learning_rate_2 = tf.train.exponential_decay(
102 | learning_rate_1, global_step, EPOCH * 0.4, 0.5, staircase=True)
103 | decayed_learning_rate = tf.train.exponential_decay(
104 | learning_rate_2, global_step, EPOCH * 0.6, 0.8, staircase=True)
105 | tf.summary.scalar('learning_rate', decayed_learning_rate)
106 | return decayed_learning_rate
107 |
108 |
109 | def main(_):
110 | cifar10 = cifar.Cifar()
111 | cifar10.ReadDataSets(one_hot=True)
112 |
113 | keep_prob = tf.placeholder(tf.float32)
114 |
115 | # Create the model
116 | x = tf.placeholder(tf.float32, [None, 3, 32, 32])
117 |
118 | # Define loss and optimizer
119 | y_ = tf.placeholder(tf.float32, [None, 10])
120 |
121 | x_image = tf.transpose(x, [0, 2, 3, 1])
122 |
123 | tf.summary.image("images", x_image, max_outputs=1)
124 |
125 | h_pool1 = conv_layer("conv_layer1", x_image, 5, 3, 64, 32, summary_conv=True)
126 | h_pool2 = conv_layer("conv_layer2", h_pool1, 5, 64, 64, 16)
127 |
128 | h_conv3_flat = tf.reshape(h_pool2, [-1, 8 * 8 * 64])
129 |
130 | h_fc1 = fc_layer('fc_layer1', h_conv3_flat, 8 * 8 * 64, 384, activation=True)
131 | h_fc2 = fc_layer('fc_layer2', h_fc1, 384, 192, activation=True)
132 | y_conv = fc_layer('fc_layer3', h_fc2, 192, 10, activation=False)
133 |
134 | global_step = tf.Variable(0, trainable=False)
135 | lr = learning_rate(global_step)
136 |
137 | total_loss = loss(y_conv, y_)
138 | optimizer = tf.train.AdamOptimizer(lr)
139 | grads_and_vars = optimizer.compute_gradients(total_loss)
140 | with tf.name_scope("conv_layer1_grad"):
141 | kernel_grad_grid = put_kernels_on_grid(grads_and_vars[0][0], (8, 8))
142 | tf.summary.image("weight_grad", kernel_grad_grid, max_outputs=1)
143 |
144 | train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
145 | correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
146 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
147 |
148 | sess = tf.InteractiveSession()
149 |
150 | merged = tf.summary.merge_all()
151 | train_writer = tf.summary.FileWriter('train', sess.graph)
152 |
153 | sess.run(tf.global_variables_initializer())
154 |
155 | for i in range(EPOCH):
156 | batch = cifar10.train.next_batch(BATCH_SIZE)
157 | if i % 100 == 0:
158 | test_accuracy = accuracy.eval(feed_dict={x: cifar10.test.images, y_: cifar10.test.labels})
159 | print("step %d, test accuracy %g" % (i, test_accuracy))
160 | summary, _ = sess.run([merged, train_step], feed_dict={x: batch[0], y_: batch[1]})
161 | train_writer.add_summary(summary, i)
162 |
163 | print("test accuracy %g" % accuracy.eval(feed_dict={
164 | x: cifar10.test.images, y_: cifar10.test.labels}))
165 |
166 |
167 | if __name__ == '__main__':
168 | tf.app.run(main=main)
169 |
--------------------------------------------------------------------------------
/gan/README.md:
--------------------------------------------------------------------------------
1 | [浅析生成对抗网络](https://limengweb.wordpress.com/2017/02/19/%E6%B5%85%E6%9E%90%E7%94%9F%E6%88%90%E5%AF%B9%E6%8A%97%E7%BD%91%E7%BB%9C/)
2 |
3 |

4 |
5 |
--------------------------------------------------------------------------------
/gan/gan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/gan/gan.png
--------------------------------------------------------------------------------
/gan/gan.py:
--------------------------------------------------------------------------------
1 | """Generative Adversarial Networks
2 |
3 | An example of distribution approximation using Generative Adversarial Networks in TensorFlow.
4 | """
5 | import os
6 |
7 | os.environ["KERAS_BACKEND"] = "tensorflow"
8 |
9 | from keras.layers import Dense
10 | from keras.models import Sequential
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | import tensorflow as tf
14 | import seaborn as sns
15 |
16 | sns.set(color_codes=True)
17 | np.random.seed(688)
18 |
19 | RANDOM_PORTION = 0.01
20 | HIDDEN_SIZE = 16
21 | BATCH_SIZE = 256
22 | EPOCH = 15000
23 | SAMPLE_RATE = 50
24 |
25 |
26 | class DataDistribution(object):
27 | def __init__(self):
28 | self.mu = 4
29 | self.sigma = .5
30 |
31 | def sample(self, N):
32 | samples = np.random.normal(self.mu, self.sigma, N)
33 | samples.sort()
34 | return samples
35 |
36 |
37 | class GeneratorDistribution(object):
38 | def __init__(self, low, high):
39 | self._low = low
40 | self._high = high
41 |
42 | def sample(self, N):
43 | return np.linspace(self._low, self._high, N) + np.random.random(N) * RANDOM_PORTION
44 |
45 |
46 | def generator(hidden_size):
47 | model = Sequential()
48 |
49 | model.add(Dense(hidden_size, activation='softplus', batch_input_shape=(BATCH_SIZE, 1), init='normal', name="g0"))
50 | model.add(Dense(1, init='normal', name="g1"))
51 |
52 | return model
53 |
54 |
55 | def discriminator(hidden_size):
56 | model = Sequential()
57 |
58 | model.add(Dense(hidden_size * 2, activation='tanh', batch_input_shape=(BATCH_SIZE, 1), init='normal', name="d0"))
59 | model.add(Dense(hidden_size * 2, activation='tanh', init='normal', name="d1"))
60 | model.add(Dense(hidden_size * 2, activation='tanh', init='normal', name="d2"))
61 | model.add(Dense(1, activation='sigmoid', init='normal', name="d3"))
62 |
63 | return model
64 |
65 |
66 | def optimizer(loss, var_list):
67 | initial_learning_rate = 0.005
68 | decay = 0.95
69 | num_decay_steps = 150
70 | batch = tf.Variable(0)
71 | learning_rate = tf.train.exponential_decay(
72 | initial_learning_rate,
73 | batch,
74 | num_decay_steps,
75 | decay,
76 | staircase=True
77 | )
78 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
79 | loss,
80 | global_step=batch,
81 | var_list=var_list
82 | )
83 | return optimizer
84 |
85 |
86 | def export_animation(anim_frames):
87 | i = 0
88 | for t_data, g_data in anim_frames:
89 | f, ax = plt.subplots(figsize=(12, 8))
90 | f.suptitle('Generative Adversarial Network', fontsize=15)
91 | plt.xlabel('Data values')
92 | plt.ylabel('Probability density')
93 | ax.set_xlim(-2, 10)
94 | ax.set_ylim(0, 1.2)
95 | sns.distplot(t_data, hist=False, rug=True, color='r', label='Target Data', ax=ax)
96 | sns.distplot(g_data, hist=False, rug=True, color='g', label='Generated Data', ax=ax)
97 | f.savefig("images/frame_" + str(i) + ".png")
98 | print "Frame index: ", i * SAMPLE_RATE
99 | f.clf()
100 | plt.close()
101 | i += 1
102 |
103 | # Generate mp4 from images:
104 | # avconv -r 10 -i frame_%d.png -b:v 1000k gan.mp4
105 | # convert -delay 20 -loop 0 output/decision_*.png myimage.gif
106 |
107 | def train(_):
108 | anim_frames = []
109 | with tf.variable_scope('GAN'):
110 | G = generator(HIDDEN_SIZE)
111 | D = discriminator(HIDDEN_SIZE)
112 |
113 | Z = G.input
114 | X = D.input
115 | tf.summary.histogram("target", X)
116 |
117 | D1 = D(X)
118 | G_train = G(Z)
119 | tf.summary.histogram("generated", G_train)
120 | D2 = D(G_train)
121 |
122 | loss_d = tf.reduce_mean(-tf.log(D1) - tf.log(1 - D2))
123 | loss_g = tf.reduce_mean(-tf.log(D2))
124 |
125 | tf.summary.scalar("loss_d", loss_d)
126 | tf.summary.scalar("loss_g", loss_g)
127 |
128 | g_params = G.trainable_weights
129 | d_params = D.trainable_weights
130 |
131 | opt_g = optimizer(loss_g, g_params)
132 | opt_d = optimizer(loss_d, d_params)
133 |
134 | with tf.Session() as session:
135 | merged = tf.summary.merge_all()
136 | train_writer = tf.summary.FileWriter('train', session.graph)
137 |
138 | session.run(tf.global_variables_initializer())
139 |
140 | for step in xrange(EPOCH):
141 | # update discriminator
142 | x = DataDistribution().sample(BATCH_SIZE)
143 | gen = GeneratorDistribution(-2, 10)
144 | z = gen.sample(BATCH_SIZE)
145 | _, _, summary = session.run([loss_d, opt_d, merged], {
146 | X: np.reshape(x, (BATCH_SIZE, 1)),
147 | Z: np.reshape(z, (BATCH_SIZE, 1))
148 | })
149 |
150 | # update generator
151 | z = gen.sample(BATCH_SIZE)
152 | _, _, summary = session.run([loss_g, opt_g, merged], {
153 | X: np.reshape(x, (BATCH_SIZE, 1)),
154 | Z: np.reshape(z, (BATCH_SIZE, 1))
155 | })
156 |
157 | G_gen = session.run([G_train], {
158 | X: np.reshape(x, (BATCH_SIZE, 1)),
159 | Z: np.reshape(z, (BATCH_SIZE, 1))
160 | })
161 |
162 | train_writer.add_summary(summary, step)
163 |
164 | if step % SAMPLE_RATE == 0:
165 | anim_frames.append((x, G_gen))
166 | print "step: ", step
167 |
168 | export_animation(anim_frames)
169 |
170 |
171 | if __name__ == "__main__":
172 | tf.app.run(main=train)
173 |
--------------------------------------------------------------------------------
/kaggle/Avito/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/Avito/__init__.py
--------------------------------------------------------------------------------
/kaggle/Avito/avito2.py:
--------------------------------------------------------------------------------
1 | import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
2 | import matplotlib.pyplot as plt
3 | from sklearn import preprocessing, model_selection, metrics
4 | import lightgbm as lgb
5 |
6 | pd.options.mode.chained_assignment = None
7 | pd.options.display.max_columns = 999
8 |
9 | train_df = pd.read_csv("C:\\Users\\jowet\\Downloads\\kaggle\\avito\\train.csv", parse_dates=["activation_date"])
10 | test_df = pd.read_csv("C:\\Users\\jowet\\Downloads\\kaggle\\avito\\test.csv", parse_dates=["activation_date"])
11 | print("Train file rows and columns are : ", train_df.shape)
12 | print("Test file rows and columns are : ", test_df.shape)
13 |
14 | # Target and ID variables #
15 | train_y = train_df["deal_probability"].values
16 | test_id = test_df["item_id"].values
17 |
18 | # New variable on weekday #
19 | train_df["activation_weekday"] = train_df["activation_date"].dt.weekday
20 | test_df["activation_weekday"] = test_df["activation_date"].dt.weekday
21 |
22 | # Label encode the categorical variables #
23 | cat_vars = ["region", "city", "parent_category_name", "category_name", "user_type", "param_1", "param_2", "param_3"]
24 | for col in cat_vars:
25 | lbl = preprocessing.LabelEncoder()
26 | lbl.fit(list(train_df[col].values.astype('str')) + list(test_df[col].values.astype('str')))
27 | train_df[col] = lbl.transform(list(train_df[col].values.astype('str')))
28 | test_df[col] = lbl.transform(list(test_df[col].values.astype('str')))
29 |
30 | cols_to_drop = ["item_id", "user_id", "title", "description", "activation_date", "image"]
31 | train_X = train_df.drop(cols_to_drop + ["deal_probability"], axis=1)
32 | test_X = test_df.drop(cols_to_drop, axis=1)
33 |
34 |
35 | def run_lgb(train_X, train_y, val_X, val_y, test_X):
36 | params = {
37 | "objective": "regression",
38 | "metric": "rmse",
39 | "num_leaves": 30,
40 | "learning_rate": 0.1,
41 | "bagging_fraction": 0.7,
42 | "feature_fraction": 0.7,
43 | "bagging_frequency": 5,
44 | "bagging_seed": 2018,
45 | "verbosity": -1
46 | }
47 |
48 | lgtrain = lgb.Dataset(train_X, label=train_y)
49 | lgval = lgb.Dataset(val_X, label=val_y)
50 | evals_result = {}
51 | model = lgb.train(params, lgtrain, 10000, valid_sets=[lgval], early_stopping_rounds=100, verbose_eval=20,
52 | evals_result=evals_result)
53 |
54 | pred_test_y = model.predict(test_X, num_iteration=model.best_iteration)
55 | return pred_test_y, model, evals_result
56 |
57 |
58 | # Splitting the data for model training#
59 | dev_X = train_X.iloc[:-200000, :]
60 | val_X = train_X.iloc[-200000:, :]
61 | dev_y = train_y[:-200000]
62 | val_y = train_y[-200000:]
63 | print(dev_X.shape, val_X.shape, test_X.shape)
64 |
65 | # Training the model #
66 | pred_test, model, evals_result = run_lgb(dev_X, dev_y, val_X, val_y, test_X)
67 |
68 | # Making a submission file #
69 | pred_test[pred_test > 1] = 1
70 | pred_test[pred_test < 0] = 0
71 | sub_df = pd.DataFrame({"item_id": test_id})
72 | sub_df["deal_probability"] = pred_test
73 | sub_df.to_csv("baseline_lgb.csv", index=False)
74 |
75 | fig, ax = plt.subplots(figsize=(12, 18))
76 | lgb.plot_importance(model, max_num_features=50, height=0.8, ax=ax)
77 | ax.grid(False)
78 | plt.title("LightGBM - Feature Importance", fontsize=15)
79 | plt.show()
80 |
--------------------------------------------------------------------------------
/kaggle/DigitalRecognizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/DigitalRecognizer/__init__.py
--------------------------------------------------------------------------------
/kaggle/DigitalRecognizer/digital_recognizer.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import pandas as pd
3 | import numpy as np
4 | import keras
5 | from keras.models import Sequential
6 | from keras.layers import Dense, Dropout, Flatten
7 | from keras.layers import Conv2D, MaxPooling2D
8 | from keras import backend as K
9 |
10 | batch_size = 128
11 | num_classes = 10
12 | epochs = 12
13 |
14 | # input image dimensions
15 | img_rows, img_cols = 28, 28
16 |
17 | train = pd.read_csv('C:\\Users\\jowet\\Downloads\\kaggle\\digit_recognizer\\train.csv')
18 | test = pd.read_csv('C:\\Users\\jowet\\Downloads\\kaggle\\digit_recognizer\\test.csv')
19 |
20 | x_train = train.drop(['label'], axis=1).as_matrix()
21 | y_train = train['label'].as_matrix()
22 | x_test = test.as_matrix()
23 |
24 | print(x_train.shape)
25 | print(y_train.shape)
26 | print(x_test.shape)
27 |
28 | if K.image_data_format() == 'channels_first':
29 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
30 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
31 | input_shape = (1, img_rows, img_cols)
32 | else:
33 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
34 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
35 | input_shape = (img_rows, img_cols, 1)
36 |
37 | x_train = x_train.astype('float32')
38 | x_test = x_test.astype('float32')
39 | x_train /= 255
40 | x_test /= 255
41 | print('x_train shape:', x_train.shape)
42 | print(x_train.shape[0], 'train samples')
43 | print(x_test.shape[0], 'test samples')
44 |
45 | # convert class vectors to binary class matrices
46 | y_train = keras.utils.to_categorical(y_train, num_classes)
47 |
48 | model = Sequential()
49 | model.add(Conv2D(32, kernel_size=(3, 3),
50 | activation='relu',
51 | input_shape=input_shape))
52 | model.add(Conv2D(64, (3, 3), activation='relu'))
53 | model.add(MaxPooling2D(pool_size=(2, 2)))
54 | model.add(Dropout(0.25))
55 | model.add(Flatten())
56 | model.add(Dense(128, activation='relu'))
57 | model.add(Dropout(0.5))
58 | model.add(Dense(num_classes, activation='softmax'))
59 |
60 | model.compile(loss=keras.losses.categorical_crossentropy,
61 | optimizer=keras.optimizers.Adadelta(),
62 | metrics=['accuracy'])
63 |
64 | model.fit(x_train, y_train,
65 | batch_size=batch_size,
66 | epochs=epochs,
67 | verbose=1)
68 | result = model.predict(x_test, verbose=0)
69 |
70 | predict = np.argmax(result, axis=1)
71 | sub_df = pd.DataFrame({"ImageId": range(1, len(predict) + 1)})
72 | sub_df["Label"] = predict
73 | sub_df.to_csv("predict.csv", index=False)
74 |
--------------------------------------------------------------------------------
/kaggle/SantanderValuePrediction/santander.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from xgboost import XGBRegressor
4 | import pandas as pd
5 |
6 | train = pd.read_csv("C:\\Users\\jowet\\Downloads\\Santander\\train.csv")
7 | test = pd.read_csv("C:\\Users\\jowet\\Downloads\\Santander\\test.csv")
8 |
9 | train.drop('ID', axis=1, inplace=True)
10 |
11 | y_train = train.pop('target')
12 | pred_index = test.pop('ID')
13 |
14 | reg = XGBRegressor()
15 | reg.fit(train, y_train)
16 | y_pred = reg.predict(test)
17 |
18 | submit = pd.DataFrame()
19 | submit['ID'] = pred_index
20 | submit['target'] = y_pred
21 | submit.to_csv('my_XGB_prediction.csv', index=False)
22 |
--------------------------------------------------------------------------------
/kaggle/TalkingData/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/TalkingData/__init__.py
--------------------------------------------------------------------------------
/kaggle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/__init__.py
--------------------------------------------------------------------------------
/kaggle/titanic/README.md:
--------------------------------------------------------------------------------
1 | [泰坦尼克:机器学习应用](https://limengweb.wordpress.com/2017/09/30/%E6%B3%B0%E5%9D%A6%E5%B0%BC%E5%85%8B%EF%BC%9A%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E5%BA%94%E7%94%A8/)
2 |
3 |

4 |
5 |
--------------------------------------------------------------------------------
/kaggle/titanic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/titanic/__init__.py
--------------------------------------------------------------------------------
/kaggle/titanic/titanic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/titanic/titanic.png
--------------------------------------------------------------------------------
/kaggle/titanic/titanic.py:
--------------------------------------------------------------------------------
1 | # remove warnings
2 | import warnings
3 | import pandas as pd
4 | #from matplotlib import pyplot as plt
5 | import numpy as np
6 |
7 | from sklearn.pipeline import make_pipeline
8 | from sklearn.ensemble import RandomForestClassifier
9 | from sklearn.feature_selection import SelectKBest
10 | from sklearn.cross_validation import StratifiedKFold
11 | from sklearn.grid_search import GridSearchCV
12 | from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
13 | from sklearn.cross_validation import cross_val_score
14 |
15 | from sklearn.ensemble import ExtraTreesClassifier
16 | from sklearn.feature_selection import SelectFromModel
17 |
18 | warnings.filterwarnings('ignore')
19 | pd.options.display.max_rows = 100
20 |
21 | def get_combined_data():
22 | # reading train data
23 | train = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/train.csv')
24 |
25 | # reading test data
26 | test = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/test.csv')
27 |
28 | # extracting and then removing the targets from the training data
29 | targets = train.Survived
30 | train.drop('Survived',1,inplace=True)
31 |
32 |
33 | # merging train data and test data for future feature engineering
34 | combined = train.append(test)
35 | combined.reset_index(inplace=True)
36 | combined.drop('index',inplace=True,axis=1)
37 |
38 | return combined, targets
39 |
40 |
41 | def create_titles(combined):
42 | # we extract the title from each name
43 | combined['Title'] = combined['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip())
44 |
45 | # a map of more aggregated titles
46 | Title_Dictionary = {
47 | "Capt": "Officer",
48 | "Col": "Officer",
49 | "Major": "Officer",
50 | "Jonkheer": "Royalty",
51 | "Don": "Royalty",
52 | "Sir" : "Royalty",
53 | "Dr": "Officer",
54 | "Rev": "Officer",
55 | "the Countess": "Royalty",
56 | "Dona": "Royalty",
57 | "Mme": "Mrs",
58 | "Mlle": "Miss",
59 | "Ms": "Mrs",
60 | "Mr" : "Mr",
61 | "Mrs" : "Mrs",
62 | "Miss" : "Miss",
63 | "Master" : "Master",
64 | "Lady" : "Royalty"
65 | }
66 |
67 | # we map each title
68 | combined['Title'] = combined.Title.map(Title_Dictionary)
69 |
70 |
71 | def process_age(combined):
72 | # a function that fills the missing values of the Age variable
73 | def fillAges(row):
74 | if row['Sex']=='female' and row['Pclass'] == 1:
75 | if row['Title'] == 'Miss':
76 | return 30
77 | elif row['Title'] == 'Mrs':
78 | return 45
79 | elif row['Title'] == 'Officer':
80 | return 49
81 | elif row['Title'] == 'Royalty':
82 | return 39
83 | elif row['Sex']=='female' and row['Pclass'] == 2:
84 | if row['Title'] == 'Miss':
85 | return 20
86 | elif row['Title'] == 'Mrs':
87 | return 30
88 | elif row['Sex']=='female' and row['Pclass'] == 3:
89 | if row['Title'] == 'Miss':
90 | return 18
91 | elif row['Title'] == 'Mrs':
92 | return 31
93 | elif row['Sex']=='male' and row['Pclass'] == 1:
94 | if row['Title'] == 'Master':
95 | return 6
96 | elif row['Title'] == 'Mr':
97 | return 41.5
98 | elif row['Title'] == 'Officer':
99 | return 52
100 | elif row['Title'] == 'Royalty':
101 | return 40
102 | elif row['Sex']=='male' and row['Pclass'] == 2:
103 | if row['Title'] == 'Master':
104 | return 2
105 | elif row['Title'] == 'Mr':
106 | return 30
107 | elif row['Title'] == 'Officer':
108 | return 41.5
109 | elif row['Sex']=='male' and row['Pclass'] == 3:
110 | if row['Title'] == 'Master':
111 | return 6
112 | elif row['Title'] == 'Mr':
113 | return 26
114 |
115 | combined.Age = combined.apply(
116 | lambda r : fillAges(r) if np.isnan(r['Age']) else r['Age'], axis=1)
117 |
118 |
119 | def process_names(combined):
120 | # we clean the Name variable
121 | combined.drop('Name',axis=1,inplace=True)
122 |
123 | # encoding in dummy variable
124 | titles_dummies = pd.get_dummies(combined['Title'],prefix='Title')
125 | combined = pd.concat([combined,titles_dummies],axis=1)
126 |
127 | # removing the title variable
128 | combined.drop('Title',axis=1,inplace=True)
129 |
130 | return combined
131 |
132 |
133 | def process_fares(combined):
134 | # there's one missing fare value - replacing it with the mean.
135 | combined.Fare.fillna(combined.Fare.mean(),inplace=True)
136 |
137 |
138 | def process_embarked(combined):
139 | # two missing embarked values - filling them with the most frequent one (S)
140 | combined.Embarked.fillna('S',inplace=True)
141 |
142 | # dummy encoding
143 | embarked_dummies = pd.get_dummies(combined['Embarked'],prefix='Embarked')
144 | combined = pd.concat([combined,embarked_dummies],axis=1)
145 | combined.drop('Embarked',axis=1,inplace=True)
146 |
147 | return combined
148 |
149 |
150 | def process_cabin(combined):
151 | # replacing missing cabins with U (for Uknown)
152 | combined.Cabin.fillna('U',inplace=True)
153 |
154 | # mapping each Cabin value with the cabin letter
155 | combined['Cabin'] = combined['Cabin'].map(lambda c : c[0])
156 |
157 | # dummy encoding ...
158 | cabin_dummies = pd.get_dummies(combined['Cabin'],prefix='Cabin')
159 |
160 | combined = pd.concat([combined,cabin_dummies],axis=1)
161 |
162 | combined.drop('Cabin',axis=1,inplace=True)
163 |
164 | return combined
165 |
166 |
167 | def process_sex(combined):
168 | # mapping string values to numerical one
169 | combined['Sex'] = combined['Sex'].map({'male':1,'female':0})
170 |
171 |
172 | def process_pclass(combined):
173 | # encoding into 3 categories:
174 | pclass_dummies = pd.get_dummies(combined['Pclass'],prefix="Pclass")
175 |
176 | # adding dummy variables
177 | combined = pd.concat([combined,pclass_dummies],axis=1)
178 |
179 | # removing "Pclass"
180 |
181 | combined.drop('Pclass',axis=1,inplace=True)
182 |
183 | return combined
184 |
185 |
186 | def process_ticket(combined):
187 | # a function that extracts each prefix of the ticket,
188 | # returns 'XXX' if no prefix (i.e the ticket is a digit)
189 | def cleanTicket(ticket):
190 | ticket = ticket.replace('.','')
191 | ticket = ticket.replace('/','')
192 | ticket = ticket.split()
193 | ticket = map(lambda t : t.strip() , ticket)
194 | ticket = filter(lambda t : not t.isdigit(), ticket)
195 | if len(ticket) > 0:
196 | return ticket[0]
197 | else:
198 | return 'XXX'
199 |
200 | # Extracting dummy variables from tickets:
201 | combined['Ticket'] = combined['Ticket'].map(cleanTicket)
202 | tickets_dummies = pd.get_dummies(combined['Ticket'],prefix='Ticket')
203 | combined = pd.concat([combined, tickets_dummies],axis=1)
204 | combined.drop('Ticket',inplace=True,axis=1)
205 | return combined
206 |
207 |
208 | def process_family(combined):
209 | # introducing a new feature : the size of families (including the passenger)
210 | combined['FamilySize'] = combined['Parch'] + combined['SibSp'] + 1
211 |
212 | # introducing other features based on the family size
213 | combined['Singleton'] = combined['FamilySize'].map(lambda s : 1 if s == 1 else 0)
214 | combined['SmallFamily'] = combined['FamilySize'].map(lambda s : 1 if 2<=s<=4 else 0)
215 | combined['LargeFamily'] = combined['FamilySize'].map(lambda s : 1 if 5<=s else 0)
216 |
217 |
218 | def scale_all_features(combined):
219 | features = list(combined.columns)
220 | features.remove('PassengerId')
221 | combined[features] = combined[features].apply(lambda x: x/x.max(), axis=0)
222 |
223 |
224 | combined, targets = get_combined_data()
225 | create_titles(combined)
226 | process_age(combined)
227 | combined = process_names(combined)
228 | process_fares(combined)
229 | combined = process_embarked(combined)
230 | combined = process_cabin(combined)
231 | process_sex(combined)
232 | combined = process_pclass(combined)
233 | combined = process_ticket(combined)
234 | process_family(combined)
235 | scale_all_features(combined)
236 |
237 |
238 | def compute_score(clf, X, y,scoring='accuracy'):
239 | xval = cross_val_score(clf, X, y, cv = 5,scoring=scoring)
240 | return np.mean(xval)
241 |
242 |
243 | def recover_train_test_target(combined):
244 | train_set = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/train.csv')
245 |
246 | targets = train_set.Survived
247 | train = combined.ix[0:890]
248 | test = combined.ix[891:]
249 |
250 | return train, test, targets
251 |
252 |
253 | train,test,targets = recover_train_test_target(combined)
254 |
255 | clf = ExtraTreesClassifier(n_estimators=200)
256 | clf = clf.fit(train, targets)
257 |
258 | features = pd.DataFrame()
259 | features['feature'] = train.columns
260 | features['importance'] = clf.feature_importances_
261 | print(features.sort_values(['importance'],ascending=False))
262 |
263 | model = SelectFromModel(clf, prefit=True)
264 | train_new = model.transform(train)
265 | print(train_new.shape)
266 |
267 | test_new = model.transform(test)
268 | print(test_new.shape)
269 |
270 | forest = RandomForestClassifier(max_features='sqrt')
271 |
272 | parameter_grid = {
273 | 'max_depth' : [4,5,6,7,8],
274 | 'n_estimators': [200,210,240,250],
275 | 'criterion': ['gini','entropy']
276 | }
277 |
278 | cross_validation = StratifiedKFold(targets, n_folds=5)
279 |
280 | grid_search = GridSearchCV(forest,
281 | param_grid=parameter_grid,
282 | cv=cross_validation)
283 |
284 | grid_search.fit(train_new, targets)
285 |
286 | print('Best score: {}'.format(grid_search.best_score_))
287 | print('Best parameters: {}'.format(grid_search.best_params_))
288 |
289 | output = grid_search.predict(test_new).astype(int)
290 | df_output = pd.DataFrame()
291 | df_output['PassengerId'] = test['PassengerId']
292 | df_output['Survived'] = output
293 | df_output[['PassengerId','Survived']].to_csv('logistic_regression_predictions.csv',index=False)
--------------------------------------------------------------------------------
/kaggle/zillow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/zillow/__init__.py
--------------------------------------------------------------------------------
/kaggle/zillow/location.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | import seaborn as sns
4 |
5 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False)
6 |
7 | sns.jointplot(x=properties_data.latitude.values, y=properties_data.longitude.values, size=10)
8 | plt.ylabel('Longitude', fontsize=12)
9 | plt.xlabel('Latitude', fontsize=12)
10 |
11 | plt.show()
12 |
--------------------------------------------------------------------------------
/kaggle/zillow/log_error.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 |
4 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
5 |
6 | plt.figure(figsize=(10, 10))
7 | plt.scatter(range(train_data.shape[0]), train_data.sort_values(by='logerror').logerror)
8 | plt.xlabel('index')
9 | plt.ylabel('logerror')
10 |
11 | plt.show()
12 |
--------------------------------------------------------------------------------
/kaggle/zillow/log_error_hist.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 |
5 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
6 |
7 | plt.figure(figsize=(10, 10))
8 | up_limit = np.percentile(train_data.logerror, 99)
9 | low_limit = np.percentile(train_data.logerror, 1)
10 | tmp_data = train_data[train_data.logerror < up_limit][train_data.logerror > low_limit]
11 | plt.hist(tmp_data.logerror, bins=50)
12 | plt.xlabel('logerror')
13 |
14 | plt.show()
15 |
--------------------------------------------------------------------------------
/kaggle/zillow/missing_data.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 |
4 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False)
5 |
6 | missing_df = properties_data.isnull().sum(axis=0).reset_index()
7 | missing_df.columns = ['column_name', 'missing_count']
8 | missing_df = missing_df[missing_df.missing_count > 0]
9 | missing_df = missing_df.sort_values(by='missing_count')
10 | missing_df.plot(kind='barh')
11 | plt.yticks(range(missing_df.shape[0]), missing_df.column_name.values)
12 |
13 | plt.show()
14 |
--------------------------------------------------------------------------------
/kaggle/zillow/month.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 |
4 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
5 |
6 | plt.figure(figsize=(10, 10))
7 | datetime_data = pd.to_datetime(train_data.transactiondate)
8 | datetime_data.dt.month.value_counts().sort_index(axis=0).plot(kind='bar')
9 | plt.xlabel('month')
10 |
11 | plt.show()
12 |
--------------------------------------------------------------------------------
/kaggle/zillow/train_data_shape.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False)
4 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False)
5 |
6 | print(train_data.shape)
7 | print(properties_data.shape)
8 |
--------------------------------------------------------------------------------
/mnist/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Imports mnist tutorial libraries used by tutorial examples."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
--------------------------------------------------------------------------------
/mnist/mnist.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Builds the MNIST network.
17 |
18 | Implements the inference/loss/training pattern for model building.
19 |
20 | 1. inference() - Builds the model as far as is required for running the network
21 | forward to make predictions.
22 | 2. loss() - Adds to the inference model the layers required to generate loss.
23 | 3. training() - Adds to the loss model the Ops required to generate and
24 | apply gradients.
25 |
26 | This file is used by the various "fully_connected_*.py" files and not meant to
27 | be run.
28 | """
29 | from __future__ import absolute_import
30 | from __future__ import division
31 | from __future__ import print_function
32 |
33 | import math
34 |
35 | import tensorflow as tf
36 |
37 | # The MNIST dataset has 10 classes, representing the digits 0 through 9.
38 | NUM_CLASSES = 10
39 |
40 | # The MNIST images are always 28x28 pixels.
41 | IMAGE_SIZE = 28
42 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
43 |
44 |
45 | def inference(images, hidden1_units, hidden2_units):
46 | """Build the MNIST model up to where it may be used for inference.
47 |
48 | Args:
49 | images: Images placeholder, from inputs().
50 | hidden1_units: Size of the first hidden layer.
51 | hidden2_units: Size of the second hidden layer.
52 |
53 | Returns:
54 | softmax_linear: Output tensor with the computed logits.
55 | """
56 | # Hidden 1
57 | with tf.name_scope('hidden1'):
58 | weights = tf.Variable(
59 | tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
60 | stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
61 | name='weights')
62 | biases = tf.Variable(tf.zeros([hidden1_units]),
63 | name='biases')
64 | hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
65 | # Hidden 2
66 | with tf.name_scope('hidden2'):
67 | weights = tf.Variable(
68 | tf.truncated_normal([hidden1_units, hidden2_units],
69 | stddev=1.0 / math.sqrt(float(hidden1_units))),
70 | name='weights')
71 | biases = tf.Variable(tf.zeros([hidden2_units]),
72 | name='biases')
73 | hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
74 | # Linear
75 | with tf.name_scope('softmax_linear'):
76 | weights = tf.Variable(
77 | tf.truncated_normal([hidden2_units, NUM_CLASSES],
78 | stddev=1.0 / math.sqrt(float(hidden2_units))),
79 | name='weights')
80 | biases = tf.Variable(tf.zeros([NUM_CLASSES]),
81 | name='biases')
82 | logits = tf.matmul(hidden2, weights) + biases
83 | return logits
84 |
85 |
86 | def loss(logits, labels):
87 | """Calculates the loss from the logits and the labels.
88 |
89 | Args:
90 | logits: Logits tensor, float - [batch_size, NUM_CLASSES].
91 | labels: Labels tensor, int32 - [batch_size].
92 |
93 | Returns:
94 | loss: Loss tensor of type float.
95 | """
96 | labels = tf.to_int64(labels)
97 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
98 | logits, labels, name='xentropy')
99 | loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
100 | return loss
101 |
102 |
103 | def training(loss, learning_rate):
104 | """Sets up the training Ops.
105 |
106 | Creates a summarizer to track the loss over time in TensorBoard.
107 |
108 | Creates an optimizer and applies the gradients to all trainable variables.
109 |
110 | The Op returned by this function is what must be passed to the
111 | `sess.run()` call to cause the model to train.
112 |
113 | Args:
114 | loss: Loss tensor, from loss().
115 | learning_rate: The learning rate to use for gradient descent.
116 |
117 | Returns:
118 | train_op: The Op for training.
119 | """
120 | # Add a scalar summary for the snapshot loss.
121 | tf.summary.scalar('loss', loss)
122 | # Create the gradient descent optimizer with the given learning rate.
123 | optimizer = tf.train.GradientDescentOptimizer(learning_rate)
124 | # Create a variable to track the global step.
125 | global_step = tf.Variable(0, name='global_step', trainable=False)
126 | # Use the optimizer to apply the gradients that minimize the loss
127 | # (and also increment the global step counter) as a single training step.
128 | train_op = optimizer.minimize(loss, global_step=global_step)
129 | return train_op
130 |
131 |
132 | def evaluation(logits, labels):
133 | """Evaluate the quality of the logits at predicting the label.
134 |
135 | Args:
136 | logits: Logits tensor, float - [batch_size, NUM_CLASSES].
137 | labels: Labels tensor, int32 - [batch_size], with values in the
138 | range [0, NUM_CLASSES).
139 |
140 | Returns:
141 | A scalar int32 tensor with the number of examples (out of batch_size)
142 | that were predicted correctly.
143 | """
144 | # For a classifier model, we can use the in_top_k Op.
145 | # It returns a bool tensor with shape [batch_size] that is true for
146 | # the examples where the label is in the top k (here k=1)
147 | # of all logits for that example.
148 | correct = tf.nn.in_top_k(logits, labels, 1)
149 | # Return the number of true entries.
150 | return tf.reduce_sum(tf.cast(correct, tf.int32))
151 |
--------------------------------------------------------------------------------
/mnist/mnist_conv.py:
--------------------------------------------------------------------------------
1 | """A convolutional neural network for MNIST classification.
2 | """
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import argparse
8 | import sys
9 |
10 | # Import data
11 | from tensorflow.examples.tutorials.mnist import input_data
12 |
13 | import tensorflow as tf
14 |
15 | FLAGS = None
16 |
17 |
18 | def weight_variable(shape):
19 | initial = tf.truncated_normal(shape, stddev=0.1)
20 | return tf.Variable(initial)
21 |
22 |
23 | def bias_variable(shape):
24 | initial = tf.constant(0.1, shape=shape)
25 | return tf.Variable(initial)
26 |
27 |
28 | def conv2d(x, W):
29 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
30 |
31 |
32 | def max_pool_2x2(x):
33 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
34 | strides=[1, 2, 2, 1], padding='SAME')
35 |
36 |
37 | def main(_):
38 | mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
39 |
40 | # Create the model
41 | x = tf.placeholder(tf.float32, [None, 784])
42 |
43 | # Define loss and optimizer
44 | y_ = tf.placeholder(tf.float32, [None, 10])
45 |
46 | x_image = tf.reshape(x, [-1, 28, 28, 1])
47 |
48 | W_conv1 = weight_variable([5, 5, 1, 32])
49 | b_conv1 = bias_variable([32])
50 |
51 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
52 | h_pool1 = max_pool_2x2(h_conv1)
53 |
54 | W_conv2 = weight_variable([5, 5, 32, 64])
55 | b_conv2 = bias_variable([64])
56 |
57 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
58 | h_pool2 = max_pool_2x2(h_conv2)
59 |
60 | W_fc1 = weight_variable([7 * 7 * 64, 1024])
61 | b_fc1 = bias_variable([1024])
62 |
63 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
64 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
65 |
66 | keep_prob = tf.placeholder(tf.float32)
67 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
68 |
69 | W_fc2 = weight_variable([1024, 10])
70 | b_fc2 = bias_variable([10])
71 |
72 | y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
73 |
74 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
75 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
76 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
77 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
78 |
79 | sess = tf.InteractiveSession()
80 | sess.run(tf.global_variables_initializer())
81 |
82 | for i in range(20000):
83 | batch = mnist.train.next_batch(50)
84 | if i % 100 == 0:
85 | train_accuracy = accuracy.eval(feed_dict={
86 | x: mnist.validation.images, y_: mnist.validation.labels, keep_prob: 1.0})
87 | print("step %d, training accuracy %g"%(i, train_accuracy))
88 | train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
89 |
90 | print("test accuracy %g"%accuracy.eval(feed_dict={
91 | x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
92 |
93 | if __name__ == '__main__':
94 | parser = argparse.ArgumentParser()
95 | parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
96 | help='Directory for storing input data')
97 | FLAGS, unparsed = parser.parse_known_args()
98 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
99 |
--------------------------------------------------------------------------------
/mnist/mnist_simple.py:
--------------------------------------------------------------------------------
1 | """Builds the MNIST network.
2 |
3 | Simplify the MNIST model building work.
4 |
5 | """
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import math
11 |
12 | import tensorflow as tf
13 |
14 | # The MNIST dataset has 10 classes, representing the digits 0 through 9.
15 | NUM_CLASSES = 10
16 |
17 | # The MNIST images are always 28x28 pixels.
18 | IMAGE_SIZE = 28
19 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
20 |
21 |
22 | def variable_summaries(var, name):
23 | """Attach a lot of summaries to a Tensor."""
24 | with tf.name_scope('summaries'):
25 | mean = tf.reduce_mean(var)
26 | tf.scalar_summary('mean/' + name, mean)
27 | with tf.name_scope('stddev'):
28 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
29 | tf.scalar_summary('stddev/' + name, stddev)
30 | tf.scalar_summary('max/' + name, tf.reduce_max(var))
31 | tf.scalar_summary('min/' + name, tf.reduce_min(var))
32 | tf.histogram_summary(name, var)
33 |
34 |
35 | def nn_layer(input_tensor, input_dim, output_dim, layer_name):
36 | with tf.name_scope(layer_name):
37 | weights = tf.Variable(
38 | tf.truncated_normal([input_dim, output_dim],
39 | stddev=1.0 / math.sqrt(float(input_dim))),
40 | name='weights')
41 | variable_summaries(weights, layer_name + '/weights')
42 | biases = tf.Variable(tf.zeros([output_dim]), name='biases')
43 | variable_summaries(biases, layer_name + '/biases')
44 | return tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
45 |
46 |
47 | def inference(images, hidden1_units, hidden2_units):
48 | """Build the MNIST model up to where it may be used for inference.
49 |
50 | Args:
51 | images: Images placeholder, from inputs().
52 | hidden1_units: Size of the first hidden layer.
53 | hidden2_units: Size of the second hidden layer.
54 |
55 | Returns:
56 | softmax_linear: Output tensor with the computed logits.
57 | """
58 | hidden1 = nn_layer(images, IMAGE_PIXELS, hidden1_units, 'layer1')
59 | hidden2 = nn_layer(hidden1, hidden1_units, hidden2_units, 'layer2')
60 | logits = nn_layer(hidden2, hidden2_units, NUM_CLASSES, 'layer3')
61 | return logits
62 |
63 |
64 | def loss(logits, labels):
65 | """Calculates the loss from the logits and the labels.
66 |
67 | Args:
68 | logits: Logits tensor, float - [batch_size, NUM_CLASSES].
69 | labels: Labels tensor, int32 - [batch_size].
70 |
71 | Returns:
72 | loss: Loss tensor of type float.
73 | """
74 | labels = tf.to_int64(labels)
75 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
76 | logits, labels, name='xentropy')
77 | loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
78 | return loss
79 |
80 |
81 | def training(loss, learning_rate):
82 | """Sets up the training Ops.
83 |
84 | Creates a summarizer to track the loss over time in TensorBoard.
85 |
86 | Creates an optimizer and applies the gradients to all trainable variables.
87 |
88 | The Op returned by this function is what must be passed to the
89 | `sess.run()` call to cause the model to train.
90 |
91 | Args:
92 | loss: Loss tensor, from loss().
93 | learning_rate: The learning rate to use for gradient descent.
94 |
95 | Returns:
96 | train_op: The Op for training.
97 | """
98 | # Add a scalar summary for the snapshot loss.
99 | tf.scalar_summary('loss', loss)
100 | # Create the gradient descent optimizer with the given learning rate.
101 | optimizer = tf.train.GradientDescentOptimizer(learning_rate)
102 | # Create a variable to track the global step.
103 | global_step = tf.Variable(0, name='global_step', trainable=False)
104 | # Use the optimizer to apply the gradients that minimize the loss
105 | # (and also increment the global step counter) as a single training step.
106 | train_op = optimizer.minimize(loss, global_step=global_step)
107 | return train_op
108 |
109 |
110 | def evaluation(logits, labels):
111 | """Evaluate the quality of the logits at predicting the label.
112 |
113 | Args:
114 | logits: Logits tensor, float - [batch_size, NUM_CLASSES].
115 | labels: Labels tensor, int32 - [batch_size], with values in the
116 | range [0, NUM_CLASSES).
117 |
118 | Returns:
119 | A scalar int32 tensor with the number of examples (out of batch_size)
120 | that were predicted correctly.
121 | """
122 | # For a classifier model, we can use the in_top_k Op.
123 | # It returns a bool tensor with shape [batch_size] that is true for
124 | # the examples where the label is in the top k (here k=1)
125 | # of all logits for that example.
126 | correct = tf.nn.in_top_k(logits, labels, 1)
127 | # Return the number of true entries.
128 | return tf.reduce_sum(tf.cast(correct, tf.int32))
129 |
--------------------------------------------------------------------------------
/mnist/mnist_softmax.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """A very simple MNIST classifier.
17 |
18 | See extensive documentation at
19 | http://tensorflow.org/tutorials/mnist/beginners/index.md
20 | """
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 |
25 | import argparse
26 | import sys
27 |
28 | # Import data
29 | from tensorflow.examples.tutorials.mnist import input_data
30 |
31 | import tensorflow as tf
32 |
33 | FLAGS = None
34 |
35 |
36 | def main(_):
37 | mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
38 |
39 | # Create the model
40 | x = tf.placeholder(tf.float32, [None, 784])
41 | W = tf.Variable(tf.zeros([784, 10]))
42 | b = tf.Variable(tf.zeros([10]))
43 | y = tf.matmul(x, W) + b
44 |
45 | # Define loss and optimizer
46 | y_ = tf.placeholder(tf.float32, [None, 10])
47 |
48 | # The raw formulation of cross-entropy,
49 | #
50 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
51 | # reduction_indices=[1]))
52 | #
53 | # can be numerically unstable.
54 | #
55 | # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
56 | # outputs of 'y', and then average across the batch.
57 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))
58 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
59 |
60 | sess = tf.InteractiveSession()
61 | # Train
62 | tf.global_variables_initializer().run()
63 | for _ in range(1000):
64 | batch_xs, batch_ys = mnist.train.next_batch(100)
65 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
66 |
67 | # Test trained model
68 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
69 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
70 | print(sess.run(accuracy, feed_dict={x: mnist.test.images,
71 | y_: mnist.test.labels}))
72 |
73 | if __name__ == '__main__':
74 | parser = argparse.ArgumentParser()
75 | parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
76 | help='Directory for storing input data')
77 | FLAGS, unparsed = parser.parse_known_args()
78 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
79 |
--------------------------------------------------------------------------------
/mnist/mnist_with_summaries.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the 'License');
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an 'AS IS' BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A simple MNIST classifier which displays summaries in TensorBoard.
16 |
17 | This is an unimpressive MNIST model, but it is a good example of using
18 | tf.name_scope to make a graph legible in the TensorBoard graph explorer, and of
19 | naming summary tags so that they are grouped meaningfully in TensorBoard.
20 |
21 | It demonstrates the functionality of every TensorBoard dashboard.
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 |
27 | import argparse
28 | import sys
29 |
30 | import tensorflow as tf
31 |
32 | from tensorflow.examples.tutorials.mnist import input_data
33 |
34 | FLAGS = None
35 |
36 |
37 | def train():
38 | # Import data
39 | mnist = input_data.read_data_sets(FLAGS.data_dir,
40 | one_hot=True,
41 | fake_data=FLAGS.fake_data)
42 |
43 | sess = tf.InteractiveSession()
44 | # Create a multilayer model.
45 |
46 | # Input placeholders
47 | with tf.name_scope('input'):
48 | x = tf.placeholder(tf.float32, [None, 784], name='x-input')
49 | y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
50 |
51 | with tf.name_scope('input_reshape'):
52 | image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])
53 | tf.summary.image('input', image_shaped_input, 10)
54 |
55 | # We can't initialize these variables to 0 - the network will get stuck.
56 | def weight_variable(shape):
57 | """Create a weight variable with appropriate initialization."""
58 | initial = tf.truncated_normal(shape, stddev=0.1)
59 | return tf.Variable(initial)
60 |
61 | def bias_variable(shape):
62 | """Create a bias variable with appropriate initialization."""
63 | initial = tf.constant(0.1, shape=shape)
64 | return tf.Variable(initial)
65 |
66 | def variable_summaries(var):
67 | """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
68 | with tf.name_scope('summaries'):
69 | mean = tf.reduce_mean(var)
70 | tf.summary.scalar('mean', mean)
71 | with tf.name_scope('stddev'):
72 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
73 | tf.summary.scalar('stddev', stddev)
74 | tf.summary.scalar('max', tf.reduce_max(var))
75 | tf.summary.scalar('min', tf.reduce_min(var))
76 | tf.summary.histogram('histogram', var)
77 |
78 | def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
79 | """Reusable code for making a simple neural net layer.
80 |
81 | It does a matrix multiply, bias add, and then uses relu to nonlinearize.
82 | It also sets up name scoping so that the resultant graph is easy to read,
83 | and adds a number of summary ops.
84 | """
85 | # Adding a name scope ensures logical grouping of the layers in the graph.
86 | with tf.name_scope(layer_name):
87 | # This Variable will hold the state of the weights for the layer
88 | with tf.name_scope('weights'):
89 | weights = weight_variable([input_dim, output_dim])
90 | variable_summaries(weights)
91 | with tf.name_scope('biases'):
92 | biases = bias_variable([output_dim])
93 | variable_summaries(biases)
94 | with tf.name_scope('Wx_plus_b'):
95 | preactivate = tf.matmul(input_tensor, weights) + biases
96 | tf.summary.histogram('pre_activations', preactivate)
97 | activations = act(preactivate, name='activation')
98 | tf.summary.histogram('activations', activations)
99 | return activations
100 |
101 | hidden1 = nn_layer(x, 784, 500, 'layer1')
102 |
103 | with tf.name_scope('dropout'):
104 | keep_prob = tf.placeholder(tf.float32)
105 | tf.summary.scalar('dropout_keep_probability', keep_prob)
106 | dropped = tf.nn.dropout(hidden1, keep_prob)
107 |
108 | # Do not apply softmax activation yet, see below.
109 | y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity)
110 |
111 | with tf.name_scope('cross_entropy'):
112 | # The raw formulation of cross-entropy,
113 | #
114 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
115 | # reduction_indices=[1]))
116 | #
117 | # can be numerically unstable.
118 | #
119 | # So here we use tf.nn.softmax_cross_entropy_with_logits on the
120 | # raw outputs of the nn_layer above, and then average across
121 | # the batch.
122 | diff = tf.nn.softmax_cross_entropy_with_logits(y, y_)
123 | with tf.name_scope('total'):
124 | cross_entropy = tf.reduce_mean(diff)
125 | tf.summary.scalar('cross_entropy', cross_entropy)
126 |
127 | with tf.name_scope('train'):
128 | train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
129 | cross_entropy)
130 |
131 | with tf.name_scope('accuracy'):
132 | with tf.name_scope('correct_prediction'):
133 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
134 | with tf.name_scope('accuracy'):
135 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
136 | tf.summary.scalar('accuracy', accuracy)
137 |
138 | # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
139 | merged = tf.summary.merge_all()
140 | train_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/train',
141 | sess.graph)
142 | test_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/test')
143 | tf.global_variables_initializer().run()
144 |
145 | # Train the model, and also write summaries.
146 | # Every 10th step, measure test-set accuracy, and write test summaries
147 | # All other steps, run train_step on training data, & add training summaries
148 |
149 | def feed_dict(train):
150 | """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
151 | if train or FLAGS.fake_data:
152 | xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)
153 | k = FLAGS.dropout
154 | else:
155 | xs, ys = mnist.test.images, mnist.test.labels
156 | k = 1.0
157 | return {x: xs, y_: ys, keep_prob: k}
158 |
159 | for i in range(FLAGS.max_steps):
160 | if i % 10 == 0: # Record summaries and test-set accuracy
161 | summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
162 | test_writer.add_summary(summary, i)
163 | print('Accuracy at step %s: %s' % (i, acc))
164 | else: # Record train set summaries, and train
165 | if i % 100 == 99: # Record execution stats
166 | run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
167 | run_metadata = tf.RunMetadata()
168 | summary, _ = sess.run([merged, train_step],
169 | feed_dict=feed_dict(True),
170 | options=run_options,
171 | run_metadata=run_metadata)
172 | train_writer.add_run_metadata(run_metadata, 'step%03d' % i)
173 | train_writer.add_summary(summary, i)
174 | print('Adding run metadata for', i)
175 | else: # Record a summary
176 | summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
177 | train_writer.add_summary(summary, i)
178 | train_writer.close()
179 | test_writer.close()
180 |
181 |
182 | def main(_):
183 | if tf.gfile.Exists(FLAGS.log_dir):
184 | tf.gfile.DeleteRecursively(FLAGS.log_dir)
185 | tf.gfile.MakeDirs(FLAGS.log_dir)
186 | train()
187 |
188 |
189 | if __name__ == '__main__':
190 | parser = argparse.ArgumentParser()
191 | parser.add_argument('--fake_data', nargs='?', const=True, type=bool,
192 | default=False,
193 | help='If true, uses fake data for unit testing.')
194 | parser.add_argument('--max_steps', type=int, default=1000,
195 | help='Number of steps to run trainer.')
196 | parser.add_argument('--learning_rate', type=float, default=0.001,
197 | help='Initial learning rate')
198 | parser.add_argument('--dropout', type=float, default=0.9,
199 | help='Keep probability for training dropout.')
200 | parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
201 | help='Directory for storing input data')
202 | parser.add_argument('--log_dir', type=str, default='/tmp/tensorflow/mnist/logs/mnist_with_summaries',
203 | help='Summaries log directory')
204 | FLAGS, unparsed = parser.parse_known_args()
205 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
206 |
--------------------------------------------------------------------------------
/mnist/mnist_with_summary.py:
--------------------------------------------------------------------------------
1 | """Builds the MNIST network.
2 |
3 | Simplify the MNIST model building work.
4 |
5 | """
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import math
11 |
12 | import tensorflow as tf
13 |
14 | # The MNIST dataset has 10 classes, representing the digits 0 through 9.
15 | NUM_CLASSES = 10
16 |
17 | # The MNIST images are always 28x28 pixels.
18 | IMAGE_SIZE = 28
19 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
20 |
21 |
22 | def variable_summaries(var, name):
23 | """Attach a lot of summaries to a Tensor."""
24 | with tf.name_scope('summaries'):
25 | mean = tf.reduce_mean(var)
26 | tf.scalar_summary('mean/' + name, mean)
27 | with tf.name_scope('stddev'):
28 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
29 | tf.scalar_summary('stddev/' + name, stddev)
30 | tf.scalar_summary('max/' + name, tf.reduce_max(var))
31 | tf.scalar_summary('min/' + name, tf.reduce_min(var))
32 | tf.histogram_summary(name, var)
33 |
34 |
35 | def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
36 | """Reusable code for making a simple neural net layer.
37 |
38 | It does a matrix multiply, bias add, and then uses relu to nonlinearize.
39 | It also sets up name scoping so that the resultant graph is easy to read,
40 | and adds a number of summary ops.
41 | """
42 | # Adding a name scope ensures logical grouping of the layers in the graph.
43 | with tf.name_scope(layer_name):
44 | # This Variable will hold the state of the weights for the layer
45 | with tf.name_scope('weights'):
46 | weights = weight_variable([input_dim, output_dim])
47 | variable_summaries(weights, layer_name + '/weights')
48 | with tf.name_scope('biases'):
49 | biases = bias_variable([output_dim])
50 | variable_summaries(biases, layer_name + '/biases')
51 | with tf.name_scope('Wx_plus_b'):
52 | preactivate = tf.matmul(input_tensor, weights) + biases
53 | tf.histogram_summary(layer_name + '/pre_activations', preactivate)
54 | activations = act(preactivate, 'activation')
55 | tf.histogram_summary(layer_name + '/activations', activations)
56 | return activations
57 |
58 | hidden1 = nn_layer(x, 784, 500, 'layer1')
59 |
60 | with tf.name_scope('dropout'):
61 | keep_prob = tf.placeholder(tf.float32)
62 | tf.scalar_summary('dropout_keep_probability', keep_prob)
63 | dropped = tf.nn.dropout(hidden1, keep_prob)
64 |
65 | y = nn_layer(dropped, 500, 10, 'layer2', act=tf.nn.softmax)
66 |
67 | with tf.name_scope('cross_entropy'):
68 | diff = y_ * tf.log(y)
69 | with tf.name_scope('total'):
70 | cross_entropy = -tf.reduce_mean(diff)
71 | tf.scalar_summary('cross entropy', cross_entropy)
72 |
73 | with tf.name_scope('train'):
74 | train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy)
75 |
76 | with tf.name_scope('accuracy'):
77 | with tf.name_scope('correct_prediction'):
78 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
79 | with tf.name_scope('accuracy'):
80 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
81 | tf.scalar_summary('accuracy', accuracy)
82 |
83 | # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
84 | merged = tf.merge_all_summaries()
85 | train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train',
86 | sess.graph)
87 | test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test')
88 | tf.initialize_all_variables().run()
89 |
--------------------------------------------------------------------------------
/reading/capsnet/drbc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/reading/capsnet/drbc.pdf
--------------------------------------------------------------------------------
/self_driving/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning
2 | ================
3 |
4 | Welcome to my blog [听雨居](https://limengweb.wordpress.com). It contains detailed description of the code here.
--------------------------------------------------------------------------------
/self_driving/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/__init__.py
--------------------------------------------------------------------------------
/self_driving/lane_detect/README.md:
--------------------------------------------------------------------------------
1 | [基于OpenCV的车道分割线提取](https://limengweb.wordpress.com/2017/08/19/%E5%9F%BA%E4%BA%8Eopencv%E7%9A%84%E8%BD%A6%E9%81%93%E5%88%86%E5%89%B2%E7%BA%BF%E6%8F%90%E5%8F%96/)
2 |
3 |

4 |
5 |
--------------------------------------------------------------------------------
/self_driving/lane_detect/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/lane_detect/__init__.py
--------------------------------------------------------------------------------
/self_driving/lane_detect/comma_ai_lane_detect.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import sys
4 | from self_driving.optical_flow.python import video
5 | from scipy import misc
6 |
7 |
8 | def color_seg(img_raw, red_thresh=0, green_thresh=0, blue_thresh=0):
9 | img_color_mask = np.copy(img_raw)
10 | red_mask = img_raw[:,:,0] < red_thresh
11 | green_mask = img_raw[:,:,1] < green_thresh
12 | rgb_mask = np.logical_or(red_mask, green_mask)
13 | img_color_mask[rgb_mask] = [0,0,0]
14 | return img_color_mask
15 |
16 |
17 | def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
18 | for line in lines:
19 | for x1,y1,x2,y2 in line:
20 | cv2.line(img, (x1, y1), (x2, y2), color, thickness)
21 |
22 |
23 | def draw_lines_extrapolate(img, lines, color=[255, 0, 0], thickness=2):
24 | # Assume lines on left and right have opposite signed slopes
25 | left_xs = []
26 | left_ys = []
27 | right_xs = []
28 | right_ys = []
29 | for line in lines:
30 | for x1, y1, x2, y2 in line:
31 | if x2 - x1 == 0: continue; # Infinite slope
32 | slope = float(y2-y1) / float(x2-x1)
33 | if .5 <= abs(slope) < 1.0: # Discard unlikely slopes
34 | if slope > 0:
35 | left_xs.extend([x1, x2])
36 | left_ys.extend([y1, y2])
37 | else:
38 | right_xs.extend([x1, x2])
39 | right_ys.extend([y1, y2])
40 |
41 | y1 = img.shape[0] - 120 # Bottom of image
42 | y2 = img.shape[0] / 2 + 10 # Middle of view
43 | y1 = int(y1); y2 = int(y2);
44 |
45 | if left_xs and left_ys:
46 | left_fit = np.polyfit(left_xs, left_ys, 1)
47 | x1_left = (y1 - left_fit[1]) / left_fit[0]
48 | x2_left = (y2 - left_fit[1]) / left_fit[0]
49 | x1_left = int(x1_left); x2_left = int(x2_left);
50 | cv2.line(img, (x1_left, y1), (x2_left, y2), color, thickness)
51 |
52 | if right_xs and right_ys:
53 | right_fit = np.polyfit(right_xs, right_ys, 1)
54 | x1_right = (y1 - right_fit[1]) / right_fit[0]
55 | x2_right = (y2 - right_fit[1]) / right_fit[0]
56 | x1_right = int(x1_right); x2_right = int(x2_right);
57 | cv2.line(img, (x1_right, y1), (x2_right, y2), color, thickness)
58 |
59 |
60 | if __name__ == '__main__':
61 | try:
62 | fn = sys.argv[1]
63 | except IndexError:
64 | fn = 0
65 | cam = video.create_capture(fn)
66 | index = 0
67 | while True:
68 | ret, img = cam.read()
69 |
70 | if img is None:
71 | break
72 |
73 | rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
74 |
75 | gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
76 |
77 | # Define a kernel size and apply Gaussian smoothing
78 | kernel_size = 3
79 | blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
80 |
81 | # Define our parameters for Canny and apply
82 | low_threshold = 50
83 | high_threshold = 150
84 | edges = cv2.Canny(blur_gray, low_threshold, high_threshold)
85 |
86 | # Next we'll create a masked edges image using cv2.fillPoly()
87 | mask = np.zeros_like(edges)
88 | ignore_mask_color = 255
89 |
90 | # This time we are defining a four sided polygon to mask
91 | imshape = img.shape
92 | vertices = np.array([[(0, imshape[0] - 120),
93 | (imshape[1] / 2 - 80, imshape[0] / 2 + 10),
94 | (imshape[1] / 2 + 80, imshape[0] / 2 + 10),
95 | (imshape[1], imshape[0] - 120)]],
96 | dtype=np.int32)
97 | cv2.fillPoly(mask, vertices, ignore_mask_color)
98 | masked_edges = cv2.bitwise_and(edges, mask)
99 |
100 | # Define the Hough transform parameters
101 | # Make a blank the same size as our image to draw on
102 | rho = 1 # distance resolution in pixels of the Hough grid
103 | theta = np.pi / 180 # angular resolution in radians of the Hough grid
104 | threshold = 5 # minimum number of votes (intersections in Hough grid cell)
105 | min_line_length = 10 # minimum number of pixels making up a line
106 | max_line_gap = 2 # maximum gap in pixels between connectable line segments
107 | line_image = np.copy(img) * 0 # creating a blank to draw lines on
108 |
109 | # Run Hough on edge detected image
110 | # Output "lines" is an array containing endpoints of detected line segments
111 | lines = cv2.HoughLinesP(masked_edges, rho, theta, threshold, np.array([]),
112 | min_line_length, max_line_gap)
113 |
114 | if lines is None:
115 | continue
116 |
117 | draw_lines_extrapolate(line_image, lines, thickness=8)
118 | #draw_lines(line_image, lines, thickness=8)
119 |
120 | # Create a "color" binary image to combine with line image
121 | color_edges = np.dstack((masked_edges, masked_edges, masked_edges))
122 |
123 | # Draw the lines on the edge image
124 | lines_edges = cv2.addWeighted(rgb, 1, line_image, 1, 0)
125 | misc.imsave(sys.argv[2] + 'frame_%d.png' % index, lines_edges)
126 | index += 1
127 | cv2.destroyAllWindows()
128 |
--------------------------------------------------------------------------------
/self_driving/lane_detect/lane_detect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/lane_detect/lane_detect.png
--------------------------------------------------------------------------------
/self_driving/lane_detect/udacity_lane_detect.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import sys
4 | from self_driving.optical_flow.python import video
5 | from scipy import misc
6 |
7 |
8 | def color_seg(img_raw, red_thresh=0, green_thresh=0, blue_thresh=0):
9 | img_color_mask = np.copy(img_raw)
10 | red_mask = img_raw[:,:,0] < red_thresh
11 | green_mask = img_raw[:,:,1] < green_thresh
12 | rgb_mask = np.logical_or(red_mask, green_mask)
13 | img_color_mask[rgb_mask] = [0,0,0]
14 | return img_color_mask
15 |
16 |
17 | def draw_lines_extrapolate(img, lines, color=[255, 0, 0], thickness=2):
18 | # Assume lines on left and right have opposite signed slopes
19 | left_xs = []
20 | left_ys = []
21 | right_xs = []
22 | right_ys = []
23 | for line in lines:
24 | for x1, y1, x2, y2 in line:
25 | if x2 - x1 == 0: continue; # Infinite slope
26 | slope = float(y2-y1) / float(x2-x1)
27 | if .5 <= abs(slope) < 1.0: # Discard unlikely slopes
28 | if slope > 0:
29 | left_xs.extend([x1, x2])
30 | left_ys.extend([y1, y2])
31 | else:
32 | right_xs.extend([x1, x2])
33 | right_ys.extend([y1, y2])
34 |
35 | left_fit = np.polyfit(left_xs, left_ys, 1)
36 | right_fit = np.polyfit(right_xs, right_ys, 1)
37 |
38 | y1 = img.shape[0] # Bottom of image
39 | y2 = img.shape[0] / 2+ 50 # Middle of view
40 | x1_left = (y1 - left_fit[1]) / left_fit[0]
41 | x2_left = (y2 - left_fit[1]) / left_fit[0]
42 | x1_right = (y1 - right_fit[1]) / right_fit[0]
43 | x2_right = (y2 - right_fit[1]) / right_fit[0]
44 | y1 = int(y1); y2 = int(y2);
45 | x1_left = int(x1_left); x2_left = int(x2_left);
46 | x1_right = int(x1_right); x2_right = int(x2_right);
47 |
48 | cv2.line(img, (x1_left, y1), (x2_left, y2), color, thickness)
49 | cv2.line(img, (x1_right, y1), (x2_right, y2), color, thickness)
50 |
51 |
52 | if __name__ == '__main__':
53 | try:
54 | fn = sys.argv[1]
55 | except IndexError:
56 | fn = 0
57 | cam = video.create_capture(fn)
58 | index = 0
59 | while True:
60 | ret, img = cam.read()
61 |
62 | if img is None:
63 | break
64 |
65 | rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
66 | masked_img = color_seg(rgb, red_thresh=200, green_thresh=150, blue_thresh=0)
67 |
68 | gray = cv2.cvtColor(masked_img, cv2.COLOR_RGB2GRAY)
69 |
70 | # Define a kernel size and apply Gaussian smoothing
71 | kernel_size = 5
72 | blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
73 |
74 | # Define our parameters for Canny and apply
75 | low_threshold = 50
76 | high_threshold = 150
77 | edges = cv2.Canny(blur_gray, low_threshold, high_threshold)
78 |
79 | # Next we'll create a masked edges image using cv2.fillPoly()
80 | mask = np.zeros_like(edges)
81 | ignore_mask_color = 255
82 |
83 | # This time we are defining a four sided polygon to mask
84 | imshape = img.shape
85 | vertices = np.array([[(0 + 120, imshape[0]),
86 | (imshape[1] / 2 - 15, imshape[0] / 2 + 40),
87 | (imshape[1] / 2 + 15, imshape[0] / 2 + 40),
88 | (imshape[1] - 50, imshape[0])]],
89 | dtype=np.int32)
90 | cv2.fillPoly(mask, vertices, ignore_mask_color)
91 | masked_edges = cv2.bitwise_and(edges, mask)
92 |
93 | # Define the Hough transform parameters
94 | # Make a blank the same size as our image to draw on
95 | rho = 1 # distance resolution in pixels of the Hough grid
96 | theta = np.pi / 180 # angular resolution in radians of the Hough grid
97 | threshold = 5 # minimum number of votes (intersections in Hough grid cell)
98 | min_line_length = 10 # minimum number of pixels making up a line
99 | max_line_gap = 2 # maximum gap in pixels between connectable line segments
100 | line_image = np.copy(img) * 0 # creating a blank to draw lines on
101 |
102 | # Run Hough on edge detected image
103 | # Output "lines" is an array containing endpoints of detected line segments
104 | lines = cv2.HoughLinesP(masked_edges, rho, theta, threshold, np.array([]),
105 | min_line_length, max_line_gap)
106 |
107 | if lines is None:
108 | continue
109 |
110 | draw_lines_extrapolate(line_image, lines, thickness=8)
111 |
112 | # Draw the lines on the edge image
113 | lines_edges = cv2.addWeighted(rgb, 1, line_image, 1, 0)
114 | misc.imsave(sys.argv[2] + 'frame_%d.png' % index, lines_edges)
115 | index += 1
116 | cv2.destroyAllWindows()
117 |
--------------------------------------------------------------------------------
/self_driving/optical_flow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/optical_flow/__init__.py
--------------------------------------------------------------------------------
/self_driving/optical_flow/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/optical_flow/python/__init__.py
--------------------------------------------------------------------------------
/self_driving/optical_flow/python/common.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | '''
4 | This module contains some common routines used by other samples.
5 | '''
6 |
7 | # Python 2/3 compatibility
8 | from __future__ import print_function
9 | import sys
10 | PY3 = sys.version_info[0] == 3
11 |
12 | if PY3:
13 | from functools import reduce
14 |
15 | import numpy as np
16 | import cv2
17 |
18 | # built-in modules
19 | import os
20 | import itertools as it
21 | from contextlib import contextmanager
22 |
23 | image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm']
24 |
25 | class Bunch(object):
26 | def __init__(self, **kw):
27 | self.__dict__.update(kw)
28 | def __str__(self):
29 | return str(self.__dict__)
30 |
31 | def splitfn(fn):
32 | path, fn = os.path.split(fn)
33 | name, ext = os.path.splitext(fn)
34 | return path, name, ext
35 |
36 | def anorm2(a):
37 | return (a*a).sum(-1)
38 | def anorm(a):
39 | return np.sqrt( anorm2(a) )
40 |
41 | def homotrans(H, x, y):
42 | xs = H[0, 0]*x + H[0, 1]*y + H[0, 2]
43 | ys = H[1, 0]*x + H[1, 1]*y + H[1, 2]
44 | s = H[2, 0]*x + H[2, 1]*y + H[2, 2]
45 | return xs/s, ys/s
46 |
47 | def to_rect(a):
48 | a = np.ravel(a)
49 | if len(a) == 2:
50 | a = (0, 0, a[0], a[1])
51 | return np.array(a, np.float64).reshape(2, 2)
52 |
53 | def rect2rect_mtx(src, dst):
54 | src, dst = to_rect(src), to_rect(dst)
55 | cx, cy = (dst[1] - dst[0]) / (src[1] - src[0])
56 | tx, ty = dst[0] - src[0] * (cx, cy)
57 | M = np.float64([[ cx, 0, tx],
58 | [ 0, cy, ty],
59 | [ 0, 0, 1]])
60 | return M
61 |
62 |
63 | def lookat(eye, target, up = (0, 0, 1)):
64 | fwd = np.asarray(target, np.float64) - eye
65 | fwd /= anorm(fwd)
66 | right = np.cross(fwd, up)
67 | right /= anorm(right)
68 | down = np.cross(fwd, right)
69 | R = np.float64([right, down, fwd])
70 | tvec = -np.dot(R, eye)
71 | return R, tvec
72 |
73 | def mtx2rvec(R):
74 | w, u, vt = cv2.SVDecomp(R - np.eye(3))
75 | p = vt[0] + u[:,0]*w[0] # same as np.dot(R, vt[0])
76 | c = np.dot(vt[0], p)
77 | s = np.dot(vt[1], p)
78 | axis = np.cross(vt[0], vt[1])
79 | return axis * np.arctan2(s, c)
80 |
81 | def draw_str(dst, target, s):
82 | x, y = target
83 | cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.LINE_AA)
84 | cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.LINE_AA)
85 |
86 | class Sketcher:
87 | def __init__(self, windowname, dests, colors_func):
88 | self.prev_pt = None
89 | self.windowname = windowname
90 | self.dests = dests
91 | self.colors_func = colors_func
92 | self.dirty = False
93 | self.show()
94 | cv2.setMouseCallback(self.windowname, self.on_mouse)
95 |
96 | def show(self):
97 | cv2.imshow(self.windowname, self.dests[0])
98 |
99 | def on_mouse(self, event, x, y, flags, param):
100 | pt = (x, y)
101 | if event == cv2.EVENT_LBUTTONDOWN:
102 | self.prev_pt = pt
103 | elif event == cv2.EVENT_LBUTTONUP:
104 | self.prev_pt = None
105 |
106 | if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON:
107 | for dst, color in zip(self.dests, self.colors_func()):
108 | cv2.line(dst, self.prev_pt, pt, color, 5)
109 | self.dirty = True
110 | self.prev_pt = pt
111 | self.show()
112 |
113 |
114 | # palette data from matplotlib/_cm.py
115 | _jet_data = {'red': ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1),
116 | (1, 0.5, 0.5)),
117 | 'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1),
118 | (0.91,0,0), (1, 0, 0)),
119 | 'blue': ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0),
120 | (1, 0, 0))}
121 |
122 | cmap_data = { 'jet' : _jet_data }
123 |
124 | def make_cmap(name, n=256):
125 | data = cmap_data[name]
126 | xs = np.linspace(0.0, 1.0, n)
127 | channels = []
128 | eps = 1e-6
129 | for ch_name in ['blue', 'green', 'red']:
130 | ch_data = data[ch_name]
131 | xp, yp = [], []
132 | for x, y1, y2 in ch_data:
133 | xp += [x, x+eps]
134 | yp += [y1, y2]
135 | ch = np.interp(xs, xp, yp)
136 | channels.append(ch)
137 | return np.uint8(np.array(channels).T*255)
138 |
139 | def nothing(*arg, **kw):
140 | pass
141 |
142 | def clock():
143 | return cv2.getTickCount() / cv2.getTickFrequency()
144 |
145 | @contextmanager
146 | def Timer(msg):
147 | print(msg, '...',)
148 | start = clock()
149 | try:
150 | yield
151 | finally:
152 | print("%.2f ms" % ((clock()-start)*1000))
153 |
154 | class StatValue:
155 | def __init__(self, smooth_coef = 0.5):
156 | self.value = None
157 | self.smooth_coef = smooth_coef
158 | def update(self, v):
159 | if self.value is None:
160 | self.value = v
161 | else:
162 | c = self.smooth_coef
163 | self.value = c * self.value + (1.0-c) * v
164 |
165 | class RectSelector:
166 | def __init__(self, win, callback):
167 | self.win = win
168 | self.callback = callback
169 | cv2.setMouseCallback(win, self.onmouse)
170 | self.drag_start = None
171 | self.drag_rect = None
172 | def onmouse(self, event, x, y, flags, param):
173 | x, y = np.int16([x, y]) # BUG
174 | if event == cv2.EVENT_LBUTTONDOWN:
175 | self.drag_start = (x, y)
176 | return
177 | if self.drag_start:
178 | if flags & cv2.EVENT_FLAG_LBUTTON:
179 | xo, yo = self.drag_start
180 | x0, y0 = np.minimum([xo, yo], [x, y])
181 | x1, y1 = np.maximum([xo, yo], [x, y])
182 | self.drag_rect = None
183 | if x1-x0 > 0 and y1-y0 > 0:
184 | self.drag_rect = (x0, y0, x1, y1)
185 | else:
186 | rect = self.drag_rect
187 | self.drag_start = None
188 | self.drag_rect = None
189 | if rect:
190 | self.callback(rect)
191 | def draw(self, vis):
192 | if not self.drag_rect:
193 | return False
194 | x0, y0, x1, y1 = self.drag_rect
195 | cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2)
196 | return True
197 | @property
198 | def dragging(self):
199 | return self.drag_rect is not None
200 |
201 |
202 | def grouper(n, iterable, fillvalue=None):
203 | '''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx'''
204 | args = [iter(iterable)] * n
205 | if PY3:
206 | output = it.zip_longest(fillvalue=fillvalue, *args)
207 | else:
208 | output = it.izip_longest(fillvalue=fillvalue, *args)
209 | return output
210 |
211 | def mosaic(w, imgs):
212 | '''Make a grid from images.
213 |
214 | w -- number of grid columns
215 | imgs -- images (must have same size and format)
216 | '''
217 | imgs = iter(imgs)
218 | if PY3:
219 | img0 = next(imgs)
220 | else:
221 | img0 = imgs.next()
222 | pad = np.zeros_like(img0)
223 | imgs = it.chain([img0], imgs)
224 | rows = grouper(w, imgs, pad)
225 | return np.vstack(map(np.hstack, rows))
226 |
227 | def getsize(img):
228 | h, w = img.shape[:2]
229 | return w, h
230 |
231 | def mdot(*args):
232 | return reduce(np.dot, args)
233 |
234 | def draw_keypoints(vis, keypoints, color = (0, 255, 255)):
235 | for kp in keypoints:
236 | x, y = kp.pt
237 | cv2.circle(vis, (int(x), int(y)), 2, color)
238 |
--------------------------------------------------------------------------------
/self_driving/optical_flow/python/opt_flow.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Python 2/3 compatibility
4 | from __future__ import print_function
5 |
6 | import numpy as np
7 | import cv2
8 | import video
9 | from scipy import misc
10 |
11 | NUM_FRAME = 20400
12 |
13 |
14 | def draw_flow(img, flow, step=16):
15 | h, w = img.shape[:2]
16 | y, x = np.mgrid[step / 2:h:step, step / 2:w:step].reshape(2, -1).astype(int)
17 | fx, fy = flow[y, x].T
18 | lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
19 | lines = np.int32(lines + 0.5)
20 | vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
21 | cv2.polylines(vis, lines, 0, (0, 255, 0))
22 | for (x1, y1), (x2, y2) in lines:
23 | cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
24 | return vis
25 |
26 |
27 | def draw_hsv(flow):
28 | h, w = flow.shape[:2]
29 | fx, fy = flow[:, :, 0], flow[:, :, 1]
30 | ang = np.arctan2(fy, fx) + np.pi
31 | v = np.sqrt(fx * fx + fy * fy)
32 | hsv = np.zeros((h, w, 3), np.uint8)
33 | hsv[..., 0] = ang * (180 / np.pi / 2)
34 | hsv[..., 1] = 255
35 | hsv[..., 2] = np.minimum(v * 4, 255)
36 | bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
37 | return bgr
38 |
39 |
40 | def warp_flow(img, flow):
41 | h, w = flow.shape[:2]
42 | flow = -flow
43 | flow[:, :, 0] += np.arange(w)
44 | flow[:, :, 1] += np.arange(h)[:, np.newaxis]
45 | res = cv2.remap(img, flow, None, cv2.INTER_LINEAR)
46 | return res
47 |
48 |
49 | if __name__ == '__main__':
50 | import sys
51 |
52 | print(__doc__)
53 | try:
54 | fn = sys.argv[1]
55 | except IndexError:
56 | fn = 0
57 |
58 | fr = NUM_FRAME
59 | cam = video.create_capture(fn)
60 | ret, prev = cam.read()
61 | fr -= 1
62 | prevgray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
63 | show_hsv = False
64 | show_glitch = False
65 | cur_glitch = prev.copy()
66 | index = 0
67 |
68 | while fr > 0:
69 | ret, img = cam.read()
70 | fr -= 1
71 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
72 | flow = cv2.calcOpticalFlowFarneback(prevgray, gray, 0.5, 3, 15, 3, 5, 1.2, 0)
73 | prevgray = gray
74 |
75 | hsv = draw_hsv(flow)[120:420, 70:-70]
76 | misc.imsave('/usr/local/google/home/limeng/Downloads/speed_est/data/train_data/frame_%d.png' % index, hsv)
77 | cv2.imshow('flow HSV', hsv)
78 | index += 1
79 | cv2.destroyAllWindows()
80 |
--------------------------------------------------------------------------------
/self_driving/optical_flow/python/tst_scene_render.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 |
4 | # Python 2/3 compatibility
5 | from __future__ import print_function
6 |
7 | import numpy as np
8 | from numpy import pi, sin, cos
9 |
10 | import cv2
11 |
12 | defaultSize = 512
13 |
14 | class TestSceneRender():
15 |
16 | def __init__(self, bgImg = None, fgImg = None,
17 | deformation = False, speed = 0.25, **params):
18 | self.time = 0.0
19 | self.timeStep = 1.0 / 30.0
20 | self.foreground = fgImg
21 | self.deformation = deformation
22 | self.speed = speed
23 |
24 | if bgImg is not None:
25 | self.sceneBg = bgImg.copy()
26 | else:
27 | self.sceneBg = np.zeros(defaultSize, defaultSize, np.uint8)
28 |
29 | self.w = self.sceneBg.shape[0]
30 | self.h = self.sceneBg.shape[1]
31 |
32 | if fgImg is not None:
33 | self.foreground = fgImg.copy()
34 | self.center = self.currentCenter = (int(self.w/2 - fgImg.shape[0]/2), int(self.h/2 - fgImg.shape[1]/2))
35 |
36 | self.xAmpl = self.sceneBg.shape[0] - (self.center[0] + fgImg.shape[0])
37 | self.yAmpl = self.sceneBg.shape[1] - (self.center[1] + fgImg.shape[1])
38 |
39 | self.initialRect = np.array([ (self.h/2, self.w/2), (self.h/2, self.w/2 + self.w/10),
40 | (self.h/2 + self.h/10, self.w/2 + self.w/10), (self.h/2 + self.h/10, self.w/2)]).astype(int)
41 | self.currentRect = self.initialRect
42 |
43 | def getXOffset(self, time):
44 | return int( self.xAmpl*cos(time*self.speed))
45 |
46 |
47 | def getYOffset(self, time):
48 | return int(self.yAmpl*sin(time*self.speed))
49 |
50 | def setInitialRect(self, rect):
51 | self.initialRect = rect
52 |
53 | def getRectInTime(self, time):
54 |
55 | if self.foreground is not None:
56 | tmp = np.array(self.center) + np.array((self.getXOffset(time), self.getYOffset(time)))
57 | x0, y0 = tmp
58 | x1, y1 = tmp + self.foreground.shape[0:2]
59 | return np.array([y0, x0, y1, x1])
60 | else:
61 | x0, y0 = self.initialRect[0] + np.array((self.getXOffset(time), self.getYOffset(time)))
62 | x1, y1 = self.initialRect[2] + np.array((self.getXOffset(time), self.getYOffset(time)))
63 | return np.array([y0, x0, y1, x1])
64 |
65 | def getCurrentRect(self):
66 |
67 | if self.foreground is not None:
68 |
69 | x0 = self.currentCenter[0]
70 | y0 = self.currentCenter[1]
71 | x1 = self.currentCenter[0] + self.foreground.shape[0]
72 | y1 = self.currentCenter[1] + self.foreground.shape[1]
73 | return np.array([y0, x0, y1, x1])
74 | else:
75 | x0, y0 = self.currentRect[0]
76 | x1, y1 = self.currentRect[2]
77 | return np.array([x0, y0, x1, y1])
78 |
79 | def getNextFrame(self):
80 | img = self.sceneBg.copy()
81 |
82 | if self.foreground is not None:
83 | self.currentCenter = (self.center[0] + self.getXOffset(self.time), self.center[1] + self.getYOffset(self.time))
84 | img[self.currentCenter[0]:self.currentCenter[0]+self.foreground.shape[0],
85 | self.currentCenter[1]:self.currentCenter[1]+self.foreground.shape[1]] = self.foreground
86 | else:
87 | self.currentRect = self.initialRect + np.int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed))
88 | if self.deformation:
89 | self.currentRect[1:3] += self.h/20*cos(self.time)
90 | cv2.fillConvexPoly(img, self.currentRect, (0, 0, 255))
91 |
92 | self.time += self.timeStep
93 | return img
94 |
95 | def resetTime(self):
96 | self.time = 0.0
97 |
98 |
99 | if __name__ == '__main__':
100 |
101 | backGr = cv2.imread('../data/graf1.png')
102 | fgr = cv2.imread('../data/box.png')
103 |
104 | render = TestSceneRender(backGr, fgr)
105 |
106 | while True:
107 |
108 | img = render.getNextFrame()
109 | cv2.imshow('img', img)
110 |
111 | ch = cv2.waitKey(3)
112 | if ch == 27:
113 | break
114 | #import os
115 | #print (os.environ['PYTHONPATH'])
116 | cv2.destroyAllWindows()
117 |
--------------------------------------------------------------------------------
/self_driving/optical_flow/python/video.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | '''
4 | Video capture sample.
5 |
6 | Sample shows how VideoCapture class can be used to acquire video
7 | frames from a camera of a movie file. Also the sample provides
8 | an example of procedural video generation by an object, mimicking
9 | the VideoCapture interface (see Chess class).
10 |
11 | 'create_capture' is a convinience function for capture creation,
12 | falling back to procedural video in case of error.
13 |
14 | Usage:
15 | video.py [--shotdir ] [source0] [source1] ...'
16 |
17 | sourceN is an
18 | - integer number for camera capture
19 | - name of video file
20 | - synth: for procedural video
21 |
22 | Synth examples:
23 | synth:bg=../data/lena.jpg:noise=0.1
24 | synth:class=chess:bg=../data/lena.jpg:noise=0.1:size=640x480
25 |
26 | Keys:
27 | ESC - exit
28 | SPACE - save current frame to directory
29 |
30 | '''
31 |
32 | # Python 2/3 compatibility
33 | from __future__ import print_function
34 |
35 | import numpy as np
36 | from numpy import pi, sin, cos
37 |
38 | import cv2
39 |
40 | # built-in modules
41 | from time import clock
42 |
43 | # local modules
44 | from tst_scene_render import TestSceneRender
45 | import common
46 |
47 |
48 | class VideoSynthBase(object):
49 | def __init__(self, size=None, noise=0.0, bg=None, **params):
50 | self.bg = None
51 | self.frame_size = (640, 480)
52 | if bg is not None:
53 | self.bg = cv2.imread(bg, 1)
54 | h, w = self.bg.shape[:2]
55 | self.frame_size = (w, h)
56 |
57 | if size is not None:
58 | w, h = map(int, size.split('x'))
59 | self.frame_size = (w, h)
60 | self.bg = cv2.resize(self.bg, self.frame_size)
61 |
62 | self.noise = float(noise)
63 |
64 | def render(self, dst):
65 | pass
66 |
67 | def read(self, dst=None):
68 | w, h = self.frame_size
69 |
70 | if self.bg is None:
71 | buf = np.zeros((h, w, 3), np.uint8)
72 | else:
73 | buf = self.bg.copy()
74 |
75 | self.render(buf)
76 |
77 | if self.noise > 0.0:
78 | noise = np.zeros((h, w, 3), np.int8)
79 | cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise)
80 | buf = cv2.add(buf, noise, dtype=cv2.CV_8UC3)
81 | return True, buf
82 |
83 | def isOpened(self):
84 | return True
85 |
86 |
87 | class Book(VideoSynthBase):
88 | def __init__(self, **kw):
89 | super(Book, self).__init__(**kw)
90 | backGr = cv2.imread('../data/graf1.png')
91 | fgr = cv2.imread('../data/box.png')
92 | self.render = TestSceneRender(backGr, fgr, speed=1)
93 |
94 | def read(self, dst=None):
95 | noise = np.zeros(self.render.sceneBg.shape, np.int8)
96 | cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise)
97 |
98 | return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3)
99 |
100 |
101 | class Cube(VideoSynthBase):
102 | def __init__(self, **kw):
103 | super(Cube, self).__init__(**kw)
104 | self.render = TestSceneRender(cv2.imread('../data/pca_test1.jpg'), deformation=True, speed=1)
105 |
106 | def read(self, dst=None):
107 | noise = np.zeros(self.render.sceneBg.shape, np.int8)
108 | cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise)
109 |
110 | return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3)
111 |
112 |
113 | class Chess(VideoSynthBase):
114 | def __init__(self, **kw):
115 | super(Chess, self).__init__(**kw)
116 |
117 | w, h = self.frame_size
118 |
119 | self.grid_size = sx, sy = 10, 7
120 | white_quads = []
121 | black_quads = []
122 | for i, j in np.ndindex(sy, sx):
123 | q = [[j, i, 0], [j + 1, i, 0], [j + 1, i + 1, 0], [j, i + 1, 0]]
124 | [white_quads, black_quads][(i + j) % 2].append(q)
125 | self.white_quads = np.float32(white_quads)
126 | self.black_quads = np.float32(black_quads)
127 |
128 | fx = 0.9
129 | self.K = np.float64([[fx * w, 0, 0.5 * (w - 1)],
130 | [0, fx * w, 0.5 * (h - 1)],
131 | [0.0, 0.0, 1.0]])
132 |
133 | self.dist_coef = np.float64([-0.2, 0.1, 0, 0])
134 | self.t = 0
135 |
136 | def draw_quads(self, img, quads, color=(0, 255, 0)):
137 | img_quads = cv2.projectPoints(quads.reshape(-1, 3), self.rvec, self.tvec, self.K, self.dist_coef)[0]
138 | img_quads.shape = quads.shape[:2] + (2,)
139 | for q in img_quads:
140 | cv2.fillConvexPoly(img, np.int32(q * 4), color, cv2.LINE_AA, shift=2)
141 |
142 | def render(self, dst):
143 | t = self.t
144 | self.t += 1.0 / 30.0
145 |
146 | sx, sy = self.grid_size
147 | center = np.array([0.5 * sx, 0.5 * sy, 0.0])
148 | phi = pi / 3 + sin(t * 3) * pi / 8
149 | c, s = cos(phi), sin(phi)
150 | ofs = np.array([sin(1.2 * t), cos(1.8 * t), 0]) * sx * 0.2
151 | eye_pos = center + np.array([cos(t) * c, sin(t) * c, s]) * 15.0 + ofs
152 | target_pos = center + ofs
153 |
154 | R, self.tvec = common.lookat(eye_pos, target_pos)
155 | self.rvec = common.mtx2rvec(R)
156 |
157 | self.draw_quads(dst, self.white_quads, (245, 245, 245))
158 | self.draw_quads(dst, self.black_quads, (10, 10, 10))
159 |
160 |
161 | classes = dict(chess=Chess, book=Book, cube=Cube)
162 |
163 | presets = dict(
164 | empty='synth:',
165 | lena='synth:bg=../data/lena.jpg:noise=0.1',
166 | chess='synth:class=chess:bg=../data/lena.jpg:noise=0.1:size=640x480',
167 | book='synth:class=book:bg=../data/graf1.png:noise=0.1:size=640x480',
168 | cube='synth:class=cube:bg=../data/pca_test1.jpg:noise=0.0:size=640x480'
169 | )
170 |
171 |
172 | def create_capture(source=0, fallback=presets['chess']):
173 | '''source: or '||synth [:= [:...]]'
174 | '''
175 | source = str(source).strip()
176 | chunks = source.split(':')
177 | # handle drive letter ('c:', ...)
178 | if len(chunks) > 1 and len(chunks[0]) == 1 and chunks[0].isalpha():
179 | chunks[1] = chunks[0] + ':' + chunks[1]
180 | del chunks[0]
181 |
182 | source = chunks[0]
183 | try:
184 | source = int(source)
185 | except ValueError:
186 | pass
187 | params = dict(s.split('=') for s in chunks[1:])
188 |
189 | cap = None
190 | if source == 'synth':
191 | Class = classes.get(params.get('class', None), VideoSynthBase)
192 | try:
193 | cap = Class(**params)
194 | except:
195 | pass
196 | else:
197 | cap = cv2.VideoCapture(source)
198 | if 'size' in params:
199 | w, h = map(int, params['size'].split('x'))
200 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, w)
201 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h)
202 | if cap is None or not cap.isOpened():
203 | print('Warning: unable to open video source: ', source)
204 | if fallback is not None:
205 | return create_capture(fallback, None)
206 | return cap
207 |
208 |
209 | if __name__ == '__main__':
210 | import sys
211 | import getopt
212 |
213 | print(__doc__)
214 |
215 | args, sources = getopt.getopt(sys.argv[1:], '', 'shotdir=')
216 | args = dict(args)
217 | shotdir = args.get('--shotdir', '.')
218 | if len(sources) == 0:
219 | sources = [0]
220 |
221 | caps = list(map(create_capture, sources))
222 | shot_idx = 0
223 | while True:
224 | imgs = []
225 | for i, cap in enumerate(caps):
226 | ret, img = cap.read()
227 | imgs.append(img)
228 | cv2.imshow('capture %d' % i, img)
229 | ch = cv2.waitKey(1)
230 | if ch == 27:
231 | break
232 | if ch == ord(' '):
233 | for i, img in enumerate(imgs):
234 | fn = '%s/shot_%d_%03d.bmp' % (shotdir, i, shot_idx)
235 | cv2.imwrite(fn, img)
236 | print(fn, 'saved')
237 | shot_idx += 1
238 | cv2.destroyAllWindows()
239 |
--------------------------------------------------------------------------------
/self_driving/road_seg/README.md:
--------------------------------------------------------------------------------
1 | [利用全卷积网络进行车道识别](https://limengweb.wordpress.com/2017/05/03/%E5%88%A9%E7%94%A8%E5%85%A8%E5%8D%B7%E7%A7%AF%E7%BD%91%E7%BB%9C%E8%BF%9B%E8%A1%8C%E8%BD%A6%E9%81%93%E8%AF%86%E5%88%AB/)
2 |
3 |

4 |
5 |
--------------------------------------------------------------------------------
/self_driving/road_seg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/road_seg/__init__.py
--------------------------------------------------------------------------------
/self_driving/road_seg/convnet.py:
--------------------------------------------------------------------------------
1 | """A full convolutional neural network for road segmentation.
2 |
3 | nohup python -u -m self_driving.road_seg.convnet > self_driving/road_seg/output.txt 2>&1 &
4 |
5 | """
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import copy
11 | import tensorflow as tf
12 | from utils import kitti
13 | from self_driving.road_seg import fcn8_vgg
14 | import scipy as scp
15 | import scipy.misc
16 | import matplotlib as mpl
17 | import matplotlib.cm
18 |
19 | EPOCH = 5000
20 | N_cl = 2
21 | UU_TRAIN_SET_SIZE = 98 - 9
22 | UU_TEST_SET_SIZE = 9
23 |
24 |
25 | def _compute_cross_entropy_mean(labels, softmax):
26 | cross_entropy = -tf.reduce_sum(
27 | tf.multiply(labels * tf.log(softmax), [1, 1]), reduction_indices=[1])
28 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='xentropy_mean')
29 | return cross_entropy_mean
30 |
31 |
32 | def loss(logits, labels):
33 | with tf.name_scope('loss'):
34 | labels = tf.to_float(tf.reshape(labels, (-1, 2)))
35 | logits = tf.reshape(logits, (-1, 2))
36 | epsilon = 1e-9
37 | softmax = tf.nn.softmax(logits) + epsilon
38 |
39 | cross_entropy_mean = _compute_cross_entropy_mean(labels, softmax)
40 |
41 | enc_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
42 | dec_loss = tf.add_n(tf.get_collection('dec_losses'), name='total_loss')
43 | fc_loss = tf.add_n(tf.get_collection('fc_wlosses'), name='total_loss')
44 | weight_loss = enc_loss + dec_loss + fc_loss
45 |
46 | total_loss = cross_entropy_mean + weight_loss
47 |
48 | losses = {}
49 | losses['total_loss'] = total_loss
50 | losses['xentropy'] = cross_entropy_mean
51 | losses['weight_loss'] = weight_loss
52 |
53 | return losses
54 |
55 |
56 | def f1_score(logits, labels):
57 | true_labels = tf.to_float(tf.reshape(labels, (-1, 2)))[:, 1]
58 | pred = tf.to_float(tf.reshape(logits, [-1]))
59 |
60 | true_positives = tf.reduce_sum(pred * true_labels)
61 | false_positives = tf.reduce_sum(pred * (1 - true_labels))
62 |
63 | precision = true_positives / (true_positives + false_positives)
64 | recall = true_positives / tf.reduce_sum(labels)
65 |
66 | f1_score = 2 * precision * recall / (precision + recall)
67 |
68 | return f1_score, precision, recall
69 |
70 |
71 | def learning_rate(global_step):
72 | starter_learning_rate = 1e-5
73 | learning_rate_1 = tf.train.exponential_decay(
74 | starter_learning_rate, global_step, EPOCH * 0.2, 0.1, staircase=True)
75 | learning_rate_2 = tf.train.exponential_decay(
76 | learning_rate_1, global_step, EPOCH * 0.4, 0.5, staircase=True)
77 | decayed_learning_rate = tf.train.exponential_decay(
78 | learning_rate_2, global_step, EPOCH * 0.6, 0.8, staircase=True)
79 | tf.summary.scalar('learning_rate', decayed_learning_rate)
80 | return decayed_learning_rate
81 |
82 |
83 | def color_image(image, num_classes=20):
84 | norm = mpl.colors.Normalize(vmin=0., vmax=num_classes)
85 | mycm = mpl.cm.get_cmap('Set1')
86 | return mycm(norm(image))
87 |
88 |
89 | def save_output(index, training_image, prediction, label):
90 | prediction_label = 1 - prediction[0]
91 | output_image = copy.copy(training_image)
92 | # Save prediction
93 | up_color = color_image(prediction[0], 2)
94 | scp.misc.imsave('output/decision_%d.png' % index, up_color)
95 | # Merge true positive with training images' green channel
96 | true_positive = prediction_label * label[..., 0][0]
97 | merge_green = (1 - true_positive) * training_image[..., 1] + true_positive * 255
98 | output_image[..., 1] = merge_green
99 | # Merge false positive with training images' red channel
100 | false_positive = prediction_label * label[..., 1][0]
101 | merge_red = (1 - false_positive) * training_image[..., 0] + false_positive * 255
102 | output_image[..., 0] = merge_red
103 | # Merge false negative with training images' blue channel
104 | false_negative = (1 - prediction_label) * label[..., 0][0]
105 | merge_blue = (1 - false_negative) * training_image[..., 2] + false_negative * 255
106 | output_image[..., 2] = merge_blue
107 | # Save images
108 | scp.misc.imsave('merge/decision_%d.png' % index, output_image)
109 |
110 |
111 | def main(_):
112 | kitti_data = kitti.Kitti()
113 |
114 | x_image = tf.placeholder(tf.float32, [1, None, None, 3])
115 | y_ = tf.placeholder(tf.float32, [1, None, None, N_cl])
116 |
117 | tf.summary.image("images", x_image, max_outputs=1)
118 |
119 | vgg_fcn = fcn8_vgg.FCN8VGG(vgg16_npy_path="data/vgg16.npy")
120 | vgg_fcn.build(x_image, debug=True, num_classes=N_cl)
121 |
122 | losses = loss(vgg_fcn.upscore32, y_)
123 | f1, precision, recall = f1_score(vgg_fcn.pred_up, y_)
124 | total_loss = losses['total_loss']
125 | tf.summary.scalar("Loss", total_loss)
126 | tf.summary.scalar("F1 Score", f1)
127 | tf.summary.scalar("Precision", precision)
128 | tf.summary.scalar("Recall", recall)
129 |
130 | global_step = tf.Variable(0, trainable=False)
131 | lr = learning_rate(global_step)
132 | optimizer = tf.train.AdamOptimizer(lr)
133 | grads_and_vars = optimizer.compute_gradients(total_loss)
134 |
135 | grads, tvars = zip(*grads_and_vars)
136 | clipped_grads, norm = tf.clip_by_global_norm(grads, 1.0)
137 | grads_and_vars = zip(clipped_grads, tvars)
138 |
139 | train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
140 |
141 | sess = tf.InteractiveSession()
142 | merged = tf.summary.merge_all()
143 | train_writer = tf.summary.FileWriter('train', sess.graph)
144 | sess.run(tf.global_variables_initializer())
145 |
146 | for i in range(EPOCH):
147 | print("step %d" % i)
148 | t_img, t_label = kitti_data.next_batch(i % UU_TRAIN_SET_SIZE)
149 | pred, _ = sess.run([vgg_fcn.pred_up, train_step],
150 | feed_dict={x_image: t_img, y_: t_label})
151 | if i % 5 == 0:
152 | for test_index in range(UU_TEST_SET_SIZE):
153 | test_img, test_label = kitti_data.next_batch(test_index + UU_TRAIN_SET_SIZE)
154 | pred, summary = sess.run([vgg_fcn.pred_up, merged],
155 | feed_dict={x_image: test_img, y_: test_label})
156 | save_output(test_index + UU_TRAIN_SET_SIZE, test_img[0], pred, test_label)
157 | train_writer.add_summary(summary, i)
158 |
159 |
160 | if __name__ == '__main__':
161 | tf.app.run(main=main)
--------------------------------------------------------------------------------
/self_driving/road_seg/road_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/road_seg/road_seg.png
--------------------------------------------------------------------------------
/self_driving/road_seg/test_fcn8_vgg.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import scipy as scp
4 | import scipy.misc
5 | import matplotlib as mpl
6 | import matplotlib.cm
7 | import logging
8 | import tensorflow as tf
9 | import sys
10 | import fcn8_vgg
11 |
12 |
13 | def main(_):
14 | logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
15 | level=logging.INFO,
16 | stream=sys.stdout)
17 | img1 = scp.misc.imread("/Users/limeng/Downloads/kitti/data_road/training/image_2/uu_000000.png")
18 | with tf.Session() as sess:
19 | images = tf.placeholder("float")
20 | feed_dict = {images: img1}
21 | batch_images = tf.expand_dims(images, 0)
22 |
23 | vgg_fcn = fcn8_vgg.FCN8VGG(vgg16_npy_path="/Users/limeng/Downloads/vgg16.npy")
24 | with tf.name_scope("content_vgg"):
25 | vgg_fcn.build(batch_images, debug=True, num_classes=2)
26 |
27 | print('Finished building Network.')
28 |
29 | logging.warning("Score weights are initialized random.")
30 | logging.warning("Do not expect meaningful results.")
31 |
32 | logging.info("Start Initializing Variabels.")
33 |
34 | init = tf.global_variables_initializer()
35 | sess.run(init)
36 |
37 | print('Running the Network')
38 | tensors = [vgg_fcn.pred, vgg_fcn.pred_up]
39 | down, up = sess.run(tensors, feed_dict=feed_dict)
40 |
41 | down_color = color_image(down[0], 2)
42 | up_color = color_image(up[0], 2)
43 |
44 | scp.misc.imsave('fcn8_downsampled.png', down_color)
45 | scp.misc.imsave('fcn8_upsampled.png', up_color)
46 |
47 |
48 | def color_image(image, num_classes=20):
49 | norm = mpl.colors.Normalize(vmin=0., vmax=num_classes)
50 | mycm = mpl.cm.get_cmap('Set1')
51 | return mycm(norm(image))
52 |
53 |
54 | if __name__ == '__main__':
55 | tf.app.run(main=main)
56 |
--------------------------------------------------------------------------------
/self_driving/road_seg/unet.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from keras.models import Model
3 | from keras.layers import Input
4 | from keras.layers.core import Dropout
5 | from keras.layers.convolutional import Convolution2D, Deconvolution2D, MaxPooling2D, ZeroPadding2D
6 |
7 |
8 | N_cl = 2
9 | C = 32
10 |
11 |
12 | def get_model():
13 | # KITTI data set.
14 | main_input = Input(shape=(None, 3, 1242, 375), dtype='float32', name='kitti_data')
15 |
16 | conv1_1 = ZeroPadding2D((10, 10))(main_input)
17 | conv1_1 = Convolution2D(64, 3, 3, activation='relu')(conv1_1) # 1260 * 393 * 64
18 | conv1_2 = ZeroPadding2D((1, 1))(conv1_1)
19 | conv1_2 = Convolution2D(64, 3, 3, activation='relu')(conv1_2) # 1260 * 393 * 64
20 | pool1 = ZeroPadding2D((0, 1))(conv1_2) # 1260 * 394 * 64
21 | pool1 = MaxPooling2D((2, 2), strides=(2, 2))(pool1) # 630 * 197 * 64
22 |
23 | conv2_1 = ZeroPadding2D((1, 1))(pool1)
24 | conv2_1 = Convolution2D(128, 3, 3, activation='relu')(conv2_1) # 630 * 197 * 128
25 | conv2_2 = ZeroPadding2D((1, 1))(conv2_1)
26 | conv2_2 = Convolution2D(128, 3, 3, activation='relu')(conv2_2) # 630 * 197 * 128
27 | pool2 = ZeroPadding2D((0, 1))(conv2_2) # 630 * 198 * 128
28 | pool2 = MaxPooling2D((2, 2), strides=(2, 2))(pool2) # 315 * 99 * 128
29 |
30 | conv3_1 = ZeroPadding2D((1, 1))(pool2)
31 | conv3_1 = Convolution2D(256, 3, 3, activation='relu')(conv3_1) # 315 * 99 * 256
32 | conv3_2 = ZeroPadding2D((1, 1))(conv3_1)
33 | conv3_2 = Convolution2D(256, 3, 3, activation='relu')(conv3_2) # 315 * 99 * 256
34 | conv3_3 = ZeroPadding2D((1, 1))(conv3_2)
35 | conv3_3 = Convolution2D(256, 3, 3, activation='relu')(conv3_3) # 315 * 99 * 256
36 | pool3 = ZeroPadding2D((1, 1))(conv3_3) # 316 * 100 * 256
37 | pool3 = MaxPooling2D((2, 2), strides=(2, 2))(pool3) # 158 * 50 * 256
38 |
39 | conv4_1 = ZeroPadding2D((1, 1))(pool3)
40 | conv4_1 = Convolution2D(512, 3, 3, activation='relu')(conv4_1) # 158 * 50 * 512
41 | conv4_2 = ZeroPadding2D((1, 1))(conv4_1)
42 | conv4_2 = Convolution2D(512, 3, 3, activation='relu')(conv4_2) # 158 * 50 * 512
43 | conv4_3 = ZeroPadding2D((1, 1))(conv4_2)
44 | conv4_3 = Convolution2D(512, 3, 3, activation='relu')(conv4_3) # 158 * 50 * 512
45 | pool4 = MaxPooling2D((2, 2), strides=(2, 2))(conv4_3) # 79 * 25 * 512
46 |
47 | conv5_1 = ZeroPadding2D((1, 1))(pool4)
48 | conv5_1 = Convolution2D(512, 3, 3, activation='relu')(conv5_1) # 79 * 25 * 512
49 | conv5_2 = ZeroPadding2D((1, 1))(conv5_1)
50 | conv5_2 = Convolution2D(512, 3, 3, activation='relu')(conv5_2) # 79 * 25 * 512
51 | conv5_3 = ZeroPadding2D((1, 1))(conv5_2)
52 | conv5_3 = Convolution2D(512, 3, 3, activation='relu')(conv5_3) # 79 * 25 * 512
53 | pool5 = ZeroPadding2D((1, 1))(conv5_3) # 80 * 26 * 512
54 | pool5 = MaxPooling2D((2, 2), strides=(2, 2))(pool5) # 40 * 13 * 512
55 |
56 | # FC_conv1
57 | fc6 = ZeroPadding2D((1, 1))(pool5)
58 | fc6 = Convolution2D(1024, 3, 3, activation='relu')(fc6) # 40 * 13 * 1024
59 | fc6 = Dropout(0.5)(fc6)
60 | # FC_conv2
61 | fc7 = Convolution2D(1024, 1, 1, activation='relu')(fc6) # 40 * 13 * 1024
62 | fc7 = Dropout(0.5)(fc7)
63 |
64 | score_fc7 = Convolution2D(N_cl, 1, 1, activation='relu')(fc7) # 40 * 13 * N_cl
65 | score_fc7_up = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 80, 26))(score_fc7)
66 |
67 | # scale pool4 skip for compatibility
68 | scale_pool4 = tf.mul(pool4, 0.01)
69 | scale_pool4 = ZeroPadding2D((1, 1))(scale_pool4) # 80 * 26 * 512
70 | score_pool4 = Convolution2D(N_cl, 1, 1, activation='relu')(scale_pool4) # 80 * 26 * N_cl
71 | fuse_pool4 = tf.add(score_fc7_up, score_pool4)
72 | score_pool4_up = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 158, 50))(fuse_pool4)
73 |
74 | # scale pool3 skip for compatibility
75 | scale_pool3 = tf.mul(pool3, 0.0001)
76 | score_pool3 = Convolution2D(N_cl, 1, 1, activation='relu')(scale_pool3) # 158 * 50 * N_cl
77 | fuse_pool3 = tf.add(score_pool4_up, score_pool3)
78 | score = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 1242, 375))(fuse_pool3)
79 |
80 | model = Model(input=main_input, output=score)
81 |
82 | return model
83 |
--------------------------------------------------------------------------------
/self_driving/segnet/README.md:
--------------------------------------------------------------------------------
1 | [分割网络的Tensorflow实现](https://limengweb.wordpress.com/2017/08/06/%E5%88%86%E5%89%B2%E7%BD%91%E7%BB%9C%E7%9A%84tensorflow%E5%AE%9E%E7%8E%B0/)
2 |
3 |

4 |
5 |
--------------------------------------------------------------------------------
/self_driving/segnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/segnet/__init__.py
--------------------------------------------------------------------------------
/self_driving/segnet/evaluate.py:
--------------------------------------------------------------------------------
1 | """Evaluate SegNet.
2 |
3 | nohup python -u -m self_driving.segnet.evaluate > self_driving/segnet/output.txt 2>&1 &
4 |
5 | """
6 |
7 | import os
8 | import tensorflow as tf
9 | from utils import camvid
10 | from scipy import misc
11 |
12 | LOG_DIR = 'save'
13 | BATCH_SIZE = 4
14 | EPOCH = 25
15 | IMAGE_HEIGHT = 720
16 | IMAGE_WIDTH = 960
17 | IMAGE_CHANNEL = 3
18 | NUM_CLASSES = 32
19 |
20 | test_dir = "/usr/local/google/home/limeng/Downloads/camvid/val.txt"
21 |
22 | colors = [
23 | [64, 128, 64], # Animal
24 | [192, 0, 128], # Archway
25 | [0, 128, 192], # Bicyclist
26 | [0, 128, 64], # Bridge
27 | [128, 0, 0], # Building
28 | [64, 0, 128], # Car
29 | [64, 0, 192], # CartLuggagePram
30 | [192, 128, 64], # Child
31 | [192, 192, 128], # Column_Pole
32 | [64, 64, 128], # Fence
33 | [128, 0, 192], # LaneMkgsDriv
34 | [192, 0, 64], # LaneMkgsNonDriv
35 | [128, 128, 64], # Misc_Text
36 | [192, 0, 192], # MotorcycleScooter
37 | [128, 64, 64], # OtherMoving
38 | [64, 192, 128], # ParkingBlock
39 | [64, 64, 0], # Pedestrian
40 | [128, 64, 128], # Road
41 | [128, 128, 192], # RoadShoulder
42 | [0, 0, 192], # Sidewalk
43 | [192, 128, 128], # SignSymbol
44 | [128, 128, 128], # Sky
45 | [64, 128, 192], # SUVPickupTruck
46 | [0, 0, 64], # TrafficCone
47 | [0, 64, 64], # TrafficLight
48 | [192, 64, 128], # Train
49 | [128, 128, 0], # Tree
50 | [192, 128, 192], # Truck_Bus
51 | [64, 0, 64], # Tunnel
52 | [192, 192, 0], # VegetationMisc
53 | [0, 0, 0], # Void
54 | [64, 192, 0] # Wall
55 | ]
56 |
57 |
58 | def color_mask(tensor, color):
59 | return tf.reduce_all(tf.equal(tensor, color), 3)
60 |
61 |
62 | def one_hot(labels):
63 | color_tensors = tf.unstack(colors)
64 | channel_tensors = list(map(lambda color: color_mask(labels, color), color_tensors))
65 | one_hot_labels = tf.cast(tf.stack(channel_tensors, 3), 'float32')
66 | return one_hot_labels
67 |
68 |
69 | def rgb(logits):
70 | softmax = tf.nn.softmax(logits)
71 | argmax = tf.argmax(softmax, 3)
72 | color_map = tf.constant(colors, dtype=tf.float32)
73 | n = color_map.get_shape().as_list()[0]
74 | one_hot = tf.one_hot(argmax, n, dtype=tf.float32)
75 | one_hot_matrix = tf.reshape(one_hot, [-1, n])
76 | rgb_matrix = tf.matmul(one_hot_matrix, color_map)
77 | rgb_tensor = tf.reshape(rgb_matrix, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 3])
78 | return tf.cast(rgb_tensor, tf.float32)
79 |
80 |
81 | def main(_):
82 | test_image_filenames, test_label_filenames = camvid.get_filename_list(test_dir)
83 | index = 0
84 |
85 | with tf.Graph().as_default():
86 | with tf.device('/cpu:0'):
87 | config = tf.ConfigProto()
88 | config.gpu_options.allocator_type = 'BFC'
89 | sess = tf.InteractiveSession(config=config)
90 |
91 | images, labels = camvid.CamVidInputs(test_image_filenames,
92 | test_label_filenames,
93 | BATCH_SIZE,
94 | shuffle=False)
95 |
96 | saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "segnet.ckpt.meta"))
97 | saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR))
98 |
99 | graph = tf.get_default_graph()
100 | train_data = graph.get_tensor_by_name("train_data:0")
101 | train_label = graph.get_tensor_by_name("train_labels:0")
102 | is_training = graph.get_tensor_by_name("is_training:0")
103 | logits = tf.get_collection("logits")[0]
104 |
105 | # Start the queue runners.
106 | coord = tf.train.Coordinator()
107 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
108 |
109 | for i in range(EPOCH):
110 | image_batch, label_batch = sess.run([images, labels])
111 | feed_dict = {
112 | train_data: image_batch,
113 | train_label: label_batch,
114 | is_training: True
115 | }
116 | prediction = rgb(logits)
117 | pred = sess.run([prediction], feed_dict)[0]
118 | for batch in range(BATCH_SIZE):
119 | misc.imsave('output/segnet_camvid/decision_%d.png' % index, pred[batch])
120 | misc.imsave('output/segnet_camvid/train_%d.png' % index, image_batch[batch])
121 | index += 1
122 |
123 | coord.request_stop()
124 | coord.join(threads)
125 |
126 |
127 | if __name__ == '__main__':
128 | tf.app.run(main=main)
129 |
--------------------------------------------------------------------------------
/self_driving/segnet/evaluate_kitti.py:
--------------------------------------------------------------------------------
1 | """Evaluate SegNet.
2 |
3 | nohup python -u -m self_driving.segnet.evaluate_kitti > self_driving/segnet/output.txt 2>&1 &
4 |
5 | """
6 |
7 | import os
8 | import tensorflow as tf
9 | from utils import kitti_segnet
10 | from scipy import misc
11 |
12 | LOG_DIR = 'backup/segnet_kitti'
13 | EPOCH = 237
14 | BATCH_SIZE = 1
15 | IMAGE_HEIGHT = 375
16 | IMAGE_WIDTH = 1242
17 | NUM_CLASSES = 2
18 |
19 | test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/test.txt"
20 |
21 | colors = [
22 | [255, 0, 255],
23 | [255, 0, 0],
24 | ]
25 |
26 | def color_mask(tensor, color):
27 | return tf.reduce_all(tf.equal(tensor, color), 3)
28 |
29 |
30 | def one_hot(labels):
31 | color_tensors = tf.unstack(colors)
32 | channel_tensors = list(map(lambda color: color_mask(labels, color), color_tensors))
33 | one_hot_labels = tf.cast(tf.stack(channel_tensors, 3), 'float32')
34 | return one_hot_labels
35 |
36 |
37 | def rgb(logits):
38 | softmax = tf.nn.softmax(logits)
39 | argmax = tf.argmax(softmax, 3)
40 | color_map = tf.constant(colors, dtype=tf.float32)
41 | n = color_map.get_shape().as_list()[0]
42 | one_hot = tf.one_hot(argmax, n, dtype=tf.float32)
43 | one_hot_matrix = tf.reshape(one_hot, [-1, n])
44 | rgb_matrix = tf.matmul(one_hot_matrix, color_map)
45 | rgb_tensor = tf.reshape(rgb_matrix, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 3])
46 | return tf.cast(rgb_tensor, tf.float32)
47 |
48 |
49 | def main(_):
50 | test_image_filenames, test_label_filenames = kitti_segnet.get_filename_list(test_dir)
51 | index = 0
52 |
53 | with tf.Graph().as_default():
54 | with tf.device('/cpu:0'):
55 | config = tf.ConfigProto()
56 | config.gpu_options.allocator_type = 'BFC'
57 | sess = tf.InteractiveSession(config=config)
58 |
59 | images, labels = kitti_segnet.CamVidInputs(test_image_filenames,
60 | test_label_filenames,
61 | BATCH_SIZE,
62 | shuffle=False)
63 |
64 | saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "segnet.ckpt.meta"))
65 | saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR))
66 |
67 | graph = tf.get_default_graph()
68 | train_data = graph.get_tensor_by_name("train_data:0")
69 | train_label = graph.get_tensor_by_name("train_labels:0")
70 | is_training = graph.get_tensor_by_name("is_training:0")
71 | logits = tf.get_collection("logits")[0]
72 |
73 | # Start the queue runners.
74 | coord = tf.train.Coordinator()
75 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
76 |
77 | for i in range(EPOCH):
78 | image_batch, label_batch = sess.run([images, labels])
79 | feed_dict = {
80 | train_data: image_batch,
81 | train_label: label_batch,
82 | is_training: True
83 | }
84 | prediction = rgb(logits)
85 | pred = sess.run([prediction], feed_dict)[0]
86 | for batch in range(BATCH_SIZE):
87 | misc.imsave('output/segnet_kitti/decision_%d.png' % index, pred[batch])
88 | misc.imsave('output/segnet_kitti/train_%d.png' % index, image_batch[batch])
89 | index += 1
90 |
91 | coord.request_stop()
92 | coord.join(threads)
93 |
94 |
95 | if __name__ == '__main__':
96 | tf.app.run(main=main)
97 |
--------------------------------------------------------------------------------
/self_driving/segnet/evaluate_test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from tensorflow.python.framework import constant_op
6 | from tensorflow.python.platform import test
7 | from self_driving.segnet import evaluate
8 | import tensorflow as tf
9 |
10 |
11 | class EvaluateTest(test.TestCase):
12 |
13 | def testTfArgmax(self):
14 | '''[[[[ 1. 2.]
15 | [ 3. 4.]
16 | [ 5. 6.]]
17 | [[ 8. 7.]
18 | [ 9. 10.]
19 | [ 11. 12.]]
20 | [[ 13. 14.]
21 | [ 16. 15.]
22 | [ 17. 18.]]]]'''
23 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 7.0, 9.0,
24 | 10.0, 11.0, 12.0, 13.0, 14.0, 16.0, 15.0, 17.0, 18.0]
25 | with self.test_session(use_gpu=False) as sess:
26 | t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2])
27 | argmax_op = tf.argmax(t, axis=3)
28 | argmax = sess.run([argmax_op])
29 | self.assertAllEqual(argmax, [[[[1, 1, 1], [0, 1, 1], [1, 0, 1]]]])
30 |
31 |
32 | def testColorImage(self):
33 | '''[[[[ 0. 2.]
34 | [ 3. 4.]
35 | [ 5. 6.]]
36 | [[ 8. 7.]
37 | [ 9. 10.]
38 | [ 11. 12.]]
39 | [[ 13. 14.]
40 | [ 16. 15.]
41 | [ 17. 18.]]]]'''
42 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 7.0, 9.0,
43 | 10.0, 11.0, 12.0, 13.0, 14.0, 16.0, 15.0, 17.0, 18.0]
44 | with self.test_session(use_gpu=False):
45 | t = constant_op.constant(tensor_input, shape=[3, 3, 1, 2])
46 | argmax_op = tf.argmax(t, dimension=3)
47 | up_color = evaluate.color_image(argmax_op.eval(), 1.)
48 | self.assertAllClose(up_color, [[[[0.60000002, 0.60000002, 0.60000002, 1.]],
49 | [[0.60000002, 0.60000002, 0.60000002, 1.]],
50 | [[0.60000002, 0.60000002, 0.60000002, 1.]]],
51 | [[[0.89411765, 0.10196079, 0.10980392, 1.]],
52 | [[0.60000002, 0.60000002, 0.60000002, 1.]],
53 | [[0.60000002, 0.60000002, 0.60000002, 1.]]],
54 | [[[0.60000002, 0.60000002, 0.60000002, 1.]],
55 | [[0.89411765, 0.10196079, 0.10980392, 1.]],
56 | [[0.60000002, 0.60000002, 0.60000002, 1.]]]])
57 |
58 |
59 | if __name__ == "__main__":
60 | test.main()
--------------------------------------------------------------------------------
/self_driving/segnet/merge_output.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | result_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_camvid"
4 | output_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_camvid/result"
5 |
6 | #result_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_kitti"
7 | #output_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_kitti/result"
8 |
9 | echo "Merge output"
10 |
11 | train_file_names=($(ls -v $result_dir/train_*.png))
12 | output_file_names=($(ls -v $result_dir/decision_*.png))
13 |
14 | output_data_size=${#train_file_names[@]}
15 |
16 | for (( i=0; i<${output_data_size}; i++ ));
17 | do
18 | convert ${output_file_names[$i]} ${train_file_names[$i]} +append $output_dir/frame_$i.png
19 | done
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/self_driving/segnet/prepare_camvid.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from scipy import misc
4 |
5 | data_image_dir = "/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full"
6 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full/image_2"
7 |
8 |
9 | IMAGE_HEIGHT = 720
10 | IMAGE_WIDTH = 960
11 | IMAGE_DEPTH = 3
12 |
13 |
14 | color2index = {
15 | (64, 128, 64) : 0, # Animal
16 | (192, 0, 128) : 1, # Archway
17 | (0, 128, 192) : 2, # Bicyclist
18 | (0, 128, 64) : 3, # Bridge
19 | (128, 0, 0) : 4, # Building
20 | (64, 0, 128) : 5, # Car
21 | (64, 0, 192) : 6, # CartLuggagePram
22 | (192, 128, 64) : 7, # Child
23 | (192, 192, 128) : 8, # Column_Pole
24 | (64, 64, 128) :9, # Fence
25 | (128, 0, 192) : 10, # LaneMkgsDriv
26 | (192, 0, 64) : 11, # LaneMkgsNonDriv
27 | (128, 128, 64) : 12, # Misc_Text
28 | (192, 0, 192) : 13, # MotorcycleScooter
29 | (128, 64, 64) : 14, # OtherMoving
30 | (64, 192, 128) : 15, # ParkingBlock
31 | (64, 64, 0) : 16, # Pedestrian
32 | (128, 64, 128) : 17, # Road
33 | (128, 128, 192) : 18, # RoadShoulder
34 | (0, 0, 192) : 19, # Sidewalk
35 | (192, 128, 128) : 20, # SignSymbol
36 | (128, 128, 128) : 21, # Sky
37 | (64, 128, 192) : 22, # SUVPickupTruck
38 | (0, 0, 64) : 23, # TrafficCone
39 | (0, 64, 64) : 24, # TrafficLight
40 | (192, 64, 128) : 25, # Train
41 | (128, 128, 0) : 26, # Tree
42 | (192, 128, 192) : 27, # Truck_Bus
43 | (64, 0, 64) : 28, # Tunnel
44 | (192, 192, 0) : 29, # VegetationMisc
45 | (0, 0, 0) : 30, # Void
46 | (64, 192, 0) : 31, # Wall
47 | }
48 |
49 |
50 | def im2index(im):
51 | height, width, ch = im.shape
52 | assert ch == IMAGE_DEPTH
53 | if height != IMAGE_HEIGHT or width != IMAGE_WIDTH:
54 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
55 | return None
56 | m_lable = np.zeros((height, width), dtype=np.uint8)
57 | for w in range(width):
58 | for h in range(height):
59 | r, g, b = im[h, w, :]
60 | if (r, g, b) in color2index:
61 | m_lable[h, w] = color2index[(r, g, b)]
62 | else:
63 | m_lable[h, w] = 30
64 | return m_lable
65 |
66 |
67 | def convert_to_label_data(file_name):
68 | assert os.path.isfile(file_name), 'Cannot find: %s' % file_name
69 | return im2index(misc.imread(file_name, mode='RGB'))
70 |
71 |
72 | def main():
73 | for file in os.listdir(data_image_dir):
74 | if file.endswith(".png"):
75 | print("Try to converting %s" % file)
76 | gt_label = convert_to_label_data(os.path.join(data_image_dir, file))
77 | if gt_label is not None:
78 | misc.imsave(os.path.join(image_dir, file), gt_label)
79 |
80 |
81 | if __name__ == '__main__':
82 | main()
83 |
--------------------------------------------------------------------------------
/self_driving/segnet/prepare_camvid.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | data_dir="/usr/local/google/home/limeng/Downloads/camvid/701_StillsRaw_full"
4 | label_data_dir="/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full/image_2"
5 |
6 | echo "Camvid dataset"
7 |
8 | rm -f train.txt
9 | touch train.txt
10 |
11 | data_file_names=($(ls $data_dir))
12 | label_file_names=($(ls $label_data_dir))
13 | data_size=${#data_file_names[@]}
14 |
15 | for (( i=0; i<${data_size}; i++ ));
16 | do
17 | echo $data_dir/${data_file_names[$i]} $label_data_dir/${label_file_names[$i]} >> train.txt
18 | done
19 |
--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from scipy import misc
4 |
5 | data_image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/data_image_2"
6 | image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/image_2"
7 | data_label_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/data_label_2"
8 | label_output_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/gt_image_2"
9 |
10 |
11 | IMAGE_HEIGHT = 375
12 | IMAGE_WIDTH = 1242
13 | IMAGE_DEPTH = 3
14 |
15 |
16 | # R G B
17 | # 255 0 255 road
18 | # 0 0 255 road
19 | # 255 0 0 valid
20 | # 0 0 0 invalid
21 | color2index = {
22 | (255, 0, 255) : 0,
23 | (0, 0, 255) : 0,
24 | (255, 0, 0) : 1,
25 | (0, 0, 0) : 1,
26 | }
27 |
28 |
29 | def im2index(im):
30 | height, width, ch = im.shape
31 | assert ch == IMAGE_DEPTH
32 | if height != IMAGE_HEIGHT or width != IMAGE_WIDTH:
33 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
34 | return None
35 | m_lable = np.zeros((height, width), dtype=np.uint8)
36 | for w in range(width):
37 | for h in range(height):
38 | r, g, b = im[h, w, :]
39 | m_lable[h, w] = color2index[(r, g, b)]
40 | return m_lable
41 |
42 |
43 | def convert_to_label_data(file_name):
44 | assert os.path.isfile(file_name), 'Cannot find: %s' % file_name
45 | return im2index(misc.imread(file_name, mode='RGB'))
46 |
47 |
48 | def main():
49 | for file in os.listdir(data_image_dir):
50 | if file.endswith(".png"):
51 | print("Try to copy %s" % file)
52 | im = misc.imread(os.path.join(data_image_dir, file), mode='RGB')
53 | height, width, ch = im.shape
54 | assert ch == IMAGE_DEPTH
55 | if height == IMAGE_HEIGHT and width == IMAGE_WIDTH and ch == IMAGE_DEPTH:
56 | misc.imsave(os.path.join(image_dir, file), im)
57 | else:
58 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
59 |
60 | for file in os.listdir(data_label_dir):
61 | if file.endswith(".png"):
62 | print("Try to converting %s" % file)
63 | gt_label = convert_to_label_data(os.path.join(data_label_dir, file))
64 | if gt_label is not None:
65 | misc.imsave(os.path.join(label_output_dir, file), gt_label)
66 |
67 |
68 | if __name__ == '__main__':
69 | main()
70 |
--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | train_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/training"
4 |
5 | echo "KITTI dataset"
6 |
7 | rm -f train.txt
8 | touch train.txt
9 |
10 | append_data_items()
11 | {
12 | train_file_names=($(ls $train_data_dir/image_2/$1))
13 | gt_file_names=($(ls $train_data_dir/gt_image_2/$1))
14 |
15 | train_data_size=${#train_file_names[@]}
16 |
17 | for (( i=0; i<${train_data_size}; i++ ));
18 | do
19 | echo ${train_file_names[$i]} ${gt_file_names[$i]} >> train.txt
20 | done
21 | }
22 |
23 | append_data_items "um_*"
24 | append_data_items "umm_*"
25 | append_data_items "uu_*"
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | from scipy import misc
3 |
4 | data_test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/data_image_2"
5 | test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/image_2"
6 |
7 |
8 | IMAGE_HEIGHT = 375
9 | IMAGE_WIDTH = 1242
10 | IMAGE_DEPTH = 3
11 |
12 |
13 | def main():
14 | for file in os.listdir(data_test_dir):
15 | if file.endswith(".png"):
16 | print("Try to copy %s" % file)
17 | im = misc.imread(os.path.join(data_test_dir, file), mode='RGB')
18 | height, width, ch = im.shape
19 | assert ch == IMAGE_DEPTH
20 | if height == IMAGE_HEIGHT and width == IMAGE_WIDTH and ch == IMAGE_DEPTH:
21 | misc.imsave(os.path.join(test_dir, file), im)
22 | else:
23 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch))
24 |
25 |
26 | if __name__ == '__main__':
27 | main()
28 |
--------------------------------------------------------------------------------
/self_driving/segnet/prepare_kitti_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | test_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/image_2"
4 | fake_label_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/testing"
5 |
6 | echo "KITTI test dataset"
7 |
8 | rm -f test.txt
9 | touch test.txt
10 |
11 | test_file_names=($(ls $test_data_dir))
12 | test_data_size=${#test_file_names[@]}
13 |
14 | for (( i=0; i<${test_data_size}; i++ ));
15 | do
16 | echo $test_data_dir/${test_file_names[$i]} $fake_label_data_dir/umm_road_000000.png >> test.txt
17 | done
18 |
--------------------------------------------------------------------------------
/self_driving/segnet/segnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/segnet/segnet.png
--------------------------------------------------------------------------------
/self_driving/segnet/segnet_vgg.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 |
8 | VGG16_NPY_PATH = 'vgg16.npy'
9 | WD = 5e-4
10 |
11 | data_dict = np.load(VGG16_NPY_PATH, encoding='latin1').item()
12 |
13 |
14 | def activation_summary(var):
15 | tensor_name = var.op.name
16 | tf.summary.histogram(tensor_name + '/activations', var)
17 | tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(var))
18 |
19 |
20 | def variable_summaries(var):
21 | if not tf.get_variable_scope().reuse:
22 | name = var.op.name
23 | with tf.name_scope('summaries'):
24 | mean = tf.reduce_mean(var)
25 | tf.summary.scalar(name + '/mean', mean)
26 | with tf.name_scope('stddev'):
27 | stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
28 | tf.summary.scalar(name + '/sttdev', stddev)
29 | tf.summary.scalar(name + '/max', tf.reduce_max(var))
30 | tf.summary.scalar(name + '/min', tf.reduce_min(var))
31 | tf.summary.histogram(name, var)
32 |
33 |
34 | def load_conv_filter(name):
35 | init = tf.constant_initializer(value=data_dict[name][0],
36 | dtype=tf.float32)
37 | shape = data_dict[name][0].shape
38 | var = tf.get_variable(name=name + "_weight", initializer=init, shape=shape)
39 | if not tf.get_variable_scope().reuse:
40 | weight_decay = tf.multiply(tf.nn.l2_loss(var), WD, name=name + '_weight_decay')
41 | tf.add_to_collection('losses', weight_decay)
42 | variable_summaries(var)
43 | return var
44 |
45 |
46 | def get_conv_filter(name, shape):
47 | init = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float32)
48 | var = tf.get_variable(name=name + "_weight", initializer=init)
49 | weight_decay = tf.multiply(tf.nn.l2_loss(var), WD, name=name + '_weight_decay')
50 | tf.add_to_collection('losses', weight_decay)
51 | variable_summaries(var)
52 | return var
53 |
54 |
55 | def load_conv_bias(name):
56 | bias_wights = data_dict[name][1]
57 | shape = data_dict[name][1].shape
58 | init = tf.constant_initializer(value=bias_wights,
59 | dtype=tf.float32)
60 | var = tf.get_variable(name=name + "_bias", initializer=init, shape=shape)
61 | variable_summaries(var)
62 | return var
63 |
64 |
65 | def get_conv_bias(name, shape):
66 | init = tf.constant(0.0, shape=shape)
67 | var = tf.get_variable(name=name + "_bias", initializer=init)
68 | variable_summaries(var)
69 | return var
70 |
71 |
72 | def conv2d(bottom, weight):
73 | return tf.nn.conv2d(bottom, weight, strides=[1, 1, 1, 1], padding='SAME')
74 |
75 |
76 | def batch_norm_layer(bottom, is_training, scope):
77 | return tf.cond(is_training,
78 | lambda: tf.contrib.layers.batch_norm(bottom,
79 | is_training=True,
80 | center=False,
81 | scope=scope+"_bn"),
82 | lambda: tf.contrib.layers.batch_norm(bottom,
83 | is_training=False,
84 | center=False,
85 | scope=scope+"_bn",
86 | reuse=True))
87 |
88 |
89 | def conv_layer_with_bn(bottom=None, is_training=True, shape=None, name=None):
90 | with tf.variable_scope(name) as scope:
91 | if shape:
92 | weight = get_conv_filter(name, shape)
93 | bias = get_conv_bias(name, [shape[3]])
94 | else:
95 | weight = load_conv_filter(name)
96 | bias = load_conv_bias(name)
97 | conv = tf.nn.bias_add(conv2d(bottom, weight), bias)
98 | conv = batch_norm_layer(conv, is_training, scope.name)
99 | conv = tf.nn.relu(conv, name="relu")
100 | activation_summary(conv)
101 | return conv
102 |
103 |
104 | def max_pool_with_argmax(bottom):
105 | with tf.name_scope('max_pool_arg_max'):
106 | with tf.device('/gpu:0'):
107 | _, indices = tf.nn.max_pool_with_argmax(
108 | bottom,
109 | ksize=[1, 2, 2, 1],
110 | strides=[1, 2, 2, 1],
111 | padding='SAME')
112 | indices = tf.stop_gradient(indices)
113 | bottom = tf.nn.max_pool(bottom,
114 | ksize=[1, 2, 2, 1],
115 | strides=[1, 2, 2, 1],
116 | padding='SAME')
117 | return bottom, indices
118 |
119 |
120 | def max_unpool_with_argmax(bottom, mask, output_shape=None):
121 | with tf.name_scope('max_unpool_with_argmax'):
122 | ksize = [1, 2, 2, 1]
123 | input_shape = bottom.get_shape().as_list()
124 | # calculation new shape
125 | if output_shape is None:
126 | output_shape = (input_shape[0],
127 | input_shape[1] * ksize[1],
128 | input_shape[2] * ksize[2],
129 | input_shape[3])
130 | # calculation indices for batch, height, width and feature maps
131 | one_like_mask = tf.ones_like(mask)
132 | batch_range = tf.reshape(tf.range(output_shape[0],
133 | dtype=tf.int64),
134 | shape=[input_shape[0], 1, 1, 1])
135 | b = one_like_mask * batch_range
136 | y = mask // (output_shape[2] * output_shape[3])
137 | x = mask % (output_shape[2] * output_shape[3]) // output_shape[3]
138 | feature_range = tf.range(output_shape[3], dtype=tf.int64)
139 | f = one_like_mask * feature_range
140 | # transpose indices & reshape update values to one dimension
141 | updates_size = tf.size(bottom)
142 | indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
143 | values = tf.reshape(bottom, [updates_size])
144 | return tf.scatter_nd(indices, values, output_shape)
145 |
146 |
147 | def inference(images, is_training, num_classes):
148 | training = tf.equal(is_training, tf.constant(True))
149 | conv1_1 = conv_layer_with_bn(bottom=images, is_training=training, name="conv1_1")
150 | conv1_2 = conv_layer_with_bn(bottom=conv1_1, is_training=training, name="conv1_2")
151 | pool1, pool1_indices = max_pool_with_argmax(conv1_2)
152 |
153 | print("pool1: ", pool1.shape)
154 |
155 | conv2_1 = conv_layer_with_bn(bottom=pool1, is_training=training, name="conv2_1")
156 | conv2_2 = conv_layer_with_bn(bottom=conv2_1, is_training=training, name="conv2_2")
157 | pool2, pool2_indices = max_pool_with_argmax(conv2_2)
158 |
159 | print("pool2: ", pool2.shape)
160 |
161 | conv3_1 = conv_layer_with_bn(bottom=pool2, is_training=training, name="conv3_1")
162 | conv3_2 = conv_layer_with_bn(bottom=conv3_1, is_training=training, name="conv3_2")
163 | conv3_3 = conv_layer_with_bn(bottom=conv3_2, is_training=training, name="conv3_3")
164 | pool3, pool3_indices = max_pool_with_argmax(conv3_3)
165 |
166 | print("pool3: ", pool3.shape)
167 |
168 | conv4_1 = conv_layer_with_bn(bottom=pool3, is_training=training, name="conv4_1")
169 | conv4_2 = conv_layer_with_bn(bottom=conv4_1, is_training=training, name="conv4_2")
170 | conv4_3 = conv_layer_with_bn(bottom=conv4_2, is_training=training, name="conv4_3")
171 | pool4, pool4_indices = max_pool_with_argmax(conv4_3)
172 |
173 | print("pool4: ", pool4.shape)
174 |
175 | conv5_1 = conv_layer_with_bn(bottom=pool4, is_training=training, name="conv5_1")
176 | conv5_2 = conv_layer_with_bn(bottom=conv5_1, is_training=training, name="conv5_2")
177 | conv5_3 = conv_layer_with_bn(bottom=conv5_2, is_training=training, name="conv5_3")
178 | pool5, pool5_indices = max_pool_with_argmax(conv5_3)
179 |
180 | print("pool5: ", pool5.shape)
181 |
182 | # End of encoders
183 | # start of decoders
184 |
185 | up_sample_5 = max_unpool_with_argmax(pool5,
186 | pool5_indices,
187 | output_shape=conv5_3.shape)
188 | up_conv5 = conv_layer_with_bn(bottom=up_sample_5,
189 | shape=[3, 3, 512, 512],
190 | is_training=training,
191 | name="up_conv5")
192 |
193 | print("up_conv5: ", up_conv5.shape)
194 |
195 | up_sample_4 = max_unpool_with_argmax(up_conv5,
196 | pool4_indices,
197 | output_shape=conv4_3.shape)
198 | up_conv4 = conv_layer_with_bn(bottom=up_sample_4,
199 | shape=[3, 3, 512, 256],
200 | is_training=training,
201 | name="up_conv4")
202 |
203 | print("up_conv4: ", up_conv4.shape)
204 |
205 | up_sample_3 = max_unpool_with_argmax(up_conv4,
206 | pool3_indices,
207 | output_shape=conv3_3.shape)
208 | up_conv3 = conv_layer_with_bn(bottom=up_sample_3,
209 | shape=[3, 3, 256, 128],
210 | is_training=training,
211 | name="up_conv3")
212 |
213 | print("up_conv3: ", up_conv3.shape)
214 |
215 | up_sample_2 = max_unpool_with_argmax(up_conv3,
216 | pool2_indices,
217 | output_shape=conv2_2.shape)
218 | up_conv2 = conv_layer_with_bn(bottom=up_sample_2,
219 | shape=[3, 3, 128, 64],
220 | is_training=training,
221 | name="up_conv2")
222 |
223 | print("up_conv2: ", up_conv2.shape)
224 |
225 | up_sample_1 = max_unpool_with_argmax(up_conv2,
226 | pool1_indices,
227 | output_shape=conv1_2.shape)
228 | logits = conv_layer_with_bn(bottom=up_sample_1,
229 | shape=[3, 3, 64, num_classes],
230 | is_training=training,
231 | name="up_conv1")
232 |
233 | print("logits: ", logits.shape)
234 | tf.add_to_collection("logits", logits)
235 |
236 | return logits
237 |
--------------------------------------------------------------------------------
/self_driving/segnet/segnet_vgg_test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from tensorflow.python.framework import constant_op
6 | from tensorflow.python.platform import test
7 | from self_driving.segnet import segnet_vgg
8 | import tensorflow as tf
9 | import numpy as np
10 |
11 | NUM_CLASSES = 11
12 |
13 | class PoolingTest(test.TestCase):
14 |
15 | def testMaxPoolingWithArgmax(self):
16 | # MaxPoolWithArgMax is implemented only on CUDA.
17 | if not test.is_gpu_available(cuda_only=True):
18 | return
19 | '''[[[[ 1. 2.]
20 | [ 3. 4.]
21 | [ 5. 6.]]
22 | [[ 7. 8.]
23 | [ 9. 10.]
24 | [ 11. 12.]]
25 | [[ 13. 14.]
26 | [ 15. 16.]
27 | [ 17. 18.]]]]'''
28 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
29 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0]
30 | with self.test_session(use_gpu=True) as sess:
31 | t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2])
32 | out_op, argmax_op = segnet_vgg.max_pool_with_argmax(t)
33 | out, argmax = sess.run([out_op, argmax_op])
34 | self.assertShapeEqual(out, out_op)
35 | self.assertShapeEqual(argmax, argmax_op)
36 | '''[[[9, 10]
37 | [11, 12]]
38 | [[15, 16]
39 | [17, 18]]]'''
40 | self.assertAllClose(out.ravel(), [9., 10., 11., 12., 15., 16., 17., 18.])
41 | self.assertAllEqual(argmax.ravel(), [8, 9, 10, 11, 14, 15, 16, 17])
42 |
43 | def testMaxUnpoolingWithArgmax(self):
44 | '''[[[[ 1. 2.]
45 | [ 3. 4.]
46 | [ 5. 6.]]
47 | [[ 7. 8.]
48 | [ 9. 10.]
49 | [ 11. 12.]]
50 | [[ 13. 14.]
51 | [ 15. 16.]
52 | [ 17. 18.]]]]'''
53 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0,
54 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0]
55 | with self.test_session(use_gpu=True) as sess:
56 | t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2])
57 | out_op, argmax_op = segnet_vgg.max_pool_with_argmax(t)
58 | out_op = segnet_vgg.max_unpool_with_argmax(out_op,
59 | argmax_op,
60 | output_shape=np.int64([1, 3, 3, 2]))
61 | out = sess.run([out_op])
62 | self.assertAllClose(out, [[[[[ 0., 0.],
63 | [ 0., 0.],
64 | [ 0., 0.]],
65 | [[ 0., 0.],
66 | [ 9., 10.],
67 | [ 11., 12.]],
68 | [[ 0., 0.],
69 | [ 15., 16.],
70 | [ 17., 18.]]]]])
71 |
72 | def testGetBias(self):
73 | with self.test_session(use_gpu=True) as sess:
74 | bias = segnet_vgg.get_bias("conv1_1")
75 | sess.run(tf.global_variables_initializer())
76 | self.assertEqual(bias.get_shape(), [64,])
77 | self.assertAllClose(tf.reduce_sum(bias).eval(), 32.08903503417969)
78 |
79 | def testGetConvFilter(self):
80 | with self.test_session(use_gpu=True) as sess:
81 | weight = segnet_vgg.get_conv_filter("conv1_1")
82 | sess.run(tf.global_variables_initializer())
83 | self.assertEqual(weight.get_shape(), [3, 3, 3, 64])
84 | self.assertAllClose(tf.reduce_sum(weight).eval(), -4.212705612182617)
85 |
86 | def testConvLayerWithBn(self):
87 | config = tf.ConfigProto()
88 | config.gpu_options.allocator_type = 'BFC'
89 | tensor_input = tf.ones([10, 495, 289, 3], tf.float32)
90 | with self.test_session(use_gpu=True, config = config) as sess:
91 | conv_op = segnet_vgg.conv_layer_with_bn(tensor_input, tf.constant(True), "conv1_1")
92 | sess.run(tf.global_variables_initializer())
93 | conv_out = sess.run([conv_op])
94 | self.assertEqual(np.array(conv_out).shape, (1, 10, 495, 289, 64))
95 |
96 | def testDeconvLayerWithBn(self):
97 | config = tf.ConfigProto()
98 | config.gpu_options.allocator_type = 'BFC'
99 | tensor_input = tf.ones([10, 495, 289, 3], tf.float32)
100 | with self.test_session(use_gpu=True, config = config) as sess:
101 | conv_op = segnet_vgg.deconv_layer_with_bn(tensor_input,
102 | [3, 3, 3, 128],
103 | tf.constant(True), "conv1_1")
104 | sess.run(tf.global_variables_initializer())
105 | conv_out = sess.run([conv_op])
106 | self.assertEqual(np.array(conv_out).shape, (1, 10, 495, 289, 128))
107 |
108 | def testInference(self):
109 | config = tf.ConfigProto()
110 | config.gpu_options.allocator_type = 'BFC'
111 | train_data = tf.ones([10, 495, 289, 3], tf.float32)
112 | with self.test_session(use_gpu=True, config = config) as sess:
113 | model_op = segnet_vgg.inference(train_data)
114 | sess.run(tf.global_variables_initializer())
115 | model_out = sess.run([model_op])
116 | self.assertEqual(np.array(model_out).shape, (1, 10, 495, 289, NUM_CLASSES))
117 |
118 |
119 | if __name__ == "__main__":
120 | test.main()
121 |
--------------------------------------------------------------------------------
/self_driving/segnet/train.py:
--------------------------------------------------------------------------------
1 | """Train SegNet with camvid dataset.
2 |
3 | nohup python -u -m self_driving.segnet.train > self_driving/segnet/output.txt 2>&1 &
4 |
5 | """
6 |
7 | import os
8 | import tensorflow as tf
9 | from utils import camvid
10 | import segnet_vgg
11 |
12 | LOG_DIR = 'save'
13 | EPOCH = 6000
14 | BATCH_SIZE = 4
15 | IMAGE_HEIGHT = 720
16 | IMAGE_WIDTH = 960
17 | IMAGE_CHANNEL = 3
18 | NUM_CLASSES = 32
19 | INITIAL_LEARNING_RATE = 0.0001
20 |
21 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/train.txt"
22 | val_dir = "/usr/local/google/home/limeng/Downloads/camvid/val.txt"
23 |
24 |
25 | def loss(logits, labels):
26 | logits = tf.reshape(logits, [-1, NUM_CLASSES])
27 | labels = tf.reshape(labels, [-1])
28 |
29 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
30 | logits=logits, labels=labels, name='cross_entropy')
31 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
32 | tf.add_to_collection('losses', cross_entropy_mean)
33 | return tf.add_n(tf.get_collection('losses'), name='total_loss')
34 |
35 |
36 | def train(total_loss):
37 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
38 | with tf.control_dependencies(update_ops):
39 | global_step = tf.Variable(0, name='global_step', trainable=False)
40 | learning_rate = tf.train.exponential_decay(
41 | INITIAL_LEARNING_RATE, global_step, EPOCH * 0.2, 0.9, staircase=True)
42 | tf.summary.scalar('total_loss', total_loss)
43 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
44 | return optimizer.minimize(total_loss, global_step=global_step)
45 |
46 |
47 | def main(_):
48 | image_filenames, label_filenames = camvid.get_filename_list(image_dir)
49 | val_image_filenames, val_label_filenames = camvid.get_filename_list(val_dir)
50 |
51 | with tf.Graph().as_default():
52 | with tf.device('/cpu:0'):
53 | # config = tf.ConfigProto(device_count = {'GPU': 0})
54 | config = tf.ConfigProto()
55 | config.gpu_options.allocator_type = 'BFC'
56 | sess = tf.InteractiveSession(config=config)
57 |
58 | train_data = tf.placeholder(tf.float32,
59 | shape=[BATCH_SIZE,
60 | IMAGE_HEIGHT,
61 | IMAGE_WIDTH,
62 | IMAGE_CHANNEL],
63 | name='train_data')
64 | train_labels = tf.placeholder(tf.int64,
65 | shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1],
66 | name='train_labels')
67 | is_training = tf.placeholder(tf.bool, name='is_training')
68 |
69 | images, labels = camvid.CamVidInputs(image_filenames,
70 | label_filenames,
71 | BATCH_SIZE)
72 | val_images, val_labels = camvid.CamVidInputs(val_image_filenames,
73 | val_label_filenames,
74 | BATCH_SIZE)
75 |
76 | logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES)
77 | total_loss = loss(logits, train_labels)
78 | train_op = train(total_loss)
79 | check_op = tf.add_check_numerics_ops()
80 |
81 | merged_summary_op = tf.summary.merge_all()
82 | summary_writer = tf.summary.FileWriter('train', sess.graph)
83 | saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
84 | if not os.path.exists(LOG_DIR):
85 | os.makedirs(LOG_DIR)
86 | checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt")
87 |
88 | sess.run(tf.global_variables_initializer())
89 |
90 | # Start the queue runners.
91 | coord = tf.train.Coordinator()
92 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
93 |
94 | for i in range(EPOCH):
95 | image_batch, label_batch = sess.run([images, labels])
96 | feed_dict = {
97 | train_data: image_batch,
98 | train_labels: label_batch,
99 | is_training: True
100 | }
101 | _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op],
102 | feed_dict=feed_dict)
103 | if i % 10 == 0:
104 | print("Start validating...")
105 | val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
106 | loss_value = total_loss.eval(feed_dict={train_data: val_images_batch,
107 | train_labels: val_labels_batch,
108 | is_training: True})
109 | print("Epoch: %d, Loss: %g" % (i, loss_value))
110 | saver.save(sess, checkpoint_path)
111 | # write logs at every iteration
112 | summary_writer.add_summary(summary, i)
113 |
114 | coord.request_stop()
115 | coord.join(threads)
116 |
117 |
118 | if __name__ == '__main__':
119 | tf.app.run(main=main)
120 |
--------------------------------------------------------------------------------
/self_driving/segnet/train_kitti.py:
--------------------------------------------------------------------------------
1 | """Train SegNet with KITTI dataset.
2 |
3 | nohup python -u -m self_driving.segnet.train_kitti > self_driving/segnet/output.txt 2>&1 &
4 |
5 | """
6 |
7 | import os
8 | import tensorflow as tf
9 | from utils import kitti_segnet
10 | import segnet_vgg
11 |
12 | LOG_DIR = 'save'
13 | EPOCH = 4000
14 | BATCH_SIZE = 1
15 | IMAGE_HEIGHT = 375
16 | IMAGE_WIDTH = 1242
17 | IMAGE_CHANNEL = 3
18 | NUM_CLASSES = 2
19 | INITIAL_LEARNING_RATE = 0.0001
20 |
21 | image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/train.txt"
22 | val_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/train.txt"
23 |
24 |
25 | def loss(logits, labels):
26 | logits = tf.reshape(logits, [-1, NUM_CLASSES])
27 | labels = tf.reshape(labels, [-1])
28 |
29 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
30 | logits=logits, labels=labels, name='cross_entropy')
31 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')
32 | tf.add_to_collection('losses', cross_entropy_mean)
33 | return tf.add_n(tf.get_collection('losses'), name='total_loss')
34 |
35 |
36 | def train(total_loss):
37 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
38 | with tf.control_dependencies(update_ops):
39 | global_step = tf.Variable(0, name='global_step', trainable=False)
40 | learning_rate = tf.train.exponential_decay(
41 | INITIAL_LEARNING_RATE, global_step, EPOCH * 0.2, 0.9, staircase=True)
42 | tf.summary.scalar('total_loss', total_loss)
43 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
44 | return optimizer.minimize(total_loss, global_step=global_step)
45 |
46 |
47 | def main(_):
48 | image_filenames, label_filenames = kitti_segnet.get_filename_list(image_dir)
49 | val_image_filenames, val_label_filenames = kitti_segnet.get_filename_list(val_dir)
50 |
51 | with tf.Graph().as_default():
52 | with tf.device('/cpu:0'):
53 | # config = tf.ConfigProto(device_count = {'GPU': 0})
54 | config = tf.ConfigProto()
55 | config.gpu_options.allocator_type = 'BFC'
56 | sess = tf.InteractiveSession(config=config)
57 |
58 | train_data = tf.placeholder(tf.float32,
59 | shape=[BATCH_SIZE,
60 | IMAGE_HEIGHT,
61 | IMAGE_WIDTH,
62 | IMAGE_CHANNEL],
63 | name='train_data')
64 | train_labels = tf.placeholder(tf.int64,
65 | shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1],
66 | name='train_labels')
67 | is_training = tf.placeholder(tf.bool, name='is_training')
68 |
69 | images, labels = kitti_segnet.CamVidInputs(image_filenames,
70 | label_filenames,
71 | BATCH_SIZE)
72 | val_images, val_labels = kitti_segnet.CamVidInputs(val_image_filenames,
73 | val_label_filenames,
74 | BATCH_SIZE)
75 |
76 | logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES)
77 | total_loss = loss(logits, train_labels)
78 | train_op = train(total_loss)
79 | check_op = tf.add_check_numerics_ops()
80 |
81 | merged_summary_op = tf.summary.merge_all()
82 | summary_writer = tf.summary.FileWriter('train', sess.graph)
83 | saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
84 | if not os.path.exists(LOG_DIR):
85 | os.makedirs(LOG_DIR)
86 | checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt")
87 |
88 | sess.run(tf.global_variables_initializer())
89 |
90 | # Start the queue runners.
91 | coord = tf.train.Coordinator()
92 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
93 |
94 | for i in range(EPOCH):
95 | image_batch, label_batch = sess.run([images, labels])
96 | feed_dict = {
97 | train_data: image_batch,
98 | train_labels: label_batch,
99 | is_training: True
100 | }
101 | _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op],
102 | feed_dict=feed_dict)
103 | if i % 10 == 0:
104 | print("Start validating...")
105 | val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
106 | loss_value = total_loss.eval(feed_dict={train_data: val_images_batch,
107 | train_labels: val_labels_batch,
108 | is_training: True})
109 | print("Epoch: %d, Loss: %g" % (i, loss_value))
110 | saver.save(sess, checkpoint_path)
111 | # write logs at every iteration
112 | summary_writer.add_summary(summary, i)
113 |
114 | coord.request_stop()
115 | coord.join(threads)
116 |
117 |
118 | if __name__ == '__main__':
119 | tf.app.run(main=main)
120 |
--------------------------------------------------------------------------------
/self_driving/steering/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/steering/__init__.py
--------------------------------------------------------------------------------
/self_driving/steering/driving_data.py:
--------------------------------------------------------------------------------
1 | import scipy.misc
2 | import random
3 |
4 | xs = []
5 | ys = []
6 |
7 | #points to the end of the last batch
8 | train_batch_pointer = 0
9 | val_batch_pointer = 0
10 |
11 | #read data.txt
12 | with open("/usr/local/google/home/limeng/Downloads/nvida/data.txt") as f:
13 | for line in f:
14 | xs.append("/usr/local/google/home/limeng/Downloads/nvida/driving_dataset/" + line.split()[0])
15 | #the paper by Nvidia uses the inverse of the turning radius,
16 | #but steering wheel angle is proportional to the inverse of turning radius
17 | #so the steering wheel angle in radians is used as the output
18 | ys.append(float(line.split()[1]) * scipy.pi / 180)
19 |
20 | #get number of images
21 | num_images = len(xs)
22 |
23 | #shuffle list of images
24 | c = list(zip(xs, ys))
25 | random.shuffle(c)
26 | xs, ys = zip(*c)
27 |
28 | train_xs = xs[:int(len(xs) * 0.8)]
29 | train_ys = ys[:int(len(xs) * 0.8)]
30 |
31 | val_xs = xs[-int(len(xs) * 0.2):]
32 | val_ys = ys[-int(len(xs) * 0.2):]
33 |
34 | num_train_images = len(train_xs)
35 | num_val_images = len(val_xs)
36 |
37 |
38 | def load_train_batch(batch_size):
39 | global train_batch_pointer
40 | x_out = []
41 | y_out = []
42 | for i in range(0, batch_size):
43 | x_out.append(
44 | scipy.misc.imresize(
45 | scipy.misc.imread(
46 | train_xs[(train_batch_pointer + i) % num_train_images]), [66, 200]) / 255.0)
47 | y_out.append([train_ys[(train_batch_pointer + i) % num_train_images]])
48 | train_batch_pointer += batch_size
49 | return x_out, y_out
50 |
51 |
52 | def load_val_batch(batch_size):
53 | global val_batch_pointer
54 | x_out = []
55 | y_out = []
56 | for i in range(0, batch_size):
57 | x_out.append(
58 | scipy.misc.imresize(
59 | scipy.misc.imread(
60 | val_xs[(val_batch_pointer + i) % num_val_images]), [66, 200]) / 255.0)
61 | y_out.append([val_ys[(val_batch_pointer + i) % num_val_images]])
62 | val_batch_pointer += batch_size
63 | return x_out, y_out
64 |
--------------------------------------------------------------------------------
/self_driving/steering/evaluate.py:
--------------------------------------------------------------------------------
1 | """Evaluate SegNet.
2 |
3 | nohup python -u -m self_driving.steering.evaluate > self_driving/segnet/output.txt 2>&1 &
4 |
5 | """
6 |
7 | import os
8 | import tensorflow as tf
9 | from utils import udacity_data
10 |
11 | LOG_DIR = 'save'
12 | BATCH_SIZE = 128
13 | EPOCH = udacity_data.NUM_VAL_IMAGES / BATCH_SIZE
14 | OUTPUT = "steering_out.txt"
15 |
16 |
17 | def main(_):
18 | udacity_data.read_data(shuffe=False)
19 | with tf.Graph().as_default():
20 | config = tf.ConfigProto()
21 | config.gpu_options.allocator_type = 'BFC'
22 | sess = tf.InteractiveSession(config=config)
23 |
24 | saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "steering.ckpt.meta"))
25 | saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR))
26 |
27 | graph = tf.get_default_graph()
28 | x_image = graph.get_tensor_by_name("x_image:0")
29 | y_label = graph.get_tensor_by_name("y_label:0")
30 | keep_prob = graph.get_tensor_by_name("keep_prob:0")
31 | logits = tf.get_collection("logits")[0]
32 |
33 | if os.path.exists(OUTPUT):
34 | os.remove(OUTPUT)
35 |
36 | for epoch in range(EPOCH):
37 | image_batch, label_batch = udacity_data.load_val_batch(BATCH_SIZE)
38 | feed_dict = {
39 | x_image: image_batch,
40 | y_label: label_batch,
41 | keep_prob: 0.6
42 | }
43 | prediction = sess.run([logits], feed_dict)
44 | with open(OUTPUT, 'a') as out:
45 | for batch in range(BATCH_SIZE):
46 | out.write("%s %.10f\n" % (udacity_data.val_xs[epoch * BATCH_SIZE + batch],
47 | prediction[0][batch]))
48 |
49 |
50 | if __name__ == '__main__':
51 | tf.app.run(main=main)
52 |
--------------------------------------------------------------------------------
/self_driving/steering/model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | def weight_variable(shape):
5 | initial = tf.truncated_normal(shape, stddev=0.1)
6 | return tf.Variable(initial)
7 |
8 |
9 | def bias_variable(shape):
10 | initial = tf.constant(0.1, shape=shape)
11 | return tf.Variable(initial)
12 |
13 |
14 | def conv2d(x, W, stride):
15 | return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID')
16 |
17 |
18 | def inference(x_image, keep_prob, is_training=True):
19 | #first convolutional layer
20 | W_conv1 = weight_variable([5, 5, 3, 24])
21 | b_conv1 = bias_variable([24])
22 |
23 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 2) + b_conv1, 'relu_conv1')
24 | h_conv1_norm = tf.contrib.layers.batch_norm(h_conv1, is_training=is_training, trainable=True)
25 |
26 | #second convolutional layer
27 | W_conv2 = weight_variable([5, 5, 24, 36])
28 | b_conv2 = bias_variable([36])
29 |
30 | h_conv2 = tf.nn.relu(conv2d(h_conv1_norm, W_conv2, 2) + b_conv2, 'relu_conv2')
31 | h_conv2_norm = tf.contrib.layers.batch_norm(h_conv2, is_training=is_training, trainable=True)
32 |
33 | #third convolutional layer
34 | W_conv3 = weight_variable([5, 5, 36, 48])
35 | b_conv3 = bias_variable([48])
36 |
37 | h_conv3 = tf.nn.relu(conv2d(h_conv2_norm, W_conv3, 2) + b_conv3, 'relu_conv3')
38 | h_conv3_norm = tf.contrib.layers.batch_norm(h_conv3, is_training=is_training, trainable=True)
39 |
40 | #fourth convolutional layer
41 | W_conv4 = weight_variable([3, 3, 48, 64])
42 | b_conv4 = bias_variable([64])
43 |
44 | h_conv4 = tf.nn.relu(conv2d(h_conv3_norm, W_conv4, 1) + b_conv4, 'relu_conv4')
45 | h_conv4_norm = tf.contrib.layers.batch_norm(h_conv4, is_training=is_training, trainable=True)
46 |
47 | #fifth convolutional layer
48 | W_conv5 = weight_variable([3, 3, 64, 64])
49 | b_conv5 = bias_variable([64])
50 |
51 | h_conv5 = tf.nn.relu(conv2d(h_conv4_norm, W_conv5, 1) + b_conv5, 'relu_conv5')
52 | h_conv5_norm = tf.contrib.layers.batch_norm(h_conv5, is_training=is_training, trainable=True)
53 |
54 | #FCL 1
55 | W_fc1 = weight_variable([1152, 1164])
56 | b_fc1 = bias_variable([1164])
57 |
58 | h_conv5_flat = tf.reshape(h_conv5_norm, [-1, 1152])
59 | h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1, 'relu_fc1')
60 | h_fc1_norm = tf.contrib.layers.batch_norm(h_fc1, is_training=is_training, trainable=True)
61 | h_fc1_drop = tf.nn.dropout(h_fc1_norm, keep_prob)
62 |
63 | #FCL 2
64 | W_fc2 = weight_variable([1164, 100])
65 | b_fc2 = bias_variable([100])
66 |
67 | h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2, 'relu_fc2')
68 | h_fc2_norm = tf.contrib.layers.batch_norm(h_fc2, is_training=is_training, trainable=True)
69 | h_fc2_drop = tf.nn.dropout(h_fc2_norm, keep_prob)
70 |
71 | #FCL 3
72 | W_fc3 = weight_variable([100, 50])
73 | b_fc3 = bias_variable([50])
74 |
75 | h_fc3 = tf.nn.relu(tf.matmul(h_fc2_drop, W_fc3) + b_fc3, 'relu_fc3')
76 | h_fc3_norm = tf.contrib.layers.batch_norm(h_fc3, is_training=is_training, trainable=True)
77 | h_fc3_drop = tf.nn.dropout(h_fc3_norm, keep_prob)
78 |
79 | #FCL 3
80 | W_fc4 = weight_variable([50, 10])
81 | b_fc4 = bias_variable([10])
82 |
83 | h_fc4 = tf.nn.relu(tf.matmul(h_fc3_drop, W_fc4) + b_fc4, 'relu_fc4')
84 | h_fc4_norm = tf.contrib.layers.batch_norm(h_fc4, is_training=is_training, trainable=True)
85 | h_fc4_drop = tf.nn.dropout(h_fc4_norm, keep_prob)
86 |
87 | #Output
88 | W_fc5 = weight_variable([10, 1])
89 | b_fc5 = bias_variable([1])
90 |
91 | y = tf.multiply(tf.atan(tf.matmul(h_fc4_drop, W_fc5) + b_fc5), 2) #scale the atan output
92 | tf.add_to_collection("logits", y)
93 |
94 | return y
95 |
--------------------------------------------------------------------------------
/self_driving/steering/model_resnet50.py:
--------------------------------------------------------------------------------
1 | # nohup python -u -m self_driving.steering.model_resnet50 > self_driving/steering/output.txt 2>&1 &
2 |
3 | from keras import applications
4 | from keras import optimizers
5 | from keras.models import Sequential
6 | from keras.models import Model
7 | from keras.layers import Dropout, Flatten, Dense
8 | from utils import my_image
9 | from keras import backend as K
10 | from keras.callbacks import ModelCheckpoint
11 |
12 | # dimensions of our images.
13 | img_width, img_height = 224, 224
14 |
15 | train_data_dir = 'utils/udacity_train.txt'
16 | validation_data_dir = 'utils/udacity_val.txt'
17 | nb_train_samples = 33808
18 | nb_validation_samples = 10558
19 | epochs = 50
20 | batch_size = 32
21 |
22 | # build the resnet50 network
23 | base_model = applications.ResNet50(include_top=False, input_shape=(224, 224, 3))
24 | print('Model loaded.')
25 |
26 | # build a classifier model to put on top of the convolutional model
27 | top_model = Sequential()
28 | top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
29 | top_model.add(Dense(512, activation='relu'))
30 | top_model.add(Dense(256, activation='relu'))
31 | top_model.add(Dense(64, activation='relu'))
32 | top_model.add(Dense(1))
33 |
34 | # add the model on top of the convolutional base
35 | # model.add(top_model)
36 | model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
37 |
38 | # set the first 25 layers (up to the last conv block)
39 | # to non-trainable (weights will not be updated)
40 | for layer in model.layers[:15]:
41 | layer.trainable = False
42 |
43 | def root_mean_squared_error(y_true, y_pred):
44 | return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
45 |
46 | # compile the model with a Adam optimizer
47 | # and a very slow learning rate.
48 | model.compile(loss=root_mean_squared_error,
49 | optimizer=optimizers.Adam(lr=0.001),
50 | metrics=['accuracy'])
51 |
52 | # prepare data augmentation configuration
53 | train_datagen = my_image.MyImageDataGenerator(rescale=1. / 255)
54 |
55 | test_datagen = my_image.MyImageDataGenerator(rescale=1. / 255)
56 |
57 | train_generator = train_datagen.flow(
58 | train_data_dir,
59 | [img_width, img_height, 3],
60 | shuffle=True)
61 |
62 | validation_generator = test_datagen.flow(
63 | validation_data_dir,
64 | [img_width, img_height, 3],
65 | shuffle=True)
66 |
67 | # checkpoint
68 | filepath="save/steering_resnet50-{epoch:02d}-{val_loss:.4f}.hdf5"
69 | checkpoint = ModelCheckpoint(
70 | filepath,
71 | monitor='val_loss',
72 | save_best_only=True,
73 | mode='min')
74 | callbacks_list = [checkpoint]
75 |
76 | model.summary()
77 |
78 | # fine-tune the model
79 | model.fit_generator(
80 | train_generator,
81 | steps_per_epoch=nb_train_samples // batch_size,
82 | epochs=epochs,
83 | validation_data=validation_generator,
84 | validation_steps=nb_validation_samples // batch_size,
85 | callbacks=callbacks_list)
86 |
--------------------------------------------------------------------------------
/self_driving/steering/model_saliency.py:
--------------------------------------------------------------------------------
1 | # nohup python -u -m self_driving.steering.model_saliency > self_driving/steering/output.txt 2>&1 &
2 |
3 | from keras import applications
4 | from keras.models import Sequential
5 | from scipy import misc
6 | from keras.models import Model
7 | from keras.layers import Dropout, Flatten, Dense
8 | from vis.visualization import visualize_saliency, overlay
9 | from vis.utils import utils
10 | import numpy as np
11 | from keras.preprocessing.image import img_to_array
12 | import os
13 |
14 | VAL_DATASET = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/center/"
15 |
16 | # dimensions of our images.
17 | img_width, img_height = 224, 224
18 | model_weights_path = 'save/steering_resnet50-22-0.0603.hdf5'
19 |
20 | # build the resnet50 network
21 | base_model = applications.ResNet50(include_top=False,
22 | input_shape=(224, 224, 3))
23 | print('Model loaded.')
24 |
25 | # build a classifier model to put on top of the convolutional model
26 | top_model = Sequential()
27 | top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
28 | top_model.add(Dense(512, activation='relu'))
29 | top_model.add(Dense(256, activation='relu'))
30 | top_model.add(Dense(64, activation='relu'))
31 | top_model.add(Dense(1))
32 |
33 | model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
34 | model.load_weights(model_weights_path)
35 |
36 | with open("output/steering/steering_val.txt", 'a') as out:
37 | for img in os.listdir(VAL_DATASET):
38 | img_data = utils.load_img(VAL_DATASET + img, target_size=(224, 224))
39 | img_input = np.expand_dims(img_to_array(img_data), axis=0)
40 | out.write("%s %.10f\n" % (img, model.predict(img_input / 255.)[0][0]))
41 | out.flush()
42 | heat_map = visualize_saliency(model,
43 | -2,
44 | filter_indices=None,
45 | seed_input=img_data,
46 | backprop_modifier='guided')
47 | misc.imsave("output/steering/%s" % img, overlay(img_data, heat_map, alpha=0.3))
48 |
--------------------------------------------------------------------------------
/self_driving/steering/split_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Split Nvida dataset into train data and test data
3 |
4 | src_dir="driving_dataset"
5 | train_dst_dir="train_data"
6 | test_dst_dir="test_data"
7 |
8 | train_data_size=40000
9 | data_size=45568
10 |
11 | echo "Split Nvida driving dataset into train data and test data"
12 |
13 | rm -rf $train_dst_dir $test_dst_dir
14 | mkdir $train_dst_dir $test_dst_dir
15 |
16 | # train_data
17 | i=0
18 | while [ $i -lt $train_data_size ]
19 | do
20 | cp $src_dir/"$i.jpg" $train_dst_dir/"$i.jpg"
21 | true $(( i++ ))
22 | done
23 |
24 | # test_data
25 | while [ $i -lt $data_size ]
26 | do
27 | cp $src_dir/"$i.jpg" $test_dst_dir/"$i.jpg"
28 | true $(( i++ ))
29 | done
30 |
31 |
--------------------------------------------------------------------------------
/self_driving/steering/train.py:
--------------------------------------------------------------------------------
1 | # nohup python -u -m self_driving.steering.train > self_driving/steering/output.txt 2>&1 &
2 |
3 | import os
4 | import tensorflow as tf
5 | from utils import udacity_data
6 | import model
7 |
8 | LOG_DIR = 'save'
9 | EPOCH = 32
10 | BATCH_SIZE = 128
11 | LEARNING_RATE = 1e-3
12 | STEP_PER_EPOCH = udacity_data.NUM_TRAIN_IMAGES / BATCH_SIZE
13 |
14 |
15 | def loss(pred, labels):
16 | train_vars = tf.trainable_variables()
17 | norm = tf.add_n([tf.nn.l2_loss(v) for v in train_vars])
18 | # create a summary to monitor L2 norm
19 | tf.summary.scalar('L2 Normalization', norm)
20 | losses = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(pred, labels))))
21 | # create a summary to monitor loss
22 | tf.summary.scalar('Loss', losses)
23 | return norm, losses, losses + norm * 0.0005
24 |
25 |
26 | def train(total_loss):
27 | global_step = tf.Variable(0, name='global_step', trainable=False)
28 | # create a summary to monitor total loss
29 | tf.summary.scalar('Total Loss', total_loss)
30 | optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
31 | return optimizer.minimize(total_loss, global_step=global_step)
32 |
33 |
34 | def main(_):
35 | with tf.Graph().as_default():
36 | config = tf.ConfigProto()
37 | config.gpu_options.allocator_type = 'BFC'
38 | sess = tf.InteractiveSession(config=config)
39 |
40 | x_image = tf.placeholder(tf.float32, shape=[None, 66, 200, 3], name="x_image")
41 | y_label = tf.placeholder(tf.float32, shape=[None, 1], name="y_label")
42 | keep_prob = tf.placeholder(tf.float32, name="keep_prob")
43 |
44 | y_pred = model.inference(x_image, keep_prob)
45 | norm, losses, total_loss = loss(y_pred, y_label)
46 | train_op = train(total_loss)
47 |
48 | merged_summary_op = tf.summary.merge_all()
49 | summary_writer = tf.summary.FileWriter('train', sess.graph)
50 | saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
51 | if not os.path.exists(LOG_DIR):
52 | os.makedirs(LOG_DIR)
53 | checkpoint_path = os.path.join(LOG_DIR, "steering.ckpt")
54 |
55 | sess.run(tf.global_variables_initializer())
56 |
57 | udacity_data.read_data()
58 |
59 | for epoch in range(EPOCH):
60 | for i in range(STEP_PER_EPOCH):
61 | steps = epoch * STEP_PER_EPOCH + i
62 |
63 | xs, ys = udacity_data.load_train_batch(BATCH_SIZE)
64 |
65 | _, summary = sess.run([train_op, merged_summary_op],
66 | feed_dict={x_image: xs, y_label: ys, keep_prob: 0.7})
67 |
68 | if i % 10 == 0:
69 | xs, ys = udacity_data.load_val_batch(BATCH_SIZE)
70 | loss_value = losses.eval(feed_dict={x_image: xs, y_label: ys, keep_prob: 1.0})
71 | print("Epoch: %d, Step: %d, Loss: %g" % (epoch, steps, loss_value))
72 |
73 | # write logs at every iteration
74 | summary_writer.add_summary(summary, steps)
75 |
76 | if i % 32 == 0:
77 | if not os.path.exists(LOG_DIR):
78 | os.makedirs(LOG_DIR)
79 | saver.save(sess, checkpoint_path)
80 |
81 |
82 | if __name__ == '__main__':
83 | tf.app.run(main=main)
84 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/utils/__init__.py
--------------------------------------------------------------------------------
/utils/camvid.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | from tensorflow.python.framework import dtypes
4 |
5 | IMAGE_HEIGHT = 720
6 | IMAGE_WIDTH = 960
7 | IMAGE_DEPTH = 3
8 |
9 | NUM_CLASSES = 32
10 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 580
11 | NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 580
12 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 580
13 |
14 | def _generate_image_and_label_batch(image, label, min_queue_examples,
15 | batch_size, shuffle):
16 | """Construct a queued batch of images and labels.
17 | Args:
18 | image: 3-D Tensor of [height, width, 3] of type.float32.
19 | label: 3-D Tensor of [height, width, 1] type.int32
20 | min_queue_examples: int32, minimum number of samples to retain
21 | in the queue that provides of batches of examples.
22 | batch_size: Number of images per batch.
23 | shuffle: boolean indicating whether to use a shuffling queue.
24 | Returns:
25 | images: Images. 4D tensor of [batch_size, height, width, 3] size.
26 | labels: Labels. 3D tensor of [batch_size, height, width ,1] size.
27 | """
28 | # Create a queue that shuffles the examples, and then
29 | # read 'batch_size' images + labels from the example queue.
30 | num_preprocess_threads = 1
31 | if shuffle:
32 | images, label_batch = tf.train.shuffle_batch(
33 | [image, label],
34 | batch_size=batch_size,
35 | num_threads=num_preprocess_threads,
36 | capacity=min_queue_examples + 3 * batch_size,
37 | min_after_dequeue=min_queue_examples)
38 | else:
39 | images, label_batch = tf.train.batch(
40 | [image, label],
41 | batch_size=batch_size,
42 | num_threads=num_preprocess_threads,
43 | capacity=min_queue_examples + 3 * batch_size)
44 |
45 | return images, label_batch
46 |
47 |
48 | def CamVid_reader_seq(filename_queue, seq_length):
49 | image_seq_filenames = tf.split(axis=0,
50 | num_or_size_splits=seq_length,
51 | value=filename_queue[0])
52 | label_seq_filenames = tf.split(axis=0,
53 | num_or_size_splits=seq_length,
54 | value=filename_queue[1])
55 |
56 | image_seq = []
57 | label_seq = []
58 | for im ,la in zip(image_seq_filenames, label_seq_filenames):
59 | imageValue = tf.read_file(tf.squeeze(im))
60 | labelValue = tf.read_file(tf.squeeze(la))
61 | image_bytes = tf.image.decode_png(imageValue)
62 | label_bytes = tf.image.decode_png(labelValue)
63 | image = tf.cast(tf.reshape(image_bytes,
64 | (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)), tf.float32)
65 | label = tf.cast(tf.reshape(label_bytes,
66 | (IMAGE_HEIGHT, IMAGE_WIDTH, 1)), tf.int64)
67 | image_seq.append(image)
68 | label_seq.append(label)
69 | return image_seq, label_seq
70 |
71 |
72 | def CamVid_reader(filename_queue):
73 | image_filename = filename_queue[0]
74 | label_filename = filename_queue[1]
75 |
76 | imageValue = tf.read_file(image_filename)
77 | labelValue = tf.read_file(label_filename)
78 |
79 | image_bytes = tf.image.decode_png(imageValue)
80 | label_bytes = tf.image.decode_png(labelValue)
81 |
82 | image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
83 | label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1))
84 |
85 | return image, label
86 |
87 |
88 | def get_filename_list(path):
89 | fd = open(path)
90 | image_filenames = []
91 | label_filenames = []
92 | for i in fd:
93 | i = i.strip().split(" ")
94 | image_filenames.append(i[0])
95 | label_filenames.append(i[1])
96 | return image_filenames, label_filenames
97 |
98 |
99 | def CamVidInputs(image_filenames, label_filenames, batch_size, shuffle=True):
100 |
101 | images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string)
102 | labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string)
103 |
104 | filename_queue = tf.train.slice_input_producer([images, labels], shuffle=shuffle)
105 |
106 | image, label = CamVid_reader(filename_queue)
107 | reshaped_image = tf.cast(image, tf.float32)
108 |
109 | min_fraction_of_examples_in_queue = 0.05
110 | min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
111 | min_fraction_of_examples_in_queue)
112 | print ('Filling queue with %d CamVid images before starting to train. '
113 | 'This will take a few minutes.' % min_queue_examples)
114 |
115 | # Generate a batch of images and labels by building up a queue of examples.
116 | return _generate_image_and_label_batch(reshaped_image, label,
117 | min_queue_examples, batch_size,
118 | shuffle=shuffle)
119 |
--------------------------------------------------------------------------------
/utils/camvid_test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from tensorflow.python.platform import test
6 |
7 | import camvid
8 | import tensorflow as tf
9 |
10 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/data/train.txt"
11 |
12 |
13 | class CamvidTest(test.TestCase):
14 |
15 | def testGetFileNameList(self):
16 | image_filenames, label_filenames = camvid.get_filename_list(image_dir)
17 | self.assertEqual(len(image_filenames), 367)
18 |
19 | def testCamVidInputs(self):
20 | config = tf.ConfigProto()
21 | config.gpu_options.allocator_type = 'BFC'
22 | with self.test_session(use_gpu=True, config = config) as sess:
23 | image_filenames, label_filenames = camvid.get_filename_list(image_dir)
24 | images, labels = camvid.CamVidInputs(image_filenames, label_filenames, 32)
25 | # Start the queue runners.
26 | coord = tf.train.Coordinator()
27 | threads = tf.train.start_queue_runners(sess=sess, coord=coord)
28 | images_batch, labels_batch = sess.run([images, labels])
29 | self.assertEqual(images.get_shape(), [32, 360, 480, 3])
30 | self.assertEqual(labels.get_shape(), [32, 360, 480, 1])
31 | coord.request_stop()
32 | coord.join(threads)
33 |
34 |
35 | if __name__ == "__main__":
36 | test.main()
37 |
--------------------------------------------------------------------------------
/utils/cifar.py:
--------------------------------------------------------------------------------
1 | """Load data from CIFAR-10 dataset
2 |
3 | The archive contains the files data_batch_1, data_batch_2, ..., data_batch_5,
4 | as well as test_batch. Each of these files is a Python "pickled" object
5 | produced with cPickle. Here is a Python routine which will open such a file
6 | and return a dictionary:
7 | """
8 |
9 | import os.path
10 | import pickle
11 | import tarfile
12 | import urllib2
13 |
14 | import numpy
15 | import dataset
16 |
17 | FLAGS = None
18 |
19 | CIFAR10_DOWNLOAD_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
20 | CIFAR10_FILE_NAME = 'cifar-10-python.tar.gz'
21 | CIFAR10_TRAIN_PREFIX = 'cifar-10-batches-py/data_batch_'
22 | CIFAR10_TEST = 'cifar-10-batches-py/test_batch'
23 | CIFAR10_DATA = 'data'
24 | CIFAR10_LABEL = 'labels'
25 |
26 |
27 | class Cifar(object):
28 | def __init__(self):
29 | self.train = dataset.DataSet()
30 | self.test = dataset.DataSet()
31 |
32 | def ReadDataSets(self, data_dir=".", one_hot=False, raw=False):
33 | file_path = os.path.join(data_dir, CIFAR10_FILE_NAME)
34 | if not os.path.isfile(file_path):
35 | _DownloadCifar10(data_dir)
36 |
37 | UnzipTarGzFile(file_path)
38 |
39 | xs = []
40 | ys = []
41 | for j in range(5):
42 | d = Unpickle(os.path.join(data_dir, CIFAR10_TRAIN_PREFIX + `j + 1`))
43 | x = d[CIFAR10_DATA]
44 | y = d[CIFAR10_LABEL]
45 | xs.append(x)
46 | ys.append(y)
47 |
48 | d = Unpickle(os.path.join(data_dir, CIFAR10_TEST))
49 | xs.append(d[CIFAR10_DATA])
50 | ys.append(d[CIFAR10_LABEL])
51 |
52 | x = numpy.concatenate(xs) / numpy.float32(255)
53 | y = numpy.concatenate(ys)
54 | if not raw:
55 | x = numpy.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:]))
56 | x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0, 3, 1, 2)
57 |
58 | # subtract per-pixel mean
59 | pixel_mean = numpy.mean(x[0:50000], axis=0)
60 | x -= pixel_mean
61 |
62 | # create mirrored images
63 | if not raw:
64 | self.train.images = x[0:50000, :, :, :]
65 | else:
66 | self.train.images = x[0:50000]
67 | self.train.labels = y[0:50000]
68 |
69 | if not raw:
70 | self.test.images = x[50000:, :, :, :]
71 | else:
72 | self.train.images = x[0:50000]
73 | self.test.labels = y[50000:]
74 |
75 | if one_hot:
76 | train_labels = numpy.zeros((50000, 10), dtype=numpy.float32)
77 | test_labels = numpy.zeros((10000, 10), dtype=numpy.float32)
78 |
79 | for i in range(50000):
80 | train_labels[i, self.train.labels[i]] = 1.
81 | self.train.labels = train_labels
82 |
83 | for j in range(10000):
84 | test_labels[j, self.test.labels[j]] = 1.
85 | self.test.labels = test_labels
86 |
87 |
88 | def _DownloadCifar10(data_dir):
89 | _EnsureDir(data_dir)
90 | cifar10_zip_file = urllib2.urlopen(CIFAR10_DOWNLOAD_URL)
91 | with open(os.path.join(data_dir, CIFAR10_FILE_NAME), 'wb') as output:
92 | output.write(cifar10_zip_file.read())
93 |
94 |
95 | def UnzipTarGzFile(file_path):
96 | with tarfile.open(file_path) as tar:
97 | tar.extractall()
98 | tar.close()
99 |
100 |
101 | def _EnsureDir(directory):
102 | if not os.path.exists(directory):
103 | os.makedirs(directory)
104 |
105 |
106 | def Unpickle(file_path):
107 | with open(file_path, mode='rb') as file:
108 | dict = pickle.load(file)
109 | return dict
110 |
--------------------------------------------------------------------------------
/utils/cifar_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from tensorflow.utils import cifar
4 |
5 |
6 | class CifarTest(unittest.TestCase):
7 | def setUp(self):
8 | self._cifar = cifar.Cifar()
9 |
10 | def testReadDataSets(self):
11 | self._cifar.ReadDataSets()
12 | self.assertEqual(len(self._cifar.train.images), 50000)
13 | self.assertEqual(len(self._cifar.train.labels), 50000)
14 | self.assertEqual(len(self._cifar.test.images), 10000)
15 | self.assertEqual(len(self._cifar.test.labels), 10000)
16 |
17 | def testReadDataSetsOneHotEnabled(self):
18 | self._cifar.ReadDataSets(one_hot=True)
19 |
20 | self.assertEqual(len(self._cifar.train.images), 50000)
21 | self.assertEqual(len(self._cifar.train.images[0]), 3072)
22 | self.assertEqual(len(self._cifar.train.labels[0]), 10)
23 | self.assertEqual(1, self._cifar.train.labels[0][6])
24 |
25 | self.assertEqual(len(self._cifar.test.images), 10000)
26 | self.assertEqual(len(self._cifar.test.labels[0]), 10)
27 | self.assertEqual(1, self._cifar.test.labels[0][3])
28 |
29 | if __name__ == '__main__':
30 | unittest.main()
31 |
--------------------------------------------------------------------------------
/utils/dataset.py:
--------------------------------------------------------------------------------
1 | import numpy
2 |
3 |
4 | class DataSet(object):
5 | def __init__(self):
6 | self._images = numpy.array([])
7 | self._labels = numpy.array([])
8 | self._index_in_epoch = 0
9 |
10 | @property
11 | def images(self):
12 | return self._images
13 |
14 | @property
15 | def labels(self):
16 | return self._labels
17 |
18 | @labels.setter
19 | def labels(self, value):
20 | self._labels = value
21 |
22 | @images.setter
23 | def images(self, value):
24 | self._images = value
25 |
26 | def appendImage(self, images):
27 | arr = self._images.tolist()
28 | arr.extend(images)
29 | self._images = numpy.array(arr)
30 |
31 | def appendLabel(self, labels):
32 | arr = self._labels.tolist()
33 | arr.extend(labels)
34 | self._labels = numpy.array(arr)
35 |
36 | def next_batch(self, batch_size):
37 | start = self._index_in_epoch
38 | self._index_in_epoch += batch_size
39 | if self._index_in_epoch > len(self._images):
40 | perm = numpy.arange(len(self._images))
41 | numpy.random.shuffle(perm)
42 | self._images = self._images[perm]
43 | self._labels = self._labels[perm]
44 | self._index_in_epoch = batch_size
45 | start = 0
46 | end = self._index_in_epoch
47 | return self._images[start:end], self._labels[start:end]
48 |
--------------------------------------------------------------------------------
/utils/kitti.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy
3 | import tensorflow as tf
4 | import scipy as scp
5 | import scipy.misc
6 |
7 | KITTI_TRAIN_DIR_PREFIX = '/usr/local/google/home/limeng/Downloads/kitti/data_road/training/image_2/'
8 | KITTI_GT_DIR_PREFIX = '/usr/local/google/home/limeng/Downloads/kitti/data_road/training/gt_image_2/'
9 |
10 | UM_TRAIN_TEMPLATE = "um_0000%02d.png"
11 | UU_TRAIN_TEMPLATE = "uu_0000%02d.png"
12 | UMM_TRAIN_TEMPLATE = "umm_0000%02d.png"
13 |
14 | UU_GT_ROAD_TEMPLATE = "uu_road_0000%02d.png"
15 | UM_GT_LANE_TEMPLATE = "um_lane_0000%02d.png"
16 | UM_GT_ROAD_TEMPLATE = "um_road_0000%02d.png"
17 | UMM_GT_ROAD_TEMPLATE = "umm_road_0000%02d.png"
18 |
19 |
20 | class Kitti(object):
21 | def __init__(self):
22 | self._images = []
23 | self._labels = []
24 | self._file_count = 0
25 | self._read_datasets()
26 |
27 | def _read_datasets(self,
28 | train_data_dir=KITTI_TRAIN_DIR_PREFIX,
29 | gt_data_dir=KITTI_GT_DIR_PREFIX,
30 | cat='uu'):
31 | assert os.path.isdir(train_data_dir), 'Cannot find: %s' % train_data_dir
32 |
33 | self._file_count = 98
34 | train_file_temp = UU_TRAIN_TEMPLATE
35 | gt_file_temp = UU_GT_ROAD_TEMPLATE
36 | if cat == 'um':
37 | self._file_count = 95
38 | train_file_temp = UM_TRAIN_TEMPLATE
39 | gt_file_temp = UM_GT_ROAD_TEMPLATE
40 | elif cat == 'umm':
41 | self._file_count = 96
42 | train_file_temp = UMM_TRAIN_TEMPLATE
43 | gt_file_temp = UMM_GT_ROAD_TEMPLATE
44 |
45 | for i in range(0, self._file_count):
46 | train_file_name = train_data_dir + train_file_temp % i
47 | gt_file_name = gt_data_dir + gt_file_temp % i
48 | print(train_file_name)
49 | x = get_training_data(train_file_name)
50 | y = get_ground_truth(gt_file_name)
51 |
52 | self._images.append(x)
53 | self._labels.append(y)
54 |
55 | def next_batch(self, batch_id):
56 | return self._images[batch_id], self._labels[batch_id]
57 |
58 |
59 | def get_training_data(file_name):
60 | assert os.path.isfile(file_name), 'Cannot find: %s' % file_name
61 | training_data = scp.misc.imread(file_name, mode='RGB')
62 | return numpy.expand_dims(training_data, axis=0)
63 |
64 |
65 | def get_ground_truth(fileNameGT):
66 | assert os.path.isfile(fileNameGT), 'Cannot find: %s' % fileNameGT
67 | full_gt = scp.misc.imread(fileNameGT, mode='RGB')
68 | roadArea = (full_gt[:, :, 2] > 0)
69 | notRoadArea = (full_gt[:, :, 2] == 0)
70 | gt_data = numpy.dstack((roadArea, notRoadArea))
71 | return numpy.expand_dims(gt_data, axis=0)
72 |
73 |
74 | def main(_):
75 | kitti = Kitti()
76 | for i in range(0, 20):
77 | img, label = kitti.next_batch()
78 | print "images"
79 | print img.shape
80 | print "labels"
81 | print label.shape
82 |
83 |
84 | if __name__ == '__main__':
85 | tf.app.run(main=main)
86 |
--------------------------------------------------------------------------------
/utils/kitti_segnet.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.framework import ops
3 | from tensorflow.python.framework import dtypes
4 |
5 | IMAGE_HEIGHT = 375
6 | IMAGE_WIDTH = 1242
7 | IMAGE_DEPTH = 3
8 |
9 | NUM_CLASSES = 3
10 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 100
11 | NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 100
12 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 100
13 |
14 | def _generate_image_and_label_batch(image, label, min_queue_examples,
15 | batch_size, shuffle):
16 | """Construct a queued batch of images and labels.
17 | Args:
18 | image: 3-D Tensor of [height, width, 3] of type.float32.
19 | label: 3-D Tensor of [height, width, 1] type.int32
20 | min_queue_examples: int32, minimum number of samples to retain
21 | in the queue that provides of batches of examples.
22 | batch_size: Number of images per batch.
23 | shuffle: boolean indicating whether to use a shuffling queue.
24 | Returns:
25 | images: Images. 4D tensor of [batch_size, height, width, 3] size.
26 | labels: Labels. 3D tensor of [batch_size, height, width ,1] size.
27 | """
28 | # Create a queue that shuffles the examples, and then
29 | # read 'batch_size' images + labels from the example queue.
30 | num_preprocess_threads = 1
31 | if shuffle:
32 | images, label_batch = tf.train.shuffle_batch(
33 | [image, label],
34 | batch_size=batch_size,
35 | num_threads=num_preprocess_threads,
36 | capacity=min_queue_examples + 3 * batch_size,
37 | min_after_dequeue=min_queue_examples)
38 | else:
39 | images, label_batch = tf.train.batch(
40 | [image, label],
41 | batch_size=batch_size,
42 | num_threads=num_preprocess_threads,
43 | capacity=min_queue_examples + 3 * batch_size)
44 |
45 | return images, label_batch
46 |
47 |
48 | def CamVid_reader_seq(filename_queue, seq_length):
49 | image_seq_filenames = tf.split(axis=0,
50 | num_or_size_splits=seq_length,
51 | value=filename_queue[0])
52 | label_seq_filenames = tf.split(axis=0,
53 | num_or_size_splits=seq_length,
54 | value=filename_queue[1])
55 |
56 | image_seq = []
57 | label_seq = []
58 | for im ,la in zip(image_seq_filenames, label_seq_filenames):
59 | imageValue = tf.read_file(tf.squeeze(im))
60 | labelValue = tf.read_file(tf.squeeze(la))
61 | image_bytes = tf.image.decode_png(imageValue)
62 | label_bytes = tf.image.decode_png(labelValue)
63 | image = tf.cast(tf.reshape(image_bytes,
64 | (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)), tf.float32)
65 | label = tf.cast(tf.reshape(label_bytes,
66 | (IMAGE_HEIGHT, IMAGE_WIDTH, 1)), tf.int64)
67 | image_seq.append(image)
68 | label_seq.append(label)
69 | return image_seq, label_seq
70 |
71 |
72 | def CamVid_reader(filename_queue):
73 | image_filename = filename_queue[0]
74 | label_filename = filename_queue[1]
75 |
76 | imageValue = tf.read_file(image_filename)
77 | labelValue = tf.read_file(label_filename)
78 |
79 | image_bytes = tf.image.decode_png(imageValue)
80 | label_bytes = tf.image.decode_png(labelValue)
81 |
82 | image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
83 | label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1))
84 |
85 | return image, label
86 |
87 |
88 | def get_filename_list(path):
89 | fd = open(path)
90 | image_filenames = []
91 | label_filenames = []
92 | for i in fd:
93 | i = i.strip().split(" ")
94 | image_filenames.append(i[0])
95 | label_filenames.append(i[1])
96 | return image_filenames, label_filenames
97 |
98 |
99 | def CamVidInputs(image_filenames, label_filenames, batch_size, shuffle=True):
100 |
101 | images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string)
102 | labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string)
103 |
104 | filename_queue = tf.train.slice_input_producer([images, labels], shuffle=shuffle)
105 |
106 | image, label = CamVid_reader(filename_queue)
107 | reshaped_image = tf.cast(image, tf.float32)
108 |
109 | min_fraction_of_examples_in_queue = 0.05
110 | min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
111 | min_fraction_of_examples_in_queue)
112 | print ('Filling queue with %d CamVid images before starting to train. '
113 | 'This will take a few minutes.' % min_queue_examples)
114 |
115 | # Generate a batch of images and labels by building up a queue of examples.
116 | return _generate_image_and_label_batch(reshaped_image, label,
117 | min_queue_examples, batch_size,
118 | shuffle=shuffle)
119 |
--------------------------------------------------------------------------------
/utils/my_image.py:
--------------------------------------------------------------------------------
1 | import scipy.misc
2 | import numpy as np
3 | import os
4 |
5 | from keras.preprocessing.image import ImageDataGenerator
6 | from keras.preprocessing.image import Iterator
7 | from keras import backend as K
8 | from keras.preprocessing import image
9 |
10 | class MyImageDataGenerator(ImageDataGenerator):
11 |
12 | def flow(self, file, image_size, batch_size=32, shuffle=True, seed=None,
13 | save_to_dir=None, save_prefix='', save_format='png'):
14 | return FileIterator(
15 | file, image_size, self,
16 | batch_size=batch_size,
17 | shuffle=shuffle,
18 | seed=seed,
19 | data_format=self.data_format,
20 | save_to_dir=save_to_dir,
21 | save_prefix=save_prefix,
22 | save_format=save_format)
23 |
24 |
25 | class FileIterator(Iterator):
26 | """Iterator yielding data from a file.
27 |
28 | The file should be in the following format:
29 |
30 |
31 | ...
32 |
33 |
34 | # Arguments
35 | file: Path to the file to read the image list and label data.
36 | image_size: Image size, [height, width, channel]
37 | image_data_generator: Instance of `ImageDataGenerator`
38 | to use for random transformations and normalization.
39 | batch_size: Integer, size of a batch.
40 | shuffle: Boolean, whether to shuffle the data between epochs.
41 | seed: Random seed for data shuffling.
42 | data_format: String, one of `channels_first`, `channels_last`.
43 | save_to_dir: Optional directory where to save the pictures
44 | being yielded, in a viewable format. This is useful
45 | for visualizing the random transformations being
46 | applied, for debugging purposes.
47 | save_prefix: String prefix to use for saving sample
48 | images (if `save_to_dir` is set).
49 | save_format: Format to use for saving sample images
50 | (if `save_to_dir` is set).
51 | """
52 |
53 | def __init__(self, file, image_size, image_data_generator,
54 | batch_size=32, shuffle=False, seed=None,
55 | data_format=None,
56 | save_to_dir=None, save_prefix='', save_format='png'):
57 | if not os.path.exists(file):
58 | raise ValueError('Cannot find file: %s' % file)
59 |
60 | if data_format is None:
61 | data_format = K.image_data_format()
62 |
63 | split_lines = [line.rstrip('\n').split(' ') for line in open(file, 'r')]
64 | self.x = np.asarray([e[0] for e in split_lines])
65 | self.y = np.asarray([float(e[1]) for e in split_lines])
66 | self.image_size = image_size
67 | self.image_data_generator = image_data_generator
68 | self.data_format = data_format
69 | self.save_to_dir = save_to_dir
70 | self.save_prefix = save_prefix
71 | self.save_format = save_format
72 | super(FileIterator, self).__init__(self.x.shape[0], batch_size, shuffle, seed)
73 |
74 | def next(self):
75 | # Keeps under lock only the mechanism which advances
76 | # the indexing of each batch.
77 | with self.lock:
78 | index_array, current_index, current_batch_size = next(self.index_generator)
79 | # The transformation of images is not under thread lock
80 | # so it can be done in parallel
81 | batch_x = np.zeros(tuple([current_batch_size] + list(self.image_size)), dtype=K.floatx())
82 | for i, j in enumerate(index_array):
83 | x = scipy.misc.imread(self.x[j])
84 | x = scipy.misc.imresize(x, self.image_size)
85 | x = self.image_data_generator.random_transform(x.astype(K.floatx()))
86 | x = self.image_data_generator.standardize(x)
87 | batch_x[i] = x
88 | if self.save_to_dir:
89 | for i in range(current_batch_size):
90 | img = image.array_to_img(batch_x[i], self.data_format, scale=True)
91 | fname = '{prefix}_{index}_{hash}.{format}'.format(prefix=self.save_prefix,
92 | index=current_index + i,
93 | hash=np.random.randint(1e4),
94 | format=self.save_format)
95 | img.save(os.path.join(self.save_to_dir, fname))
96 | batch_y = self.y[index_array]
97 | return batch_x, batch_y
--------------------------------------------------------------------------------
/utils/my_image_test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from tensorflow.python.platform import test
6 | import my_image
7 |
8 | class MyImageTest(test.TestCase):
9 |
10 | def testReadData(self):
11 | myImageDataGenerator = my_image.MyImageDataGenerator()
12 | generator = myImageDataGenerator.flow("udacity_train.txt",
13 | [224, 224, 3],
14 | shuffle=False,
15 | save_to_dir='test')
16 | images, labels = generator.next()
17 | self.assertAllEqual(images.shape, [32, 224, 224, 3])
18 | self.assertAllEqual(labels.shape, [32])
19 | self.assertAllClose(labels[0], 0.0490969472)
20 |
21 |
22 | if __name__ == "__main__":
23 | test.main()
--------------------------------------------------------------------------------
/utils/svhn.py:
--------------------------------------------------------------------------------
1 | """Load data from SVHN dataset
2 | """
3 |
4 | import os.path
5 | import dataset
6 | import numpy
7 | import scipy.io
8 |
9 | FLAGS = None
10 |
11 | SVHN_TRAIN_FILE_NAME = 'train_32x32.mat'
12 | SVHN_TEST_FILE_NAME = 'test_32x32.mat'
13 | SVHN_DATA = 'X'
14 | SVHN_LABEL = 'y'
15 |
16 |
17 | class SVHN(object):
18 | def __init__(self):
19 | self.train = dataset.DataSet()
20 | self.test = dataset.DataSet()
21 |
22 | def ReadDataSets(self, data_dir=".", one_hot=False):
23 | train_file_path = os.path.join(data_dir, SVHN_TRAIN_FILE_NAME)
24 | test_file_path = os.path.join(data_dir, SVHN_TEST_FILE_NAME)
25 | if not os.path.isfile(train_file_path) and not os.path.isfile(test_file_path):
26 | print("SVHN dataset not found.")
27 | return
28 |
29 | read_input = scipy.io.loadmat(train_file_path)
30 | self.train.images = read_input[SVHN_DATA]
31 | self.train.labels = read_input[SVHN_LABEL]
32 |
33 | read_input = scipy.io.loadmat(test_file_path)
34 | self.test.images = read_input[SVHN_DATA]
35 | self.test.labels = read_input[SVHN_LABEL]
36 |
37 | self.train.images = numpy.swapaxes(self.train.images, 0, 3)
38 | self.train.images = numpy.swapaxes(self.train.images, 1, 2)
39 | self.train.images = numpy.swapaxes(self.train.images, 2, 3)
40 |
41 | self.train.images = self.train.images.reshape((73257, -1))
42 |
43 | self.test.images = numpy.swapaxes(self.test.images, 0, 3)
44 | self.test.images = numpy.swapaxes(self.test.images, 1, 2)
45 | self.test.images = numpy.swapaxes(self.test.images, 2, 3)
46 |
47 | self.test.images = self.test.images.reshape((26032, -1))
48 |
49 | self.train.images = self.train.images / numpy.float32(255)
50 | self.test.images = self.test.images / numpy.float32(255)
51 |
52 | if one_hot:
53 | train_labels = numpy.zeros((73257, 10), dtype=numpy.float32)
54 | test_labels = numpy.zeros((26032, 10), dtype=numpy.float32)
55 |
56 | for i in range(73257):
57 | train_labels[i, self.train.labels[i] - 1] = 1.
58 | self.train.labels = train_labels
59 |
60 | for j in range(26032):
61 | test_labels[j, self.test.labels[j] - 1] = 1.
62 | self.test.labels = test_labels
63 |
--------------------------------------------------------------------------------
/utils/udacity_data.py:
--------------------------------------------------------------------------------
1 | import scipy.misc
2 | import random
3 | import pandas as pd
4 | import tensorflow as tf
5 |
6 | #points to the end of the last batch
7 | train_batch_pointer = 0
8 | val_batch_pointer = 0
9 |
10 | train_xs = []
11 | train_ys = []
12 | val_xs = []
13 | val_ys = []
14 |
15 | TRAIN_IMG_PREFIX = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_%s/"
16 | TRAIN_CSV = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_%s/interpolated.csv"
17 | VAL_IMG_PREFIX = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/"
18 | VAL_CSV = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/interpolated.csv"
19 |
20 | NUM_TRAIN_IMAGES = 33808
21 | NUM_VAL_IMAGES = 5279
22 |
23 |
24 | def read_csv(csv_file_name, img_prefix):
25 | x_out = []
26 | data_csv = pd.read_csv(csv_file_name)
27 | data = data_csv[[x.startswith("center") for x in data_csv["filename"]]]
28 | for file_name in data["filename"]:
29 | x_out.append(img_prefix + file_name)
30 | return x_out, data["angle"]
31 |
32 |
33 | def read_data(shuffe=True):
34 | global train_xs
35 | global train_ys
36 | global val_xs
37 | global val_ys
38 |
39 | # Read train set
40 | for idx in range(1, 7):
41 | if idx == 3:
42 | continue
43 | x_out, y_out = read_csv(TRAIN_CSV % idx, TRAIN_IMG_PREFIX % idx)
44 | train_xs.extend(x_out)
45 | train_ys.extend(y_out)
46 | # Read val set
47 | val_xs, val_ys = read_csv(VAL_CSV, VAL_IMG_PREFIX)
48 |
49 | #shuffle train set
50 | c = list(zip(train_xs, train_ys))
51 | if shuffe:
52 | random.shuffle(c)
53 | # with open("train.txt", 'a') as out:
54 | # for item in c:
55 | # out.write("%s %.10f\n" % (item[0], item[1]))
56 | train_xs, train_ys = zip(*c)
57 | #shuffle val set
58 | c = list(zip(val_xs, val_ys))
59 | # with open("val.txt", 'a') as out:
60 | # for item in c:
61 | # out.write("%s %.10f\n" % (item[0], item[1]))
62 | if shuffe:
63 | random.shuffle(c)
64 | val_xs, val_ys = zip(*c)
65 |
66 |
67 | def load_train_batch(batch_size):
68 | global train_batch_pointer
69 | global train_xs
70 | global train_ys
71 |
72 | x_out = []
73 | y_out = []
74 | for i in range(0, batch_size):
75 | image = scipy.misc.imread(train_xs[(train_batch_pointer + i) % NUM_TRAIN_IMAGES], mode="RGB")
76 | x_out.append(scipy.misc.imresize(image[-300:], [66, 200]) / 255.0)
77 | y_out.append([train_ys[(train_batch_pointer + i) % NUM_TRAIN_IMAGES]])
78 | train_batch_pointer += batch_size
79 | return x_out, y_out
80 |
81 |
82 | def load_val_batch(batch_size):
83 | global val_batch_pointer
84 | global val_xs
85 | global val_ys
86 |
87 | x_out = []
88 | y_out = []
89 | for i in range(0, batch_size):
90 | image = scipy.misc.imread(val_xs[(val_batch_pointer + i) % NUM_VAL_IMAGES], mode="RGB")
91 | x_out.append(scipy.misc.imresize(image[-300:], [66, 200]) / 255.0)
92 | y_out.append([val_ys[(val_batch_pointer + i) % NUM_VAL_IMAGES]])
93 | val_batch_pointer += batch_size
94 | return x_out, y_out
95 |
96 |
97 | def main(_):
98 | read_data()
99 |
100 | if __name__ == '__main__':
101 | tf.app.run(main=main)
--------------------------------------------------------------------------------
/utils/udacity_data_test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from tensorflow.python.platform import test
6 | from scipy import misc
7 | import udacity_data
8 |
9 | IMG_TRAIN = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_1/center/1479424215880976321.png"
10 | IMG_VAL = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/center/1479425441182877835.png"
11 |
12 | class UdacityDataTest(test.TestCase):
13 |
14 | def testReadData(self):
15 | udacity_data.read_data()
16 | self.assertAllEqual(len(udacity_data.train_xs), 33808)
17 | self.assertAllEqual(len(udacity_data.train_ys), 33808)
18 | self.assertAllEqual(len(udacity_data.val_xs), 5279)
19 | self.assertAllEqual(len(udacity_data.val_ys), 5279)
20 | self.assertTrue(IMG_TRAIN in udacity_data.train_xs)
21 | self.assertAllClose(udacity_data.train_ys[udacity_data.train_xs.index(IMG_TRAIN)], 0.0010389391)
22 | self.assertTrue(IMG_VAL in udacity_data.val_xs)
23 | self.assertAllClose(udacity_data.val_ys[udacity_data.val_xs.index(IMG_VAL)], -0.0169280299)
24 |
25 | def testReadData(self):
26 | udacity_data.read_data()
27 | x_out, y_out = udacity_data.load_val_batch(64)
28 | misc.imsave('test.png', x_out[0])
29 |
30 |
31 | if __name__ == "__main__":
32 | test.main()
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | def put_kernels_on_grid(kernel, (grid_Y, grid_X), pad=1):
4 | '''Visualize conv. features as an image (mostly for the 1st layer).
5 | Place kernel into a grid, with some paddings between adjacent filters.
6 | Args:
7 | kernel: tensor of shape [Y, X, NumChannels, NumKernels]
8 | (grid_Y, grid_X): shape of the grid. Require: NumKernels == grid_Y * grid_X
9 | User is responsible of how to break into two multiples.
10 | pad: number of black pixels around each filter (between them)
11 |
12 | Return:
13 | Tensor of shape [(Y+pad)*grid_Y, (X+pad)*grid_X, NumChannels, 1].
14 | '''
15 | # pad X and Y
16 | x1 = tf.pad(kernel, tf.constant([[pad, 0], [pad, 0], [0, 0], [0, 0]]))
17 |
18 | # X and Y dimensions, w.r.t. padding
19 | Y = kernel.get_shape()[0] + pad
20 | X = kernel.get_shape()[1] + pad
21 | ch = kernel.get_shape()[2]
22 |
23 | # put NumKernels to the 1st dimension
24 | x2 = tf.transpose(x1, (3, 0, 1, 2))
25 | # organize grid on Y axis
26 | x3 = tf.reshape(x2, tf.pack([grid_X, Y * grid_Y, X, ch]))
27 |
28 | # switch X and Y axes
29 | x4 = tf.transpose(x3, (0, 2, 1, 3))
30 | # organize grid on X axis
31 | x5 = tf.reshape(x4, tf.pack([1, X * grid_X, Y * grid_Y, ch]))
32 |
33 | # back to normal order (not combining with the next step for clarity)
34 | x6 = tf.transpose(x5, (2, 1, 3, 0))
35 |
36 | # to tf.image_summary order [batch_size, height, width, channels],
37 | # where in this case batch_size == 1
38 | x7 = tf.transpose(x6, (3, 0, 1, 2))
39 |
40 | # scale to [0, 1]
41 | x_min = tf.reduce_min(x7)
42 | x_max = tf.reduce_max(x7)
43 | x8 = (x7 - x_min) / (x_max - x_min)
44 |
45 | return x8
46 |
--------------------------------------------------------------------------------
/vae/README.md:
--------------------------------------------------------------------------------
1 | [深入理解变分推断](https://limengweb.wordpress.com/2017/11/13/%E6%B7%B1%E5%85%A5%E7%90%86%E8%A7%A3%E5%8F%98%E5%88%86%E6%8E%A8%E6%96%AD/)
2 |
3 |

4 |
5 |
--------------------------------------------------------------------------------
/vae/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/vae/__init__.py
--------------------------------------------------------------------------------
/vae/vae_mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/vae/vae_mnist.png
--------------------------------------------------------------------------------
/vae/vae_mnist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 |
4 | """A Variational Autoencoders for MNIST.
5 | """
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from keras.layers import Input, Dense, Lambda, Conv2D, Conv2DTranspose, \
12 | Flatten, Reshape
13 | from keras.models import Model
14 | from keras import backend as K
15 | from keras.datasets import mnist
16 | from keras import metrics
17 | import tensorflow as tf
18 | import numpy as np
19 | import matplotlib.pyplot as plt
20 | from scipy.stats import norm
21 |
22 | EPOCH = 5
23 | INPUT_DIM = 784
24 | BATCH_SIZE = 64
25 | HIDDEN_VAR_DIM = 7 * 7 * 32
26 | LATENT_VAR_DIM = 2
27 |
28 | # input image dimensions
29 |
30 | (img_rows, img_cols, img_chns) = (28, 28, 1)
31 |
32 | if K.image_data_format() == 'channels_first':
33 | original_img_size = (img_chns, img_rows, img_cols)
34 | output_shape = (BATCH_SIZE, 32, 7, 7)
35 | else:
36 | original_img_size = (img_rows, img_cols, img_chns)
37 | output_shape = (BATCH_SIZE, 7, 7, 32)
38 |
39 |
40 | def sampling(args):
41 | (z_mean, z_var) = args
42 | epsilon = K.random_normal(shape=(K.shape(z_mean)[0],
43 | LATENT_VAR_DIM), mean=0., stddev=1.)
44 | return z_mean + z_var * epsilon
45 |
46 |
47 | def encode(x):
48 | input_reshape = Reshape(original_img_size)(x)
49 | conv1 = Conv2D(16, 5, strides=(2, 2), padding='same',
50 | activation='relu')(input_reshape)
51 | conv2 = Conv2D(32, 5, strides=(2, 2), padding='same',
52 | activation='relu')(conv1)
53 | hidden = Flatten()(conv2)
54 | z_mean = Dense(LATENT_VAR_DIM, activation='relu')(hidden)
55 | z_var = Dense(LATENT_VAR_DIM, activation='relu')(hidden)
56 | return (z_mean, z_var)
57 |
58 |
59 | def decode(z):
60 | hidden = Dense(HIDDEN_VAR_DIM, activation='relu')(z)
61 | hidden_reshape = Reshape(output_shape[1:])(hidden)
62 | deconv1 = Conv2DTranspose(16, 5, strides=(2, 2), padding='same',
63 | activation='relu')(hidden_reshape)
64 | deconv2 = Conv2DTranspose(1, 5, strides=(2, 2), padding='same',
65 | activation='sigmoid')(deconv1)
66 | return Flatten()(deconv2)
67 |
68 |
69 | def main(_):
70 | x = Input(shape=(INPUT_DIM, ))
71 | (z_mean, z_var) = encode(x)
72 | z = Lambda(sampling)([z_mean, z_var])
73 | x_decoded = decode(z)
74 | model = Model(inputs=x, outputs=x_decoded)
75 |
76 | def vae_loss(y_true, y_pred):
77 | generation_loss = img_rows * img_cols \
78 | * metrics.binary_crossentropy(x, x_decoded)
79 | kl_loss = 0.5 * tf.reduce_sum(K.square(z_mean)
80 | + K.square(z_var) - K.log(K.square(z_var + 1e-8)) - 1,
81 | axis=1)
82 | return tf.reduce_mean(generation_loss + kl_loss)
83 |
84 | model.compile(optimizer='rmsprop', loss=vae_loss)
85 |
86 | # train the VAE on MNIST digits
87 |
88 | ((x_train, y_train), (x_test, y_test)) = mnist.load_data()
89 |
90 | x_train = x_train.astype('float32') / 255.
91 | x_test = x_test.astype('float32') / 255.
92 | x_train = x_train.reshape((len(x_train),
93 | np.prod(x_train.shape[1:])))
94 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
95 |
96 | print(model.summary())
97 |
98 | model.fit(
99 | x_train,
100 | y_train,
101 | shuffle=True,
102 | epochs=EPOCH,
103 | batch_size=BATCH_SIZE,
104 | validation_data=(x_test, y_test),
105 | )
106 |
107 | generator = K.function([model.layers[8].input],
108 | [model.layers[12].output])
109 |
110 | # display a 2D manifold of the digits
111 |
112 | n = 15 # figure with 15x15 digits
113 | digit_size = 28
114 | figure = np.zeros((digit_size * n, digit_size * n))
115 |
116 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
117 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian
118 |
119 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
120 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
121 |
122 | for (i, yi) in enumerate(grid_x):
123 | for (j, xi) in enumerate(grid_y):
124 | z_sample = np.array([[xi, yi]])
125 | z_sample = np.tile(z_sample,
126 | BATCH_SIZE).reshape(BATCH_SIZE, 2)
127 | x_decoded = generator([z_sample])[0]
128 | digit = x_decoded[0].reshape(digit_size, digit_size)
129 |
130 | figure[i * digit_size:(i + 1) * digit_size, j * digit_size:
131 | (j + 1) * digit_size] = digit
132 |
133 | plt.figure(figsize=(10, 10))
134 | plt.imshow(figure, cmap='Greys_r')
135 | plt.show()
136 |
137 |
138 | if __name__ == '__main__':
139 | tf.app.run(main=main)
140 |
--------------------------------------------------------------------------------