├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── capnet └── capsnet.py ├── cifar ├── README.md ├── __init__.py ├── cifar10.png └── cifar_conv.py ├── gan ├── README.md ├── gan.png └── gan.py ├── kaggle ├── Avito │ ├── __init__.py │ ├── avito.py │ ├── avito2.py │ └── baseline_lgb.csv ├── CostaRicanHouseholdPovertyLevelPrediction │ └── kernel │ │ └── kernel.ipynb ├── DigitalRecognizer │ ├── __init__.py │ ├── digital_recognizer.py │ └── predict.csv ├── SantanderValuePrediction │ ├── SantanderPredict.ipynb │ ├── pipline.py │ └── santander.py ├── TalkingData │ ├── __init__.py │ └── talking_data.py ├── __init__.py ├── titanic │ ├── README.md │ ├── __init__.py │ ├── titanic.png │ └── titanic.py └── zillow │ ├── __init__.py │ ├── location.py │ ├── log_error.py │ ├── log_error_hist.py │ ├── missing_data.py │ ├── month.py │ └── train_data_shape.py ├── mnist ├── __init__.py ├── fully_connected_feed.py ├── fully_connected_feed_simple.py ├── mnist.py ├── mnist_conv.py ├── mnist_simple.py ├── mnist_softmax.py ├── mnist_with_summaries.py └── mnist_with_summary.py ├── reading └── capsnet │ └── drbc.pdf ├── self_driving ├── README.md ├── __init__.py ├── lane_detect │ ├── README.md │ ├── __init__.py │ ├── comma_ai_lane_detect.py │ ├── lane_detect.png │ └── udacity_lane_detect.py ├── optical_flow │ ├── __init__.py │ └── python │ │ ├── __init__.py │ │ ├── common.py │ │ ├── opt_flow.py │ │ ├── tst_scene_render.py │ │ └── video.py ├── road_seg │ ├── README.md │ ├── __init__.py │ ├── convnet.py │ ├── fcn8_vgg.py │ ├── road_seg.png │ ├── test_fcn8_vgg.py │ └── unet.py ├── segnet │ ├── README.md │ ├── __init__.py │ ├── evaluate.py │ ├── evaluate_kitti.py │ ├── evaluate_test.py │ ├── merge_output.sh │ ├── prepare_camvid.py │ ├── prepare_camvid.sh │ ├── prepare_kitti.py │ ├── prepare_kitti.sh │ ├── prepare_kitti_test.py │ ├── prepare_kitti_test.sh │ ├── segnet.png │ ├── segnet_vgg.py │ ├── segnet_vgg_test.py │ ├── train.py │ └── train_kitti.py └── steering │ ├── __init__.py │ ├── driving_data.py │ ├── evaluate.py │ ├── model.py │ ├── model_resnet50.py │ ├── model_saliency.py │ ├── split_data.sh │ └── train.py ├── utils ├── __init__.py ├── camvid.py ├── camvid_test.py ├── cifar.py ├── cifar_test.py ├── dataset.py ├── kitti.py ├── kitti_segnet.py ├── my_image.py ├── my_image_test.py ├── svhn.py ├── udacity_data.py ├── udacity_data_test.py ├── udacity_train.txt ├── udacity_val.txt └── utils.py └── vae ├── README.md ├── __init__.py ├── vae_mnist.png ├── vae_mnist.py ├── vaegan_cifar.py └── vaegan_svhn.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Built application files 2 | *.apk 3 | *.ap_ 4 | 5 | # Files for the Dalvik VM 6 | *.dex 7 | 8 | # Java class files 9 | *.class 10 | 11 | # Generated files 12 | bin/ 13 | gen/ 14 | .idea/ 15 | 16 | # Gradle files 17 | .gradle/ 18 | build/ 19 | 20 | # Local configuration file (sdk path, etc) 21 | local.properties 22 | 23 | # Proguard folder generated by Eclipse 24 | proguard/ 25 | 26 | # Log Files 27 | *.log 28 | 29 | .DS_Store 30 | 31 | *.pyc 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Machine Learning 2 | ================ 3 | 4 | Welcome to my blog [听雨居](https://limengweb.wordpress.com). It contains detailed description of the code here. -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/__init__.py -------------------------------------------------------------------------------- /capnet/capsnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, unicode_literals 2 | 3 | %matplotlib inline 4 | import matplotlib 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | tf.reset_default_graph() 10 | np.random.seed(42) 11 | tf.set_random_seed(42) 12 | 13 | from tensorflow.examples.tutorials.mnist import input_data 14 | 15 | mnist = input_data.read_data_sets("/tmp/data/") 16 | 17 | n_samples = 5 18 | 19 | plt.figure(figsize=(n_samples * 2, 3)) 20 | for index in range(n_samples): 21 | plt.subplot(1, n_samples, index + 1) 22 | sample_image = mnist.train.images[index].reshape(28, 28) 23 | plt.imshow(sample_image, cmap="binary") 24 | plt.axis("off") 25 | 26 | plt.show() 27 | -------------------------------------------------------------------------------- /cifar/README.md: -------------------------------------------------------------------------------- 1 | [利用卷积神经网络识别CIFAR-10](https://limengweb.wordpress.com/2016/12/31/%E5%88%A9%E7%94%A8%E5%8D%B7%E7%A7%AF%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C%E8%AF%86%E5%88%ABcifar-10/) 2 |
3 |

4 |
5 | -------------------------------------------------------------------------------- /cifar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/cifar/__init__.py -------------------------------------------------------------------------------- /cifar/cifar10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/cifar/cifar10.png -------------------------------------------------------------------------------- /cifar/cifar_conv.py: -------------------------------------------------------------------------------- 1 | """A convolutional neural network for CIFAR-10 classification. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import tensorflow as tf 8 | from utils import cifar 9 | from utils.utils import put_kernels_on_grid 10 | 11 | EPOCH = 36000 12 | BATCH_SIZE = 128 13 | 14 | 15 | def weight_variable_with_decay(shape, wd): 16 | initial = tf.truncated_normal(shape, stddev=0.05, dtype=tf.float32) 17 | var = tf.Variable(initial, 'weights') 18 | weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss') 19 | tf.add_to_collection('losses', weight_decay) 20 | return var 21 | 22 | 23 | def bias_variable(shape): 24 | initial = tf.constant(0.0, shape=shape, dtype=tf.float32) 25 | return tf.Variable(initial, 'biases') 26 | 27 | 28 | def conv2d(x, W): 29 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 30 | 31 | 32 | def max_pool_2x2(x): 33 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], 34 | strides=[1, 2, 2, 1], padding='SAME') 35 | 36 | 37 | def conv_layer(layer_name, input, in_dim, in_ch, out_dim, out_size, summary_conv=False): 38 | with tf.name_scope(layer_name): 39 | # Initialize weights and bias 40 | W_conv = weight_variable_with_decay([in_dim, in_dim, in_ch, out_dim], 0.004) 41 | b_conv = bias_variable([out_dim]) 42 | 43 | # Log weights and bias 44 | tf.summary.histogram("weights", W_conv) 45 | tf.summary.histogram("biases", b_conv) 46 | 47 | # Draw weights in 8x8 grid for the first conv layer 48 | if summary_conv: 49 | kernel_grid = put_kernels_on_grid(W_conv, (8, 8)) 50 | tf.summary.image("kernel", kernel_grid, max_outputs=1) 51 | 52 | # Draw conv activation in 8x8 grid 53 | activation = tf.nn.bias_add(conv2d(input, W_conv), b_conv) 54 | # Only draw the activation for the first image in a batch 55 | activation_sample = tf.slice(activation, [0, 0, 0, 0], [1, out_size, out_size, out_dim]) 56 | activation_grid = put_kernels_on_grid(tf.transpose(activation_sample, [1, 2, 0, 3]), (8, 8)) 57 | tf.summary.image("conv/activatins", activation_grid, max_outputs=1) 58 | 59 | # Draw relu activation in 8x8 grid 60 | activation = tf.nn.relu(activation) 61 | # Only draw the activation for the first image in a batch 62 | activation_sample = tf.slice(activation, [0, 0, 0, 0], [1, out_size, out_size, out_dim]) 63 | activation_grid = put_kernels_on_grid(tf.transpose(activation_sample, [1, 2, 0, 3]), (8, 8)) 64 | tf.summary.image("relu/activatins", activation_grid, max_outputs=1) 65 | 66 | # 2x2 max pooling 67 | pool = max_pool_2x2(activation) 68 | 69 | return tf.nn.lrn(pool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm') 70 | 71 | 72 | def fc_layer(layer_name, input, in_dim, out_dim, activation=True): 73 | with tf.name_scope(layer_name): 74 | # Initialize weights and bias 75 | W_fc = weight_variable_with_decay([in_dim, out_dim], 0.004) 76 | b_fc = bias_variable([out_dim]) 77 | 78 | # Log weights and bias 79 | tf.summary.histogram("weights", W_fc) 80 | tf.summary.histogram("biases", b_fc) 81 | 82 | # Shouldn't only apply activation function for the last fc layer 83 | if activation: 84 | return tf.nn.relu(tf.nn.bias_add(tf.matmul(input, W_fc), b_fc)) 85 | else: 86 | return tf.nn.bias_add(tf.matmul(input, W_fc), b_fc) 87 | 88 | 89 | def loss(logits, labels): 90 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) 91 | tf.add_to_collection('losses', cross_entropy) 92 | total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss') 93 | tf.summary.scalar('loss', total_loss) 94 | return total_loss 95 | 96 | 97 | def learning_rate(global_step): 98 | starter_learning_rate = 0.001 99 | learning_rate_1 = tf.train.exponential_decay( 100 | starter_learning_rate, global_step, EPOCH * 0.2, 0.1, staircase=True) 101 | learning_rate_2 = tf.train.exponential_decay( 102 | learning_rate_1, global_step, EPOCH * 0.4, 0.5, staircase=True) 103 | decayed_learning_rate = tf.train.exponential_decay( 104 | learning_rate_2, global_step, EPOCH * 0.6, 0.8, staircase=True) 105 | tf.summary.scalar('learning_rate', decayed_learning_rate) 106 | return decayed_learning_rate 107 | 108 | 109 | def main(_): 110 | cifar10 = cifar.Cifar() 111 | cifar10.ReadDataSets(one_hot=True) 112 | 113 | keep_prob = tf.placeholder(tf.float32) 114 | 115 | # Create the model 116 | x = tf.placeholder(tf.float32, [None, 3, 32, 32]) 117 | 118 | # Define loss and optimizer 119 | y_ = tf.placeholder(tf.float32, [None, 10]) 120 | 121 | x_image = tf.transpose(x, [0, 2, 3, 1]) 122 | 123 | tf.summary.image("images", x_image, max_outputs=1) 124 | 125 | h_pool1 = conv_layer("conv_layer1", x_image, 5, 3, 64, 32, summary_conv=True) 126 | h_pool2 = conv_layer("conv_layer2", h_pool1, 5, 64, 64, 16) 127 | 128 | h_conv3_flat = tf.reshape(h_pool2, [-1, 8 * 8 * 64]) 129 | 130 | h_fc1 = fc_layer('fc_layer1', h_conv3_flat, 8 * 8 * 64, 384, activation=True) 131 | h_fc2 = fc_layer('fc_layer2', h_fc1, 384, 192, activation=True) 132 | y_conv = fc_layer('fc_layer3', h_fc2, 192, 10, activation=False) 133 | 134 | global_step = tf.Variable(0, trainable=False) 135 | lr = learning_rate(global_step) 136 | 137 | total_loss = loss(y_conv, y_) 138 | optimizer = tf.train.AdamOptimizer(lr) 139 | grads_and_vars = optimizer.compute_gradients(total_loss) 140 | with tf.name_scope("conv_layer1_grad"): 141 | kernel_grad_grid = put_kernels_on_grid(grads_and_vars[0][0], (8, 8)) 142 | tf.summary.image("weight_grad", kernel_grad_grid, max_outputs=1) 143 | 144 | train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step) 145 | correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) 146 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 147 | 148 | sess = tf.InteractiveSession() 149 | 150 | merged = tf.summary.merge_all() 151 | train_writer = tf.summary.FileWriter('train', sess.graph) 152 | 153 | sess.run(tf.global_variables_initializer()) 154 | 155 | for i in range(EPOCH): 156 | batch = cifar10.train.next_batch(BATCH_SIZE) 157 | if i % 100 == 0: 158 | test_accuracy = accuracy.eval(feed_dict={x: cifar10.test.images, y_: cifar10.test.labels}) 159 | print("step %d, test accuracy %g" % (i, test_accuracy)) 160 | summary, _ = sess.run([merged, train_step], feed_dict={x: batch[0], y_: batch[1]}) 161 | train_writer.add_summary(summary, i) 162 | 163 | print("test accuracy %g" % accuracy.eval(feed_dict={ 164 | x: cifar10.test.images, y_: cifar10.test.labels})) 165 | 166 | 167 | if __name__ == '__main__': 168 | tf.app.run(main=main) 169 | -------------------------------------------------------------------------------- /gan/README.md: -------------------------------------------------------------------------------- 1 | [浅析生成对抗网络](https://limengweb.wordpress.com/2017/02/19/%E6%B5%85%E6%9E%90%E7%94%9F%E6%88%90%E5%AF%B9%E6%8A%97%E7%BD%91%E7%BB%9C/) 2 |
3 |

4 |
5 | -------------------------------------------------------------------------------- /gan/gan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/gan/gan.png -------------------------------------------------------------------------------- /gan/gan.py: -------------------------------------------------------------------------------- 1 | """Generative Adversarial Networks 2 | 3 | An example of distribution approximation using Generative Adversarial Networks in TensorFlow. 4 | """ 5 | import os 6 | 7 | os.environ["KERAS_BACKEND"] = "tensorflow" 8 | 9 | from keras.layers import Dense 10 | from keras.models import Sequential 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | import tensorflow as tf 14 | import seaborn as sns 15 | 16 | sns.set(color_codes=True) 17 | np.random.seed(688) 18 | 19 | RANDOM_PORTION = 0.01 20 | HIDDEN_SIZE = 16 21 | BATCH_SIZE = 256 22 | EPOCH = 15000 23 | SAMPLE_RATE = 50 24 | 25 | 26 | class DataDistribution(object): 27 | def __init__(self): 28 | self.mu = 4 29 | self.sigma = .5 30 | 31 | def sample(self, N): 32 | samples = np.random.normal(self.mu, self.sigma, N) 33 | samples.sort() 34 | return samples 35 | 36 | 37 | class GeneratorDistribution(object): 38 | def __init__(self, low, high): 39 | self._low = low 40 | self._high = high 41 | 42 | def sample(self, N): 43 | return np.linspace(self._low, self._high, N) + np.random.random(N) * RANDOM_PORTION 44 | 45 | 46 | def generator(hidden_size): 47 | model = Sequential() 48 | 49 | model.add(Dense(hidden_size, activation='softplus', batch_input_shape=(BATCH_SIZE, 1), init='normal', name="g0")) 50 | model.add(Dense(1, init='normal', name="g1")) 51 | 52 | return model 53 | 54 | 55 | def discriminator(hidden_size): 56 | model = Sequential() 57 | 58 | model.add(Dense(hidden_size * 2, activation='tanh', batch_input_shape=(BATCH_SIZE, 1), init='normal', name="d0")) 59 | model.add(Dense(hidden_size * 2, activation='tanh', init='normal', name="d1")) 60 | model.add(Dense(hidden_size * 2, activation='tanh', init='normal', name="d2")) 61 | model.add(Dense(1, activation='sigmoid', init='normal', name="d3")) 62 | 63 | return model 64 | 65 | 66 | def optimizer(loss, var_list): 67 | initial_learning_rate = 0.005 68 | decay = 0.95 69 | num_decay_steps = 150 70 | batch = tf.Variable(0) 71 | learning_rate = tf.train.exponential_decay( 72 | initial_learning_rate, 73 | batch, 74 | num_decay_steps, 75 | decay, 76 | staircase=True 77 | ) 78 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( 79 | loss, 80 | global_step=batch, 81 | var_list=var_list 82 | ) 83 | return optimizer 84 | 85 | 86 | def export_animation(anim_frames): 87 | i = 0 88 | for t_data, g_data in anim_frames: 89 | f, ax = plt.subplots(figsize=(12, 8)) 90 | f.suptitle('Generative Adversarial Network', fontsize=15) 91 | plt.xlabel('Data values') 92 | plt.ylabel('Probability density') 93 | ax.set_xlim(-2, 10) 94 | ax.set_ylim(0, 1.2) 95 | sns.distplot(t_data, hist=False, rug=True, color='r', label='Target Data', ax=ax) 96 | sns.distplot(g_data, hist=False, rug=True, color='g', label='Generated Data', ax=ax) 97 | f.savefig("images/frame_" + str(i) + ".png") 98 | print "Frame index: ", i * SAMPLE_RATE 99 | f.clf() 100 | plt.close() 101 | i += 1 102 | 103 | # Generate mp4 from images: 104 | # avconv -r 10 -i frame_%d.png -b:v 1000k gan.mp4 105 | # convert -delay 20 -loop 0 output/decision_*.png myimage.gif 106 | 107 | def train(_): 108 | anim_frames = [] 109 | with tf.variable_scope('GAN'): 110 | G = generator(HIDDEN_SIZE) 111 | D = discriminator(HIDDEN_SIZE) 112 | 113 | Z = G.input 114 | X = D.input 115 | tf.summary.histogram("target", X) 116 | 117 | D1 = D(X) 118 | G_train = G(Z) 119 | tf.summary.histogram("generated", G_train) 120 | D2 = D(G_train) 121 | 122 | loss_d = tf.reduce_mean(-tf.log(D1) - tf.log(1 - D2)) 123 | loss_g = tf.reduce_mean(-tf.log(D2)) 124 | 125 | tf.summary.scalar("loss_d", loss_d) 126 | tf.summary.scalar("loss_g", loss_g) 127 | 128 | g_params = G.trainable_weights 129 | d_params = D.trainable_weights 130 | 131 | opt_g = optimizer(loss_g, g_params) 132 | opt_d = optimizer(loss_d, d_params) 133 | 134 | with tf.Session() as session: 135 | merged = tf.summary.merge_all() 136 | train_writer = tf.summary.FileWriter('train', session.graph) 137 | 138 | session.run(tf.global_variables_initializer()) 139 | 140 | for step in xrange(EPOCH): 141 | # update discriminator 142 | x = DataDistribution().sample(BATCH_SIZE) 143 | gen = GeneratorDistribution(-2, 10) 144 | z = gen.sample(BATCH_SIZE) 145 | _, _, summary = session.run([loss_d, opt_d, merged], { 146 | X: np.reshape(x, (BATCH_SIZE, 1)), 147 | Z: np.reshape(z, (BATCH_SIZE, 1)) 148 | }) 149 | 150 | # update generator 151 | z = gen.sample(BATCH_SIZE) 152 | _, _, summary = session.run([loss_g, opt_g, merged], { 153 | X: np.reshape(x, (BATCH_SIZE, 1)), 154 | Z: np.reshape(z, (BATCH_SIZE, 1)) 155 | }) 156 | 157 | G_gen = session.run([G_train], { 158 | X: np.reshape(x, (BATCH_SIZE, 1)), 159 | Z: np.reshape(z, (BATCH_SIZE, 1)) 160 | }) 161 | 162 | train_writer.add_summary(summary, step) 163 | 164 | if step % SAMPLE_RATE == 0: 165 | anim_frames.append((x, G_gen)) 166 | print "step: ", step 167 | 168 | export_animation(anim_frames) 169 | 170 | 171 | if __name__ == "__main__": 172 | tf.app.run(main=train) 173 | -------------------------------------------------------------------------------- /kaggle/Avito/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/Avito/__init__.py -------------------------------------------------------------------------------- /kaggle/Avito/avito2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) 2 | import matplotlib.pyplot as plt 3 | from sklearn import preprocessing, model_selection, metrics 4 | import lightgbm as lgb 5 | 6 | pd.options.mode.chained_assignment = None 7 | pd.options.display.max_columns = 999 8 | 9 | train_df = pd.read_csv("C:\\Users\\jowet\\Downloads\\kaggle\\avito\\train.csv", parse_dates=["activation_date"]) 10 | test_df = pd.read_csv("C:\\Users\\jowet\\Downloads\\kaggle\\avito\\test.csv", parse_dates=["activation_date"]) 11 | print("Train file rows and columns are : ", train_df.shape) 12 | print("Test file rows and columns are : ", test_df.shape) 13 | 14 | # Target and ID variables # 15 | train_y = train_df["deal_probability"].values 16 | test_id = test_df["item_id"].values 17 | 18 | # New variable on weekday # 19 | train_df["activation_weekday"] = train_df["activation_date"].dt.weekday 20 | test_df["activation_weekday"] = test_df["activation_date"].dt.weekday 21 | 22 | # Label encode the categorical variables # 23 | cat_vars = ["region", "city", "parent_category_name", "category_name", "user_type", "param_1", "param_2", "param_3"] 24 | for col in cat_vars: 25 | lbl = preprocessing.LabelEncoder() 26 | lbl.fit(list(train_df[col].values.astype('str')) + list(test_df[col].values.astype('str'))) 27 | train_df[col] = lbl.transform(list(train_df[col].values.astype('str'))) 28 | test_df[col] = lbl.transform(list(test_df[col].values.astype('str'))) 29 | 30 | cols_to_drop = ["item_id", "user_id", "title", "description", "activation_date", "image"] 31 | train_X = train_df.drop(cols_to_drop + ["deal_probability"], axis=1) 32 | test_X = test_df.drop(cols_to_drop, axis=1) 33 | 34 | 35 | def run_lgb(train_X, train_y, val_X, val_y, test_X): 36 | params = { 37 | "objective": "regression", 38 | "metric": "rmse", 39 | "num_leaves": 30, 40 | "learning_rate": 0.1, 41 | "bagging_fraction": 0.7, 42 | "feature_fraction": 0.7, 43 | "bagging_frequency": 5, 44 | "bagging_seed": 2018, 45 | "verbosity": -1 46 | } 47 | 48 | lgtrain = lgb.Dataset(train_X, label=train_y) 49 | lgval = lgb.Dataset(val_X, label=val_y) 50 | evals_result = {} 51 | model = lgb.train(params, lgtrain, 10000, valid_sets=[lgval], early_stopping_rounds=100, verbose_eval=20, 52 | evals_result=evals_result) 53 | 54 | pred_test_y = model.predict(test_X, num_iteration=model.best_iteration) 55 | return pred_test_y, model, evals_result 56 | 57 | 58 | # Splitting the data for model training# 59 | dev_X = train_X.iloc[:-200000, :] 60 | val_X = train_X.iloc[-200000:, :] 61 | dev_y = train_y[:-200000] 62 | val_y = train_y[-200000:] 63 | print(dev_X.shape, val_X.shape, test_X.shape) 64 | 65 | # Training the model # 66 | pred_test, model, evals_result = run_lgb(dev_X, dev_y, val_X, val_y, test_X) 67 | 68 | # Making a submission file # 69 | pred_test[pred_test > 1] = 1 70 | pred_test[pred_test < 0] = 0 71 | sub_df = pd.DataFrame({"item_id": test_id}) 72 | sub_df["deal_probability"] = pred_test 73 | sub_df.to_csv("baseline_lgb.csv", index=False) 74 | 75 | fig, ax = plt.subplots(figsize=(12, 18)) 76 | lgb.plot_importance(model, max_num_features=50, height=0.8, ax=ax) 77 | ax.grid(False) 78 | plt.title("LightGBM - Feature Importance", fontsize=15) 79 | plt.show() 80 | -------------------------------------------------------------------------------- /kaggle/DigitalRecognizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/DigitalRecognizer/__init__.py -------------------------------------------------------------------------------- /kaggle/DigitalRecognizer/digital_recognizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | import numpy as np 4 | import keras 5 | from keras.models import Sequential 6 | from keras.layers import Dense, Dropout, Flatten 7 | from keras.layers import Conv2D, MaxPooling2D 8 | from keras import backend as K 9 | 10 | batch_size = 128 11 | num_classes = 10 12 | epochs = 12 13 | 14 | # input image dimensions 15 | img_rows, img_cols = 28, 28 16 | 17 | train = pd.read_csv('C:\\Users\\jowet\\Downloads\\kaggle\\digit_recognizer\\train.csv') 18 | test = pd.read_csv('C:\\Users\\jowet\\Downloads\\kaggle\\digit_recognizer\\test.csv') 19 | 20 | x_train = train.drop(['label'], axis=1).as_matrix() 21 | y_train = train['label'].as_matrix() 22 | x_test = test.as_matrix() 23 | 24 | print(x_train.shape) 25 | print(y_train.shape) 26 | print(x_test.shape) 27 | 28 | if K.image_data_format() == 'channels_first': 29 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 30 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 31 | input_shape = (1, img_rows, img_cols) 32 | else: 33 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 34 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 35 | input_shape = (img_rows, img_cols, 1) 36 | 37 | x_train = x_train.astype('float32') 38 | x_test = x_test.astype('float32') 39 | x_train /= 255 40 | x_test /= 255 41 | print('x_train shape:', x_train.shape) 42 | print(x_train.shape[0], 'train samples') 43 | print(x_test.shape[0], 'test samples') 44 | 45 | # convert class vectors to binary class matrices 46 | y_train = keras.utils.to_categorical(y_train, num_classes) 47 | 48 | model = Sequential() 49 | model.add(Conv2D(32, kernel_size=(3, 3), 50 | activation='relu', 51 | input_shape=input_shape)) 52 | model.add(Conv2D(64, (3, 3), activation='relu')) 53 | model.add(MaxPooling2D(pool_size=(2, 2))) 54 | model.add(Dropout(0.25)) 55 | model.add(Flatten()) 56 | model.add(Dense(128, activation='relu')) 57 | model.add(Dropout(0.5)) 58 | model.add(Dense(num_classes, activation='softmax')) 59 | 60 | model.compile(loss=keras.losses.categorical_crossentropy, 61 | optimizer=keras.optimizers.Adadelta(), 62 | metrics=['accuracy']) 63 | 64 | model.fit(x_train, y_train, 65 | batch_size=batch_size, 66 | epochs=epochs, 67 | verbose=1) 68 | result = model.predict(x_test, verbose=0) 69 | 70 | predict = np.argmax(result, axis=1) 71 | sub_df = pd.DataFrame({"ImageId": range(1, len(predict) + 1)}) 72 | sub_df["Label"] = predict 73 | sub_df.to_csv("predict.csv", index=False) 74 | -------------------------------------------------------------------------------- /kaggle/SantanderValuePrediction/santander.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from xgboost import XGBRegressor 4 | import pandas as pd 5 | 6 | train = pd.read_csv("C:\\Users\\jowet\\Downloads\\Santander\\train.csv") 7 | test = pd.read_csv("C:\\Users\\jowet\\Downloads\\Santander\\test.csv") 8 | 9 | train.drop('ID', axis=1, inplace=True) 10 | 11 | y_train = train.pop('target') 12 | pred_index = test.pop('ID') 13 | 14 | reg = XGBRegressor() 15 | reg.fit(train, y_train) 16 | y_pred = reg.predict(test) 17 | 18 | submit = pd.DataFrame() 19 | submit['ID'] = pred_index 20 | submit['target'] = y_pred 21 | submit.to_csv('my_XGB_prediction.csv', index=False) 22 | -------------------------------------------------------------------------------- /kaggle/TalkingData/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/TalkingData/__init__.py -------------------------------------------------------------------------------- /kaggle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/__init__.py -------------------------------------------------------------------------------- /kaggle/titanic/README.md: -------------------------------------------------------------------------------- 1 | [泰坦尼克:机器学习应用](https://limengweb.wordpress.com/2017/09/30/%E6%B3%B0%E5%9D%A6%E5%B0%BC%E5%85%8B%EF%BC%9A%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E5%BA%94%E7%94%A8/) 2 |
3 |

4 |
5 | -------------------------------------------------------------------------------- /kaggle/titanic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/titanic/__init__.py -------------------------------------------------------------------------------- /kaggle/titanic/titanic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/titanic/titanic.png -------------------------------------------------------------------------------- /kaggle/titanic/titanic.py: -------------------------------------------------------------------------------- 1 | # remove warnings 2 | import warnings 3 | import pandas as pd 4 | #from matplotlib import pyplot as plt 5 | import numpy as np 6 | 7 | from sklearn.pipeline import make_pipeline 8 | from sklearn.ensemble import RandomForestClassifier 9 | from sklearn.feature_selection import SelectKBest 10 | from sklearn.cross_validation import StratifiedKFold 11 | from sklearn.grid_search import GridSearchCV 12 | from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier 13 | from sklearn.cross_validation import cross_val_score 14 | 15 | from sklearn.ensemble import ExtraTreesClassifier 16 | from sklearn.feature_selection import SelectFromModel 17 | 18 | warnings.filterwarnings('ignore') 19 | pd.options.display.max_rows = 100 20 | 21 | def get_combined_data(): 22 | # reading train data 23 | train = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/train.csv') 24 | 25 | # reading test data 26 | test = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/test.csv') 27 | 28 | # extracting and then removing the targets from the training data 29 | targets = train.Survived 30 | train.drop('Survived',1,inplace=True) 31 | 32 | 33 | # merging train data and test data for future feature engineering 34 | combined = train.append(test) 35 | combined.reset_index(inplace=True) 36 | combined.drop('index',inplace=True,axis=1) 37 | 38 | return combined, targets 39 | 40 | 41 | def create_titles(combined): 42 | # we extract the title from each name 43 | combined['Title'] = combined['Name'].map(lambda name:name.split(',')[1].split('.')[0].strip()) 44 | 45 | # a map of more aggregated titles 46 | Title_Dictionary = { 47 | "Capt": "Officer", 48 | "Col": "Officer", 49 | "Major": "Officer", 50 | "Jonkheer": "Royalty", 51 | "Don": "Royalty", 52 | "Sir" : "Royalty", 53 | "Dr": "Officer", 54 | "Rev": "Officer", 55 | "the Countess": "Royalty", 56 | "Dona": "Royalty", 57 | "Mme": "Mrs", 58 | "Mlle": "Miss", 59 | "Ms": "Mrs", 60 | "Mr" : "Mr", 61 | "Mrs" : "Mrs", 62 | "Miss" : "Miss", 63 | "Master" : "Master", 64 | "Lady" : "Royalty" 65 | } 66 | 67 | # we map each title 68 | combined['Title'] = combined.Title.map(Title_Dictionary) 69 | 70 | 71 | def process_age(combined): 72 | # a function that fills the missing values of the Age variable 73 | def fillAges(row): 74 | if row['Sex']=='female' and row['Pclass'] == 1: 75 | if row['Title'] == 'Miss': 76 | return 30 77 | elif row['Title'] == 'Mrs': 78 | return 45 79 | elif row['Title'] == 'Officer': 80 | return 49 81 | elif row['Title'] == 'Royalty': 82 | return 39 83 | elif row['Sex']=='female' and row['Pclass'] == 2: 84 | if row['Title'] == 'Miss': 85 | return 20 86 | elif row['Title'] == 'Mrs': 87 | return 30 88 | elif row['Sex']=='female' and row['Pclass'] == 3: 89 | if row['Title'] == 'Miss': 90 | return 18 91 | elif row['Title'] == 'Mrs': 92 | return 31 93 | elif row['Sex']=='male' and row['Pclass'] == 1: 94 | if row['Title'] == 'Master': 95 | return 6 96 | elif row['Title'] == 'Mr': 97 | return 41.5 98 | elif row['Title'] == 'Officer': 99 | return 52 100 | elif row['Title'] == 'Royalty': 101 | return 40 102 | elif row['Sex']=='male' and row['Pclass'] == 2: 103 | if row['Title'] == 'Master': 104 | return 2 105 | elif row['Title'] == 'Mr': 106 | return 30 107 | elif row['Title'] == 'Officer': 108 | return 41.5 109 | elif row['Sex']=='male' and row['Pclass'] == 3: 110 | if row['Title'] == 'Master': 111 | return 6 112 | elif row['Title'] == 'Mr': 113 | return 26 114 | 115 | combined.Age = combined.apply( 116 | lambda r : fillAges(r) if np.isnan(r['Age']) else r['Age'], axis=1) 117 | 118 | 119 | def process_names(combined): 120 | # we clean the Name variable 121 | combined.drop('Name',axis=1,inplace=True) 122 | 123 | # encoding in dummy variable 124 | titles_dummies = pd.get_dummies(combined['Title'],prefix='Title') 125 | combined = pd.concat([combined,titles_dummies],axis=1) 126 | 127 | # removing the title variable 128 | combined.drop('Title',axis=1,inplace=True) 129 | 130 | return combined 131 | 132 | 133 | def process_fares(combined): 134 | # there's one missing fare value - replacing it with the mean. 135 | combined.Fare.fillna(combined.Fare.mean(),inplace=True) 136 | 137 | 138 | def process_embarked(combined): 139 | # two missing embarked values - filling them with the most frequent one (S) 140 | combined.Embarked.fillna('S',inplace=True) 141 | 142 | # dummy encoding 143 | embarked_dummies = pd.get_dummies(combined['Embarked'],prefix='Embarked') 144 | combined = pd.concat([combined,embarked_dummies],axis=1) 145 | combined.drop('Embarked',axis=1,inplace=True) 146 | 147 | return combined 148 | 149 | 150 | def process_cabin(combined): 151 | # replacing missing cabins with U (for Uknown) 152 | combined.Cabin.fillna('U',inplace=True) 153 | 154 | # mapping each Cabin value with the cabin letter 155 | combined['Cabin'] = combined['Cabin'].map(lambda c : c[0]) 156 | 157 | # dummy encoding ... 158 | cabin_dummies = pd.get_dummies(combined['Cabin'],prefix='Cabin') 159 | 160 | combined = pd.concat([combined,cabin_dummies],axis=1) 161 | 162 | combined.drop('Cabin',axis=1,inplace=True) 163 | 164 | return combined 165 | 166 | 167 | def process_sex(combined): 168 | # mapping string values to numerical one 169 | combined['Sex'] = combined['Sex'].map({'male':1,'female':0}) 170 | 171 | 172 | def process_pclass(combined): 173 | # encoding into 3 categories: 174 | pclass_dummies = pd.get_dummies(combined['Pclass'],prefix="Pclass") 175 | 176 | # adding dummy variables 177 | combined = pd.concat([combined,pclass_dummies],axis=1) 178 | 179 | # removing "Pclass" 180 | 181 | combined.drop('Pclass',axis=1,inplace=True) 182 | 183 | return combined 184 | 185 | 186 | def process_ticket(combined): 187 | # a function that extracts each prefix of the ticket, 188 | # returns 'XXX' if no prefix (i.e the ticket is a digit) 189 | def cleanTicket(ticket): 190 | ticket = ticket.replace('.','') 191 | ticket = ticket.replace('/','') 192 | ticket = ticket.split() 193 | ticket = map(lambda t : t.strip() , ticket) 194 | ticket = filter(lambda t : not t.isdigit(), ticket) 195 | if len(ticket) > 0: 196 | return ticket[0] 197 | else: 198 | return 'XXX' 199 | 200 | # Extracting dummy variables from tickets: 201 | combined['Ticket'] = combined['Ticket'].map(cleanTicket) 202 | tickets_dummies = pd.get_dummies(combined['Ticket'],prefix='Ticket') 203 | combined = pd.concat([combined, tickets_dummies],axis=1) 204 | combined.drop('Ticket',inplace=True,axis=1) 205 | return combined 206 | 207 | 208 | def process_family(combined): 209 | # introducing a new feature : the size of families (including the passenger) 210 | combined['FamilySize'] = combined['Parch'] + combined['SibSp'] + 1 211 | 212 | # introducing other features based on the family size 213 | combined['Singleton'] = combined['FamilySize'].map(lambda s : 1 if s == 1 else 0) 214 | combined['SmallFamily'] = combined['FamilySize'].map(lambda s : 1 if 2<=s<=4 else 0) 215 | combined['LargeFamily'] = combined['FamilySize'].map(lambda s : 1 if 5<=s else 0) 216 | 217 | 218 | def scale_all_features(combined): 219 | features = list(combined.columns) 220 | features.remove('PassengerId') 221 | combined[features] = combined[features].apply(lambda x: x/x.max(), axis=0) 222 | 223 | 224 | combined, targets = get_combined_data() 225 | create_titles(combined) 226 | process_age(combined) 227 | combined = process_names(combined) 228 | process_fares(combined) 229 | combined = process_embarked(combined) 230 | combined = process_cabin(combined) 231 | process_sex(combined) 232 | combined = process_pclass(combined) 233 | combined = process_ticket(combined) 234 | process_family(combined) 235 | scale_all_features(combined) 236 | 237 | 238 | def compute_score(clf, X, y,scoring='accuracy'): 239 | xval = cross_val_score(clf, X, y, cv = 5,scoring=scoring) 240 | return np.mean(xval) 241 | 242 | 243 | def recover_train_test_target(combined): 244 | train_set = pd.read_csv('/usr/local/google/home/limeng/Downloads/kaggle/titanic/train.csv') 245 | 246 | targets = train_set.Survived 247 | train = combined.ix[0:890] 248 | test = combined.ix[891:] 249 | 250 | return train, test, targets 251 | 252 | 253 | train,test,targets = recover_train_test_target(combined) 254 | 255 | clf = ExtraTreesClassifier(n_estimators=200) 256 | clf = clf.fit(train, targets) 257 | 258 | features = pd.DataFrame() 259 | features['feature'] = train.columns 260 | features['importance'] = clf.feature_importances_ 261 | print(features.sort_values(['importance'],ascending=False)) 262 | 263 | model = SelectFromModel(clf, prefit=True) 264 | train_new = model.transform(train) 265 | print(train_new.shape) 266 | 267 | test_new = model.transform(test) 268 | print(test_new.shape) 269 | 270 | forest = RandomForestClassifier(max_features='sqrt') 271 | 272 | parameter_grid = { 273 | 'max_depth' : [4,5,6,7,8], 274 | 'n_estimators': [200,210,240,250], 275 | 'criterion': ['gini','entropy'] 276 | } 277 | 278 | cross_validation = StratifiedKFold(targets, n_folds=5) 279 | 280 | grid_search = GridSearchCV(forest, 281 | param_grid=parameter_grid, 282 | cv=cross_validation) 283 | 284 | grid_search.fit(train_new, targets) 285 | 286 | print('Best score: {}'.format(grid_search.best_score_)) 287 | print('Best parameters: {}'.format(grid_search.best_params_)) 288 | 289 | output = grid_search.predict(test_new).astype(int) 290 | df_output = pd.DataFrame() 291 | df_output['PassengerId'] = test['PassengerId'] 292 | df_output['Survived'] = output 293 | df_output[['PassengerId','Survived']].to_csv('logistic_regression_predictions.csv',index=False) -------------------------------------------------------------------------------- /kaggle/zillow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/kaggle/zillow/__init__.py -------------------------------------------------------------------------------- /kaggle/zillow/location.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False) 6 | 7 | sns.jointplot(x=properties_data.latitude.values, y=properties_data.longitude.values, size=10) 8 | plt.ylabel('Longitude', fontsize=12) 9 | plt.xlabel('Latitude', fontsize=12) 10 | 11 | plt.show() 12 | -------------------------------------------------------------------------------- /kaggle/zillow/log_error.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False) 5 | 6 | plt.figure(figsize=(10, 10)) 7 | plt.scatter(range(train_data.shape[0]), train_data.sort_values(by='logerror').logerror) 8 | plt.xlabel('index') 9 | plt.ylabel('logerror') 10 | 11 | plt.show() 12 | -------------------------------------------------------------------------------- /kaggle/zillow/log_error_hist.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False) 6 | 7 | plt.figure(figsize=(10, 10)) 8 | up_limit = np.percentile(train_data.logerror, 99) 9 | low_limit = np.percentile(train_data.logerror, 1) 10 | tmp_data = train_data[train_data.logerror < up_limit][train_data.logerror > low_limit] 11 | plt.hist(tmp_data.logerror, bins=50) 12 | plt.xlabel('logerror') 13 | 14 | plt.show() 15 | -------------------------------------------------------------------------------- /kaggle/zillow/missing_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False) 5 | 6 | missing_df = properties_data.isnull().sum(axis=0).reset_index() 7 | missing_df.columns = ['column_name', 'missing_count'] 8 | missing_df = missing_df[missing_df.missing_count > 0] 9 | missing_df = missing_df.sort_values(by='missing_count') 10 | missing_df.plot(kind='barh') 11 | plt.yticks(range(missing_df.shape[0]), missing_df.column_name.values) 12 | 13 | plt.show() 14 | -------------------------------------------------------------------------------- /kaggle/zillow/month.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False) 5 | 6 | plt.figure(figsize=(10, 10)) 7 | datetime_data = pd.to_datetime(train_data.transactiondate) 8 | datetime_data.dt.month.value_counts().sort_index(axis=0).plot(kind='bar') 9 | plt.xlabel('month') 10 | 11 | plt.show() 12 | -------------------------------------------------------------------------------- /kaggle/zillow/train_data_shape.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | train_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\train_2017.csv', low_memory=False) 4 | properties_data = pd.read_csv('C:\\Users\\jowet\\Downloads\\zillow\\properties_2017.csv', low_memory=False) 5 | 6 | print(train_data.shape) 7 | print(properties_data.shape) 8 | -------------------------------------------------------------------------------- /mnist/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Imports mnist tutorial libraries used by tutorial examples.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | -------------------------------------------------------------------------------- /mnist/mnist.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Builds the MNIST network. 17 | 18 | Implements the inference/loss/training pattern for model building. 19 | 20 | 1. inference() - Builds the model as far as is required for running the network 21 | forward to make predictions. 22 | 2. loss() - Adds to the inference model the layers required to generate loss. 23 | 3. training() - Adds to the loss model the Ops required to generate and 24 | apply gradients. 25 | 26 | This file is used by the various "fully_connected_*.py" files and not meant to 27 | be run. 28 | """ 29 | from __future__ import absolute_import 30 | from __future__ import division 31 | from __future__ import print_function 32 | 33 | import math 34 | 35 | import tensorflow as tf 36 | 37 | # The MNIST dataset has 10 classes, representing the digits 0 through 9. 38 | NUM_CLASSES = 10 39 | 40 | # The MNIST images are always 28x28 pixels. 41 | IMAGE_SIZE = 28 42 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE 43 | 44 | 45 | def inference(images, hidden1_units, hidden2_units): 46 | """Build the MNIST model up to where it may be used for inference. 47 | 48 | Args: 49 | images: Images placeholder, from inputs(). 50 | hidden1_units: Size of the first hidden layer. 51 | hidden2_units: Size of the second hidden layer. 52 | 53 | Returns: 54 | softmax_linear: Output tensor with the computed logits. 55 | """ 56 | # Hidden 1 57 | with tf.name_scope('hidden1'): 58 | weights = tf.Variable( 59 | tf.truncated_normal([IMAGE_PIXELS, hidden1_units], 60 | stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), 61 | name='weights') 62 | biases = tf.Variable(tf.zeros([hidden1_units]), 63 | name='biases') 64 | hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) 65 | # Hidden 2 66 | with tf.name_scope('hidden2'): 67 | weights = tf.Variable( 68 | tf.truncated_normal([hidden1_units, hidden2_units], 69 | stddev=1.0 / math.sqrt(float(hidden1_units))), 70 | name='weights') 71 | biases = tf.Variable(tf.zeros([hidden2_units]), 72 | name='biases') 73 | hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) 74 | # Linear 75 | with tf.name_scope('softmax_linear'): 76 | weights = tf.Variable( 77 | tf.truncated_normal([hidden2_units, NUM_CLASSES], 78 | stddev=1.0 / math.sqrt(float(hidden2_units))), 79 | name='weights') 80 | biases = tf.Variable(tf.zeros([NUM_CLASSES]), 81 | name='biases') 82 | logits = tf.matmul(hidden2, weights) + biases 83 | return logits 84 | 85 | 86 | def loss(logits, labels): 87 | """Calculates the loss from the logits and the labels. 88 | 89 | Args: 90 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 91 | labels: Labels tensor, int32 - [batch_size]. 92 | 93 | Returns: 94 | loss: Loss tensor of type float. 95 | """ 96 | labels = tf.to_int64(labels) 97 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 98 | logits, labels, name='xentropy') 99 | loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') 100 | return loss 101 | 102 | 103 | def training(loss, learning_rate): 104 | """Sets up the training Ops. 105 | 106 | Creates a summarizer to track the loss over time in TensorBoard. 107 | 108 | Creates an optimizer and applies the gradients to all trainable variables. 109 | 110 | The Op returned by this function is what must be passed to the 111 | `sess.run()` call to cause the model to train. 112 | 113 | Args: 114 | loss: Loss tensor, from loss(). 115 | learning_rate: The learning rate to use for gradient descent. 116 | 117 | Returns: 118 | train_op: The Op for training. 119 | """ 120 | # Add a scalar summary for the snapshot loss. 121 | tf.summary.scalar('loss', loss) 122 | # Create the gradient descent optimizer with the given learning rate. 123 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 124 | # Create a variable to track the global step. 125 | global_step = tf.Variable(0, name='global_step', trainable=False) 126 | # Use the optimizer to apply the gradients that minimize the loss 127 | # (and also increment the global step counter) as a single training step. 128 | train_op = optimizer.minimize(loss, global_step=global_step) 129 | return train_op 130 | 131 | 132 | def evaluation(logits, labels): 133 | """Evaluate the quality of the logits at predicting the label. 134 | 135 | Args: 136 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 137 | labels: Labels tensor, int32 - [batch_size], with values in the 138 | range [0, NUM_CLASSES). 139 | 140 | Returns: 141 | A scalar int32 tensor with the number of examples (out of batch_size) 142 | that were predicted correctly. 143 | """ 144 | # For a classifier model, we can use the in_top_k Op. 145 | # It returns a bool tensor with shape [batch_size] that is true for 146 | # the examples where the label is in the top k (here k=1) 147 | # of all logits for that example. 148 | correct = tf.nn.in_top_k(logits, labels, 1) 149 | # Return the number of true entries. 150 | return tf.reduce_sum(tf.cast(correct, tf.int32)) 151 | -------------------------------------------------------------------------------- /mnist/mnist_conv.py: -------------------------------------------------------------------------------- 1 | """A convolutional neural network for MNIST classification. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import sys 9 | 10 | # Import data 11 | from tensorflow.examples.tutorials.mnist import input_data 12 | 13 | import tensorflow as tf 14 | 15 | FLAGS = None 16 | 17 | 18 | def weight_variable(shape): 19 | initial = tf.truncated_normal(shape, stddev=0.1) 20 | return tf.Variable(initial) 21 | 22 | 23 | def bias_variable(shape): 24 | initial = tf.constant(0.1, shape=shape) 25 | return tf.Variable(initial) 26 | 27 | 28 | def conv2d(x, W): 29 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 30 | 31 | 32 | def max_pool_2x2(x): 33 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], 34 | strides=[1, 2, 2, 1], padding='SAME') 35 | 36 | 37 | def main(_): 38 | mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) 39 | 40 | # Create the model 41 | x = tf.placeholder(tf.float32, [None, 784]) 42 | 43 | # Define loss and optimizer 44 | y_ = tf.placeholder(tf.float32, [None, 10]) 45 | 46 | x_image = tf.reshape(x, [-1, 28, 28, 1]) 47 | 48 | W_conv1 = weight_variable([5, 5, 1, 32]) 49 | b_conv1 = bias_variable([32]) 50 | 51 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) 52 | h_pool1 = max_pool_2x2(h_conv1) 53 | 54 | W_conv2 = weight_variable([5, 5, 32, 64]) 55 | b_conv2 = bias_variable([64]) 56 | 57 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) 58 | h_pool2 = max_pool_2x2(h_conv2) 59 | 60 | W_fc1 = weight_variable([7 * 7 * 64, 1024]) 61 | b_fc1 = bias_variable([1024]) 62 | 63 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) 64 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) 65 | 66 | keep_prob = tf.placeholder(tf.float32) 67 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) 68 | 69 | W_fc2 = weight_variable([1024, 10]) 70 | b_fc2 = bias_variable([10]) 71 | 72 | y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 73 | 74 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)) 75 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 76 | correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) 77 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 78 | 79 | sess = tf.InteractiveSession() 80 | sess.run(tf.global_variables_initializer()) 81 | 82 | for i in range(20000): 83 | batch = mnist.train.next_batch(50) 84 | if i % 100 == 0: 85 | train_accuracy = accuracy.eval(feed_dict={ 86 | x: mnist.validation.images, y_: mnist.validation.labels, keep_prob: 1.0}) 87 | print("step %d, training accuracy %g"%(i, train_accuracy)) 88 | train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) 89 | 90 | print("test accuracy %g"%accuracy.eval(feed_dict={ 91 | x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})) 92 | 93 | if __name__ == '__main__': 94 | parser = argparse.ArgumentParser() 95 | parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', 96 | help='Directory for storing input data') 97 | FLAGS, unparsed = parser.parse_known_args() 98 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 99 | -------------------------------------------------------------------------------- /mnist/mnist_simple.py: -------------------------------------------------------------------------------- 1 | """Builds the MNIST network. 2 | 3 | Simplify the MNIST model building work. 4 | 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import math 11 | 12 | import tensorflow as tf 13 | 14 | # The MNIST dataset has 10 classes, representing the digits 0 through 9. 15 | NUM_CLASSES = 10 16 | 17 | # The MNIST images are always 28x28 pixels. 18 | IMAGE_SIZE = 28 19 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE 20 | 21 | 22 | def variable_summaries(var, name): 23 | """Attach a lot of summaries to a Tensor.""" 24 | with tf.name_scope('summaries'): 25 | mean = tf.reduce_mean(var) 26 | tf.scalar_summary('mean/' + name, mean) 27 | with tf.name_scope('stddev'): 28 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) 29 | tf.scalar_summary('stddev/' + name, stddev) 30 | tf.scalar_summary('max/' + name, tf.reduce_max(var)) 31 | tf.scalar_summary('min/' + name, tf.reduce_min(var)) 32 | tf.histogram_summary(name, var) 33 | 34 | 35 | def nn_layer(input_tensor, input_dim, output_dim, layer_name): 36 | with tf.name_scope(layer_name): 37 | weights = tf.Variable( 38 | tf.truncated_normal([input_dim, output_dim], 39 | stddev=1.0 / math.sqrt(float(input_dim))), 40 | name='weights') 41 | variable_summaries(weights, layer_name + '/weights') 42 | biases = tf.Variable(tf.zeros([output_dim]), name='biases') 43 | variable_summaries(biases, layer_name + '/biases') 44 | return tf.nn.relu(tf.matmul(input_tensor, weights) + biases) 45 | 46 | 47 | def inference(images, hidden1_units, hidden2_units): 48 | """Build the MNIST model up to where it may be used for inference. 49 | 50 | Args: 51 | images: Images placeholder, from inputs(). 52 | hidden1_units: Size of the first hidden layer. 53 | hidden2_units: Size of the second hidden layer. 54 | 55 | Returns: 56 | softmax_linear: Output tensor with the computed logits. 57 | """ 58 | hidden1 = nn_layer(images, IMAGE_PIXELS, hidden1_units, 'layer1') 59 | hidden2 = nn_layer(hidden1, hidden1_units, hidden2_units, 'layer2') 60 | logits = nn_layer(hidden2, hidden2_units, NUM_CLASSES, 'layer3') 61 | return logits 62 | 63 | 64 | def loss(logits, labels): 65 | """Calculates the loss from the logits and the labels. 66 | 67 | Args: 68 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 69 | labels: Labels tensor, int32 - [batch_size]. 70 | 71 | Returns: 72 | loss: Loss tensor of type float. 73 | """ 74 | labels = tf.to_int64(labels) 75 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 76 | logits, labels, name='xentropy') 77 | loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') 78 | return loss 79 | 80 | 81 | def training(loss, learning_rate): 82 | """Sets up the training Ops. 83 | 84 | Creates a summarizer to track the loss over time in TensorBoard. 85 | 86 | Creates an optimizer and applies the gradients to all trainable variables. 87 | 88 | The Op returned by this function is what must be passed to the 89 | `sess.run()` call to cause the model to train. 90 | 91 | Args: 92 | loss: Loss tensor, from loss(). 93 | learning_rate: The learning rate to use for gradient descent. 94 | 95 | Returns: 96 | train_op: The Op for training. 97 | """ 98 | # Add a scalar summary for the snapshot loss. 99 | tf.scalar_summary('loss', loss) 100 | # Create the gradient descent optimizer with the given learning rate. 101 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 102 | # Create a variable to track the global step. 103 | global_step = tf.Variable(0, name='global_step', trainable=False) 104 | # Use the optimizer to apply the gradients that minimize the loss 105 | # (and also increment the global step counter) as a single training step. 106 | train_op = optimizer.minimize(loss, global_step=global_step) 107 | return train_op 108 | 109 | 110 | def evaluation(logits, labels): 111 | """Evaluate the quality of the logits at predicting the label. 112 | 113 | Args: 114 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 115 | labels: Labels tensor, int32 - [batch_size], with values in the 116 | range [0, NUM_CLASSES). 117 | 118 | Returns: 119 | A scalar int32 tensor with the number of examples (out of batch_size) 120 | that were predicted correctly. 121 | """ 122 | # For a classifier model, we can use the in_top_k Op. 123 | # It returns a bool tensor with shape [batch_size] that is true for 124 | # the examples where the label is in the top k (here k=1) 125 | # of all logits for that example. 126 | correct = tf.nn.in_top_k(logits, labels, 1) 127 | # Return the number of true entries. 128 | return tf.reduce_sum(tf.cast(correct, tf.int32)) 129 | -------------------------------------------------------------------------------- /mnist/mnist_softmax.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A very simple MNIST classifier. 17 | 18 | See extensive documentation at 19 | http://tensorflow.org/tutorials/mnist/beginners/index.md 20 | """ 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | import argparse 26 | import sys 27 | 28 | # Import data 29 | from tensorflow.examples.tutorials.mnist import input_data 30 | 31 | import tensorflow as tf 32 | 33 | FLAGS = None 34 | 35 | 36 | def main(_): 37 | mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) 38 | 39 | # Create the model 40 | x = tf.placeholder(tf.float32, [None, 784]) 41 | W = tf.Variable(tf.zeros([784, 10])) 42 | b = tf.Variable(tf.zeros([10])) 43 | y = tf.matmul(x, W) + b 44 | 45 | # Define loss and optimizer 46 | y_ = tf.placeholder(tf.float32, [None, 10]) 47 | 48 | # The raw formulation of cross-entropy, 49 | # 50 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), 51 | # reduction_indices=[1])) 52 | # 53 | # can be numerically unstable. 54 | # 55 | # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw 56 | # outputs of 'y', and then average across the batch. 57 | cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_)) 58 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 59 | 60 | sess = tf.InteractiveSession() 61 | # Train 62 | tf.global_variables_initializer().run() 63 | for _ in range(1000): 64 | batch_xs, batch_ys = mnist.train.next_batch(100) 65 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) 66 | 67 | # Test trained model 68 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) 69 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 70 | print(sess.run(accuracy, feed_dict={x: mnist.test.images, 71 | y_: mnist.test.labels})) 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', 76 | help='Directory for storing input data') 77 | FLAGS, unparsed = parser.parse_known_args() 78 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 79 | -------------------------------------------------------------------------------- /mnist/mnist_with_summaries.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the 'License'); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an 'AS IS' BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A simple MNIST classifier which displays summaries in TensorBoard. 16 | 17 | This is an unimpressive MNIST model, but it is a good example of using 18 | tf.name_scope to make a graph legible in the TensorBoard graph explorer, and of 19 | naming summary tags so that they are grouped meaningfully in TensorBoard. 20 | 21 | It demonstrates the functionality of every TensorBoard dashboard. 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import argparse 28 | import sys 29 | 30 | import tensorflow as tf 31 | 32 | from tensorflow.examples.tutorials.mnist import input_data 33 | 34 | FLAGS = None 35 | 36 | 37 | def train(): 38 | # Import data 39 | mnist = input_data.read_data_sets(FLAGS.data_dir, 40 | one_hot=True, 41 | fake_data=FLAGS.fake_data) 42 | 43 | sess = tf.InteractiveSession() 44 | # Create a multilayer model. 45 | 46 | # Input placeholders 47 | with tf.name_scope('input'): 48 | x = tf.placeholder(tf.float32, [None, 784], name='x-input') 49 | y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') 50 | 51 | with tf.name_scope('input_reshape'): 52 | image_shaped_input = tf.reshape(x, [-1, 28, 28, 1]) 53 | tf.summary.image('input', image_shaped_input, 10) 54 | 55 | # We can't initialize these variables to 0 - the network will get stuck. 56 | def weight_variable(shape): 57 | """Create a weight variable with appropriate initialization.""" 58 | initial = tf.truncated_normal(shape, stddev=0.1) 59 | return tf.Variable(initial) 60 | 61 | def bias_variable(shape): 62 | """Create a bias variable with appropriate initialization.""" 63 | initial = tf.constant(0.1, shape=shape) 64 | return tf.Variable(initial) 65 | 66 | def variable_summaries(var): 67 | """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" 68 | with tf.name_scope('summaries'): 69 | mean = tf.reduce_mean(var) 70 | tf.summary.scalar('mean', mean) 71 | with tf.name_scope('stddev'): 72 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) 73 | tf.summary.scalar('stddev', stddev) 74 | tf.summary.scalar('max', tf.reduce_max(var)) 75 | tf.summary.scalar('min', tf.reduce_min(var)) 76 | tf.summary.histogram('histogram', var) 77 | 78 | def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu): 79 | """Reusable code for making a simple neural net layer. 80 | 81 | It does a matrix multiply, bias add, and then uses relu to nonlinearize. 82 | It also sets up name scoping so that the resultant graph is easy to read, 83 | and adds a number of summary ops. 84 | """ 85 | # Adding a name scope ensures logical grouping of the layers in the graph. 86 | with tf.name_scope(layer_name): 87 | # This Variable will hold the state of the weights for the layer 88 | with tf.name_scope('weights'): 89 | weights = weight_variable([input_dim, output_dim]) 90 | variable_summaries(weights) 91 | with tf.name_scope('biases'): 92 | biases = bias_variable([output_dim]) 93 | variable_summaries(biases) 94 | with tf.name_scope('Wx_plus_b'): 95 | preactivate = tf.matmul(input_tensor, weights) + biases 96 | tf.summary.histogram('pre_activations', preactivate) 97 | activations = act(preactivate, name='activation') 98 | tf.summary.histogram('activations', activations) 99 | return activations 100 | 101 | hidden1 = nn_layer(x, 784, 500, 'layer1') 102 | 103 | with tf.name_scope('dropout'): 104 | keep_prob = tf.placeholder(tf.float32) 105 | tf.summary.scalar('dropout_keep_probability', keep_prob) 106 | dropped = tf.nn.dropout(hidden1, keep_prob) 107 | 108 | # Do not apply softmax activation yet, see below. 109 | y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity) 110 | 111 | with tf.name_scope('cross_entropy'): 112 | # The raw formulation of cross-entropy, 113 | # 114 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)), 115 | # reduction_indices=[1])) 116 | # 117 | # can be numerically unstable. 118 | # 119 | # So here we use tf.nn.softmax_cross_entropy_with_logits on the 120 | # raw outputs of the nn_layer above, and then average across 121 | # the batch. 122 | diff = tf.nn.softmax_cross_entropy_with_logits(y, y_) 123 | with tf.name_scope('total'): 124 | cross_entropy = tf.reduce_mean(diff) 125 | tf.summary.scalar('cross_entropy', cross_entropy) 126 | 127 | with tf.name_scope('train'): 128 | train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( 129 | cross_entropy) 130 | 131 | with tf.name_scope('accuracy'): 132 | with tf.name_scope('correct_prediction'): 133 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) 134 | with tf.name_scope('accuracy'): 135 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 136 | tf.summary.scalar('accuracy', accuracy) 137 | 138 | # Merge all the summaries and write them out to /tmp/mnist_logs (by default) 139 | merged = tf.summary.merge_all() 140 | train_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/train', 141 | sess.graph) 142 | test_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/test') 143 | tf.global_variables_initializer().run() 144 | 145 | # Train the model, and also write summaries. 146 | # Every 10th step, measure test-set accuracy, and write test summaries 147 | # All other steps, run train_step on training data, & add training summaries 148 | 149 | def feed_dict(train): 150 | """Make a TensorFlow feed_dict: maps data onto Tensor placeholders.""" 151 | if train or FLAGS.fake_data: 152 | xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data) 153 | k = FLAGS.dropout 154 | else: 155 | xs, ys = mnist.test.images, mnist.test.labels 156 | k = 1.0 157 | return {x: xs, y_: ys, keep_prob: k} 158 | 159 | for i in range(FLAGS.max_steps): 160 | if i % 10 == 0: # Record summaries and test-set accuracy 161 | summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False)) 162 | test_writer.add_summary(summary, i) 163 | print('Accuracy at step %s: %s' % (i, acc)) 164 | else: # Record train set summaries, and train 165 | if i % 100 == 99: # Record execution stats 166 | run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) 167 | run_metadata = tf.RunMetadata() 168 | summary, _ = sess.run([merged, train_step], 169 | feed_dict=feed_dict(True), 170 | options=run_options, 171 | run_metadata=run_metadata) 172 | train_writer.add_run_metadata(run_metadata, 'step%03d' % i) 173 | train_writer.add_summary(summary, i) 174 | print('Adding run metadata for', i) 175 | else: # Record a summary 176 | summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True)) 177 | train_writer.add_summary(summary, i) 178 | train_writer.close() 179 | test_writer.close() 180 | 181 | 182 | def main(_): 183 | if tf.gfile.Exists(FLAGS.log_dir): 184 | tf.gfile.DeleteRecursively(FLAGS.log_dir) 185 | tf.gfile.MakeDirs(FLAGS.log_dir) 186 | train() 187 | 188 | 189 | if __name__ == '__main__': 190 | parser = argparse.ArgumentParser() 191 | parser.add_argument('--fake_data', nargs='?', const=True, type=bool, 192 | default=False, 193 | help='If true, uses fake data for unit testing.') 194 | parser.add_argument('--max_steps', type=int, default=1000, 195 | help='Number of steps to run trainer.') 196 | parser.add_argument('--learning_rate', type=float, default=0.001, 197 | help='Initial learning rate') 198 | parser.add_argument('--dropout', type=float, default=0.9, 199 | help='Keep probability for training dropout.') 200 | parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', 201 | help='Directory for storing input data') 202 | parser.add_argument('--log_dir', type=str, default='/tmp/tensorflow/mnist/logs/mnist_with_summaries', 203 | help='Summaries log directory') 204 | FLAGS, unparsed = parser.parse_known_args() 205 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 206 | -------------------------------------------------------------------------------- /mnist/mnist_with_summary.py: -------------------------------------------------------------------------------- 1 | """Builds the MNIST network. 2 | 3 | Simplify the MNIST model building work. 4 | 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import math 11 | 12 | import tensorflow as tf 13 | 14 | # The MNIST dataset has 10 classes, representing the digits 0 through 9. 15 | NUM_CLASSES = 10 16 | 17 | # The MNIST images are always 28x28 pixels. 18 | IMAGE_SIZE = 28 19 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE 20 | 21 | 22 | def variable_summaries(var, name): 23 | """Attach a lot of summaries to a Tensor.""" 24 | with tf.name_scope('summaries'): 25 | mean = tf.reduce_mean(var) 26 | tf.scalar_summary('mean/' + name, mean) 27 | with tf.name_scope('stddev'): 28 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) 29 | tf.scalar_summary('stddev/' + name, stddev) 30 | tf.scalar_summary('max/' + name, tf.reduce_max(var)) 31 | tf.scalar_summary('min/' + name, tf.reduce_min(var)) 32 | tf.histogram_summary(name, var) 33 | 34 | 35 | def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu): 36 | """Reusable code for making a simple neural net layer. 37 | 38 | It does a matrix multiply, bias add, and then uses relu to nonlinearize. 39 | It also sets up name scoping so that the resultant graph is easy to read, 40 | and adds a number of summary ops. 41 | """ 42 | # Adding a name scope ensures logical grouping of the layers in the graph. 43 | with tf.name_scope(layer_name): 44 | # This Variable will hold the state of the weights for the layer 45 | with tf.name_scope('weights'): 46 | weights = weight_variable([input_dim, output_dim]) 47 | variable_summaries(weights, layer_name + '/weights') 48 | with tf.name_scope('biases'): 49 | biases = bias_variable([output_dim]) 50 | variable_summaries(biases, layer_name + '/biases') 51 | with tf.name_scope('Wx_plus_b'): 52 | preactivate = tf.matmul(input_tensor, weights) + biases 53 | tf.histogram_summary(layer_name + '/pre_activations', preactivate) 54 | activations = act(preactivate, 'activation') 55 | tf.histogram_summary(layer_name + '/activations', activations) 56 | return activations 57 | 58 | hidden1 = nn_layer(x, 784, 500, 'layer1') 59 | 60 | with tf.name_scope('dropout'): 61 | keep_prob = tf.placeholder(tf.float32) 62 | tf.scalar_summary('dropout_keep_probability', keep_prob) 63 | dropped = tf.nn.dropout(hidden1, keep_prob) 64 | 65 | y = nn_layer(dropped, 500, 10, 'layer2', act=tf.nn.softmax) 66 | 67 | with tf.name_scope('cross_entropy'): 68 | diff = y_ * tf.log(y) 69 | with tf.name_scope('total'): 70 | cross_entropy = -tf.reduce_mean(diff) 71 | tf.scalar_summary('cross entropy', cross_entropy) 72 | 73 | with tf.name_scope('train'): 74 | train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy) 75 | 76 | with tf.name_scope('accuracy'): 77 | with tf.name_scope('correct_prediction'): 78 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) 79 | with tf.name_scope('accuracy'): 80 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 81 | tf.scalar_summary('accuracy', accuracy) 82 | 83 | # Merge all the summaries and write them out to /tmp/mnist_logs (by default) 84 | merged = tf.merge_all_summaries() 85 | train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train', 86 | sess.graph) 87 | test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test') 88 | tf.initialize_all_variables().run() 89 | -------------------------------------------------------------------------------- /reading/capsnet/drbc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/reading/capsnet/drbc.pdf -------------------------------------------------------------------------------- /self_driving/README.md: -------------------------------------------------------------------------------- 1 | Machine Learning 2 | ================ 3 | 4 | Welcome to my blog [听雨居](https://limengweb.wordpress.com). It contains detailed description of the code here. -------------------------------------------------------------------------------- /self_driving/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/__init__.py -------------------------------------------------------------------------------- /self_driving/lane_detect/README.md: -------------------------------------------------------------------------------- 1 | [基于OpenCV的车道分割线提取](https://limengweb.wordpress.com/2017/08/19/%E5%9F%BA%E4%BA%8Eopencv%E7%9A%84%E8%BD%A6%E9%81%93%E5%88%86%E5%89%B2%E7%BA%BF%E6%8F%90%E5%8F%96/) 2 |
3 |

4 |
5 | -------------------------------------------------------------------------------- /self_driving/lane_detect/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/lane_detect/__init__.py -------------------------------------------------------------------------------- /self_driving/lane_detect/comma_ai_lane_detect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | from self_driving.optical_flow.python import video 5 | from scipy import misc 6 | 7 | 8 | def color_seg(img_raw, red_thresh=0, green_thresh=0, blue_thresh=0): 9 | img_color_mask = np.copy(img_raw) 10 | red_mask = img_raw[:,:,0] < red_thresh 11 | green_mask = img_raw[:,:,1] < green_thresh 12 | rgb_mask = np.logical_or(red_mask, green_mask) 13 | img_color_mask[rgb_mask] = [0,0,0] 14 | return img_color_mask 15 | 16 | 17 | def draw_lines(img, lines, color=[255, 0, 0], thickness=2): 18 | for line in lines: 19 | for x1,y1,x2,y2 in line: 20 | cv2.line(img, (x1, y1), (x2, y2), color, thickness) 21 | 22 | 23 | def draw_lines_extrapolate(img, lines, color=[255, 0, 0], thickness=2): 24 | # Assume lines on left and right have opposite signed slopes 25 | left_xs = [] 26 | left_ys = [] 27 | right_xs = [] 28 | right_ys = [] 29 | for line in lines: 30 | for x1, y1, x2, y2 in line: 31 | if x2 - x1 == 0: continue; # Infinite slope 32 | slope = float(y2-y1) / float(x2-x1) 33 | if .5 <= abs(slope) < 1.0: # Discard unlikely slopes 34 | if slope > 0: 35 | left_xs.extend([x1, x2]) 36 | left_ys.extend([y1, y2]) 37 | else: 38 | right_xs.extend([x1, x2]) 39 | right_ys.extend([y1, y2]) 40 | 41 | y1 = img.shape[0] - 120 # Bottom of image 42 | y2 = img.shape[0] / 2 + 10 # Middle of view 43 | y1 = int(y1); y2 = int(y2); 44 | 45 | if left_xs and left_ys: 46 | left_fit = np.polyfit(left_xs, left_ys, 1) 47 | x1_left = (y1 - left_fit[1]) / left_fit[0] 48 | x2_left = (y2 - left_fit[1]) / left_fit[0] 49 | x1_left = int(x1_left); x2_left = int(x2_left); 50 | cv2.line(img, (x1_left, y1), (x2_left, y2), color, thickness) 51 | 52 | if right_xs and right_ys: 53 | right_fit = np.polyfit(right_xs, right_ys, 1) 54 | x1_right = (y1 - right_fit[1]) / right_fit[0] 55 | x2_right = (y2 - right_fit[1]) / right_fit[0] 56 | x1_right = int(x1_right); x2_right = int(x2_right); 57 | cv2.line(img, (x1_right, y1), (x2_right, y2), color, thickness) 58 | 59 | 60 | if __name__ == '__main__': 61 | try: 62 | fn = sys.argv[1] 63 | except IndexError: 64 | fn = 0 65 | cam = video.create_capture(fn) 66 | index = 0 67 | while True: 68 | ret, img = cam.read() 69 | 70 | if img is None: 71 | break 72 | 73 | rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 74 | 75 | gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY) 76 | 77 | # Define a kernel size and apply Gaussian smoothing 78 | kernel_size = 3 79 | blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0) 80 | 81 | # Define our parameters for Canny and apply 82 | low_threshold = 50 83 | high_threshold = 150 84 | edges = cv2.Canny(blur_gray, low_threshold, high_threshold) 85 | 86 | # Next we'll create a masked edges image using cv2.fillPoly() 87 | mask = np.zeros_like(edges) 88 | ignore_mask_color = 255 89 | 90 | # This time we are defining a four sided polygon to mask 91 | imshape = img.shape 92 | vertices = np.array([[(0, imshape[0] - 120), 93 | (imshape[1] / 2 - 80, imshape[0] / 2 + 10), 94 | (imshape[1] / 2 + 80, imshape[0] / 2 + 10), 95 | (imshape[1], imshape[0] - 120)]], 96 | dtype=np.int32) 97 | cv2.fillPoly(mask, vertices, ignore_mask_color) 98 | masked_edges = cv2.bitwise_and(edges, mask) 99 | 100 | # Define the Hough transform parameters 101 | # Make a blank the same size as our image to draw on 102 | rho = 1 # distance resolution in pixels of the Hough grid 103 | theta = np.pi / 180 # angular resolution in radians of the Hough grid 104 | threshold = 5 # minimum number of votes (intersections in Hough grid cell) 105 | min_line_length = 10 # minimum number of pixels making up a line 106 | max_line_gap = 2 # maximum gap in pixels between connectable line segments 107 | line_image = np.copy(img) * 0 # creating a blank to draw lines on 108 | 109 | # Run Hough on edge detected image 110 | # Output "lines" is an array containing endpoints of detected line segments 111 | lines = cv2.HoughLinesP(masked_edges, rho, theta, threshold, np.array([]), 112 | min_line_length, max_line_gap) 113 | 114 | if lines is None: 115 | continue 116 | 117 | draw_lines_extrapolate(line_image, lines, thickness=8) 118 | #draw_lines(line_image, lines, thickness=8) 119 | 120 | # Create a "color" binary image to combine with line image 121 | color_edges = np.dstack((masked_edges, masked_edges, masked_edges)) 122 | 123 | # Draw the lines on the edge image 124 | lines_edges = cv2.addWeighted(rgb, 1, line_image, 1, 0) 125 | misc.imsave(sys.argv[2] + 'frame_%d.png' % index, lines_edges) 126 | index += 1 127 | cv2.destroyAllWindows() 128 | -------------------------------------------------------------------------------- /self_driving/lane_detect/lane_detect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/lane_detect/lane_detect.png -------------------------------------------------------------------------------- /self_driving/lane_detect/udacity_lane_detect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import sys 4 | from self_driving.optical_flow.python import video 5 | from scipy import misc 6 | 7 | 8 | def color_seg(img_raw, red_thresh=0, green_thresh=0, blue_thresh=0): 9 | img_color_mask = np.copy(img_raw) 10 | red_mask = img_raw[:,:,0] < red_thresh 11 | green_mask = img_raw[:,:,1] < green_thresh 12 | rgb_mask = np.logical_or(red_mask, green_mask) 13 | img_color_mask[rgb_mask] = [0,0,0] 14 | return img_color_mask 15 | 16 | 17 | def draw_lines_extrapolate(img, lines, color=[255, 0, 0], thickness=2): 18 | # Assume lines on left and right have opposite signed slopes 19 | left_xs = [] 20 | left_ys = [] 21 | right_xs = [] 22 | right_ys = [] 23 | for line in lines: 24 | for x1, y1, x2, y2 in line: 25 | if x2 - x1 == 0: continue; # Infinite slope 26 | slope = float(y2-y1) / float(x2-x1) 27 | if .5 <= abs(slope) < 1.0: # Discard unlikely slopes 28 | if slope > 0: 29 | left_xs.extend([x1, x2]) 30 | left_ys.extend([y1, y2]) 31 | else: 32 | right_xs.extend([x1, x2]) 33 | right_ys.extend([y1, y2]) 34 | 35 | left_fit = np.polyfit(left_xs, left_ys, 1) 36 | right_fit = np.polyfit(right_xs, right_ys, 1) 37 | 38 | y1 = img.shape[0] # Bottom of image 39 | y2 = img.shape[0] / 2+ 50 # Middle of view 40 | x1_left = (y1 - left_fit[1]) / left_fit[0] 41 | x2_left = (y2 - left_fit[1]) / left_fit[0] 42 | x1_right = (y1 - right_fit[1]) / right_fit[0] 43 | x2_right = (y2 - right_fit[1]) / right_fit[0] 44 | y1 = int(y1); y2 = int(y2); 45 | x1_left = int(x1_left); x2_left = int(x2_left); 46 | x1_right = int(x1_right); x2_right = int(x2_right); 47 | 48 | cv2.line(img, (x1_left, y1), (x2_left, y2), color, thickness) 49 | cv2.line(img, (x1_right, y1), (x2_right, y2), color, thickness) 50 | 51 | 52 | if __name__ == '__main__': 53 | try: 54 | fn = sys.argv[1] 55 | except IndexError: 56 | fn = 0 57 | cam = video.create_capture(fn) 58 | index = 0 59 | while True: 60 | ret, img = cam.read() 61 | 62 | if img is None: 63 | break 64 | 65 | rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 66 | masked_img = color_seg(rgb, red_thresh=200, green_thresh=150, blue_thresh=0) 67 | 68 | gray = cv2.cvtColor(masked_img, cv2.COLOR_RGB2GRAY) 69 | 70 | # Define a kernel size and apply Gaussian smoothing 71 | kernel_size = 5 72 | blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0) 73 | 74 | # Define our parameters for Canny and apply 75 | low_threshold = 50 76 | high_threshold = 150 77 | edges = cv2.Canny(blur_gray, low_threshold, high_threshold) 78 | 79 | # Next we'll create a masked edges image using cv2.fillPoly() 80 | mask = np.zeros_like(edges) 81 | ignore_mask_color = 255 82 | 83 | # This time we are defining a four sided polygon to mask 84 | imshape = img.shape 85 | vertices = np.array([[(0 + 120, imshape[0]), 86 | (imshape[1] / 2 - 15, imshape[0] / 2 + 40), 87 | (imshape[1] / 2 + 15, imshape[0] / 2 + 40), 88 | (imshape[1] - 50, imshape[0])]], 89 | dtype=np.int32) 90 | cv2.fillPoly(mask, vertices, ignore_mask_color) 91 | masked_edges = cv2.bitwise_and(edges, mask) 92 | 93 | # Define the Hough transform parameters 94 | # Make a blank the same size as our image to draw on 95 | rho = 1 # distance resolution in pixels of the Hough grid 96 | theta = np.pi / 180 # angular resolution in radians of the Hough grid 97 | threshold = 5 # minimum number of votes (intersections in Hough grid cell) 98 | min_line_length = 10 # minimum number of pixels making up a line 99 | max_line_gap = 2 # maximum gap in pixels between connectable line segments 100 | line_image = np.copy(img) * 0 # creating a blank to draw lines on 101 | 102 | # Run Hough on edge detected image 103 | # Output "lines" is an array containing endpoints of detected line segments 104 | lines = cv2.HoughLinesP(masked_edges, rho, theta, threshold, np.array([]), 105 | min_line_length, max_line_gap) 106 | 107 | if lines is None: 108 | continue 109 | 110 | draw_lines_extrapolate(line_image, lines, thickness=8) 111 | 112 | # Draw the lines on the edge image 113 | lines_edges = cv2.addWeighted(rgb, 1, line_image, 1, 0) 114 | misc.imsave(sys.argv[2] + 'frame_%d.png' % index, lines_edges) 115 | index += 1 116 | cv2.destroyAllWindows() 117 | -------------------------------------------------------------------------------- /self_driving/optical_flow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/optical_flow/__init__.py -------------------------------------------------------------------------------- /self_driving/optical_flow/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/optical_flow/python/__init__.py -------------------------------------------------------------------------------- /self_driving/optical_flow/python/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | This module contains some common routines used by other samples. 5 | ''' 6 | 7 | # Python 2/3 compatibility 8 | from __future__ import print_function 9 | import sys 10 | PY3 = sys.version_info[0] == 3 11 | 12 | if PY3: 13 | from functools import reduce 14 | 15 | import numpy as np 16 | import cv2 17 | 18 | # built-in modules 19 | import os 20 | import itertools as it 21 | from contextlib import contextmanager 22 | 23 | image_extensions = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.pbm', '.pgm', '.ppm'] 24 | 25 | class Bunch(object): 26 | def __init__(self, **kw): 27 | self.__dict__.update(kw) 28 | def __str__(self): 29 | return str(self.__dict__) 30 | 31 | def splitfn(fn): 32 | path, fn = os.path.split(fn) 33 | name, ext = os.path.splitext(fn) 34 | return path, name, ext 35 | 36 | def anorm2(a): 37 | return (a*a).sum(-1) 38 | def anorm(a): 39 | return np.sqrt( anorm2(a) ) 40 | 41 | def homotrans(H, x, y): 42 | xs = H[0, 0]*x + H[0, 1]*y + H[0, 2] 43 | ys = H[1, 0]*x + H[1, 1]*y + H[1, 2] 44 | s = H[2, 0]*x + H[2, 1]*y + H[2, 2] 45 | return xs/s, ys/s 46 | 47 | def to_rect(a): 48 | a = np.ravel(a) 49 | if len(a) == 2: 50 | a = (0, 0, a[0], a[1]) 51 | return np.array(a, np.float64).reshape(2, 2) 52 | 53 | def rect2rect_mtx(src, dst): 54 | src, dst = to_rect(src), to_rect(dst) 55 | cx, cy = (dst[1] - dst[0]) / (src[1] - src[0]) 56 | tx, ty = dst[0] - src[0] * (cx, cy) 57 | M = np.float64([[ cx, 0, tx], 58 | [ 0, cy, ty], 59 | [ 0, 0, 1]]) 60 | return M 61 | 62 | 63 | def lookat(eye, target, up = (0, 0, 1)): 64 | fwd = np.asarray(target, np.float64) - eye 65 | fwd /= anorm(fwd) 66 | right = np.cross(fwd, up) 67 | right /= anorm(right) 68 | down = np.cross(fwd, right) 69 | R = np.float64([right, down, fwd]) 70 | tvec = -np.dot(R, eye) 71 | return R, tvec 72 | 73 | def mtx2rvec(R): 74 | w, u, vt = cv2.SVDecomp(R - np.eye(3)) 75 | p = vt[0] + u[:,0]*w[0] # same as np.dot(R, vt[0]) 76 | c = np.dot(vt[0], p) 77 | s = np.dot(vt[1], p) 78 | axis = np.cross(vt[0], vt[1]) 79 | return axis * np.arctan2(s, c) 80 | 81 | def draw_str(dst, target, s): 82 | x, y = target 83 | cv2.putText(dst, s, (x+1, y+1), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 0), thickness = 2, lineType=cv2.LINE_AA) 84 | cv2.putText(dst, s, (x, y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), lineType=cv2.LINE_AA) 85 | 86 | class Sketcher: 87 | def __init__(self, windowname, dests, colors_func): 88 | self.prev_pt = None 89 | self.windowname = windowname 90 | self.dests = dests 91 | self.colors_func = colors_func 92 | self.dirty = False 93 | self.show() 94 | cv2.setMouseCallback(self.windowname, self.on_mouse) 95 | 96 | def show(self): 97 | cv2.imshow(self.windowname, self.dests[0]) 98 | 99 | def on_mouse(self, event, x, y, flags, param): 100 | pt = (x, y) 101 | if event == cv2.EVENT_LBUTTONDOWN: 102 | self.prev_pt = pt 103 | elif event == cv2.EVENT_LBUTTONUP: 104 | self.prev_pt = None 105 | 106 | if self.prev_pt and flags & cv2.EVENT_FLAG_LBUTTON: 107 | for dst, color in zip(self.dests, self.colors_func()): 108 | cv2.line(dst, self.prev_pt, pt, color, 5) 109 | self.dirty = True 110 | self.prev_pt = pt 111 | self.show() 112 | 113 | 114 | # palette data from matplotlib/_cm.py 115 | _jet_data = {'red': ((0., 0, 0), (0.35, 0, 0), (0.66, 1, 1), (0.89,1, 1), 116 | (1, 0.5, 0.5)), 117 | 'green': ((0., 0, 0), (0.125,0, 0), (0.375,1, 1), (0.64,1, 1), 118 | (0.91,0,0), (1, 0, 0)), 119 | 'blue': ((0., 0.5, 0.5), (0.11, 1, 1), (0.34, 1, 1), (0.65,0, 0), 120 | (1, 0, 0))} 121 | 122 | cmap_data = { 'jet' : _jet_data } 123 | 124 | def make_cmap(name, n=256): 125 | data = cmap_data[name] 126 | xs = np.linspace(0.0, 1.0, n) 127 | channels = [] 128 | eps = 1e-6 129 | for ch_name in ['blue', 'green', 'red']: 130 | ch_data = data[ch_name] 131 | xp, yp = [], [] 132 | for x, y1, y2 in ch_data: 133 | xp += [x, x+eps] 134 | yp += [y1, y2] 135 | ch = np.interp(xs, xp, yp) 136 | channels.append(ch) 137 | return np.uint8(np.array(channels).T*255) 138 | 139 | def nothing(*arg, **kw): 140 | pass 141 | 142 | def clock(): 143 | return cv2.getTickCount() / cv2.getTickFrequency() 144 | 145 | @contextmanager 146 | def Timer(msg): 147 | print(msg, '...',) 148 | start = clock() 149 | try: 150 | yield 151 | finally: 152 | print("%.2f ms" % ((clock()-start)*1000)) 153 | 154 | class StatValue: 155 | def __init__(self, smooth_coef = 0.5): 156 | self.value = None 157 | self.smooth_coef = smooth_coef 158 | def update(self, v): 159 | if self.value is None: 160 | self.value = v 161 | else: 162 | c = self.smooth_coef 163 | self.value = c * self.value + (1.0-c) * v 164 | 165 | class RectSelector: 166 | def __init__(self, win, callback): 167 | self.win = win 168 | self.callback = callback 169 | cv2.setMouseCallback(win, self.onmouse) 170 | self.drag_start = None 171 | self.drag_rect = None 172 | def onmouse(self, event, x, y, flags, param): 173 | x, y = np.int16([x, y]) # BUG 174 | if event == cv2.EVENT_LBUTTONDOWN: 175 | self.drag_start = (x, y) 176 | return 177 | if self.drag_start: 178 | if flags & cv2.EVENT_FLAG_LBUTTON: 179 | xo, yo = self.drag_start 180 | x0, y0 = np.minimum([xo, yo], [x, y]) 181 | x1, y1 = np.maximum([xo, yo], [x, y]) 182 | self.drag_rect = None 183 | if x1-x0 > 0 and y1-y0 > 0: 184 | self.drag_rect = (x0, y0, x1, y1) 185 | else: 186 | rect = self.drag_rect 187 | self.drag_start = None 188 | self.drag_rect = None 189 | if rect: 190 | self.callback(rect) 191 | def draw(self, vis): 192 | if not self.drag_rect: 193 | return False 194 | x0, y0, x1, y1 = self.drag_rect 195 | cv2.rectangle(vis, (x0, y0), (x1, y1), (0, 255, 0), 2) 196 | return True 197 | @property 198 | def dragging(self): 199 | return self.drag_rect is not None 200 | 201 | 202 | def grouper(n, iterable, fillvalue=None): 203 | '''grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx''' 204 | args = [iter(iterable)] * n 205 | if PY3: 206 | output = it.zip_longest(fillvalue=fillvalue, *args) 207 | else: 208 | output = it.izip_longest(fillvalue=fillvalue, *args) 209 | return output 210 | 211 | def mosaic(w, imgs): 212 | '''Make a grid from images. 213 | 214 | w -- number of grid columns 215 | imgs -- images (must have same size and format) 216 | ''' 217 | imgs = iter(imgs) 218 | if PY3: 219 | img0 = next(imgs) 220 | else: 221 | img0 = imgs.next() 222 | pad = np.zeros_like(img0) 223 | imgs = it.chain([img0], imgs) 224 | rows = grouper(w, imgs, pad) 225 | return np.vstack(map(np.hstack, rows)) 226 | 227 | def getsize(img): 228 | h, w = img.shape[:2] 229 | return w, h 230 | 231 | def mdot(*args): 232 | return reduce(np.dot, args) 233 | 234 | def draw_keypoints(vis, keypoints, color = (0, 255, 255)): 235 | for kp in keypoints: 236 | x, y = kp.pt 237 | cv2.circle(vis, (int(x), int(y)), 2, color) 238 | -------------------------------------------------------------------------------- /self_driving/optical_flow/python/opt_flow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Python 2/3 compatibility 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | import cv2 8 | import video 9 | from scipy import misc 10 | 11 | NUM_FRAME = 20400 12 | 13 | 14 | def draw_flow(img, flow, step=16): 15 | h, w = img.shape[:2] 16 | y, x = np.mgrid[step / 2:h:step, step / 2:w:step].reshape(2, -1).astype(int) 17 | fx, fy = flow[y, x].T 18 | lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2) 19 | lines = np.int32(lines + 0.5) 20 | vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 21 | cv2.polylines(vis, lines, 0, (0, 255, 0)) 22 | for (x1, y1), (x2, y2) in lines: 23 | cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1) 24 | return vis 25 | 26 | 27 | def draw_hsv(flow): 28 | h, w = flow.shape[:2] 29 | fx, fy = flow[:, :, 0], flow[:, :, 1] 30 | ang = np.arctan2(fy, fx) + np.pi 31 | v = np.sqrt(fx * fx + fy * fy) 32 | hsv = np.zeros((h, w, 3), np.uint8) 33 | hsv[..., 0] = ang * (180 / np.pi / 2) 34 | hsv[..., 1] = 255 35 | hsv[..., 2] = np.minimum(v * 4, 255) 36 | bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) 37 | return bgr 38 | 39 | 40 | def warp_flow(img, flow): 41 | h, w = flow.shape[:2] 42 | flow = -flow 43 | flow[:, :, 0] += np.arange(w) 44 | flow[:, :, 1] += np.arange(h)[:, np.newaxis] 45 | res = cv2.remap(img, flow, None, cv2.INTER_LINEAR) 46 | return res 47 | 48 | 49 | if __name__ == '__main__': 50 | import sys 51 | 52 | print(__doc__) 53 | try: 54 | fn = sys.argv[1] 55 | except IndexError: 56 | fn = 0 57 | 58 | fr = NUM_FRAME 59 | cam = video.create_capture(fn) 60 | ret, prev = cam.read() 61 | fr -= 1 62 | prevgray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY) 63 | show_hsv = False 64 | show_glitch = False 65 | cur_glitch = prev.copy() 66 | index = 0 67 | 68 | while fr > 0: 69 | ret, img = cam.read() 70 | fr -= 1 71 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 72 | flow = cv2.calcOpticalFlowFarneback(prevgray, gray, 0.5, 3, 15, 3, 5, 1.2, 0) 73 | prevgray = gray 74 | 75 | hsv = draw_hsv(flow)[120:420, 70:-70] 76 | misc.imsave('/usr/local/google/home/limeng/Downloads/speed_est/data/train_data/frame_%d.png' % index, hsv) 77 | cv2.imshow('flow HSV', hsv) 78 | index += 1 79 | cv2.destroyAllWindows() 80 | -------------------------------------------------------------------------------- /self_driving/optical_flow/python/tst_scene_render.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | # Python 2/3 compatibility 5 | from __future__ import print_function 6 | 7 | import numpy as np 8 | from numpy import pi, sin, cos 9 | 10 | import cv2 11 | 12 | defaultSize = 512 13 | 14 | class TestSceneRender(): 15 | 16 | def __init__(self, bgImg = None, fgImg = None, 17 | deformation = False, speed = 0.25, **params): 18 | self.time = 0.0 19 | self.timeStep = 1.0 / 30.0 20 | self.foreground = fgImg 21 | self.deformation = deformation 22 | self.speed = speed 23 | 24 | if bgImg is not None: 25 | self.sceneBg = bgImg.copy() 26 | else: 27 | self.sceneBg = np.zeros(defaultSize, defaultSize, np.uint8) 28 | 29 | self.w = self.sceneBg.shape[0] 30 | self.h = self.sceneBg.shape[1] 31 | 32 | if fgImg is not None: 33 | self.foreground = fgImg.copy() 34 | self.center = self.currentCenter = (int(self.w/2 - fgImg.shape[0]/2), int(self.h/2 - fgImg.shape[1]/2)) 35 | 36 | self.xAmpl = self.sceneBg.shape[0] - (self.center[0] + fgImg.shape[0]) 37 | self.yAmpl = self.sceneBg.shape[1] - (self.center[1] + fgImg.shape[1]) 38 | 39 | self.initialRect = np.array([ (self.h/2, self.w/2), (self.h/2, self.w/2 + self.w/10), 40 | (self.h/2 + self.h/10, self.w/2 + self.w/10), (self.h/2 + self.h/10, self.w/2)]).astype(int) 41 | self.currentRect = self.initialRect 42 | 43 | def getXOffset(self, time): 44 | return int( self.xAmpl*cos(time*self.speed)) 45 | 46 | 47 | def getYOffset(self, time): 48 | return int(self.yAmpl*sin(time*self.speed)) 49 | 50 | def setInitialRect(self, rect): 51 | self.initialRect = rect 52 | 53 | def getRectInTime(self, time): 54 | 55 | if self.foreground is not None: 56 | tmp = np.array(self.center) + np.array((self.getXOffset(time), self.getYOffset(time))) 57 | x0, y0 = tmp 58 | x1, y1 = tmp + self.foreground.shape[0:2] 59 | return np.array([y0, x0, y1, x1]) 60 | else: 61 | x0, y0 = self.initialRect[0] + np.array((self.getXOffset(time), self.getYOffset(time))) 62 | x1, y1 = self.initialRect[2] + np.array((self.getXOffset(time), self.getYOffset(time))) 63 | return np.array([y0, x0, y1, x1]) 64 | 65 | def getCurrentRect(self): 66 | 67 | if self.foreground is not None: 68 | 69 | x0 = self.currentCenter[0] 70 | y0 = self.currentCenter[1] 71 | x1 = self.currentCenter[0] + self.foreground.shape[0] 72 | y1 = self.currentCenter[1] + self.foreground.shape[1] 73 | return np.array([y0, x0, y1, x1]) 74 | else: 75 | x0, y0 = self.currentRect[0] 76 | x1, y1 = self.currentRect[2] 77 | return np.array([x0, y0, x1, y1]) 78 | 79 | def getNextFrame(self): 80 | img = self.sceneBg.copy() 81 | 82 | if self.foreground is not None: 83 | self.currentCenter = (self.center[0] + self.getXOffset(self.time), self.center[1] + self.getYOffset(self.time)) 84 | img[self.currentCenter[0]:self.currentCenter[0]+self.foreground.shape[0], 85 | self.currentCenter[1]:self.currentCenter[1]+self.foreground.shape[1]] = self.foreground 86 | else: 87 | self.currentRect = self.initialRect + np.int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed)) 88 | if self.deformation: 89 | self.currentRect[1:3] += self.h/20*cos(self.time) 90 | cv2.fillConvexPoly(img, self.currentRect, (0, 0, 255)) 91 | 92 | self.time += self.timeStep 93 | return img 94 | 95 | def resetTime(self): 96 | self.time = 0.0 97 | 98 | 99 | if __name__ == '__main__': 100 | 101 | backGr = cv2.imread('../data/graf1.png') 102 | fgr = cv2.imread('../data/box.png') 103 | 104 | render = TestSceneRender(backGr, fgr) 105 | 106 | while True: 107 | 108 | img = render.getNextFrame() 109 | cv2.imshow('img', img) 110 | 111 | ch = cv2.waitKey(3) 112 | if ch == 27: 113 | break 114 | #import os 115 | #print (os.environ['PYTHONPATH']) 116 | cv2.destroyAllWindows() 117 | -------------------------------------------------------------------------------- /self_driving/optical_flow/python/video.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Video capture sample. 5 | 6 | Sample shows how VideoCapture class can be used to acquire video 7 | frames from a camera of a movie file. Also the sample provides 8 | an example of procedural video generation by an object, mimicking 9 | the VideoCapture interface (see Chess class). 10 | 11 | 'create_capture' is a convinience function for capture creation, 12 | falling back to procedural video in case of error. 13 | 14 | Usage: 15 | video.py [--shotdir ] [source0] [source1] ...' 16 | 17 | sourceN is an 18 | - integer number for camera capture 19 | - name of video file 20 | - synth: for procedural video 21 | 22 | Synth examples: 23 | synth:bg=../data/lena.jpg:noise=0.1 24 | synth:class=chess:bg=../data/lena.jpg:noise=0.1:size=640x480 25 | 26 | Keys: 27 | ESC - exit 28 | SPACE - save current frame to directory 29 | 30 | ''' 31 | 32 | # Python 2/3 compatibility 33 | from __future__ import print_function 34 | 35 | import numpy as np 36 | from numpy import pi, sin, cos 37 | 38 | import cv2 39 | 40 | # built-in modules 41 | from time import clock 42 | 43 | # local modules 44 | from tst_scene_render import TestSceneRender 45 | import common 46 | 47 | 48 | class VideoSynthBase(object): 49 | def __init__(self, size=None, noise=0.0, bg=None, **params): 50 | self.bg = None 51 | self.frame_size = (640, 480) 52 | if bg is not None: 53 | self.bg = cv2.imread(bg, 1) 54 | h, w = self.bg.shape[:2] 55 | self.frame_size = (w, h) 56 | 57 | if size is not None: 58 | w, h = map(int, size.split('x')) 59 | self.frame_size = (w, h) 60 | self.bg = cv2.resize(self.bg, self.frame_size) 61 | 62 | self.noise = float(noise) 63 | 64 | def render(self, dst): 65 | pass 66 | 67 | def read(self, dst=None): 68 | w, h = self.frame_size 69 | 70 | if self.bg is None: 71 | buf = np.zeros((h, w, 3), np.uint8) 72 | else: 73 | buf = self.bg.copy() 74 | 75 | self.render(buf) 76 | 77 | if self.noise > 0.0: 78 | noise = np.zeros((h, w, 3), np.int8) 79 | cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise) 80 | buf = cv2.add(buf, noise, dtype=cv2.CV_8UC3) 81 | return True, buf 82 | 83 | def isOpened(self): 84 | return True 85 | 86 | 87 | class Book(VideoSynthBase): 88 | def __init__(self, **kw): 89 | super(Book, self).__init__(**kw) 90 | backGr = cv2.imread('../data/graf1.png') 91 | fgr = cv2.imread('../data/box.png') 92 | self.render = TestSceneRender(backGr, fgr, speed=1) 93 | 94 | def read(self, dst=None): 95 | noise = np.zeros(self.render.sceneBg.shape, np.int8) 96 | cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise) 97 | 98 | return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3) 99 | 100 | 101 | class Cube(VideoSynthBase): 102 | def __init__(self, **kw): 103 | super(Cube, self).__init__(**kw) 104 | self.render = TestSceneRender(cv2.imread('../data/pca_test1.jpg'), deformation=True, speed=1) 105 | 106 | def read(self, dst=None): 107 | noise = np.zeros(self.render.sceneBg.shape, np.int8) 108 | cv2.randn(noise, np.zeros(3), np.ones(3) * 255 * self.noise) 109 | 110 | return True, cv2.add(self.render.getNextFrame(), noise, dtype=cv2.CV_8UC3) 111 | 112 | 113 | class Chess(VideoSynthBase): 114 | def __init__(self, **kw): 115 | super(Chess, self).__init__(**kw) 116 | 117 | w, h = self.frame_size 118 | 119 | self.grid_size = sx, sy = 10, 7 120 | white_quads = [] 121 | black_quads = [] 122 | for i, j in np.ndindex(sy, sx): 123 | q = [[j, i, 0], [j + 1, i, 0], [j + 1, i + 1, 0], [j, i + 1, 0]] 124 | [white_quads, black_quads][(i + j) % 2].append(q) 125 | self.white_quads = np.float32(white_quads) 126 | self.black_quads = np.float32(black_quads) 127 | 128 | fx = 0.9 129 | self.K = np.float64([[fx * w, 0, 0.5 * (w - 1)], 130 | [0, fx * w, 0.5 * (h - 1)], 131 | [0.0, 0.0, 1.0]]) 132 | 133 | self.dist_coef = np.float64([-0.2, 0.1, 0, 0]) 134 | self.t = 0 135 | 136 | def draw_quads(self, img, quads, color=(0, 255, 0)): 137 | img_quads = cv2.projectPoints(quads.reshape(-1, 3), self.rvec, self.tvec, self.K, self.dist_coef)[0] 138 | img_quads.shape = quads.shape[:2] + (2,) 139 | for q in img_quads: 140 | cv2.fillConvexPoly(img, np.int32(q * 4), color, cv2.LINE_AA, shift=2) 141 | 142 | def render(self, dst): 143 | t = self.t 144 | self.t += 1.0 / 30.0 145 | 146 | sx, sy = self.grid_size 147 | center = np.array([0.5 * sx, 0.5 * sy, 0.0]) 148 | phi = pi / 3 + sin(t * 3) * pi / 8 149 | c, s = cos(phi), sin(phi) 150 | ofs = np.array([sin(1.2 * t), cos(1.8 * t), 0]) * sx * 0.2 151 | eye_pos = center + np.array([cos(t) * c, sin(t) * c, s]) * 15.0 + ofs 152 | target_pos = center + ofs 153 | 154 | R, self.tvec = common.lookat(eye_pos, target_pos) 155 | self.rvec = common.mtx2rvec(R) 156 | 157 | self.draw_quads(dst, self.white_quads, (245, 245, 245)) 158 | self.draw_quads(dst, self.black_quads, (10, 10, 10)) 159 | 160 | 161 | classes = dict(chess=Chess, book=Book, cube=Cube) 162 | 163 | presets = dict( 164 | empty='synth:', 165 | lena='synth:bg=../data/lena.jpg:noise=0.1', 166 | chess='synth:class=chess:bg=../data/lena.jpg:noise=0.1:size=640x480', 167 | book='synth:class=book:bg=../data/graf1.png:noise=0.1:size=640x480', 168 | cube='synth:class=cube:bg=../data/pca_test1.jpg:noise=0.0:size=640x480' 169 | ) 170 | 171 | 172 | def create_capture(source=0, fallback=presets['chess']): 173 | '''source: or '||synth [:= [:...]]' 174 | ''' 175 | source = str(source).strip() 176 | chunks = source.split(':') 177 | # handle drive letter ('c:', ...) 178 | if len(chunks) > 1 and len(chunks[0]) == 1 and chunks[0].isalpha(): 179 | chunks[1] = chunks[0] + ':' + chunks[1] 180 | del chunks[0] 181 | 182 | source = chunks[0] 183 | try: 184 | source = int(source) 185 | except ValueError: 186 | pass 187 | params = dict(s.split('=') for s in chunks[1:]) 188 | 189 | cap = None 190 | if source == 'synth': 191 | Class = classes.get(params.get('class', None), VideoSynthBase) 192 | try: 193 | cap = Class(**params) 194 | except: 195 | pass 196 | else: 197 | cap = cv2.VideoCapture(source) 198 | if 'size' in params: 199 | w, h = map(int, params['size'].split('x')) 200 | cap.set(cv2.CAP_PROP_FRAME_WIDTH, w) 201 | cap.set(cv2.CAP_PROP_FRAME_HEIGHT, h) 202 | if cap is None or not cap.isOpened(): 203 | print('Warning: unable to open video source: ', source) 204 | if fallback is not None: 205 | return create_capture(fallback, None) 206 | return cap 207 | 208 | 209 | if __name__ == '__main__': 210 | import sys 211 | import getopt 212 | 213 | print(__doc__) 214 | 215 | args, sources = getopt.getopt(sys.argv[1:], '', 'shotdir=') 216 | args = dict(args) 217 | shotdir = args.get('--shotdir', '.') 218 | if len(sources) == 0: 219 | sources = [0] 220 | 221 | caps = list(map(create_capture, sources)) 222 | shot_idx = 0 223 | while True: 224 | imgs = [] 225 | for i, cap in enumerate(caps): 226 | ret, img = cap.read() 227 | imgs.append(img) 228 | cv2.imshow('capture %d' % i, img) 229 | ch = cv2.waitKey(1) 230 | if ch == 27: 231 | break 232 | if ch == ord(' '): 233 | for i, img in enumerate(imgs): 234 | fn = '%s/shot_%d_%03d.bmp' % (shotdir, i, shot_idx) 235 | cv2.imwrite(fn, img) 236 | print(fn, 'saved') 237 | shot_idx += 1 238 | cv2.destroyAllWindows() 239 | -------------------------------------------------------------------------------- /self_driving/road_seg/README.md: -------------------------------------------------------------------------------- 1 | [利用全卷积网络进行车道识别](https://limengweb.wordpress.com/2017/05/03/%E5%88%A9%E7%94%A8%E5%85%A8%E5%8D%B7%E7%A7%AF%E7%BD%91%E7%BB%9C%E8%BF%9B%E8%A1%8C%E8%BD%A6%E9%81%93%E8%AF%86%E5%88%AB/) 2 |
3 |

4 |
5 | -------------------------------------------------------------------------------- /self_driving/road_seg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/road_seg/__init__.py -------------------------------------------------------------------------------- /self_driving/road_seg/convnet.py: -------------------------------------------------------------------------------- 1 | """A full convolutional neural network for road segmentation. 2 | 3 | nohup python -u -m self_driving.road_seg.convnet > self_driving/road_seg/output.txt 2>&1 & 4 | 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import copy 11 | import tensorflow as tf 12 | from utils import kitti 13 | from self_driving.road_seg import fcn8_vgg 14 | import scipy as scp 15 | import scipy.misc 16 | import matplotlib as mpl 17 | import matplotlib.cm 18 | 19 | EPOCH = 5000 20 | N_cl = 2 21 | UU_TRAIN_SET_SIZE = 98 - 9 22 | UU_TEST_SET_SIZE = 9 23 | 24 | 25 | def _compute_cross_entropy_mean(labels, softmax): 26 | cross_entropy = -tf.reduce_sum( 27 | tf.multiply(labels * tf.log(softmax), [1, 1]), reduction_indices=[1]) 28 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='xentropy_mean') 29 | return cross_entropy_mean 30 | 31 | 32 | def loss(logits, labels): 33 | with tf.name_scope('loss'): 34 | labels = tf.to_float(tf.reshape(labels, (-1, 2))) 35 | logits = tf.reshape(logits, (-1, 2)) 36 | epsilon = 1e-9 37 | softmax = tf.nn.softmax(logits) + epsilon 38 | 39 | cross_entropy_mean = _compute_cross_entropy_mean(labels, softmax) 40 | 41 | enc_loss = tf.add_n(tf.get_collection('losses'), name='total_loss') 42 | dec_loss = tf.add_n(tf.get_collection('dec_losses'), name='total_loss') 43 | fc_loss = tf.add_n(tf.get_collection('fc_wlosses'), name='total_loss') 44 | weight_loss = enc_loss + dec_loss + fc_loss 45 | 46 | total_loss = cross_entropy_mean + weight_loss 47 | 48 | losses = {} 49 | losses['total_loss'] = total_loss 50 | losses['xentropy'] = cross_entropy_mean 51 | losses['weight_loss'] = weight_loss 52 | 53 | return losses 54 | 55 | 56 | def f1_score(logits, labels): 57 | true_labels = tf.to_float(tf.reshape(labels, (-1, 2)))[:, 1] 58 | pred = tf.to_float(tf.reshape(logits, [-1])) 59 | 60 | true_positives = tf.reduce_sum(pred * true_labels) 61 | false_positives = tf.reduce_sum(pred * (1 - true_labels)) 62 | 63 | precision = true_positives / (true_positives + false_positives) 64 | recall = true_positives / tf.reduce_sum(labels) 65 | 66 | f1_score = 2 * precision * recall / (precision + recall) 67 | 68 | return f1_score, precision, recall 69 | 70 | 71 | def learning_rate(global_step): 72 | starter_learning_rate = 1e-5 73 | learning_rate_1 = tf.train.exponential_decay( 74 | starter_learning_rate, global_step, EPOCH * 0.2, 0.1, staircase=True) 75 | learning_rate_2 = tf.train.exponential_decay( 76 | learning_rate_1, global_step, EPOCH * 0.4, 0.5, staircase=True) 77 | decayed_learning_rate = tf.train.exponential_decay( 78 | learning_rate_2, global_step, EPOCH * 0.6, 0.8, staircase=True) 79 | tf.summary.scalar('learning_rate', decayed_learning_rate) 80 | return decayed_learning_rate 81 | 82 | 83 | def color_image(image, num_classes=20): 84 | norm = mpl.colors.Normalize(vmin=0., vmax=num_classes) 85 | mycm = mpl.cm.get_cmap('Set1') 86 | return mycm(norm(image)) 87 | 88 | 89 | def save_output(index, training_image, prediction, label): 90 | prediction_label = 1 - prediction[0] 91 | output_image = copy.copy(training_image) 92 | # Save prediction 93 | up_color = color_image(prediction[0], 2) 94 | scp.misc.imsave('output/decision_%d.png' % index, up_color) 95 | # Merge true positive with training images' green channel 96 | true_positive = prediction_label * label[..., 0][0] 97 | merge_green = (1 - true_positive) * training_image[..., 1] + true_positive * 255 98 | output_image[..., 1] = merge_green 99 | # Merge false positive with training images' red channel 100 | false_positive = prediction_label * label[..., 1][0] 101 | merge_red = (1 - false_positive) * training_image[..., 0] + false_positive * 255 102 | output_image[..., 0] = merge_red 103 | # Merge false negative with training images' blue channel 104 | false_negative = (1 - prediction_label) * label[..., 0][0] 105 | merge_blue = (1 - false_negative) * training_image[..., 2] + false_negative * 255 106 | output_image[..., 2] = merge_blue 107 | # Save images 108 | scp.misc.imsave('merge/decision_%d.png' % index, output_image) 109 | 110 | 111 | def main(_): 112 | kitti_data = kitti.Kitti() 113 | 114 | x_image = tf.placeholder(tf.float32, [1, None, None, 3]) 115 | y_ = tf.placeholder(tf.float32, [1, None, None, N_cl]) 116 | 117 | tf.summary.image("images", x_image, max_outputs=1) 118 | 119 | vgg_fcn = fcn8_vgg.FCN8VGG(vgg16_npy_path="data/vgg16.npy") 120 | vgg_fcn.build(x_image, debug=True, num_classes=N_cl) 121 | 122 | losses = loss(vgg_fcn.upscore32, y_) 123 | f1, precision, recall = f1_score(vgg_fcn.pred_up, y_) 124 | total_loss = losses['total_loss'] 125 | tf.summary.scalar("Loss", total_loss) 126 | tf.summary.scalar("F1 Score", f1) 127 | tf.summary.scalar("Precision", precision) 128 | tf.summary.scalar("Recall", recall) 129 | 130 | global_step = tf.Variable(0, trainable=False) 131 | lr = learning_rate(global_step) 132 | optimizer = tf.train.AdamOptimizer(lr) 133 | grads_and_vars = optimizer.compute_gradients(total_loss) 134 | 135 | grads, tvars = zip(*grads_and_vars) 136 | clipped_grads, norm = tf.clip_by_global_norm(grads, 1.0) 137 | grads_and_vars = zip(clipped_grads, tvars) 138 | 139 | train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step) 140 | 141 | sess = tf.InteractiveSession() 142 | merged = tf.summary.merge_all() 143 | train_writer = tf.summary.FileWriter('train', sess.graph) 144 | sess.run(tf.global_variables_initializer()) 145 | 146 | for i in range(EPOCH): 147 | print("step %d" % i) 148 | t_img, t_label = kitti_data.next_batch(i % UU_TRAIN_SET_SIZE) 149 | pred, _ = sess.run([vgg_fcn.pred_up, train_step], 150 | feed_dict={x_image: t_img, y_: t_label}) 151 | if i % 5 == 0: 152 | for test_index in range(UU_TEST_SET_SIZE): 153 | test_img, test_label = kitti_data.next_batch(test_index + UU_TRAIN_SET_SIZE) 154 | pred, summary = sess.run([vgg_fcn.pred_up, merged], 155 | feed_dict={x_image: test_img, y_: test_label}) 156 | save_output(test_index + UU_TRAIN_SET_SIZE, test_img[0], pred, test_label) 157 | train_writer.add_summary(summary, i) 158 | 159 | 160 | if __name__ == '__main__': 161 | tf.app.run(main=main) -------------------------------------------------------------------------------- /self_driving/road_seg/road_seg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/road_seg/road_seg.png -------------------------------------------------------------------------------- /self_driving/road_seg/test_fcn8_vgg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import scipy as scp 4 | import scipy.misc 5 | import matplotlib as mpl 6 | import matplotlib.cm 7 | import logging 8 | import tensorflow as tf 9 | import sys 10 | import fcn8_vgg 11 | 12 | 13 | def main(_): 14 | logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', 15 | level=logging.INFO, 16 | stream=sys.stdout) 17 | img1 = scp.misc.imread("/Users/limeng/Downloads/kitti/data_road/training/image_2/uu_000000.png") 18 | with tf.Session() as sess: 19 | images = tf.placeholder("float") 20 | feed_dict = {images: img1} 21 | batch_images = tf.expand_dims(images, 0) 22 | 23 | vgg_fcn = fcn8_vgg.FCN8VGG(vgg16_npy_path="/Users/limeng/Downloads/vgg16.npy") 24 | with tf.name_scope("content_vgg"): 25 | vgg_fcn.build(batch_images, debug=True, num_classes=2) 26 | 27 | print('Finished building Network.') 28 | 29 | logging.warning("Score weights are initialized random.") 30 | logging.warning("Do not expect meaningful results.") 31 | 32 | logging.info("Start Initializing Variabels.") 33 | 34 | init = tf.global_variables_initializer() 35 | sess.run(init) 36 | 37 | print('Running the Network') 38 | tensors = [vgg_fcn.pred, vgg_fcn.pred_up] 39 | down, up = sess.run(tensors, feed_dict=feed_dict) 40 | 41 | down_color = color_image(down[0], 2) 42 | up_color = color_image(up[0], 2) 43 | 44 | scp.misc.imsave('fcn8_downsampled.png', down_color) 45 | scp.misc.imsave('fcn8_upsampled.png', up_color) 46 | 47 | 48 | def color_image(image, num_classes=20): 49 | norm = mpl.colors.Normalize(vmin=0., vmax=num_classes) 50 | mycm = mpl.cm.get_cmap('Set1') 51 | return mycm(norm(image)) 52 | 53 | 54 | if __name__ == '__main__': 55 | tf.app.run(main=main) 56 | -------------------------------------------------------------------------------- /self_driving/road_seg/unet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from keras.models import Model 3 | from keras.layers import Input 4 | from keras.layers.core import Dropout 5 | from keras.layers.convolutional import Convolution2D, Deconvolution2D, MaxPooling2D, ZeroPadding2D 6 | 7 | 8 | N_cl = 2 9 | C = 32 10 | 11 | 12 | def get_model(): 13 | # KITTI data set. 14 | main_input = Input(shape=(None, 3, 1242, 375), dtype='float32', name='kitti_data') 15 | 16 | conv1_1 = ZeroPadding2D((10, 10))(main_input) 17 | conv1_1 = Convolution2D(64, 3, 3, activation='relu')(conv1_1) # 1260 * 393 * 64 18 | conv1_2 = ZeroPadding2D((1, 1))(conv1_1) 19 | conv1_2 = Convolution2D(64, 3, 3, activation='relu')(conv1_2) # 1260 * 393 * 64 20 | pool1 = ZeroPadding2D((0, 1))(conv1_2) # 1260 * 394 * 64 21 | pool1 = MaxPooling2D((2, 2), strides=(2, 2))(pool1) # 630 * 197 * 64 22 | 23 | conv2_1 = ZeroPadding2D((1, 1))(pool1) 24 | conv2_1 = Convolution2D(128, 3, 3, activation='relu')(conv2_1) # 630 * 197 * 128 25 | conv2_2 = ZeroPadding2D((1, 1))(conv2_1) 26 | conv2_2 = Convolution2D(128, 3, 3, activation='relu')(conv2_2) # 630 * 197 * 128 27 | pool2 = ZeroPadding2D((0, 1))(conv2_2) # 630 * 198 * 128 28 | pool2 = MaxPooling2D((2, 2), strides=(2, 2))(pool2) # 315 * 99 * 128 29 | 30 | conv3_1 = ZeroPadding2D((1, 1))(pool2) 31 | conv3_1 = Convolution2D(256, 3, 3, activation='relu')(conv3_1) # 315 * 99 * 256 32 | conv3_2 = ZeroPadding2D((1, 1))(conv3_1) 33 | conv3_2 = Convolution2D(256, 3, 3, activation='relu')(conv3_2) # 315 * 99 * 256 34 | conv3_3 = ZeroPadding2D((1, 1))(conv3_2) 35 | conv3_3 = Convolution2D(256, 3, 3, activation='relu')(conv3_3) # 315 * 99 * 256 36 | pool3 = ZeroPadding2D((1, 1))(conv3_3) # 316 * 100 * 256 37 | pool3 = MaxPooling2D((2, 2), strides=(2, 2))(pool3) # 158 * 50 * 256 38 | 39 | conv4_1 = ZeroPadding2D((1, 1))(pool3) 40 | conv4_1 = Convolution2D(512, 3, 3, activation='relu')(conv4_1) # 158 * 50 * 512 41 | conv4_2 = ZeroPadding2D((1, 1))(conv4_1) 42 | conv4_2 = Convolution2D(512, 3, 3, activation='relu')(conv4_2) # 158 * 50 * 512 43 | conv4_3 = ZeroPadding2D((1, 1))(conv4_2) 44 | conv4_3 = Convolution2D(512, 3, 3, activation='relu')(conv4_3) # 158 * 50 * 512 45 | pool4 = MaxPooling2D((2, 2), strides=(2, 2))(conv4_3) # 79 * 25 * 512 46 | 47 | conv5_1 = ZeroPadding2D((1, 1))(pool4) 48 | conv5_1 = Convolution2D(512, 3, 3, activation='relu')(conv5_1) # 79 * 25 * 512 49 | conv5_2 = ZeroPadding2D((1, 1))(conv5_1) 50 | conv5_2 = Convolution2D(512, 3, 3, activation='relu')(conv5_2) # 79 * 25 * 512 51 | conv5_3 = ZeroPadding2D((1, 1))(conv5_2) 52 | conv5_3 = Convolution2D(512, 3, 3, activation='relu')(conv5_3) # 79 * 25 * 512 53 | pool5 = ZeroPadding2D((1, 1))(conv5_3) # 80 * 26 * 512 54 | pool5 = MaxPooling2D((2, 2), strides=(2, 2))(pool5) # 40 * 13 * 512 55 | 56 | # FC_conv1 57 | fc6 = ZeroPadding2D((1, 1))(pool5) 58 | fc6 = Convolution2D(1024, 3, 3, activation='relu')(fc6) # 40 * 13 * 1024 59 | fc6 = Dropout(0.5)(fc6) 60 | # FC_conv2 61 | fc7 = Convolution2D(1024, 1, 1, activation='relu')(fc6) # 40 * 13 * 1024 62 | fc7 = Dropout(0.5)(fc7) 63 | 64 | score_fc7 = Convolution2D(N_cl, 1, 1, activation='relu')(fc7) # 40 * 13 * N_cl 65 | score_fc7_up = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 80, 26))(score_fc7) 66 | 67 | # scale pool4 skip for compatibility 68 | scale_pool4 = tf.mul(pool4, 0.01) 69 | scale_pool4 = ZeroPadding2D((1, 1))(scale_pool4) # 80 * 26 * 512 70 | score_pool4 = Convolution2D(N_cl, 1, 1, activation='relu')(scale_pool4) # 80 * 26 * N_cl 71 | fuse_pool4 = tf.add(score_fc7_up, score_pool4) 72 | score_pool4_up = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 158, 50))(fuse_pool4) 73 | 74 | # scale pool3 skip for compatibility 75 | scale_pool3 = tf.mul(pool3, 0.0001) 76 | score_pool3 = Convolution2D(N_cl, 1, 1, activation='relu')(scale_pool3) # 158 * 50 * N_cl 77 | fuse_pool3 = tf.add(score_pool4_up, score_pool3) 78 | score = Deconvolution2D(N_cl, 3, 3, output_shape=(None, N_cl, 1242, 375))(fuse_pool3) 79 | 80 | model = Model(input=main_input, output=score) 81 | 82 | return model 83 | -------------------------------------------------------------------------------- /self_driving/segnet/README.md: -------------------------------------------------------------------------------- 1 | [分割网络的Tensorflow实现](https://limengweb.wordpress.com/2017/08/06/%E5%88%86%E5%89%B2%E7%BD%91%E7%BB%9C%E7%9A%84tensorflow%E5%AE%9E%E7%8E%B0/) 2 |
3 |

4 |
5 | -------------------------------------------------------------------------------- /self_driving/segnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/segnet/__init__.py -------------------------------------------------------------------------------- /self_driving/segnet/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluate SegNet. 2 | 3 | nohup python -u -m self_driving.segnet.evaluate > self_driving/segnet/output.txt 2>&1 & 4 | 5 | """ 6 | 7 | import os 8 | import tensorflow as tf 9 | from utils import camvid 10 | from scipy import misc 11 | 12 | LOG_DIR = 'save' 13 | BATCH_SIZE = 4 14 | EPOCH = 25 15 | IMAGE_HEIGHT = 720 16 | IMAGE_WIDTH = 960 17 | IMAGE_CHANNEL = 3 18 | NUM_CLASSES = 32 19 | 20 | test_dir = "/usr/local/google/home/limeng/Downloads/camvid/val.txt" 21 | 22 | colors = [ 23 | [64, 128, 64], # Animal 24 | [192, 0, 128], # Archway 25 | [0, 128, 192], # Bicyclist 26 | [0, 128, 64], # Bridge 27 | [128, 0, 0], # Building 28 | [64, 0, 128], # Car 29 | [64, 0, 192], # CartLuggagePram 30 | [192, 128, 64], # Child 31 | [192, 192, 128], # Column_Pole 32 | [64, 64, 128], # Fence 33 | [128, 0, 192], # LaneMkgsDriv 34 | [192, 0, 64], # LaneMkgsNonDriv 35 | [128, 128, 64], # Misc_Text 36 | [192, 0, 192], # MotorcycleScooter 37 | [128, 64, 64], # OtherMoving 38 | [64, 192, 128], # ParkingBlock 39 | [64, 64, 0], # Pedestrian 40 | [128, 64, 128], # Road 41 | [128, 128, 192], # RoadShoulder 42 | [0, 0, 192], # Sidewalk 43 | [192, 128, 128], # SignSymbol 44 | [128, 128, 128], # Sky 45 | [64, 128, 192], # SUVPickupTruck 46 | [0, 0, 64], # TrafficCone 47 | [0, 64, 64], # TrafficLight 48 | [192, 64, 128], # Train 49 | [128, 128, 0], # Tree 50 | [192, 128, 192], # Truck_Bus 51 | [64, 0, 64], # Tunnel 52 | [192, 192, 0], # VegetationMisc 53 | [0, 0, 0], # Void 54 | [64, 192, 0] # Wall 55 | ] 56 | 57 | 58 | def color_mask(tensor, color): 59 | return tf.reduce_all(tf.equal(tensor, color), 3) 60 | 61 | 62 | def one_hot(labels): 63 | color_tensors = tf.unstack(colors) 64 | channel_tensors = list(map(lambda color: color_mask(labels, color), color_tensors)) 65 | one_hot_labels = tf.cast(tf.stack(channel_tensors, 3), 'float32') 66 | return one_hot_labels 67 | 68 | 69 | def rgb(logits): 70 | softmax = tf.nn.softmax(logits) 71 | argmax = tf.argmax(softmax, 3) 72 | color_map = tf.constant(colors, dtype=tf.float32) 73 | n = color_map.get_shape().as_list()[0] 74 | one_hot = tf.one_hot(argmax, n, dtype=tf.float32) 75 | one_hot_matrix = tf.reshape(one_hot, [-1, n]) 76 | rgb_matrix = tf.matmul(one_hot_matrix, color_map) 77 | rgb_tensor = tf.reshape(rgb_matrix, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 3]) 78 | return tf.cast(rgb_tensor, tf.float32) 79 | 80 | 81 | def main(_): 82 | test_image_filenames, test_label_filenames = camvid.get_filename_list(test_dir) 83 | index = 0 84 | 85 | with tf.Graph().as_default(): 86 | with tf.device('/cpu:0'): 87 | config = tf.ConfigProto() 88 | config.gpu_options.allocator_type = 'BFC' 89 | sess = tf.InteractiveSession(config=config) 90 | 91 | images, labels = camvid.CamVidInputs(test_image_filenames, 92 | test_label_filenames, 93 | BATCH_SIZE, 94 | shuffle=False) 95 | 96 | saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "segnet.ckpt.meta")) 97 | saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR)) 98 | 99 | graph = tf.get_default_graph() 100 | train_data = graph.get_tensor_by_name("train_data:0") 101 | train_label = graph.get_tensor_by_name("train_labels:0") 102 | is_training = graph.get_tensor_by_name("is_training:0") 103 | logits = tf.get_collection("logits")[0] 104 | 105 | # Start the queue runners. 106 | coord = tf.train.Coordinator() 107 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 108 | 109 | for i in range(EPOCH): 110 | image_batch, label_batch = sess.run([images, labels]) 111 | feed_dict = { 112 | train_data: image_batch, 113 | train_label: label_batch, 114 | is_training: True 115 | } 116 | prediction = rgb(logits) 117 | pred = sess.run([prediction], feed_dict)[0] 118 | for batch in range(BATCH_SIZE): 119 | misc.imsave('output/segnet_camvid/decision_%d.png' % index, pred[batch]) 120 | misc.imsave('output/segnet_camvid/train_%d.png' % index, image_batch[batch]) 121 | index += 1 122 | 123 | coord.request_stop() 124 | coord.join(threads) 125 | 126 | 127 | if __name__ == '__main__': 128 | tf.app.run(main=main) 129 | -------------------------------------------------------------------------------- /self_driving/segnet/evaluate_kitti.py: -------------------------------------------------------------------------------- 1 | """Evaluate SegNet. 2 | 3 | nohup python -u -m self_driving.segnet.evaluate_kitti > self_driving/segnet/output.txt 2>&1 & 4 | 5 | """ 6 | 7 | import os 8 | import tensorflow as tf 9 | from utils import kitti_segnet 10 | from scipy import misc 11 | 12 | LOG_DIR = 'backup/segnet_kitti' 13 | EPOCH = 237 14 | BATCH_SIZE = 1 15 | IMAGE_HEIGHT = 375 16 | IMAGE_WIDTH = 1242 17 | NUM_CLASSES = 2 18 | 19 | test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/test.txt" 20 | 21 | colors = [ 22 | [255, 0, 255], 23 | [255, 0, 0], 24 | ] 25 | 26 | def color_mask(tensor, color): 27 | return tf.reduce_all(tf.equal(tensor, color), 3) 28 | 29 | 30 | def one_hot(labels): 31 | color_tensors = tf.unstack(colors) 32 | channel_tensors = list(map(lambda color: color_mask(labels, color), color_tensors)) 33 | one_hot_labels = tf.cast(tf.stack(channel_tensors, 3), 'float32') 34 | return one_hot_labels 35 | 36 | 37 | def rgb(logits): 38 | softmax = tf.nn.softmax(logits) 39 | argmax = tf.argmax(softmax, 3) 40 | color_map = tf.constant(colors, dtype=tf.float32) 41 | n = color_map.get_shape().as_list()[0] 42 | one_hot = tf.one_hot(argmax, n, dtype=tf.float32) 43 | one_hot_matrix = tf.reshape(one_hot, [-1, n]) 44 | rgb_matrix = tf.matmul(one_hot_matrix, color_map) 45 | rgb_tensor = tf.reshape(rgb_matrix, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 3]) 46 | return tf.cast(rgb_tensor, tf.float32) 47 | 48 | 49 | def main(_): 50 | test_image_filenames, test_label_filenames = kitti_segnet.get_filename_list(test_dir) 51 | index = 0 52 | 53 | with tf.Graph().as_default(): 54 | with tf.device('/cpu:0'): 55 | config = tf.ConfigProto() 56 | config.gpu_options.allocator_type = 'BFC' 57 | sess = tf.InteractiveSession(config=config) 58 | 59 | images, labels = kitti_segnet.CamVidInputs(test_image_filenames, 60 | test_label_filenames, 61 | BATCH_SIZE, 62 | shuffle=False) 63 | 64 | saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "segnet.ckpt.meta")) 65 | saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR)) 66 | 67 | graph = tf.get_default_graph() 68 | train_data = graph.get_tensor_by_name("train_data:0") 69 | train_label = graph.get_tensor_by_name("train_labels:0") 70 | is_training = graph.get_tensor_by_name("is_training:0") 71 | logits = tf.get_collection("logits")[0] 72 | 73 | # Start the queue runners. 74 | coord = tf.train.Coordinator() 75 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 76 | 77 | for i in range(EPOCH): 78 | image_batch, label_batch = sess.run([images, labels]) 79 | feed_dict = { 80 | train_data: image_batch, 81 | train_label: label_batch, 82 | is_training: True 83 | } 84 | prediction = rgb(logits) 85 | pred = sess.run([prediction], feed_dict)[0] 86 | for batch in range(BATCH_SIZE): 87 | misc.imsave('output/segnet_kitti/decision_%d.png' % index, pred[batch]) 88 | misc.imsave('output/segnet_kitti/train_%d.png' % index, image_batch[batch]) 89 | index += 1 90 | 91 | coord.request_stop() 92 | coord.join(threads) 93 | 94 | 95 | if __name__ == '__main__': 96 | tf.app.run(main=main) 97 | -------------------------------------------------------------------------------- /self_driving/segnet/evaluate_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from tensorflow.python.framework import constant_op 6 | from tensorflow.python.platform import test 7 | from self_driving.segnet import evaluate 8 | import tensorflow as tf 9 | 10 | 11 | class EvaluateTest(test.TestCase): 12 | 13 | def testTfArgmax(self): 14 | '''[[[[ 1. 2.] 15 | [ 3. 4.] 16 | [ 5. 6.]] 17 | [[ 8. 7.] 18 | [ 9. 10.] 19 | [ 11. 12.]] 20 | [[ 13. 14.] 21 | [ 16. 15.] 22 | [ 17. 18.]]]]''' 23 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 7.0, 9.0, 24 | 10.0, 11.0, 12.0, 13.0, 14.0, 16.0, 15.0, 17.0, 18.0] 25 | with self.test_session(use_gpu=False) as sess: 26 | t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2]) 27 | argmax_op = tf.argmax(t, axis=3) 28 | argmax = sess.run([argmax_op]) 29 | self.assertAllEqual(argmax, [[[[1, 1, 1], [0, 1, 1], [1, 0, 1]]]]) 30 | 31 | 32 | def testColorImage(self): 33 | '''[[[[ 0. 2.] 34 | [ 3. 4.] 35 | [ 5. 6.]] 36 | [[ 8. 7.] 37 | [ 9. 10.] 38 | [ 11. 12.]] 39 | [[ 13. 14.] 40 | [ 16. 15.] 41 | [ 17. 18.]]]]''' 42 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 7.0, 9.0, 43 | 10.0, 11.0, 12.0, 13.0, 14.0, 16.0, 15.0, 17.0, 18.0] 44 | with self.test_session(use_gpu=False): 45 | t = constant_op.constant(tensor_input, shape=[3, 3, 1, 2]) 46 | argmax_op = tf.argmax(t, dimension=3) 47 | up_color = evaluate.color_image(argmax_op.eval(), 1.) 48 | self.assertAllClose(up_color, [[[[0.60000002, 0.60000002, 0.60000002, 1.]], 49 | [[0.60000002, 0.60000002, 0.60000002, 1.]], 50 | [[0.60000002, 0.60000002, 0.60000002, 1.]]], 51 | [[[0.89411765, 0.10196079, 0.10980392, 1.]], 52 | [[0.60000002, 0.60000002, 0.60000002, 1.]], 53 | [[0.60000002, 0.60000002, 0.60000002, 1.]]], 54 | [[[0.60000002, 0.60000002, 0.60000002, 1.]], 55 | [[0.89411765, 0.10196079, 0.10980392, 1.]], 56 | [[0.60000002, 0.60000002, 0.60000002, 1.]]]]) 57 | 58 | 59 | if __name__ == "__main__": 60 | test.main() -------------------------------------------------------------------------------- /self_driving/segnet/merge_output.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | result_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_camvid" 4 | output_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_camvid/result" 5 | 6 | #result_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_kitti" 7 | #output_dir="/usr/local/google/home/limeng/githome/tensorflow/output/segnet_kitti/result" 8 | 9 | echo "Merge output" 10 | 11 | train_file_names=($(ls -v $result_dir/train_*.png)) 12 | output_file_names=($(ls -v $result_dir/decision_*.png)) 13 | 14 | output_data_size=${#train_file_names[@]} 15 | 16 | for (( i=0; i<${output_data_size}; i++ )); 17 | do 18 | convert ${output_file_names[$i]} ${train_file_names[$i]} +append $output_dir/frame_$i.png 19 | done 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /self_driving/segnet/prepare_camvid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from scipy import misc 4 | 5 | data_image_dir = "/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full" 6 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full/image_2" 7 | 8 | 9 | IMAGE_HEIGHT = 720 10 | IMAGE_WIDTH = 960 11 | IMAGE_DEPTH = 3 12 | 13 | 14 | color2index = { 15 | (64, 128, 64) : 0, # Animal 16 | (192, 0, 128) : 1, # Archway 17 | (0, 128, 192) : 2, # Bicyclist 18 | (0, 128, 64) : 3, # Bridge 19 | (128, 0, 0) : 4, # Building 20 | (64, 0, 128) : 5, # Car 21 | (64, 0, 192) : 6, # CartLuggagePram 22 | (192, 128, 64) : 7, # Child 23 | (192, 192, 128) : 8, # Column_Pole 24 | (64, 64, 128) :9, # Fence 25 | (128, 0, 192) : 10, # LaneMkgsDriv 26 | (192, 0, 64) : 11, # LaneMkgsNonDriv 27 | (128, 128, 64) : 12, # Misc_Text 28 | (192, 0, 192) : 13, # MotorcycleScooter 29 | (128, 64, 64) : 14, # OtherMoving 30 | (64, 192, 128) : 15, # ParkingBlock 31 | (64, 64, 0) : 16, # Pedestrian 32 | (128, 64, 128) : 17, # Road 33 | (128, 128, 192) : 18, # RoadShoulder 34 | (0, 0, 192) : 19, # Sidewalk 35 | (192, 128, 128) : 20, # SignSymbol 36 | (128, 128, 128) : 21, # Sky 37 | (64, 128, 192) : 22, # SUVPickupTruck 38 | (0, 0, 64) : 23, # TrafficCone 39 | (0, 64, 64) : 24, # TrafficLight 40 | (192, 64, 128) : 25, # Train 41 | (128, 128, 0) : 26, # Tree 42 | (192, 128, 192) : 27, # Truck_Bus 43 | (64, 0, 64) : 28, # Tunnel 44 | (192, 192, 0) : 29, # VegetationMisc 45 | (0, 0, 0) : 30, # Void 46 | (64, 192, 0) : 31, # Wall 47 | } 48 | 49 | 50 | def im2index(im): 51 | height, width, ch = im.shape 52 | assert ch == IMAGE_DEPTH 53 | if height != IMAGE_HEIGHT or width != IMAGE_WIDTH: 54 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch)) 55 | return None 56 | m_lable = np.zeros((height, width), dtype=np.uint8) 57 | for w in range(width): 58 | for h in range(height): 59 | r, g, b = im[h, w, :] 60 | if (r, g, b) in color2index: 61 | m_lable[h, w] = color2index[(r, g, b)] 62 | else: 63 | m_lable[h, w] = 30 64 | return m_lable 65 | 66 | 67 | def convert_to_label_data(file_name): 68 | assert os.path.isfile(file_name), 'Cannot find: %s' % file_name 69 | return im2index(misc.imread(file_name, mode='RGB')) 70 | 71 | 72 | def main(): 73 | for file in os.listdir(data_image_dir): 74 | if file.endswith(".png"): 75 | print("Try to converting %s" % file) 76 | gt_label = convert_to_label_data(os.path.join(data_image_dir, file)) 77 | if gt_label is not None: 78 | misc.imsave(os.path.join(image_dir, file), gt_label) 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /self_driving/segnet/prepare_camvid.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | data_dir="/usr/local/google/home/limeng/Downloads/camvid/701_StillsRaw_full" 4 | label_data_dir="/usr/local/google/home/limeng/Downloads/camvid/LabeledApproved_full/image_2" 5 | 6 | echo "Camvid dataset" 7 | 8 | rm -f train.txt 9 | touch train.txt 10 | 11 | data_file_names=($(ls $data_dir)) 12 | label_file_names=($(ls $label_data_dir)) 13 | data_size=${#data_file_names[@]} 14 | 15 | for (( i=0; i<${data_size}; i++ )); 16 | do 17 | echo $data_dir/${data_file_names[$i]} $label_data_dir/${label_file_names[$i]} >> train.txt 18 | done 19 | -------------------------------------------------------------------------------- /self_driving/segnet/prepare_kitti.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from scipy import misc 4 | 5 | data_image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/data_image_2" 6 | image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/image_2" 7 | data_label_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/data_label_2" 8 | label_output_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/gt_image_2" 9 | 10 | 11 | IMAGE_HEIGHT = 375 12 | IMAGE_WIDTH = 1242 13 | IMAGE_DEPTH = 3 14 | 15 | 16 | # R G B 17 | # 255 0 255 road 18 | # 0 0 255 road 19 | # 255 0 0 valid 20 | # 0 0 0 invalid 21 | color2index = { 22 | (255, 0, 255) : 0, 23 | (0, 0, 255) : 0, 24 | (255, 0, 0) : 1, 25 | (0, 0, 0) : 1, 26 | } 27 | 28 | 29 | def im2index(im): 30 | height, width, ch = im.shape 31 | assert ch == IMAGE_DEPTH 32 | if height != IMAGE_HEIGHT or width != IMAGE_WIDTH: 33 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch)) 34 | return None 35 | m_lable = np.zeros((height, width), dtype=np.uint8) 36 | for w in range(width): 37 | for h in range(height): 38 | r, g, b = im[h, w, :] 39 | m_lable[h, w] = color2index[(r, g, b)] 40 | return m_lable 41 | 42 | 43 | def convert_to_label_data(file_name): 44 | assert os.path.isfile(file_name), 'Cannot find: %s' % file_name 45 | return im2index(misc.imread(file_name, mode='RGB')) 46 | 47 | 48 | def main(): 49 | for file in os.listdir(data_image_dir): 50 | if file.endswith(".png"): 51 | print("Try to copy %s" % file) 52 | im = misc.imread(os.path.join(data_image_dir, file), mode='RGB') 53 | height, width, ch = im.shape 54 | assert ch == IMAGE_DEPTH 55 | if height == IMAGE_HEIGHT and width == IMAGE_WIDTH and ch == IMAGE_DEPTH: 56 | misc.imsave(os.path.join(image_dir, file), im) 57 | else: 58 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch)) 59 | 60 | for file in os.listdir(data_label_dir): 61 | if file.endswith(".png"): 62 | print("Try to converting %s" % file) 63 | gt_label = convert_to_label_data(os.path.join(data_label_dir, file)) 64 | if gt_label is not None: 65 | misc.imsave(os.path.join(label_output_dir, file), gt_label) 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /self_driving/segnet/prepare_kitti.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | train_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/training" 4 | 5 | echo "KITTI dataset" 6 | 7 | rm -f train.txt 8 | touch train.txt 9 | 10 | append_data_items() 11 | { 12 | train_file_names=($(ls $train_data_dir/image_2/$1)) 13 | gt_file_names=($(ls $train_data_dir/gt_image_2/$1)) 14 | 15 | train_data_size=${#train_file_names[@]} 16 | 17 | for (( i=0; i<${train_data_size}; i++ )); 18 | do 19 | echo ${train_file_names[$i]} ${gt_file_names[$i]} >> train.txt 20 | done 21 | } 22 | 23 | append_data_items "um_*" 24 | append_data_items "umm_*" 25 | append_data_items "uu_*" 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /self_driving/segnet/prepare_kitti_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from scipy import misc 3 | 4 | data_test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/data_image_2" 5 | test_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/image_2" 6 | 7 | 8 | IMAGE_HEIGHT = 375 9 | IMAGE_WIDTH = 1242 10 | IMAGE_DEPTH = 3 11 | 12 | 13 | def main(): 14 | for file in os.listdir(data_test_dir): 15 | if file.endswith(".png"): 16 | print("Try to copy %s" % file) 17 | im = misc.imread(os.path.join(data_test_dir, file), mode='RGB') 18 | height, width, ch = im.shape 19 | assert ch == IMAGE_DEPTH 20 | if height == IMAGE_HEIGHT and width == IMAGE_WIDTH and ch == IMAGE_DEPTH: 21 | misc.imsave(os.path.join(test_dir, file), im) 22 | else: 23 | print("Size: (%d, %d, %d) cannot be used." % (height, width, ch)) 24 | 25 | 26 | if __name__ == '__main__': 27 | main() 28 | -------------------------------------------------------------------------------- /self_driving/segnet/prepare_kitti_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | test_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/testing/image_2" 4 | fake_label_data_dir="/usr/local/google/home/limeng/Downloads/kitti/data_road/testing" 5 | 6 | echo "KITTI test dataset" 7 | 8 | rm -f test.txt 9 | touch test.txt 10 | 11 | test_file_names=($(ls $test_data_dir)) 12 | test_data_size=${#test_file_names[@]} 13 | 14 | for (( i=0; i<${test_data_size}; i++ )); 15 | do 16 | echo $test_data_dir/${test_file_names[$i]} $fake_label_data_dir/umm_road_000000.png >> test.txt 17 | done 18 | -------------------------------------------------------------------------------- /self_driving/segnet/segnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/segnet/segnet.png -------------------------------------------------------------------------------- /self_driving/segnet/segnet_vgg.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | VGG16_NPY_PATH = 'vgg16.npy' 9 | WD = 5e-4 10 | 11 | data_dict = np.load(VGG16_NPY_PATH, encoding='latin1').item() 12 | 13 | 14 | def activation_summary(var): 15 | tensor_name = var.op.name 16 | tf.summary.histogram(tensor_name + '/activations', var) 17 | tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(var)) 18 | 19 | 20 | def variable_summaries(var): 21 | if not tf.get_variable_scope().reuse: 22 | name = var.op.name 23 | with tf.name_scope('summaries'): 24 | mean = tf.reduce_mean(var) 25 | tf.summary.scalar(name + '/mean', mean) 26 | with tf.name_scope('stddev'): 27 | stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) 28 | tf.summary.scalar(name + '/sttdev', stddev) 29 | tf.summary.scalar(name + '/max', tf.reduce_max(var)) 30 | tf.summary.scalar(name + '/min', tf.reduce_min(var)) 31 | tf.summary.histogram(name, var) 32 | 33 | 34 | def load_conv_filter(name): 35 | init = tf.constant_initializer(value=data_dict[name][0], 36 | dtype=tf.float32) 37 | shape = data_dict[name][0].shape 38 | var = tf.get_variable(name=name + "_weight", initializer=init, shape=shape) 39 | if not tf.get_variable_scope().reuse: 40 | weight_decay = tf.multiply(tf.nn.l2_loss(var), WD, name=name + '_weight_decay') 41 | tf.add_to_collection('losses', weight_decay) 42 | variable_summaries(var) 43 | return var 44 | 45 | 46 | def get_conv_filter(name, shape): 47 | init = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float32) 48 | var = tf.get_variable(name=name + "_weight", initializer=init) 49 | weight_decay = tf.multiply(tf.nn.l2_loss(var), WD, name=name + '_weight_decay') 50 | tf.add_to_collection('losses', weight_decay) 51 | variable_summaries(var) 52 | return var 53 | 54 | 55 | def load_conv_bias(name): 56 | bias_wights = data_dict[name][1] 57 | shape = data_dict[name][1].shape 58 | init = tf.constant_initializer(value=bias_wights, 59 | dtype=tf.float32) 60 | var = tf.get_variable(name=name + "_bias", initializer=init, shape=shape) 61 | variable_summaries(var) 62 | return var 63 | 64 | 65 | def get_conv_bias(name, shape): 66 | init = tf.constant(0.0, shape=shape) 67 | var = tf.get_variable(name=name + "_bias", initializer=init) 68 | variable_summaries(var) 69 | return var 70 | 71 | 72 | def conv2d(bottom, weight): 73 | return tf.nn.conv2d(bottom, weight, strides=[1, 1, 1, 1], padding='SAME') 74 | 75 | 76 | def batch_norm_layer(bottom, is_training, scope): 77 | return tf.cond(is_training, 78 | lambda: tf.contrib.layers.batch_norm(bottom, 79 | is_training=True, 80 | center=False, 81 | scope=scope+"_bn"), 82 | lambda: tf.contrib.layers.batch_norm(bottom, 83 | is_training=False, 84 | center=False, 85 | scope=scope+"_bn", 86 | reuse=True)) 87 | 88 | 89 | def conv_layer_with_bn(bottom=None, is_training=True, shape=None, name=None): 90 | with tf.variable_scope(name) as scope: 91 | if shape: 92 | weight = get_conv_filter(name, shape) 93 | bias = get_conv_bias(name, [shape[3]]) 94 | else: 95 | weight = load_conv_filter(name) 96 | bias = load_conv_bias(name) 97 | conv = tf.nn.bias_add(conv2d(bottom, weight), bias) 98 | conv = batch_norm_layer(conv, is_training, scope.name) 99 | conv = tf.nn.relu(conv, name="relu") 100 | activation_summary(conv) 101 | return conv 102 | 103 | 104 | def max_pool_with_argmax(bottom): 105 | with tf.name_scope('max_pool_arg_max'): 106 | with tf.device('/gpu:0'): 107 | _, indices = tf.nn.max_pool_with_argmax( 108 | bottom, 109 | ksize=[1, 2, 2, 1], 110 | strides=[1, 2, 2, 1], 111 | padding='SAME') 112 | indices = tf.stop_gradient(indices) 113 | bottom = tf.nn.max_pool(bottom, 114 | ksize=[1, 2, 2, 1], 115 | strides=[1, 2, 2, 1], 116 | padding='SAME') 117 | return bottom, indices 118 | 119 | 120 | def max_unpool_with_argmax(bottom, mask, output_shape=None): 121 | with tf.name_scope('max_unpool_with_argmax'): 122 | ksize = [1, 2, 2, 1] 123 | input_shape = bottom.get_shape().as_list() 124 | # calculation new shape 125 | if output_shape is None: 126 | output_shape = (input_shape[0], 127 | input_shape[1] * ksize[1], 128 | input_shape[2] * ksize[2], 129 | input_shape[3]) 130 | # calculation indices for batch, height, width and feature maps 131 | one_like_mask = tf.ones_like(mask) 132 | batch_range = tf.reshape(tf.range(output_shape[0], 133 | dtype=tf.int64), 134 | shape=[input_shape[0], 1, 1, 1]) 135 | b = one_like_mask * batch_range 136 | y = mask // (output_shape[2] * output_shape[3]) 137 | x = mask % (output_shape[2] * output_shape[3]) // output_shape[3] 138 | feature_range = tf.range(output_shape[3], dtype=tf.int64) 139 | f = one_like_mask * feature_range 140 | # transpose indices & reshape update values to one dimension 141 | updates_size = tf.size(bottom) 142 | indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size])) 143 | values = tf.reshape(bottom, [updates_size]) 144 | return tf.scatter_nd(indices, values, output_shape) 145 | 146 | 147 | def inference(images, is_training, num_classes): 148 | training = tf.equal(is_training, tf.constant(True)) 149 | conv1_1 = conv_layer_with_bn(bottom=images, is_training=training, name="conv1_1") 150 | conv1_2 = conv_layer_with_bn(bottom=conv1_1, is_training=training, name="conv1_2") 151 | pool1, pool1_indices = max_pool_with_argmax(conv1_2) 152 | 153 | print("pool1: ", pool1.shape) 154 | 155 | conv2_1 = conv_layer_with_bn(bottom=pool1, is_training=training, name="conv2_1") 156 | conv2_2 = conv_layer_with_bn(bottom=conv2_1, is_training=training, name="conv2_2") 157 | pool2, pool2_indices = max_pool_with_argmax(conv2_2) 158 | 159 | print("pool2: ", pool2.shape) 160 | 161 | conv3_1 = conv_layer_with_bn(bottom=pool2, is_training=training, name="conv3_1") 162 | conv3_2 = conv_layer_with_bn(bottom=conv3_1, is_training=training, name="conv3_2") 163 | conv3_3 = conv_layer_with_bn(bottom=conv3_2, is_training=training, name="conv3_3") 164 | pool3, pool3_indices = max_pool_with_argmax(conv3_3) 165 | 166 | print("pool3: ", pool3.shape) 167 | 168 | conv4_1 = conv_layer_with_bn(bottom=pool3, is_training=training, name="conv4_1") 169 | conv4_2 = conv_layer_with_bn(bottom=conv4_1, is_training=training, name="conv4_2") 170 | conv4_3 = conv_layer_with_bn(bottom=conv4_2, is_training=training, name="conv4_3") 171 | pool4, pool4_indices = max_pool_with_argmax(conv4_3) 172 | 173 | print("pool4: ", pool4.shape) 174 | 175 | conv5_1 = conv_layer_with_bn(bottom=pool4, is_training=training, name="conv5_1") 176 | conv5_2 = conv_layer_with_bn(bottom=conv5_1, is_training=training, name="conv5_2") 177 | conv5_3 = conv_layer_with_bn(bottom=conv5_2, is_training=training, name="conv5_3") 178 | pool5, pool5_indices = max_pool_with_argmax(conv5_3) 179 | 180 | print("pool5: ", pool5.shape) 181 | 182 | # End of encoders 183 | # start of decoders 184 | 185 | up_sample_5 = max_unpool_with_argmax(pool5, 186 | pool5_indices, 187 | output_shape=conv5_3.shape) 188 | up_conv5 = conv_layer_with_bn(bottom=up_sample_5, 189 | shape=[3, 3, 512, 512], 190 | is_training=training, 191 | name="up_conv5") 192 | 193 | print("up_conv5: ", up_conv5.shape) 194 | 195 | up_sample_4 = max_unpool_with_argmax(up_conv5, 196 | pool4_indices, 197 | output_shape=conv4_3.shape) 198 | up_conv4 = conv_layer_with_bn(bottom=up_sample_4, 199 | shape=[3, 3, 512, 256], 200 | is_training=training, 201 | name="up_conv4") 202 | 203 | print("up_conv4: ", up_conv4.shape) 204 | 205 | up_sample_3 = max_unpool_with_argmax(up_conv4, 206 | pool3_indices, 207 | output_shape=conv3_3.shape) 208 | up_conv3 = conv_layer_with_bn(bottom=up_sample_3, 209 | shape=[3, 3, 256, 128], 210 | is_training=training, 211 | name="up_conv3") 212 | 213 | print("up_conv3: ", up_conv3.shape) 214 | 215 | up_sample_2 = max_unpool_with_argmax(up_conv3, 216 | pool2_indices, 217 | output_shape=conv2_2.shape) 218 | up_conv2 = conv_layer_with_bn(bottom=up_sample_2, 219 | shape=[3, 3, 128, 64], 220 | is_training=training, 221 | name="up_conv2") 222 | 223 | print("up_conv2: ", up_conv2.shape) 224 | 225 | up_sample_1 = max_unpool_with_argmax(up_conv2, 226 | pool1_indices, 227 | output_shape=conv1_2.shape) 228 | logits = conv_layer_with_bn(bottom=up_sample_1, 229 | shape=[3, 3, 64, num_classes], 230 | is_training=training, 231 | name="up_conv1") 232 | 233 | print("logits: ", logits.shape) 234 | tf.add_to_collection("logits", logits) 235 | 236 | return logits 237 | -------------------------------------------------------------------------------- /self_driving/segnet/segnet_vgg_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from tensorflow.python.framework import constant_op 6 | from tensorflow.python.platform import test 7 | from self_driving.segnet import segnet_vgg 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | NUM_CLASSES = 11 12 | 13 | class PoolingTest(test.TestCase): 14 | 15 | def testMaxPoolingWithArgmax(self): 16 | # MaxPoolWithArgMax is implemented only on CUDA. 17 | if not test.is_gpu_available(cuda_only=True): 18 | return 19 | '''[[[[ 1. 2.] 20 | [ 3. 4.] 21 | [ 5. 6.]] 22 | [[ 7. 8.] 23 | [ 9. 10.] 24 | [ 11. 12.]] 25 | [[ 13. 14.] 26 | [ 15. 16.] 27 | [ 17. 18.]]]]''' 28 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 29 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0] 30 | with self.test_session(use_gpu=True) as sess: 31 | t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2]) 32 | out_op, argmax_op = segnet_vgg.max_pool_with_argmax(t) 33 | out, argmax = sess.run([out_op, argmax_op]) 34 | self.assertShapeEqual(out, out_op) 35 | self.assertShapeEqual(argmax, argmax_op) 36 | '''[[[9, 10] 37 | [11, 12]] 38 | [[15, 16] 39 | [17, 18]]]''' 40 | self.assertAllClose(out.ravel(), [9., 10., 11., 12., 15., 16., 17., 18.]) 41 | self.assertAllEqual(argmax.ravel(), [8, 9, 10, 11, 14, 15, 16, 17]) 42 | 43 | def testMaxUnpoolingWithArgmax(self): 44 | '''[[[[ 1. 2.] 45 | [ 3. 4.] 46 | [ 5. 6.]] 47 | [[ 7. 8.] 48 | [ 9. 10.] 49 | [ 11. 12.]] 50 | [[ 13. 14.] 51 | [ 15. 16.] 52 | [ 17. 18.]]]]''' 53 | tensor_input = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 54 | 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0] 55 | with self.test_session(use_gpu=True) as sess: 56 | t = constant_op.constant(tensor_input, shape=[1, 3, 3, 2]) 57 | out_op, argmax_op = segnet_vgg.max_pool_with_argmax(t) 58 | out_op = segnet_vgg.max_unpool_with_argmax(out_op, 59 | argmax_op, 60 | output_shape=np.int64([1, 3, 3, 2])) 61 | out = sess.run([out_op]) 62 | self.assertAllClose(out, [[[[[ 0., 0.], 63 | [ 0., 0.], 64 | [ 0., 0.]], 65 | [[ 0., 0.], 66 | [ 9., 10.], 67 | [ 11., 12.]], 68 | [[ 0., 0.], 69 | [ 15., 16.], 70 | [ 17., 18.]]]]]) 71 | 72 | def testGetBias(self): 73 | with self.test_session(use_gpu=True) as sess: 74 | bias = segnet_vgg.get_bias("conv1_1") 75 | sess.run(tf.global_variables_initializer()) 76 | self.assertEqual(bias.get_shape(), [64,]) 77 | self.assertAllClose(tf.reduce_sum(bias).eval(), 32.08903503417969) 78 | 79 | def testGetConvFilter(self): 80 | with self.test_session(use_gpu=True) as sess: 81 | weight = segnet_vgg.get_conv_filter("conv1_1") 82 | sess.run(tf.global_variables_initializer()) 83 | self.assertEqual(weight.get_shape(), [3, 3, 3, 64]) 84 | self.assertAllClose(tf.reduce_sum(weight).eval(), -4.212705612182617) 85 | 86 | def testConvLayerWithBn(self): 87 | config = tf.ConfigProto() 88 | config.gpu_options.allocator_type = 'BFC' 89 | tensor_input = tf.ones([10, 495, 289, 3], tf.float32) 90 | with self.test_session(use_gpu=True, config = config) as sess: 91 | conv_op = segnet_vgg.conv_layer_with_bn(tensor_input, tf.constant(True), "conv1_1") 92 | sess.run(tf.global_variables_initializer()) 93 | conv_out = sess.run([conv_op]) 94 | self.assertEqual(np.array(conv_out).shape, (1, 10, 495, 289, 64)) 95 | 96 | def testDeconvLayerWithBn(self): 97 | config = tf.ConfigProto() 98 | config.gpu_options.allocator_type = 'BFC' 99 | tensor_input = tf.ones([10, 495, 289, 3], tf.float32) 100 | with self.test_session(use_gpu=True, config = config) as sess: 101 | conv_op = segnet_vgg.deconv_layer_with_bn(tensor_input, 102 | [3, 3, 3, 128], 103 | tf.constant(True), "conv1_1") 104 | sess.run(tf.global_variables_initializer()) 105 | conv_out = sess.run([conv_op]) 106 | self.assertEqual(np.array(conv_out).shape, (1, 10, 495, 289, 128)) 107 | 108 | def testInference(self): 109 | config = tf.ConfigProto() 110 | config.gpu_options.allocator_type = 'BFC' 111 | train_data = tf.ones([10, 495, 289, 3], tf.float32) 112 | with self.test_session(use_gpu=True, config = config) as sess: 113 | model_op = segnet_vgg.inference(train_data) 114 | sess.run(tf.global_variables_initializer()) 115 | model_out = sess.run([model_op]) 116 | self.assertEqual(np.array(model_out).shape, (1, 10, 495, 289, NUM_CLASSES)) 117 | 118 | 119 | if __name__ == "__main__": 120 | test.main() 121 | -------------------------------------------------------------------------------- /self_driving/segnet/train.py: -------------------------------------------------------------------------------- 1 | """Train SegNet with camvid dataset. 2 | 3 | nohup python -u -m self_driving.segnet.train > self_driving/segnet/output.txt 2>&1 & 4 | 5 | """ 6 | 7 | import os 8 | import tensorflow as tf 9 | from utils import camvid 10 | import segnet_vgg 11 | 12 | LOG_DIR = 'save' 13 | EPOCH = 6000 14 | BATCH_SIZE = 4 15 | IMAGE_HEIGHT = 720 16 | IMAGE_WIDTH = 960 17 | IMAGE_CHANNEL = 3 18 | NUM_CLASSES = 32 19 | INITIAL_LEARNING_RATE = 0.0001 20 | 21 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/train.txt" 22 | val_dir = "/usr/local/google/home/limeng/Downloads/camvid/val.txt" 23 | 24 | 25 | def loss(logits, labels): 26 | logits = tf.reshape(logits, [-1, NUM_CLASSES]) 27 | labels = tf.reshape(labels, [-1]) 28 | 29 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 30 | logits=logits, labels=labels, name='cross_entropy') 31 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean') 32 | tf.add_to_collection('losses', cross_entropy_mean) 33 | return tf.add_n(tf.get_collection('losses'), name='total_loss') 34 | 35 | 36 | def train(total_loss): 37 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 38 | with tf.control_dependencies(update_ops): 39 | global_step = tf.Variable(0, name='global_step', trainable=False) 40 | learning_rate = tf.train.exponential_decay( 41 | INITIAL_LEARNING_RATE, global_step, EPOCH * 0.2, 0.9, staircase=True) 42 | tf.summary.scalar('total_loss', total_loss) 43 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 44 | return optimizer.minimize(total_loss, global_step=global_step) 45 | 46 | 47 | def main(_): 48 | image_filenames, label_filenames = camvid.get_filename_list(image_dir) 49 | val_image_filenames, val_label_filenames = camvid.get_filename_list(val_dir) 50 | 51 | with tf.Graph().as_default(): 52 | with tf.device('/cpu:0'): 53 | # config = tf.ConfigProto(device_count = {'GPU': 0}) 54 | config = tf.ConfigProto() 55 | config.gpu_options.allocator_type = 'BFC' 56 | sess = tf.InteractiveSession(config=config) 57 | 58 | train_data = tf.placeholder(tf.float32, 59 | shape=[BATCH_SIZE, 60 | IMAGE_HEIGHT, 61 | IMAGE_WIDTH, 62 | IMAGE_CHANNEL], 63 | name='train_data') 64 | train_labels = tf.placeholder(tf.int64, 65 | shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1], 66 | name='train_labels') 67 | is_training = tf.placeholder(tf.bool, name='is_training') 68 | 69 | images, labels = camvid.CamVidInputs(image_filenames, 70 | label_filenames, 71 | BATCH_SIZE) 72 | val_images, val_labels = camvid.CamVidInputs(val_image_filenames, 73 | val_label_filenames, 74 | BATCH_SIZE) 75 | 76 | logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES) 77 | total_loss = loss(logits, train_labels) 78 | train_op = train(total_loss) 79 | check_op = tf.add_check_numerics_ops() 80 | 81 | merged_summary_op = tf.summary.merge_all() 82 | summary_writer = tf.summary.FileWriter('train', sess.graph) 83 | saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) 84 | if not os.path.exists(LOG_DIR): 85 | os.makedirs(LOG_DIR) 86 | checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt") 87 | 88 | sess.run(tf.global_variables_initializer()) 89 | 90 | # Start the queue runners. 91 | coord = tf.train.Coordinator() 92 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 93 | 94 | for i in range(EPOCH): 95 | image_batch, label_batch = sess.run([images, labels]) 96 | feed_dict = { 97 | train_data: image_batch, 98 | train_labels: label_batch, 99 | is_training: True 100 | } 101 | _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op], 102 | feed_dict=feed_dict) 103 | if i % 10 == 0: 104 | print("Start validating...") 105 | val_images_batch, val_labels_batch = sess.run([val_images, val_labels]) 106 | loss_value = total_loss.eval(feed_dict={train_data: val_images_batch, 107 | train_labels: val_labels_batch, 108 | is_training: True}) 109 | print("Epoch: %d, Loss: %g" % (i, loss_value)) 110 | saver.save(sess, checkpoint_path) 111 | # write logs at every iteration 112 | summary_writer.add_summary(summary, i) 113 | 114 | coord.request_stop() 115 | coord.join(threads) 116 | 117 | 118 | if __name__ == '__main__': 119 | tf.app.run(main=main) 120 | -------------------------------------------------------------------------------- /self_driving/segnet/train_kitti.py: -------------------------------------------------------------------------------- 1 | """Train SegNet with KITTI dataset. 2 | 3 | nohup python -u -m self_driving.segnet.train_kitti > self_driving/segnet/output.txt 2>&1 & 4 | 5 | """ 6 | 7 | import os 8 | import tensorflow as tf 9 | from utils import kitti_segnet 10 | import segnet_vgg 11 | 12 | LOG_DIR = 'save' 13 | EPOCH = 4000 14 | BATCH_SIZE = 1 15 | IMAGE_HEIGHT = 375 16 | IMAGE_WIDTH = 1242 17 | IMAGE_CHANNEL = 3 18 | NUM_CLASSES = 2 19 | INITIAL_LEARNING_RATE = 0.0001 20 | 21 | image_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/train.txt" 22 | val_dir = "/usr/local/google/home/limeng/Downloads/kitti/data_road/training/train.txt" 23 | 24 | 25 | def loss(logits, labels): 26 | logits = tf.reshape(logits, [-1, NUM_CLASSES]) 27 | labels = tf.reshape(labels, [-1]) 28 | 29 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 30 | logits=logits, labels=labels, name='cross_entropy') 31 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean') 32 | tf.add_to_collection('losses', cross_entropy_mean) 33 | return tf.add_n(tf.get_collection('losses'), name='total_loss') 34 | 35 | 36 | def train(total_loss): 37 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 38 | with tf.control_dependencies(update_ops): 39 | global_step = tf.Variable(0, name='global_step', trainable=False) 40 | learning_rate = tf.train.exponential_decay( 41 | INITIAL_LEARNING_RATE, global_step, EPOCH * 0.2, 0.9, staircase=True) 42 | tf.summary.scalar('total_loss', total_loss) 43 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 44 | return optimizer.minimize(total_loss, global_step=global_step) 45 | 46 | 47 | def main(_): 48 | image_filenames, label_filenames = kitti_segnet.get_filename_list(image_dir) 49 | val_image_filenames, val_label_filenames = kitti_segnet.get_filename_list(val_dir) 50 | 51 | with tf.Graph().as_default(): 52 | with tf.device('/cpu:0'): 53 | # config = tf.ConfigProto(device_count = {'GPU': 0}) 54 | config = tf.ConfigProto() 55 | config.gpu_options.allocator_type = 'BFC' 56 | sess = tf.InteractiveSession(config=config) 57 | 58 | train_data = tf.placeholder(tf.float32, 59 | shape=[BATCH_SIZE, 60 | IMAGE_HEIGHT, 61 | IMAGE_WIDTH, 62 | IMAGE_CHANNEL], 63 | name='train_data') 64 | train_labels = tf.placeholder(tf.int64, 65 | shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1], 66 | name='train_labels') 67 | is_training = tf.placeholder(tf.bool, name='is_training') 68 | 69 | images, labels = kitti_segnet.CamVidInputs(image_filenames, 70 | label_filenames, 71 | BATCH_SIZE) 72 | val_images, val_labels = kitti_segnet.CamVidInputs(val_image_filenames, 73 | val_label_filenames, 74 | BATCH_SIZE) 75 | 76 | logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES) 77 | total_loss = loss(logits, train_labels) 78 | train_op = train(total_loss) 79 | check_op = tf.add_check_numerics_ops() 80 | 81 | merged_summary_op = tf.summary.merge_all() 82 | summary_writer = tf.summary.FileWriter('train', sess.graph) 83 | saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) 84 | if not os.path.exists(LOG_DIR): 85 | os.makedirs(LOG_DIR) 86 | checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt") 87 | 88 | sess.run(tf.global_variables_initializer()) 89 | 90 | # Start the queue runners. 91 | coord = tf.train.Coordinator() 92 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 93 | 94 | for i in range(EPOCH): 95 | image_batch, label_batch = sess.run([images, labels]) 96 | feed_dict = { 97 | train_data: image_batch, 98 | train_labels: label_batch, 99 | is_training: True 100 | } 101 | _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op], 102 | feed_dict=feed_dict) 103 | if i % 10 == 0: 104 | print("Start validating...") 105 | val_images_batch, val_labels_batch = sess.run([val_images, val_labels]) 106 | loss_value = total_loss.eval(feed_dict={train_data: val_images_batch, 107 | train_labels: val_labels_batch, 108 | is_training: True}) 109 | print("Epoch: %d, Loss: %g" % (i, loss_value)) 110 | saver.save(sess, checkpoint_path) 111 | # write logs at every iteration 112 | summary_writer.add_summary(summary, i) 113 | 114 | coord.request_stop() 115 | coord.join(threads) 116 | 117 | 118 | if __name__ == '__main__': 119 | tf.app.run(main=main) 120 | -------------------------------------------------------------------------------- /self_driving/steering/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/self_driving/steering/__init__.py -------------------------------------------------------------------------------- /self_driving/steering/driving_data.py: -------------------------------------------------------------------------------- 1 | import scipy.misc 2 | import random 3 | 4 | xs = [] 5 | ys = [] 6 | 7 | #points to the end of the last batch 8 | train_batch_pointer = 0 9 | val_batch_pointer = 0 10 | 11 | #read data.txt 12 | with open("/usr/local/google/home/limeng/Downloads/nvida/data.txt") as f: 13 | for line in f: 14 | xs.append("/usr/local/google/home/limeng/Downloads/nvida/driving_dataset/" + line.split()[0]) 15 | #the paper by Nvidia uses the inverse of the turning radius, 16 | #but steering wheel angle is proportional to the inverse of turning radius 17 | #so the steering wheel angle in radians is used as the output 18 | ys.append(float(line.split()[1]) * scipy.pi / 180) 19 | 20 | #get number of images 21 | num_images = len(xs) 22 | 23 | #shuffle list of images 24 | c = list(zip(xs, ys)) 25 | random.shuffle(c) 26 | xs, ys = zip(*c) 27 | 28 | train_xs = xs[:int(len(xs) * 0.8)] 29 | train_ys = ys[:int(len(xs) * 0.8)] 30 | 31 | val_xs = xs[-int(len(xs) * 0.2):] 32 | val_ys = ys[-int(len(xs) * 0.2):] 33 | 34 | num_train_images = len(train_xs) 35 | num_val_images = len(val_xs) 36 | 37 | 38 | def load_train_batch(batch_size): 39 | global train_batch_pointer 40 | x_out = [] 41 | y_out = [] 42 | for i in range(0, batch_size): 43 | x_out.append( 44 | scipy.misc.imresize( 45 | scipy.misc.imread( 46 | train_xs[(train_batch_pointer + i) % num_train_images]), [66, 200]) / 255.0) 47 | y_out.append([train_ys[(train_batch_pointer + i) % num_train_images]]) 48 | train_batch_pointer += batch_size 49 | return x_out, y_out 50 | 51 | 52 | def load_val_batch(batch_size): 53 | global val_batch_pointer 54 | x_out = [] 55 | y_out = [] 56 | for i in range(0, batch_size): 57 | x_out.append( 58 | scipy.misc.imresize( 59 | scipy.misc.imread( 60 | val_xs[(val_batch_pointer + i) % num_val_images]), [66, 200]) / 255.0) 61 | y_out.append([val_ys[(val_batch_pointer + i) % num_val_images]]) 62 | val_batch_pointer += batch_size 63 | return x_out, y_out 64 | -------------------------------------------------------------------------------- /self_driving/steering/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluate SegNet. 2 | 3 | nohup python -u -m self_driving.steering.evaluate > self_driving/segnet/output.txt 2>&1 & 4 | 5 | """ 6 | 7 | import os 8 | import tensorflow as tf 9 | from utils import udacity_data 10 | 11 | LOG_DIR = 'save' 12 | BATCH_SIZE = 128 13 | EPOCH = udacity_data.NUM_VAL_IMAGES / BATCH_SIZE 14 | OUTPUT = "steering_out.txt" 15 | 16 | 17 | def main(_): 18 | udacity_data.read_data(shuffe=False) 19 | with tf.Graph().as_default(): 20 | config = tf.ConfigProto() 21 | config.gpu_options.allocator_type = 'BFC' 22 | sess = tf.InteractiveSession(config=config) 23 | 24 | saver = tf.train.import_meta_graph(os.path.join(LOG_DIR, "steering.ckpt.meta")) 25 | saver.restore(sess, tf.train.latest_checkpoint(LOG_DIR)) 26 | 27 | graph = tf.get_default_graph() 28 | x_image = graph.get_tensor_by_name("x_image:0") 29 | y_label = graph.get_tensor_by_name("y_label:0") 30 | keep_prob = graph.get_tensor_by_name("keep_prob:0") 31 | logits = tf.get_collection("logits")[0] 32 | 33 | if os.path.exists(OUTPUT): 34 | os.remove(OUTPUT) 35 | 36 | for epoch in range(EPOCH): 37 | image_batch, label_batch = udacity_data.load_val_batch(BATCH_SIZE) 38 | feed_dict = { 39 | x_image: image_batch, 40 | y_label: label_batch, 41 | keep_prob: 0.6 42 | } 43 | prediction = sess.run([logits], feed_dict) 44 | with open(OUTPUT, 'a') as out: 45 | for batch in range(BATCH_SIZE): 46 | out.write("%s %.10f\n" % (udacity_data.val_xs[epoch * BATCH_SIZE + batch], 47 | prediction[0][batch])) 48 | 49 | 50 | if __name__ == '__main__': 51 | tf.app.run(main=main) 52 | -------------------------------------------------------------------------------- /self_driving/steering/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def weight_variable(shape): 5 | initial = tf.truncated_normal(shape, stddev=0.1) 6 | return tf.Variable(initial) 7 | 8 | 9 | def bias_variable(shape): 10 | initial = tf.constant(0.1, shape=shape) 11 | return tf.Variable(initial) 12 | 13 | 14 | def conv2d(x, W, stride): 15 | return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID') 16 | 17 | 18 | def inference(x_image, keep_prob, is_training=True): 19 | #first convolutional layer 20 | W_conv1 = weight_variable([5, 5, 3, 24]) 21 | b_conv1 = bias_variable([24]) 22 | 23 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 2) + b_conv1, 'relu_conv1') 24 | h_conv1_norm = tf.contrib.layers.batch_norm(h_conv1, is_training=is_training, trainable=True) 25 | 26 | #second convolutional layer 27 | W_conv2 = weight_variable([5, 5, 24, 36]) 28 | b_conv2 = bias_variable([36]) 29 | 30 | h_conv2 = tf.nn.relu(conv2d(h_conv1_norm, W_conv2, 2) + b_conv2, 'relu_conv2') 31 | h_conv2_norm = tf.contrib.layers.batch_norm(h_conv2, is_training=is_training, trainable=True) 32 | 33 | #third convolutional layer 34 | W_conv3 = weight_variable([5, 5, 36, 48]) 35 | b_conv3 = bias_variable([48]) 36 | 37 | h_conv3 = tf.nn.relu(conv2d(h_conv2_norm, W_conv3, 2) + b_conv3, 'relu_conv3') 38 | h_conv3_norm = tf.contrib.layers.batch_norm(h_conv3, is_training=is_training, trainable=True) 39 | 40 | #fourth convolutional layer 41 | W_conv4 = weight_variable([3, 3, 48, 64]) 42 | b_conv4 = bias_variable([64]) 43 | 44 | h_conv4 = tf.nn.relu(conv2d(h_conv3_norm, W_conv4, 1) + b_conv4, 'relu_conv4') 45 | h_conv4_norm = tf.contrib.layers.batch_norm(h_conv4, is_training=is_training, trainable=True) 46 | 47 | #fifth convolutional layer 48 | W_conv5 = weight_variable([3, 3, 64, 64]) 49 | b_conv5 = bias_variable([64]) 50 | 51 | h_conv5 = tf.nn.relu(conv2d(h_conv4_norm, W_conv5, 1) + b_conv5, 'relu_conv5') 52 | h_conv5_norm = tf.contrib.layers.batch_norm(h_conv5, is_training=is_training, trainable=True) 53 | 54 | #FCL 1 55 | W_fc1 = weight_variable([1152, 1164]) 56 | b_fc1 = bias_variable([1164]) 57 | 58 | h_conv5_flat = tf.reshape(h_conv5_norm, [-1, 1152]) 59 | h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1, 'relu_fc1') 60 | h_fc1_norm = tf.contrib.layers.batch_norm(h_fc1, is_training=is_training, trainable=True) 61 | h_fc1_drop = tf.nn.dropout(h_fc1_norm, keep_prob) 62 | 63 | #FCL 2 64 | W_fc2 = weight_variable([1164, 100]) 65 | b_fc2 = bias_variable([100]) 66 | 67 | h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2, 'relu_fc2') 68 | h_fc2_norm = tf.contrib.layers.batch_norm(h_fc2, is_training=is_training, trainable=True) 69 | h_fc2_drop = tf.nn.dropout(h_fc2_norm, keep_prob) 70 | 71 | #FCL 3 72 | W_fc3 = weight_variable([100, 50]) 73 | b_fc3 = bias_variable([50]) 74 | 75 | h_fc3 = tf.nn.relu(tf.matmul(h_fc2_drop, W_fc3) + b_fc3, 'relu_fc3') 76 | h_fc3_norm = tf.contrib.layers.batch_norm(h_fc3, is_training=is_training, trainable=True) 77 | h_fc3_drop = tf.nn.dropout(h_fc3_norm, keep_prob) 78 | 79 | #FCL 3 80 | W_fc4 = weight_variable([50, 10]) 81 | b_fc4 = bias_variable([10]) 82 | 83 | h_fc4 = tf.nn.relu(tf.matmul(h_fc3_drop, W_fc4) + b_fc4, 'relu_fc4') 84 | h_fc4_norm = tf.contrib.layers.batch_norm(h_fc4, is_training=is_training, trainable=True) 85 | h_fc4_drop = tf.nn.dropout(h_fc4_norm, keep_prob) 86 | 87 | #Output 88 | W_fc5 = weight_variable([10, 1]) 89 | b_fc5 = bias_variable([1]) 90 | 91 | y = tf.multiply(tf.atan(tf.matmul(h_fc4_drop, W_fc5) + b_fc5), 2) #scale the atan output 92 | tf.add_to_collection("logits", y) 93 | 94 | return y 95 | -------------------------------------------------------------------------------- /self_driving/steering/model_resnet50.py: -------------------------------------------------------------------------------- 1 | # nohup python -u -m self_driving.steering.model_resnet50 > self_driving/steering/output.txt 2>&1 & 2 | 3 | from keras import applications 4 | from keras import optimizers 5 | from keras.models import Sequential 6 | from keras.models import Model 7 | from keras.layers import Dropout, Flatten, Dense 8 | from utils import my_image 9 | from keras import backend as K 10 | from keras.callbacks import ModelCheckpoint 11 | 12 | # dimensions of our images. 13 | img_width, img_height = 224, 224 14 | 15 | train_data_dir = 'utils/udacity_train.txt' 16 | validation_data_dir = 'utils/udacity_val.txt' 17 | nb_train_samples = 33808 18 | nb_validation_samples = 10558 19 | epochs = 50 20 | batch_size = 32 21 | 22 | # build the resnet50 network 23 | base_model = applications.ResNet50(include_top=False, input_shape=(224, 224, 3)) 24 | print('Model loaded.') 25 | 26 | # build a classifier model to put on top of the convolutional model 27 | top_model = Sequential() 28 | top_model.add(Flatten(input_shape=base_model.output_shape[1:])) 29 | top_model.add(Dense(512, activation='relu')) 30 | top_model.add(Dense(256, activation='relu')) 31 | top_model.add(Dense(64, activation='relu')) 32 | top_model.add(Dense(1)) 33 | 34 | # add the model on top of the convolutional base 35 | # model.add(top_model) 36 | model = Model(inputs=base_model.input, outputs=top_model(base_model.output)) 37 | 38 | # set the first 25 layers (up to the last conv block) 39 | # to non-trainable (weights will not be updated) 40 | for layer in model.layers[:15]: 41 | layer.trainable = False 42 | 43 | def root_mean_squared_error(y_true, y_pred): 44 | return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) 45 | 46 | # compile the model with a Adam optimizer 47 | # and a very slow learning rate. 48 | model.compile(loss=root_mean_squared_error, 49 | optimizer=optimizers.Adam(lr=0.001), 50 | metrics=['accuracy']) 51 | 52 | # prepare data augmentation configuration 53 | train_datagen = my_image.MyImageDataGenerator(rescale=1. / 255) 54 | 55 | test_datagen = my_image.MyImageDataGenerator(rescale=1. / 255) 56 | 57 | train_generator = train_datagen.flow( 58 | train_data_dir, 59 | [img_width, img_height, 3], 60 | shuffle=True) 61 | 62 | validation_generator = test_datagen.flow( 63 | validation_data_dir, 64 | [img_width, img_height, 3], 65 | shuffle=True) 66 | 67 | # checkpoint 68 | filepath="save/steering_resnet50-{epoch:02d}-{val_loss:.4f}.hdf5" 69 | checkpoint = ModelCheckpoint( 70 | filepath, 71 | monitor='val_loss', 72 | save_best_only=True, 73 | mode='min') 74 | callbacks_list = [checkpoint] 75 | 76 | model.summary() 77 | 78 | # fine-tune the model 79 | model.fit_generator( 80 | train_generator, 81 | steps_per_epoch=nb_train_samples // batch_size, 82 | epochs=epochs, 83 | validation_data=validation_generator, 84 | validation_steps=nb_validation_samples // batch_size, 85 | callbacks=callbacks_list) 86 | -------------------------------------------------------------------------------- /self_driving/steering/model_saliency.py: -------------------------------------------------------------------------------- 1 | # nohup python -u -m self_driving.steering.model_saliency > self_driving/steering/output.txt 2>&1 & 2 | 3 | from keras import applications 4 | from keras.models import Sequential 5 | from scipy import misc 6 | from keras.models import Model 7 | from keras.layers import Dropout, Flatten, Dense 8 | from vis.visualization import visualize_saliency, overlay 9 | from vis.utils import utils 10 | import numpy as np 11 | from keras.preprocessing.image import img_to_array 12 | import os 13 | 14 | VAL_DATASET = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/center/" 15 | 16 | # dimensions of our images. 17 | img_width, img_height = 224, 224 18 | model_weights_path = 'save/steering_resnet50-22-0.0603.hdf5' 19 | 20 | # build the resnet50 network 21 | base_model = applications.ResNet50(include_top=False, 22 | input_shape=(224, 224, 3)) 23 | print('Model loaded.') 24 | 25 | # build a classifier model to put on top of the convolutional model 26 | top_model = Sequential() 27 | top_model.add(Flatten(input_shape=base_model.output_shape[1:])) 28 | top_model.add(Dense(512, activation='relu')) 29 | top_model.add(Dense(256, activation='relu')) 30 | top_model.add(Dense(64, activation='relu')) 31 | top_model.add(Dense(1)) 32 | 33 | model = Model(inputs=base_model.input, outputs=top_model(base_model.output)) 34 | model.load_weights(model_weights_path) 35 | 36 | with open("output/steering/steering_val.txt", 'a') as out: 37 | for img in os.listdir(VAL_DATASET): 38 | img_data = utils.load_img(VAL_DATASET + img, target_size=(224, 224)) 39 | img_input = np.expand_dims(img_to_array(img_data), axis=0) 40 | out.write("%s %.10f\n" % (img, model.predict(img_input / 255.)[0][0])) 41 | out.flush() 42 | heat_map = visualize_saliency(model, 43 | -2, 44 | filter_indices=None, 45 | seed_input=img_data, 46 | backprop_modifier='guided') 47 | misc.imsave("output/steering/%s" % img, overlay(img_data, heat_map, alpha=0.3)) 48 | -------------------------------------------------------------------------------- /self_driving/steering/split_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Split Nvida dataset into train data and test data 3 | 4 | src_dir="driving_dataset" 5 | train_dst_dir="train_data" 6 | test_dst_dir="test_data" 7 | 8 | train_data_size=40000 9 | data_size=45568 10 | 11 | echo "Split Nvida driving dataset into train data and test data" 12 | 13 | rm -rf $train_dst_dir $test_dst_dir 14 | mkdir $train_dst_dir $test_dst_dir 15 | 16 | # train_data 17 | i=0 18 | while [ $i -lt $train_data_size ] 19 | do 20 | cp $src_dir/"$i.jpg" $train_dst_dir/"$i.jpg" 21 | true $(( i++ )) 22 | done 23 | 24 | # test_data 25 | while [ $i -lt $data_size ] 26 | do 27 | cp $src_dir/"$i.jpg" $test_dst_dir/"$i.jpg" 28 | true $(( i++ )) 29 | done 30 | 31 | -------------------------------------------------------------------------------- /self_driving/steering/train.py: -------------------------------------------------------------------------------- 1 | # nohup python -u -m self_driving.steering.train > self_driving/steering/output.txt 2>&1 & 2 | 3 | import os 4 | import tensorflow as tf 5 | from utils import udacity_data 6 | import model 7 | 8 | LOG_DIR = 'save' 9 | EPOCH = 32 10 | BATCH_SIZE = 128 11 | LEARNING_RATE = 1e-3 12 | STEP_PER_EPOCH = udacity_data.NUM_TRAIN_IMAGES / BATCH_SIZE 13 | 14 | 15 | def loss(pred, labels): 16 | train_vars = tf.trainable_variables() 17 | norm = tf.add_n([tf.nn.l2_loss(v) for v in train_vars]) 18 | # create a summary to monitor L2 norm 19 | tf.summary.scalar('L2 Normalization', norm) 20 | losses = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(pred, labels)))) 21 | # create a summary to monitor loss 22 | tf.summary.scalar('Loss', losses) 23 | return norm, losses, losses + norm * 0.0005 24 | 25 | 26 | def train(total_loss): 27 | global_step = tf.Variable(0, name='global_step', trainable=False) 28 | # create a summary to monitor total loss 29 | tf.summary.scalar('Total Loss', total_loss) 30 | optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) 31 | return optimizer.minimize(total_loss, global_step=global_step) 32 | 33 | 34 | def main(_): 35 | with tf.Graph().as_default(): 36 | config = tf.ConfigProto() 37 | config.gpu_options.allocator_type = 'BFC' 38 | sess = tf.InteractiveSession(config=config) 39 | 40 | x_image = tf.placeholder(tf.float32, shape=[None, 66, 200, 3], name="x_image") 41 | y_label = tf.placeholder(tf.float32, shape=[None, 1], name="y_label") 42 | keep_prob = tf.placeholder(tf.float32, name="keep_prob") 43 | 44 | y_pred = model.inference(x_image, keep_prob) 45 | norm, losses, total_loss = loss(y_pred, y_label) 46 | train_op = train(total_loss) 47 | 48 | merged_summary_op = tf.summary.merge_all() 49 | summary_writer = tf.summary.FileWriter('train', sess.graph) 50 | saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) 51 | if not os.path.exists(LOG_DIR): 52 | os.makedirs(LOG_DIR) 53 | checkpoint_path = os.path.join(LOG_DIR, "steering.ckpt") 54 | 55 | sess.run(tf.global_variables_initializer()) 56 | 57 | udacity_data.read_data() 58 | 59 | for epoch in range(EPOCH): 60 | for i in range(STEP_PER_EPOCH): 61 | steps = epoch * STEP_PER_EPOCH + i 62 | 63 | xs, ys = udacity_data.load_train_batch(BATCH_SIZE) 64 | 65 | _, summary = sess.run([train_op, merged_summary_op], 66 | feed_dict={x_image: xs, y_label: ys, keep_prob: 0.7}) 67 | 68 | if i % 10 == 0: 69 | xs, ys = udacity_data.load_val_batch(BATCH_SIZE) 70 | loss_value = losses.eval(feed_dict={x_image: xs, y_label: ys, keep_prob: 1.0}) 71 | print("Epoch: %d, Step: %d, Loss: %g" % (epoch, steps, loss_value)) 72 | 73 | # write logs at every iteration 74 | summary_writer.add_summary(summary, steps) 75 | 76 | if i % 32 == 0: 77 | if not os.path.exists(LOG_DIR): 78 | os.makedirs(LOG_DIR) 79 | saver.save(sess, checkpoint_path) 80 | 81 | 82 | if __name__ == '__main__': 83 | tf.app.run(main=main) 84 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/utils/__init__.py -------------------------------------------------------------------------------- /utils/camvid.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | from tensorflow.python.framework import dtypes 4 | 5 | IMAGE_HEIGHT = 720 6 | IMAGE_WIDTH = 960 7 | IMAGE_DEPTH = 3 8 | 9 | NUM_CLASSES = 32 10 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 580 11 | NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 580 12 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 580 13 | 14 | def _generate_image_and_label_batch(image, label, min_queue_examples, 15 | batch_size, shuffle): 16 | """Construct a queued batch of images and labels. 17 | Args: 18 | image: 3-D Tensor of [height, width, 3] of type.float32. 19 | label: 3-D Tensor of [height, width, 1] type.int32 20 | min_queue_examples: int32, minimum number of samples to retain 21 | in the queue that provides of batches of examples. 22 | batch_size: Number of images per batch. 23 | shuffle: boolean indicating whether to use a shuffling queue. 24 | Returns: 25 | images: Images. 4D tensor of [batch_size, height, width, 3] size. 26 | labels: Labels. 3D tensor of [batch_size, height, width ,1] size. 27 | """ 28 | # Create a queue that shuffles the examples, and then 29 | # read 'batch_size' images + labels from the example queue. 30 | num_preprocess_threads = 1 31 | if shuffle: 32 | images, label_batch = tf.train.shuffle_batch( 33 | [image, label], 34 | batch_size=batch_size, 35 | num_threads=num_preprocess_threads, 36 | capacity=min_queue_examples + 3 * batch_size, 37 | min_after_dequeue=min_queue_examples) 38 | else: 39 | images, label_batch = tf.train.batch( 40 | [image, label], 41 | batch_size=batch_size, 42 | num_threads=num_preprocess_threads, 43 | capacity=min_queue_examples + 3 * batch_size) 44 | 45 | return images, label_batch 46 | 47 | 48 | def CamVid_reader_seq(filename_queue, seq_length): 49 | image_seq_filenames = tf.split(axis=0, 50 | num_or_size_splits=seq_length, 51 | value=filename_queue[0]) 52 | label_seq_filenames = tf.split(axis=0, 53 | num_or_size_splits=seq_length, 54 | value=filename_queue[1]) 55 | 56 | image_seq = [] 57 | label_seq = [] 58 | for im ,la in zip(image_seq_filenames, label_seq_filenames): 59 | imageValue = tf.read_file(tf.squeeze(im)) 60 | labelValue = tf.read_file(tf.squeeze(la)) 61 | image_bytes = tf.image.decode_png(imageValue) 62 | label_bytes = tf.image.decode_png(labelValue) 63 | image = tf.cast(tf.reshape(image_bytes, 64 | (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)), tf.float32) 65 | label = tf.cast(tf.reshape(label_bytes, 66 | (IMAGE_HEIGHT, IMAGE_WIDTH, 1)), tf.int64) 67 | image_seq.append(image) 68 | label_seq.append(label) 69 | return image_seq, label_seq 70 | 71 | 72 | def CamVid_reader(filename_queue): 73 | image_filename = filename_queue[0] 74 | label_filename = filename_queue[1] 75 | 76 | imageValue = tf.read_file(image_filename) 77 | labelValue = tf.read_file(label_filename) 78 | 79 | image_bytes = tf.image.decode_png(imageValue) 80 | label_bytes = tf.image.decode_png(labelValue) 81 | 82 | image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)) 83 | label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1)) 84 | 85 | return image, label 86 | 87 | 88 | def get_filename_list(path): 89 | fd = open(path) 90 | image_filenames = [] 91 | label_filenames = [] 92 | for i in fd: 93 | i = i.strip().split(" ") 94 | image_filenames.append(i[0]) 95 | label_filenames.append(i[1]) 96 | return image_filenames, label_filenames 97 | 98 | 99 | def CamVidInputs(image_filenames, label_filenames, batch_size, shuffle=True): 100 | 101 | images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string) 102 | labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string) 103 | 104 | filename_queue = tf.train.slice_input_producer([images, labels], shuffle=shuffle) 105 | 106 | image, label = CamVid_reader(filename_queue) 107 | reshaped_image = tf.cast(image, tf.float32) 108 | 109 | min_fraction_of_examples_in_queue = 0.05 110 | min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * 111 | min_fraction_of_examples_in_queue) 112 | print ('Filling queue with %d CamVid images before starting to train. ' 113 | 'This will take a few minutes.' % min_queue_examples) 114 | 115 | # Generate a batch of images and labels by building up a queue of examples. 116 | return _generate_image_and_label_batch(reshaped_image, label, 117 | min_queue_examples, batch_size, 118 | shuffle=shuffle) 119 | -------------------------------------------------------------------------------- /utils/camvid_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from tensorflow.python.platform import test 6 | 7 | import camvid 8 | import tensorflow as tf 9 | 10 | image_dir = "/usr/local/google/home/limeng/Downloads/camvid/data/train.txt" 11 | 12 | 13 | class CamvidTest(test.TestCase): 14 | 15 | def testGetFileNameList(self): 16 | image_filenames, label_filenames = camvid.get_filename_list(image_dir) 17 | self.assertEqual(len(image_filenames), 367) 18 | 19 | def testCamVidInputs(self): 20 | config = tf.ConfigProto() 21 | config.gpu_options.allocator_type = 'BFC' 22 | with self.test_session(use_gpu=True, config = config) as sess: 23 | image_filenames, label_filenames = camvid.get_filename_list(image_dir) 24 | images, labels = camvid.CamVidInputs(image_filenames, label_filenames, 32) 25 | # Start the queue runners. 26 | coord = tf.train.Coordinator() 27 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 28 | images_batch, labels_batch = sess.run([images, labels]) 29 | self.assertEqual(images.get_shape(), [32, 360, 480, 3]) 30 | self.assertEqual(labels.get_shape(), [32, 360, 480, 1]) 31 | coord.request_stop() 32 | coord.join(threads) 33 | 34 | 35 | if __name__ == "__main__": 36 | test.main() 37 | -------------------------------------------------------------------------------- /utils/cifar.py: -------------------------------------------------------------------------------- 1 | """Load data from CIFAR-10 dataset 2 | 3 | The archive contains the files data_batch_1, data_batch_2, ..., data_batch_5, 4 | as well as test_batch. Each of these files is a Python "pickled" object 5 | produced with cPickle. Here is a Python routine which will open such a file 6 | and return a dictionary: 7 | """ 8 | 9 | import os.path 10 | import pickle 11 | import tarfile 12 | import urllib2 13 | 14 | import numpy 15 | import dataset 16 | 17 | FLAGS = None 18 | 19 | CIFAR10_DOWNLOAD_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' 20 | CIFAR10_FILE_NAME = 'cifar-10-python.tar.gz' 21 | CIFAR10_TRAIN_PREFIX = 'cifar-10-batches-py/data_batch_' 22 | CIFAR10_TEST = 'cifar-10-batches-py/test_batch' 23 | CIFAR10_DATA = 'data' 24 | CIFAR10_LABEL = 'labels' 25 | 26 | 27 | class Cifar(object): 28 | def __init__(self): 29 | self.train = dataset.DataSet() 30 | self.test = dataset.DataSet() 31 | 32 | def ReadDataSets(self, data_dir=".", one_hot=False, raw=False): 33 | file_path = os.path.join(data_dir, CIFAR10_FILE_NAME) 34 | if not os.path.isfile(file_path): 35 | _DownloadCifar10(data_dir) 36 | 37 | UnzipTarGzFile(file_path) 38 | 39 | xs = [] 40 | ys = [] 41 | for j in range(5): 42 | d = Unpickle(os.path.join(data_dir, CIFAR10_TRAIN_PREFIX + `j + 1`)) 43 | x = d[CIFAR10_DATA] 44 | y = d[CIFAR10_LABEL] 45 | xs.append(x) 46 | ys.append(y) 47 | 48 | d = Unpickle(os.path.join(data_dir, CIFAR10_TEST)) 49 | xs.append(d[CIFAR10_DATA]) 50 | ys.append(d[CIFAR10_LABEL]) 51 | 52 | x = numpy.concatenate(xs) / numpy.float32(255) 53 | y = numpy.concatenate(ys) 54 | if not raw: 55 | x = numpy.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:])) 56 | x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0, 3, 1, 2) 57 | 58 | # subtract per-pixel mean 59 | pixel_mean = numpy.mean(x[0:50000], axis=0) 60 | x -= pixel_mean 61 | 62 | # create mirrored images 63 | if not raw: 64 | self.train.images = x[0:50000, :, :, :] 65 | else: 66 | self.train.images = x[0:50000] 67 | self.train.labels = y[0:50000] 68 | 69 | if not raw: 70 | self.test.images = x[50000:, :, :, :] 71 | else: 72 | self.train.images = x[0:50000] 73 | self.test.labels = y[50000:] 74 | 75 | if one_hot: 76 | train_labels = numpy.zeros((50000, 10), dtype=numpy.float32) 77 | test_labels = numpy.zeros((10000, 10), dtype=numpy.float32) 78 | 79 | for i in range(50000): 80 | train_labels[i, self.train.labels[i]] = 1. 81 | self.train.labels = train_labels 82 | 83 | for j in range(10000): 84 | test_labels[j, self.test.labels[j]] = 1. 85 | self.test.labels = test_labels 86 | 87 | 88 | def _DownloadCifar10(data_dir): 89 | _EnsureDir(data_dir) 90 | cifar10_zip_file = urllib2.urlopen(CIFAR10_DOWNLOAD_URL) 91 | with open(os.path.join(data_dir, CIFAR10_FILE_NAME), 'wb') as output: 92 | output.write(cifar10_zip_file.read()) 93 | 94 | 95 | def UnzipTarGzFile(file_path): 96 | with tarfile.open(file_path) as tar: 97 | tar.extractall() 98 | tar.close() 99 | 100 | 101 | def _EnsureDir(directory): 102 | if not os.path.exists(directory): 103 | os.makedirs(directory) 104 | 105 | 106 | def Unpickle(file_path): 107 | with open(file_path, mode='rb') as file: 108 | dict = pickle.load(file) 109 | return dict 110 | -------------------------------------------------------------------------------- /utils/cifar_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tensorflow.utils import cifar 4 | 5 | 6 | class CifarTest(unittest.TestCase): 7 | def setUp(self): 8 | self._cifar = cifar.Cifar() 9 | 10 | def testReadDataSets(self): 11 | self._cifar.ReadDataSets() 12 | self.assertEqual(len(self._cifar.train.images), 50000) 13 | self.assertEqual(len(self._cifar.train.labels), 50000) 14 | self.assertEqual(len(self._cifar.test.images), 10000) 15 | self.assertEqual(len(self._cifar.test.labels), 10000) 16 | 17 | def testReadDataSetsOneHotEnabled(self): 18 | self._cifar.ReadDataSets(one_hot=True) 19 | 20 | self.assertEqual(len(self._cifar.train.images), 50000) 21 | self.assertEqual(len(self._cifar.train.images[0]), 3072) 22 | self.assertEqual(len(self._cifar.train.labels[0]), 10) 23 | self.assertEqual(1, self._cifar.train.labels[0][6]) 24 | 25 | self.assertEqual(len(self._cifar.test.images), 10000) 26 | self.assertEqual(len(self._cifar.test.labels[0]), 10) 27 | self.assertEqual(1, self._cifar.test.labels[0][3]) 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /utils/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | 4 | class DataSet(object): 5 | def __init__(self): 6 | self._images = numpy.array([]) 7 | self._labels = numpy.array([]) 8 | self._index_in_epoch = 0 9 | 10 | @property 11 | def images(self): 12 | return self._images 13 | 14 | @property 15 | def labels(self): 16 | return self._labels 17 | 18 | @labels.setter 19 | def labels(self, value): 20 | self._labels = value 21 | 22 | @images.setter 23 | def images(self, value): 24 | self._images = value 25 | 26 | def appendImage(self, images): 27 | arr = self._images.tolist() 28 | arr.extend(images) 29 | self._images = numpy.array(arr) 30 | 31 | def appendLabel(self, labels): 32 | arr = self._labels.tolist() 33 | arr.extend(labels) 34 | self._labels = numpy.array(arr) 35 | 36 | def next_batch(self, batch_size): 37 | start = self._index_in_epoch 38 | self._index_in_epoch += batch_size 39 | if self._index_in_epoch > len(self._images): 40 | perm = numpy.arange(len(self._images)) 41 | numpy.random.shuffle(perm) 42 | self._images = self._images[perm] 43 | self._labels = self._labels[perm] 44 | self._index_in_epoch = batch_size 45 | start = 0 46 | end = self._index_in_epoch 47 | return self._images[start:end], self._labels[start:end] 48 | -------------------------------------------------------------------------------- /utils/kitti.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy 3 | import tensorflow as tf 4 | import scipy as scp 5 | import scipy.misc 6 | 7 | KITTI_TRAIN_DIR_PREFIX = '/usr/local/google/home/limeng/Downloads/kitti/data_road/training/image_2/' 8 | KITTI_GT_DIR_PREFIX = '/usr/local/google/home/limeng/Downloads/kitti/data_road/training/gt_image_2/' 9 | 10 | UM_TRAIN_TEMPLATE = "um_0000%02d.png" 11 | UU_TRAIN_TEMPLATE = "uu_0000%02d.png" 12 | UMM_TRAIN_TEMPLATE = "umm_0000%02d.png" 13 | 14 | UU_GT_ROAD_TEMPLATE = "uu_road_0000%02d.png" 15 | UM_GT_LANE_TEMPLATE = "um_lane_0000%02d.png" 16 | UM_GT_ROAD_TEMPLATE = "um_road_0000%02d.png" 17 | UMM_GT_ROAD_TEMPLATE = "umm_road_0000%02d.png" 18 | 19 | 20 | class Kitti(object): 21 | def __init__(self): 22 | self._images = [] 23 | self._labels = [] 24 | self._file_count = 0 25 | self._read_datasets() 26 | 27 | def _read_datasets(self, 28 | train_data_dir=KITTI_TRAIN_DIR_PREFIX, 29 | gt_data_dir=KITTI_GT_DIR_PREFIX, 30 | cat='uu'): 31 | assert os.path.isdir(train_data_dir), 'Cannot find: %s' % train_data_dir 32 | 33 | self._file_count = 98 34 | train_file_temp = UU_TRAIN_TEMPLATE 35 | gt_file_temp = UU_GT_ROAD_TEMPLATE 36 | if cat == 'um': 37 | self._file_count = 95 38 | train_file_temp = UM_TRAIN_TEMPLATE 39 | gt_file_temp = UM_GT_ROAD_TEMPLATE 40 | elif cat == 'umm': 41 | self._file_count = 96 42 | train_file_temp = UMM_TRAIN_TEMPLATE 43 | gt_file_temp = UMM_GT_ROAD_TEMPLATE 44 | 45 | for i in range(0, self._file_count): 46 | train_file_name = train_data_dir + train_file_temp % i 47 | gt_file_name = gt_data_dir + gt_file_temp % i 48 | print(train_file_name) 49 | x = get_training_data(train_file_name) 50 | y = get_ground_truth(gt_file_name) 51 | 52 | self._images.append(x) 53 | self._labels.append(y) 54 | 55 | def next_batch(self, batch_id): 56 | return self._images[batch_id], self._labels[batch_id] 57 | 58 | 59 | def get_training_data(file_name): 60 | assert os.path.isfile(file_name), 'Cannot find: %s' % file_name 61 | training_data = scp.misc.imread(file_name, mode='RGB') 62 | return numpy.expand_dims(training_data, axis=0) 63 | 64 | 65 | def get_ground_truth(fileNameGT): 66 | assert os.path.isfile(fileNameGT), 'Cannot find: %s' % fileNameGT 67 | full_gt = scp.misc.imread(fileNameGT, mode='RGB') 68 | roadArea = (full_gt[:, :, 2] > 0) 69 | notRoadArea = (full_gt[:, :, 2] == 0) 70 | gt_data = numpy.dstack((roadArea, notRoadArea)) 71 | return numpy.expand_dims(gt_data, axis=0) 72 | 73 | 74 | def main(_): 75 | kitti = Kitti() 76 | for i in range(0, 20): 77 | img, label = kitti.next_batch() 78 | print "images" 79 | print img.shape 80 | print "labels" 81 | print label.shape 82 | 83 | 84 | if __name__ == '__main__': 85 | tf.app.run(main=main) 86 | -------------------------------------------------------------------------------- /utils/kitti_segnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | from tensorflow.python.framework import dtypes 4 | 5 | IMAGE_HEIGHT = 375 6 | IMAGE_WIDTH = 1242 7 | IMAGE_DEPTH = 3 8 | 9 | NUM_CLASSES = 3 10 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 100 11 | NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 100 12 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 100 13 | 14 | def _generate_image_and_label_batch(image, label, min_queue_examples, 15 | batch_size, shuffle): 16 | """Construct a queued batch of images and labels. 17 | Args: 18 | image: 3-D Tensor of [height, width, 3] of type.float32. 19 | label: 3-D Tensor of [height, width, 1] type.int32 20 | min_queue_examples: int32, minimum number of samples to retain 21 | in the queue that provides of batches of examples. 22 | batch_size: Number of images per batch. 23 | shuffle: boolean indicating whether to use a shuffling queue. 24 | Returns: 25 | images: Images. 4D tensor of [batch_size, height, width, 3] size. 26 | labels: Labels. 3D tensor of [batch_size, height, width ,1] size. 27 | """ 28 | # Create a queue that shuffles the examples, and then 29 | # read 'batch_size' images + labels from the example queue. 30 | num_preprocess_threads = 1 31 | if shuffle: 32 | images, label_batch = tf.train.shuffle_batch( 33 | [image, label], 34 | batch_size=batch_size, 35 | num_threads=num_preprocess_threads, 36 | capacity=min_queue_examples + 3 * batch_size, 37 | min_after_dequeue=min_queue_examples) 38 | else: 39 | images, label_batch = tf.train.batch( 40 | [image, label], 41 | batch_size=batch_size, 42 | num_threads=num_preprocess_threads, 43 | capacity=min_queue_examples + 3 * batch_size) 44 | 45 | return images, label_batch 46 | 47 | 48 | def CamVid_reader_seq(filename_queue, seq_length): 49 | image_seq_filenames = tf.split(axis=0, 50 | num_or_size_splits=seq_length, 51 | value=filename_queue[0]) 52 | label_seq_filenames = tf.split(axis=0, 53 | num_or_size_splits=seq_length, 54 | value=filename_queue[1]) 55 | 56 | image_seq = [] 57 | label_seq = [] 58 | for im ,la in zip(image_seq_filenames, label_seq_filenames): 59 | imageValue = tf.read_file(tf.squeeze(im)) 60 | labelValue = tf.read_file(tf.squeeze(la)) 61 | image_bytes = tf.image.decode_png(imageValue) 62 | label_bytes = tf.image.decode_png(labelValue) 63 | image = tf.cast(tf.reshape(image_bytes, 64 | (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)), tf.float32) 65 | label = tf.cast(tf.reshape(label_bytes, 66 | (IMAGE_HEIGHT, IMAGE_WIDTH, 1)), tf.int64) 67 | image_seq.append(image) 68 | label_seq.append(label) 69 | return image_seq, label_seq 70 | 71 | 72 | def CamVid_reader(filename_queue): 73 | image_filename = filename_queue[0] 74 | label_filename = filename_queue[1] 75 | 76 | imageValue = tf.read_file(image_filename) 77 | labelValue = tf.read_file(label_filename) 78 | 79 | image_bytes = tf.image.decode_png(imageValue) 80 | label_bytes = tf.image.decode_png(labelValue) 81 | 82 | image = tf.reshape(image_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH)) 83 | label = tf.reshape(label_bytes, (IMAGE_HEIGHT, IMAGE_WIDTH, 1)) 84 | 85 | return image, label 86 | 87 | 88 | def get_filename_list(path): 89 | fd = open(path) 90 | image_filenames = [] 91 | label_filenames = [] 92 | for i in fd: 93 | i = i.strip().split(" ") 94 | image_filenames.append(i[0]) 95 | label_filenames.append(i[1]) 96 | return image_filenames, label_filenames 97 | 98 | 99 | def CamVidInputs(image_filenames, label_filenames, batch_size, shuffle=True): 100 | 101 | images = ops.convert_to_tensor(image_filenames, dtype=dtypes.string) 102 | labels = ops.convert_to_tensor(label_filenames, dtype=dtypes.string) 103 | 104 | filename_queue = tf.train.slice_input_producer([images, labels], shuffle=shuffle) 105 | 106 | image, label = CamVid_reader(filename_queue) 107 | reshaped_image = tf.cast(image, tf.float32) 108 | 109 | min_fraction_of_examples_in_queue = 0.05 110 | min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * 111 | min_fraction_of_examples_in_queue) 112 | print ('Filling queue with %d CamVid images before starting to train. ' 113 | 'This will take a few minutes.' % min_queue_examples) 114 | 115 | # Generate a batch of images and labels by building up a queue of examples. 116 | return _generate_image_and_label_batch(reshaped_image, label, 117 | min_queue_examples, batch_size, 118 | shuffle=shuffle) 119 | -------------------------------------------------------------------------------- /utils/my_image.py: -------------------------------------------------------------------------------- 1 | import scipy.misc 2 | import numpy as np 3 | import os 4 | 5 | from keras.preprocessing.image import ImageDataGenerator 6 | from keras.preprocessing.image import Iterator 7 | from keras import backend as K 8 | from keras.preprocessing import image 9 | 10 | class MyImageDataGenerator(ImageDataGenerator): 11 | 12 | def flow(self, file, image_size, batch_size=32, shuffle=True, seed=None, 13 | save_to_dir=None, save_prefix='', save_format='png'): 14 | return FileIterator( 15 | file, image_size, self, 16 | batch_size=batch_size, 17 | shuffle=shuffle, 18 | seed=seed, 19 | data_format=self.data_format, 20 | save_to_dir=save_to_dir, 21 | save_prefix=save_prefix, 22 | save_format=save_format) 23 | 24 | 25 | class FileIterator(Iterator): 26 | """Iterator yielding data from a file. 27 | 28 | The file should be in the following format: 29 | 30 | 31 | ... 32 | 33 | 34 | # Arguments 35 | file: Path to the file to read the image list and label data. 36 | image_size: Image size, [height, width, channel] 37 | image_data_generator: Instance of `ImageDataGenerator` 38 | to use for random transformations and normalization. 39 | batch_size: Integer, size of a batch. 40 | shuffle: Boolean, whether to shuffle the data between epochs. 41 | seed: Random seed for data shuffling. 42 | data_format: String, one of `channels_first`, `channels_last`. 43 | save_to_dir: Optional directory where to save the pictures 44 | being yielded, in a viewable format. This is useful 45 | for visualizing the random transformations being 46 | applied, for debugging purposes. 47 | save_prefix: String prefix to use for saving sample 48 | images (if `save_to_dir` is set). 49 | save_format: Format to use for saving sample images 50 | (if `save_to_dir` is set). 51 | """ 52 | 53 | def __init__(self, file, image_size, image_data_generator, 54 | batch_size=32, shuffle=False, seed=None, 55 | data_format=None, 56 | save_to_dir=None, save_prefix='', save_format='png'): 57 | if not os.path.exists(file): 58 | raise ValueError('Cannot find file: %s' % file) 59 | 60 | if data_format is None: 61 | data_format = K.image_data_format() 62 | 63 | split_lines = [line.rstrip('\n').split(' ') for line in open(file, 'r')] 64 | self.x = np.asarray([e[0] for e in split_lines]) 65 | self.y = np.asarray([float(e[1]) for e in split_lines]) 66 | self.image_size = image_size 67 | self.image_data_generator = image_data_generator 68 | self.data_format = data_format 69 | self.save_to_dir = save_to_dir 70 | self.save_prefix = save_prefix 71 | self.save_format = save_format 72 | super(FileIterator, self).__init__(self.x.shape[0], batch_size, shuffle, seed) 73 | 74 | def next(self): 75 | # Keeps under lock only the mechanism which advances 76 | # the indexing of each batch. 77 | with self.lock: 78 | index_array, current_index, current_batch_size = next(self.index_generator) 79 | # The transformation of images is not under thread lock 80 | # so it can be done in parallel 81 | batch_x = np.zeros(tuple([current_batch_size] + list(self.image_size)), dtype=K.floatx()) 82 | for i, j in enumerate(index_array): 83 | x = scipy.misc.imread(self.x[j]) 84 | x = scipy.misc.imresize(x, self.image_size) 85 | x = self.image_data_generator.random_transform(x.astype(K.floatx())) 86 | x = self.image_data_generator.standardize(x) 87 | batch_x[i] = x 88 | if self.save_to_dir: 89 | for i in range(current_batch_size): 90 | img = image.array_to_img(batch_x[i], self.data_format, scale=True) 91 | fname = '{prefix}_{index}_{hash}.{format}'.format(prefix=self.save_prefix, 92 | index=current_index + i, 93 | hash=np.random.randint(1e4), 94 | format=self.save_format) 95 | img.save(os.path.join(self.save_to_dir, fname)) 96 | batch_y = self.y[index_array] 97 | return batch_x, batch_y -------------------------------------------------------------------------------- /utils/my_image_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from tensorflow.python.platform import test 6 | import my_image 7 | 8 | class MyImageTest(test.TestCase): 9 | 10 | def testReadData(self): 11 | myImageDataGenerator = my_image.MyImageDataGenerator() 12 | generator = myImageDataGenerator.flow("udacity_train.txt", 13 | [224, 224, 3], 14 | shuffle=False, 15 | save_to_dir='test') 16 | images, labels = generator.next() 17 | self.assertAllEqual(images.shape, [32, 224, 224, 3]) 18 | self.assertAllEqual(labels.shape, [32]) 19 | self.assertAllClose(labels[0], 0.0490969472) 20 | 21 | 22 | if __name__ == "__main__": 23 | test.main() -------------------------------------------------------------------------------- /utils/svhn.py: -------------------------------------------------------------------------------- 1 | """Load data from SVHN dataset 2 | """ 3 | 4 | import os.path 5 | import dataset 6 | import numpy 7 | import scipy.io 8 | 9 | FLAGS = None 10 | 11 | SVHN_TRAIN_FILE_NAME = 'train_32x32.mat' 12 | SVHN_TEST_FILE_NAME = 'test_32x32.mat' 13 | SVHN_DATA = 'X' 14 | SVHN_LABEL = 'y' 15 | 16 | 17 | class SVHN(object): 18 | def __init__(self): 19 | self.train = dataset.DataSet() 20 | self.test = dataset.DataSet() 21 | 22 | def ReadDataSets(self, data_dir=".", one_hot=False): 23 | train_file_path = os.path.join(data_dir, SVHN_TRAIN_FILE_NAME) 24 | test_file_path = os.path.join(data_dir, SVHN_TEST_FILE_NAME) 25 | if not os.path.isfile(train_file_path) and not os.path.isfile(test_file_path): 26 | print("SVHN dataset not found.") 27 | return 28 | 29 | read_input = scipy.io.loadmat(train_file_path) 30 | self.train.images = read_input[SVHN_DATA] 31 | self.train.labels = read_input[SVHN_LABEL] 32 | 33 | read_input = scipy.io.loadmat(test_file_path) 34 | self.test.images = read_input[SVHN_DATA] 35 | self.test.labels = read_input[SVHN_LABEL] 36 | 37 | self.train.images = numpy.swapaxes(self.train.images, 0, 3) 38 | self.train.images = numpy.swapaxes(self.train.images, 1, 2) 39 | self.train.images = numpy.swapaxes(self.train.images, 2, 3) 40 | 41 | self.train.images = self.train.images.reshape((73257, -1)) 42 | 43 | self.test.images = numpy.swapaxes(self.test.images, 0, 3) 44 | self.test.images = numpy.swapaxes(self.test.images, 1, 2) 45 | self.test.images = numpy.swapaxes(self.test.images, 2, 3) 46 | 47 | self.test.images = self.test.images.reshape((26032, -1)) 48 | 49 | self.train.images = self.train.images / numpy.float32(255) 50 | self.test.images = self.test.images / numpy.float32(255) 51 | 52 | if one_hot: 53 | train_labels = numpy.zeros((73257, 10), dtype=numpy.float32) 54 | test_labels = numpy.zeros((26032, 10), dtype=numpy.float32) 55 | 56 | for i in range(73257): 57 | train_labels[i, self.train.labels[i] - 1] = 1. 58 | self.train.labels = train_labels 59 | 60 | for j in range(26032): 61 | test_labels[j, self.test.labels[j] - 1] = 1. 62 | self.test.labels = test_labels 63 | -------------------------------------------------------------------------------- /utils/udacity_data.py: -------------------------------------------------------------------------------- 1 | import scipy.misc 2 | import random 3 | import pandas as pd 4 | import tensorflow as tf 5 | 6 | #points to the end of the last batch 7 | train_batch_pointer = 0 8 | val_batch_pointer = 0 9 | 10 | train_xs = [] 11 | train_ys = [] 12 | val_xs = [] 13 | val_ys = [] 14 | 15 | TRAIN_IMG_PREFIX = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_%s/" 16 | TRAIN_CSV = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_%s/interpolated.csv" 17 | VAL_IMG_PREFIX = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/" 18 | VAL_CSV = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/interpolated.csv" 19 | 20 | NUM_TRAIN_IMAGES = 33808 21 | NUM_VAL_IMAGES = 5279 22 | 23 | 24 | def read_csv(csv_file_name, img_prefix): 25 | x_out = [] 26 | data_csv = pd.read_csv(csv_file_name) 27 | data = data_csv[[x.startswith("center") for x in data_csv["filename"]]] 28 | for file_name in data["filename"]: 29 | x_out.append(img_prefix + file_name) 30 | return x_out, data["angle"] 31 | 32 | 33 | def read_data(shuffe=True): 34 | global train_xs 35 | global train_ys 36 | global val_xs 37 | global val_ys 38 | 39 | # Read train set 40 | for idx in range(1, 7): 41 | if idx == 3: 42 | continue 43 | x_out, y_out = read_csv(TRAIN_CSV % idx, TRAIN_IMG_PREFIX % idx) 44 | train_xs.extend(x_out) 45 | train_ys.extend(y_out) 46 | # Read val set 47 | val_xs, val_ys = read_csv(VAL_CSV, VAL_IMG_PREFIX) 48 | 49 | #shuffle train set 50 | c = list(zip(train_xs, train_ys)) 51 | if shuffe: 52 | random.shuffle(c) 53 | # with open("train.txt", 'a') as out: 54 | # for item in c: 55 | # out.write("%s %.10f\n" % (item[0], item[1])) 56 | train_xs, train_ys = zip(*c) 57 | #shuffle val set 58 | c = list(zip(val_xs, val_ys)) 59 | # with open("val.txt", 'a') as out: 60 | # for item in c: 61 | # out.write("%s %.10f\n" % (item[0], item[1])) 62 | if shuffe: 63 | random.shuffle(c) 64 | val_xs, val_ys = zip(*c) 65 | 66 | 67 | def load_train_batch(batch_size): 68 | global train_batch_pointer 69 | global train_xs 70 | global train_ys 71 | 72 | x_out = [] 73 | y_out = [] 74 | for i in range(0, batch_size): 75 | image = scipy.misc.imread(train_xs[(train_batch_pointer + i) % NUM_TRAIN_IMAGES], mode="RGB") 76 | x_out.append(scipy.misc.imresize(image[-300:], [66, 200]) / 255.0) 77 | y_out.append([train_ys[(train_batch_pointer + i) % NUM_TRAIN_IMAGES]]) 78 | train_batch_pointer += batch_size 79 | return x_out, y_out 80 | 81 | 82 | def load_val_batch(batch_size): 83 | global val_batch_pointer 84 | global val_xs 85 | global val_ys 86 | 87 | x_out = [] 88 | y_out = [] 89 | for i in range(0, batch_size): 90 | image = scipy.misc.imread(val_xs[(val_batch_pointer + i) % NUM_VAL_IMAGES], mode="RGB") 91 | x_out.append(scipy.misc.imresize(image[-300:], [66, 200]) / 255.0) 92 | y_out.append([val_ys[(val_batch_pointer + i) % NUM_VAL_IMAGES]]) 93 | val_batch_pointer += batch_size 94 | return x_out, y_out 95 | 96 | 97 | def main(_): 98 | read_data() 99 | 100 | if __name__ == '__main__': 101 | tf.app.run(main=main) -------------------------------------------------------------------------------- /utils/udacity_data_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from tensorflow.python.platform import test 6 | from scipy import misc 7 | import udacity_data 8 | 9 | IMG_TRAIN = "/usr/local/google/home/limeng/Downloads/udacity/ch2_002/output/HMB_1/center/1479424215880976321.png" 10 | IMG_VAL = "/usr/local/google/home/limeng/Downloads/udacity/test/HMB_3/center/1479425441182877835.png" 11 | 12 | class UdacityDataTest(test.TestCase): 13 | 14 | def testReadData(self): 15 | udacity_data.read_data() 16 | self.assertAllEqual(len(udacity_data.train_xs), 33808) 17 | self.assertAllEqual(len(udacity_data.train_ys), 33808) 18 | self.assertAllEqual(len(udacity_data.val_xs), 5279) 19 | self.assertAllEqual(len(udacity_data.val_ys), 5279) 20 | self.assertTrue(IMG_TRAIN in udacity_data.train_xs) 21 | self.assertAllClose(udacity_data.train_ys[udacity_data.train_xs.index(IMG_TRAIN)], 0.0010389391) 22 | self.assertTrue(IMG_VAL in udacity_data.val_xs) 23 | self.assertAllClose(udacity_data.val_ys[udacity_data.val_xs.index(IMG_VAL)], -0.0169280299) 24 | 25 | def testReadData(self): 26 | udacity_data.read_data() 27 | x_out, y_out = udacity_data.load_val_batch(64) 28 | misc.imsave('test.png', x_out[0]) 29 | 30 | 31 | if __name__ == "__main__": 32 | test.main() -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def put_kernels_on_grid(kernel, (grid_Y, grid_X), pad=1): 4 | '''Visualize conv. features as an image (mostly for the 1st layer). 5 | Place kernel into a grid, with some paddings between adjacent filters. 6 | Args: 7 | kernel: tensor of shape [Y, X, NumChannels, NumKernels] 8 | (grid_Y, grid_X): shape of the grid. Require: NumKernels == grid_Y * grid_X 9 | User is responsible of how to break into two multiples. 10 | pad: number of black pixels around each filter (between them) 11 | 12 | Return: 13 | Tensor of shape [(Y+pad)*grid_Y, (X+pad)*grid_X, NumChannels, 1]. 14 | ''' 15 | # pad X and Y 16 | x1 = tf.pad(kernel, tf.constant([[pad, 0], [pad, 0], [0, 0], [0, 0]])) 17 | 18 | # X and Y dimensions, w.r.t. padding 19 | Y = kernel.get_shape()[0] + pad 20 | X = kernel.get_shape()[1] + pad 21 | ch = kernel.get_shape()[2] 22 | 23 | # put NumKernels to the 1st dimension 24 | x2 = tf.transpose(x1, (3, 0, 1, 2)) 25 | # organize grid on Y axis 26 | x3 = tf.reshape(x2, tf.pack([grid_X, Y * grid_Y, X, ch])) 27 | 28 | # switch X and Y axes 29 | x4 = tf.transpose(x3, (0, 2, 1, 3)) 30 | # organize grid on X axis 31 | x5 = tf.reshape(x4, tf.pack([1, X * grid_X, Y * grid_Y, ch])) 32 | 33 | # back to normal order (not combining with the next step for clarity) 34 | x6 = tf.transpose(x5, (2, 1, 3, 0)) 35 | 36 | # to tf.image_summary order [batch_size, height, width, channels], 37 | # where in this case batch_size == 1 38 | x7 = tf.transpose(x6, (3, 0, 1, 2)) 39 | 40 | # scale to [0, 1] 41 | x_min = tf.reduce_min(x7) 42 | x_max = tf.reduce_max(x7) 43 | x8 = (x7 - x_min) / (x_max - x_min) 44 | 45 | return x8 46 | -------------------------------------------------------------------------------- /vae/README.md: -------------------------------------------------------------------------------- 1 | [深入理解变分推断](https://limengweb.wordpress.com/2017/11/13/%E6%B7%B1%E5%85%A5%E7%90%86%E8%A7%A3%E5%8F%98%E5%88%86%E6%8E%A8%E6%96%AD/) 2 |
3 |

4 |
5 | -------------------------------------------------------------------------------- /vae/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/vae/__init__.py -------------------------------------------------------------------------------- /vae/vae_mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mengli/MachineLearning/107a5be76aabbfd57a6395a6b7e1b9c55e06bbad/vae/vae_mnist.png -------------------------------------------------------------------------------- /vae/vae_mnist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """A Variational Autoencoders for MNIST. 5 | """ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from keras.layers import Input, Dense, Lambda, Conv2D, Conv2DTranspose, \ 12 | Flatten, Reshape 13 | from keras.models import Model 14 | from keras import backend as K 15 | from keras.datasets import mnist 16 | from keras import metrics 17 | import tensorflow as tf 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | from scipy.stats import norm 21 | 22 | EPOCH = 5 23 | INPUT_DIM = 784 24 | BATCH_SIZE = 64 25 | HIDDEN_VAR_DIM = 7 * 7 * 32 26 | LATENT_VAR_DIM = 2 27 | 28 | # input image dimensions 29 | 30 | (img_rows, img_cols, img_chns) = (28, 28, 1) 31 | 32 | if K.image_data_format() == 'channels_first': 33 | original_img_size = (img_chns, img_rows, img_cols) 34 | output_shape = (BATCH_SIZE, 32, 7, 7) 35 | else: 36 | original_img_size = (img_rows, img_cols, img_chns) 37 | output_shape = (BATCH_SIZE, 7, 7, 32) 38 | 39 | 40 | def sampling(args): 41 | (z_mean, z_var) = args 42 | epsilon = K.random_normal(shape=(K.shape(z_mean)[0], 43 | LATENT_VAR_DIM), mean=0., stddev=1.) 44 | return z_mean + z_var * epsilon 45 | 46 | 47 | def encode(x): 48 | input_reshape = Reshape(original_img_size)(x) 49 | conv1 = Conv2D(16, 5, strides=(2, 2), padding='same', 50 | activation='relu')(input_reshape) 51 | conv2 = Conv2D(32, 5, strides=(2, 2), padding='same', 52 | activation='relu')(conv1) 53 | hidden = Flatten()(conv2) 54 | z_mean = Dense(LATENT_VAR_DIM, activation='relu')(hidden) 55 | z_var = Dense(LATENT_VAR_DIM, activation='relu')(hidden) 56 | return (z_mean, z_var) 57 | 58 | 59 | def decode(z): 60 | hidden = Dense(HIDDEN_VAR_DIM, activation='relu')(z) 61 | hidden_reshape = Reshape(output_shape[1:])(hidden) 62 | deconv1 = Conv2DTranspose(16, 5, strides=(2, 2), padding='same', 63 | activation='relu')(hidden_reshape) 64 | deconv2 = Conv2DTranspose(1, 5, strides=(2, 2), padding='same', 65 | activation='sigmoid')(deconv1) 66 | return Flatten()(deconv2) 67 | 68 | 69 | def main(_): 70 | x = Input(shape=(INPUT_DIM, )) 71 | (z_mean, z_var) = encode(x) 72 | z = Lambda(sampling)([z_mean, z_var]) 73 | x_decoded = decode(z) 74 | model = Model(inputs=x, outputs=x_decoded) 75 | 76 | def vae_loss(y_true, y_pred): 77 | generation_loss = img_rows * img_cols \ 78 | * metrics.binary_crossentropy(x, x_decoded) 79 | kl_loss = 0.5 * tf.reduce_sum(K.square(z_mean) 80 | + K.square(z_var) - K.log(K.square(z_var + 1e-8)) - 1, 81 | axis=1) 82 | return tf.reduce_mean(generation_loss + kl_loss) 83 | 84 | model.compile(optimizer='rmsprop', loss=vae_loss) 85 | 86 | # train the VAE on MNIST digits 87 | 88 | ((x_train, y_train), (x_test, y_test)) = mnist.load_data() 89 | 90 | x_train = x_train.astype('float32') / 255. 91 | x_test = x_test.astype('float32') / 255. 92 | x_train = x_train.reshape((len(x_train), 93 | np.prod(x_train.shape[1:]))) 94 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 95 | 96 | print(model.summary()) 97 | 98 | model.fit( 99 | x_train, 100 | y_train, 101 | shuffle=True, 102 | epochs=EPOCH, 103 | batch_size=BATCH_SIZE, 104 | validation_data=(x_test, y_test), 105 | ) 106 | 107 | generator = K.function([model.layers[8].input], 108 | [model.layers[12].output]) 109 | 110 | # display a 2D manifold of the digits 111 | 112 | n = 15 # figure with 15x15 digits 113 | digit_size = 28 114 | figure = np.zeros((digit_size * n, digit_size * n)) 115 | 116 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian 117 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian 118 | 119 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) 120 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) 121 | 122 | for (i, yi) in enumerate(grid_x): 123 | for (j, xi) in enumerate(grid_y): 124 | z_sample = np.array([[xi, yi]]) 125 | z_sample = np.tile(z_sample, 126 | BATCH_SIZE).reshape(BATCH_SIZE, 2) 127 | x_decoded = generator([z_sample])[0] 128 | digit = x_decoded[0].reshape(digit_size, digit_size) 129 | 130 | figure[i * digit_size:(i + 1) * digit_size, j * digit_size: 131 | (j + 1) * digit_size] = digit 132 | 133 | plt.figure(figsize=(10, 10)) 134 | plt.imshow(figure, cmap='Greys_r') 135 | plt.show() 136 | 137 | 138 | if __name__ == '__main__': 139 | tf.app.run(main=main) 140 | --------------------------------------------------------------------------------