├── README.md ├── hsr-eval.png ├── hsr.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # HSR 2 | 3 | This repository contains code that I use to build a machine learning model for hand signals 4 | recognition system. 5 | 6 | ![Eval example](hsr-eval.png) 7 | 8 | The training data is not included. You can create your own training data 9 | using webcam via Chrome. I use the following 10 | [HTML & JS script](https://gist.github.com/pyk/48b92225d1e3c5a732d1fda7c7b79ce5) 11 | to collect the training data. 12 | 13 | ## Running 14 | 15 | python train.py training-data/ 16 | 17 | It expect all images inside `training-data` directory are named using this 18 | format: `label_id-*` where `label_id` is natural number and `0 < label_id`. 19 | 20 | -------------------------------------------------------------------------------- /hsr-eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyk/hsr/9cfa4a0f2d39bab24ac81a3ef4adb68d152cd8be/hsr-eval.png -------------------------------------------------------------------------------- /hsr.py: -------------------------------------------------------------------------------- 1 | # The architecture is inspired by LeNet-5 (LeCun, 1998) 2 | import os 3 | 4 | import tensorflow as tf 5 | 6 | # Parameter 7 | IMAGE_HEIGHT = 240 8 | IMAGE_WIDTH = 320 9 | BATCH_SIZE = 5 10 | NUM_EPOCHS = 2 11 | NUM_CLASS = 5 12 | NUM_CHANNELS = 3 13 | CONV1_FILTER_SIZE = 32 14 | CONV1_FILTER_COUNT = 4 15 | CONV2_FILTER_SIZE = 16 16 | CONV2_FILTER_COUNT = 6 17 | HIDDEN_LAYER_SIZE = 400 18 | 19 | def read_images(data_dir): 20 | pattern = os.path.join(data_dir, '*.png') 21 | filenames = tf.train.match_filenames_once(pattern, name='list_files') 22 | 23 | queue = tf.train.string_input_producer( 24 | filenames, 25 | num_epochs=NUM_EPOCHS, 26 | shuffle=True, 27 | name='queue') 28 | 29 | reader = tf.WholeFileReader() 30 | filename, content = reader.read(queue, name='read_image') 31 | filename = tf.Print( 32 | filename, 33 | data=[filename], 34 | message='loading: ') 35 | filename_split = tf.string_split([filename], delimiter='/') 36 | label_id = tf.string_to_number(tf.substr(filename_split.values[1], 37 | 0, 1), out_type=tf.int32) 38 | label = tf.one_hot( 39 | label_id-1, 40 | 5, 41 | on_value=1.0, 42 | off_value=0.0, 43 | dtype=tf.float32) 44 | 45 | img_tensor = tf.image.decode_png( 46 | content, 47 | dtype=tf.uint8, 48 | channels=3, 49 | name='img_decode') 50 | 51 | # Preprocess the image, Performs random transformations 52 | # Random flip 53 | img_tensor_flip = tf.image.random_flip_left_right(img_tensor) 54 | 55 | # Random brightness 56 | img_tensor_bri = tf.image.random_brightness(img_tensor_flip, 57 | max_delta=0.2) 58 | 59 | # Per-image scaling 60 | img_tensor_std = tf.image.per_image_standardization(img_tensor_bri) 61 | 62 | min_after_dequeue = 1000 63 | capacity = min_after_dequeue + 3 * BATCH_SIZE 64 | example_batch, label_batch = tf.train.shuffle_batch( 65 | [img_tensor_std, label], 66 | batch_size=BATCH_SIZE, 67 | shapes=[(IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS), (NUM_CLASS)], 68 | capacity=capacity, 69 | min_after_dequeue=min_after_dequeue, 70 | name='train_shuffle') 71 | 72 | return example_batch, label_batch 73 | 74 | # `images` is a 4-D tensor with the shape: 75 | # [n_batch, img_height, img_width, n_channel] 76 | def inference(images): 77 | # Convolutional layer 1 78 | with tf.name_scope('conv1'): 79 | W = tf.Variable( 80 | tf.truncated_normal( 81 | shape=( 82 | CONV1_FILTER_SIZE, 83 | CONV1_FILTER_SIZE, 84 | NUM_CHANNELS, 85 | CONV1_FILTER_COUNT), 86 | dtype=tf.float32, 87 | stddev=5e-2), 88 | name='weights') 89 | b = tf.Variable( 90 | tf.zeros( 91 | shape=(CONV1_FILTER_COUNT), 92 | dtype=tf.float32), 93 | name='biases') 94 | conv = tf.nn.conv2d( 95 | input=images, 96 | filter=W, 97 | strides=(1, 1, 1, 1), 98 | padding='SAME', 99 | name='convolutional') 100 | conv_bias = tf.nn.bias_add(conv, b) 101 | conv_act = tf.nn.relu( 102 | features=conv_bias, 103 | name='activation') 104 | pool1 = tf.nn.max_pool( 105 | value=conv_act, 106 | ksize=(1, 2, 2, 1), 107 | strides=(1, 2, 2, 1), 108 | padding='SAME', 109 | name='subsampling') 110 | 111 | # Convolutional layer 2 112 | with tf.name_scope('conv2'): 113 | W = tf.Variable( 114 | tf.truncated_normal( 115 | shape=( 116 | CONV2_FILTER_SIZE, 117 | CONV2_FILTER_SIZE, 118 | CONV1_FILTER_COUNT, 119 | CONV2_FILTER_COUNT), 120 | dtype=tf.float32, 121 | stddev=5e-2), 122 | name='weights') 123 | b = tf.Variable( 124 | tf.zeros( 125 | shape=(CONV2_FILTER_COUNT), 126 | dtype=tf.float32), 127 | name='biases') 128 | conv = tf.nn.conv2d( 129 | input=pool1, 130 | filter=W, 131 | strides=(1, 1, 1, 1), 132 | padding='SAME', 133 | name='convolutional') 134 | conv_bias = tf.nn.bias_add(conv, b) 135 | conv_act = tf.nn.relu( 136 | features=conv_bias, 137 | name='activation') 138 | pool2 = tf.nn.max_pool( 139 | value=conv_act, 140 | ksize=(1, 2, 2, 1), 141 | strides=(1, 2, 2, 1), 142 | padding='SAME', 143 | name='subsampling') 144 | 145 | # Hidden layer 146 | with tf.name_scope('hidden'): 147 | conv_output_size = 28800 148 | W = tf.Variable( 149 | tf.truncated_normal( 150 | shape=(conv_output_size, HIDDEN_LAYER_SIZE), 151 | dtype=tf.float32, 152 | stddev=5e-2), 153 | name='weights') 154 | b = tf.Variable( 155 | tf.zeros( 156 | shape=(HIDDEN_LAYER_SIZE), 157 | dtype=tf.float32), 158 | name='biases') 159 | reshape = tf.reshape( 160 | tensor=pool2, 161 | shape=[BATCH_SIZE, -1]) 162 | h1 = tf.nn.relu( 163 | features=tf.add(tf.matmul(reshape, W), b), 164 | name='activation') 165 | 166 | # Softmax layer 167 | with tf.name_scope('softmax'): 168 | W = tf.Variable( 169 | tf.truncated_normal( 170 | shape=(HIDDEN_LAYER_SIZE, NUM_CLASS), 171 | dtype=tf.float32, 172 | stddev=5e-2), 173 | name='weights') 174 | b = tf.Variable( 175 | tf.zeros( 176 | shape=(NUM_CLASS), 177 | dtype=tf.float32), 178 | name='biases') 179 | logits = tf.add(tf.matmul(h1, W), b, name='logits') 180 | 181 | return logits 182 | 183 | def loss(logits, labels): 184 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, 185 | labels=labels) 186 | loss = tf.reduce_mean(cross_entropy) 187 | return loss 188 | 189 | def training(loss, learning_rate=5e-3): 190 | global_step = tf.Variable(0, name='global_step', trainable=False) 191 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 192 | train_op = optimizer.minimize(loss, global_step=global_step) 193 | return train_op 194 | 195 | def evaluation(logits, labels): 196 | predictions = tf.argmax(logits, 1, name='predictions') 197 | correct_predictions = tf.equal(predictions, 198 | tf.argmax(labels, 1)) 199 | accuracy = tf.reduce_mean(tf.cast(correct_predictions, 200 | 'float'), name='accuracy') 201 | return accuracy 202 | 203 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import hsr 3 | import sys 4 | import os 5 | import time 6 | 7 | if __name__ == '__main__': 8 | # % python train.py folder_name 9 | if len(sys.argv) < 2: 10 | print 'Usage: python', sys.argv[0], 'training_data/' 11 | sys.exit(1) 12 | 13 | data_dir = sys.argv[1] 14 | 15 | image_total = 0 16 | for subdir, dirs, files in os.walk(data_dir): 17 | for file_name in files: 18 | if file_name.split('.')[-1] == 'png': 19 | image_total += 1 20 | 21 | checkpoint_dir = os.path.abspath('checkpoints') 22 | checkpoint_prefix = os.path.join(checkpoint_dir, 'model') 23 | if not os.path.exists(checkpoint_dir): 24 | os.makedirs(checkpoint_dir) 25 | 26 | # Create graph 27 | images, labels = hsr.read_images(data_dir) 28 | logits = hsr.inference(images) 29 | loss = hsr.loss(logits, labels) 30 | train = hsr.training(loss, learning_rate=5e-2) 31 | accuracy = hsr.evaluation(logits, labels) 32 | 33 | # Run the graph 34 | session = tf.Session() 35 | session.run(tf.global_variables_initializer()) 36 | session.run(tf.local_variables_initializer()) 37 | saver = tf.train.Saver(tf.global_variables()) 38 | 39 | coord = tf.train.Coordinator() 40 | threads = tf.train.start_queue_runners(sess=session, coord=coord) 41 | try: 42 | batch_i = 1 43 | total_batch = 0 44 | epoch = 1 45 | start_time = time.time() 46 | while not coord.should_stop(): 47 | loss_value, acc_value, _ = session.run([ 48 | loss, 49 | accuracy, 50 | train]) 51 | elapsed_time = time.time() - start_time 52 | print 'epoch:', epoch, 'batch:', batch_i, 'loss:', loss_value, 'accuracy:', acc_value, 'duration: %.3fs' % elapsed_time 53 | batch_i += 1 54 | total_batch += hsr.BATCH_SIZE 55 | if total_batch >= image_total: 56 | epoch += 1 57 | total_batch = 0 58 | batch_i = 1 59 | 60 | saver.save(session, checkpoint_prefix) 61 | start_time = time.time() 62 | 63 | except tf.errors.OutOfRangeError: 64 | print '' 65 | print 'Done.' 66 | except Exception as e: 67 | coord.request_stop(e) 68 | finally: 69 | coord.request_stop() 70 | 71 | coord.join(threads) 72 | --------------------------------------------------------------------------------