├── README.md
├── hsr-eval.png
├── hsr.py
└── train.py


/README.md:
--------------------------------------------------------------------------------
 1 | # HSR
 2 | 
 3 | This repository contains code that I use to build a machine learning model for hand signals 
 4 | recognition system.
 5 | 
 6 | ![Eval example](hsr-eval.png)
 7 | 
 8 | The training data is not included. You can create your own training data 
 9 | using webcam via Chrome. I use the following 
10 | [HTML & JS script](https://gist.github.com/pyk/48b92225d1e3c5a732d1fda7c7b79ce5) 
11 | to collect the training data.
12 | 
13 | ## Running
14 | 
15 |     python train.py training-data/
16 | 
17 | It expect all images inside `training-data` directory are named using this 
18 | format: `label_id-*` where `label_id` is natural number and `0 < label_id`.
19 | 
20 | 


--------------------------------------------------------------------------------
/hsr-eval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pyk/hsr/9cfa4a0f2d39bab24ac81a3ef4adb68d152cd8be/hsr-eval.png


--------------------------------------------------------------------------------
/hsr.py:
--------------------------------------------------------------------------------
  1 | # The architecture is inspired by LeNet-5 (LeCun, 1998)
  2 | import os
  3 | 
  4 | import tensorflow as tf
  5 | 
  6 | # Parameter
  7 | IMAGE_HEIGHT = 240
  8 | IMAGE_WIDTH = 320
  9 | BATCH_SIZE = 5
 10 | NUM_EPOCHS = 2
 11 | NUM_CLASS = 5
 12 | NUM_CHANNELS = 3
 13 | CONV1_FILTER_SIZE  = 32
 14 | CONV1_FILTER_COUNT  = 4
 15 | CONV2_FILTER_SIZE  = 16
 16 | CONV2_FILTER_COUNT  = 6
 17 | HIDDEN_LAYER_SIZE = 400
 18 | 
 19 | def read_images(data_dir):
 20 |     pattern = os.path.join(data_dir, '*.png')
 21 |     filenames = tf.train.match_filenames_once(pattern, name='list_files')
 22 |     
 23 |     queue = tf.train.string_input_producer(
 24 |         filenames, 
 25 |         num_epochs=NUM_EPOCHS, 
 26 |         shuffle=True, 
 27 |         name='queue')
 28 |     
 29 |     reader = tf.WholeFileReader()
 30 |     filename, content = reader.read(queue, name='read_image')
 31 |     filename = tf.Print(
 32 |         filename, 
 33 |         data=[filename],
 34 |         message='loading: ')
 35 |     filename_split = tf.string_split([filename], delimiter='/')
 36 |     label_id = tf.string_to_number(tf.substr(filename_split.values[1], 
 37 |         0, 1), out_type=tf.int32)
 38 |     label = tf.one_hot(
 39 |         label_id-1, 
 40 |         5, 
 41 |         on_value=1.0, 
 42 |         off_value=0.0, 
 43 |         dtype=tf.float32)
 44 | 
 45 |     img_tensor = tf.image.decode_png(
 46 |         content, 
 47 |         dtype=tf.uint8, 
 48 |         channels=3,
 49 |         name='img_decode')
 50 | 
 51 |     # Preprocess the image, Performs random transformations
 52 |     # Random flip
 53 |     img_tensor_flip = tf.image.random_flip_left_right(img_tensor)
 54 | 
 55 |     # Random brightness
 56 |     img_tensor_bri = tf.image.random_brightness(img_tensor_flip, 
 57 |         max_delta=0.2)
 58 | 
 59 |     # Per-image scaling
 60 |     img_tensor_std = tf.image.per_image_standardization(img_tensor_bri)
 61 | 
 62 |     min_after_dequeue = 1000
 63 |     capacity = min_after_dequeue + 3 * BATCH_SIZE
 64 |     example_batch, label_batch = tf.train.shuffle_batch(
 65 |         [img_tensor_std, label], 
 66 |         batch_size=BATCH_SIZE,
 67 |         shapes=[(IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS), (NUM_CLASS)],
 68 |         capacity=capacity, 
 69 |         min_after_dequeue=min_after_dequeue,
 70 |         name='train_shuffle')
 71 | 
 72 |     return example_batch, label_batch
 73 | 
 74 | # `images` is a 4-D tensor with the shape:
 75 | # [n_batch, img_height, img_width, n_channel]
 76 | def inference(images):
 77 |     # Convolutional layer 1
 78 |     with tf.name_scope('conv1'):
 79 |         W = tf.Variable(
 80 |             tf.truncated_normal(
 81 |                 shape=(
 82 |                     CONV1_FILTER_SIZE, 
 83 |                     CONV1_FILTER_SIZE,
 84 |                     NUM_CHANNELS,
 85 |                     CONV1_FILTER_COUNT),
 86 |                 dtype=tf.float32,
 87 |                 stddev=5e-2), 
 88 |             name='weights')
 89 |         b = tf.Variable(
 90 |             tf.zeros(
 91 |                 shape=(CONV1_FILTER_COUNT), 
 92 |                 dtype=tf.float32), 
 93 |             name='biases')
 94 |         conv = tf.nn.conv2d(
 95 |             input=images, 
 96 |             filter=W,
 97 |             strides=(1, 1, 1, 1), 
 98 |             padding='SAME',
 99 |             name='convolutional')
100 |         conv_bias = tf.nn.bias_add(conv, b)
101 |         conv_act = tf.nn.relu(
102 |             features=conv_bias, 
103 |             name='activation')
104 |         pool1 = tf.nn.max_pool(
105 |             value=conv_act, 
106 |             ksize=(1, 2, 2, 1), 
107 |             strides=(1, 2, 2, 1), 
108 |             padding='SAME', 
109 |             name='subsampling')
110 | 
111 |     # Convolutional layer 2
112 |     with tf.name_scope('conv2'):
113 |         W = tf.Variable(
114 |             tf.truncated_normal(
115 |                 shape=(
116 |                     CONV2_FILTER_SIZE, 
117 |                     CONV2_FILTER_SIZE,
118 |                     CONV1_FILTER_COUNT,
119 |                     CONV2_FILTER_COUNT),
120 |                 dtype=tf.float32,
121 |                 stddev=5e-2), 
122 |             name='weights')
123 |         b = tf.Variable(
124 |             tf.zeros(
125 |                 shape=(CONV2_FILTER_COUNT), 
126 |                 dtype=tf.float32), 
127 |             name='biases')
128 |         conv = tf.nn.conv2d(
129 |             input=pool1, 
130 |             filter=W,
131 |             strides=(1, 1, 1, 1), 
132 |             padding='SAME',
133 |             name='convolutional')
134 |         conv_bias = tf.nn.bias_add(conv, b)
135 |         conv_act = tf.nn.relu(
136 |             features=conv_bias, 
137 |             name='activation')
138 |         pool2 = tf.nn.max_pool(
139 |             value=conv_act, 
140 |             ksize=(1, 2, 2, 1), 
141 |             strides=(1, 2, 2, 1), 
142 |             padding='SAME', 
143 |             name='subsampling')
144 | 
145 |     # Hidden layer
146 |     with tf.name_scope('hidden'):
147 |         conv_output_size = 28800 
148 |         W = tf.Variable(
149 |             tf.truncated_normal(
150 |                 shape=(conv_output_size, HIDDEN_LAYER_SIZE), 
151 |                 dtype=tf.float32,
152 |                 stddev=5e-2), 
153 |             name='weights')
154 |         b = tf.Variable(
155 |             tf.zeros(
156 |                 shape=(HIDDEN_LAYER_SIZE), 
157 |                 dtype=tf.float32), 
158 |             name='biases')
159 |         reshape = tf.reshape(
160 |             tensor=pool2, 
161 |             shape=[BATCH_SIZE, -1])
162 |         h1 = tf.nn.relu(
163 |             features=tf.add(tf.matmul(reshape, W), b),
164 |             name='activation')
165 | 
166 |     # Softmax layer
167 |     with tf.name_scope('softmax'):
168 |         W = tf.Variable(
169 |             tf.truncated_normal(
170 |                 shape=(HIDDEN_LAYER_SIZE, NUM_CLASS), 
171 |                 dtype=tf.float32,
172 |                 stddev=5e-2),
173 |             name='weights')
174 |         b = tf.Variable(
175 |             tf.zeros(
176 |                 shape=(NUM_CLASS), 
177 |                 dtype=tf.float32), 
178 |             name='biases')
179 |         logits = tf.add(tf.matmul(h1, W), b, name='logits')
180 | 
181 |     return logits
182 | 
183 | def loss(logits, labels):
184 |     cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, 
185 |         labels=labels)
186 |     loss = tf.reduce_mean(cross_entropy)
187 |     return loss
188 | 
189 | def training(loss, learning_rate=5e-3):
190 |     global_step = tf.Variable(0, name='global_step', trainable=False)
191 |     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
192 |     train_op = optimizer.minimize(loss, global_step=global_step)
193 |     return train_op
194 | 
195 | def evaluation(logits, labels):
196 |     predictions = tf.argmax(logits, 1, name='predictions')
197 |     correct_predictions = tf.equal(predictions, 
198 |         tf.argmax(labels, 1))
199 |     accuracy = tf.reduce_mean(tf.cast(correct_predictions, 
200 |         'float'), name='accuracy')
201 |     return accuracy
202 | 
203 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import hsr
 3 | import sys
 4 | import os
 5 | import time
 6 | 
 7 | if __name__ == '__main__':
 8 |     # % python train.py folder_name
 9 |     if len(sys.argv) < 2:
10 |         print 'Usage: python', sys.argv[0], 'training_data/'
11 |         sys.exit(1)
12 | 
13 |     data_dir = sys.argv[1]
14 | 
15 |     image_total = 0
16 |     for subdir, dirs, files in os.walk(data_dir):
17 |         for file_name in files:
18 |             if file_name.split('.')[-1] == 'png':
19 |                 image_total += 1
20 | 
21 |     checkpoint_dir = os.path.abspath('checkpoints')
22 |     checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
23 |     if not os.path.exists(checkpoint_dir):
24 |         os.makedirs(checkpoint_dir)
25 | 
26 |     # Create graph
27 |     images, labels = hsr.read_images(data_dir)
28 |     logits = hsr.inference(images)
29 |     loss = hsr.loss(logits, labels)
30 |     train = hsr.training(loss, learning_rate=5e-2)
31 |     accuracy = hsr.evaluation(logits, labels)
32 | 
33 |     # Run the graph
34 |     session = tf.Session()
35 |     session.run(tf.global_variables_initializer())
36 |     session.run(tf.local_variables_initializer())
37 |     saver = tf.train.Saver(tf.global_variables())
38 | 
39 |     coord = tf.train.Coordinator()
40 |     threads = tf.train.start_queue_runners(sess=session, coord=coord)
41 |     try:
42 |         batch_i = 1
43 |         total_batch = 0
44 |         epoch = 1
45 |         start_time = time.time()
46 |         while not coord.should_stop():
47 |             loss_value, acc_value, _ = session.run([
48 |                 loss, 
49 |                 accuracy, 
50 |                 train])
51 |             elapsed_time = time.time() - start_time
52 |             print 'epoch:', epoch, 'batch:', batch_i, 'loss:', loss_value, 'accuracy:', acc_value, 'duration: %.3fs' % elapsed_time
53 |             batch_i += 1
54 |             total_batch += hsr.BATCH_SIZE
55 |             if total_batch >= image_total:
56 |                 epoch += 1
57 |                 total_batch = 0
58 |                 batch_i = 1
59 | 
60 |             saver.save(session, checkpoint_prefix)
61 |             start_time = time.time()
62 | 
63 |     except tf.errors.OutOfRangeError:
64 |         print ''
65 |         print 'Done.'
66 |     except Exception as e:
67 |         coord.request_stop(e)
68 |     finally:
69 |         coord.request_stop()
70 | 
71 |     coord.join(threads)
72 | 


--------------------------------------------------------------------------------