├── .gitignore ├── README.md ├── TensorFlow 1.2 seq2seq example.ipynb ├── Tensorflow 1.2 CTC example.ipynb ├── ctc.py ├── ctc_example.py ├── seq2seq.py └── seq2seq_example.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .ipynb_checkpoints 3 | *.swp 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow Examples 2 | 3 | This repository stores examples for several neural network types in TensorFlow. 4 | 5 | * [seq2seq](https://github.com/pplantinga/tensorflow-examples/blob/master/TensorFlow%201.2%20seq2seq%20example.ipynb) 6 | * [ctc](https://github.com/pplantinga/tensorflow-examples/blob/master/Tensorflow%201.2%20CTC%20example.ipynb) 7 | -------------------------------------------------------------------------------- /TensorFlow 1.2 seq2seq example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorFlow 1.2 seq2seq example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Since there seems to be a dearth of up-to-date tensorflow examples on how to use the seq2seq module in contrib, I've decided to post this code online. It is based primarily on this tutorial: [Udacity's sequence to sequence implementation](https://github.com/udacity/deep-learning/blob/master/seq2seq/sequence_to_sequence_implementation.ipynb)\n", 15 | "\n", 16 | "This example takes a list of numbers and sorts it. There are multiple updates from the Udacity example, such as scheduled sampling, beam search, attention, and error rate calculation. You will best understand what is going on in this example code if you already have a good background in TensorFlow and seq2seq networks.\n", 17 | "\n", 18 | "Unfortunately, Jupyter doesn't work well with classes, so I will have to put most of the code in a single cell. The comments should describe what is going on." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import tensorflow as tf\n", 30 | "import tensorflow.contrib.rnn as rnn\n", 31 | "import tensorflow.contrib.seq2seq as seq2seq\n", 32 | "from tensorflow.python.layers.core import Dense\n", 33 | "import numpy as np\n", 34 | "\n", 35 | "class seq2seq_example:\n", 36 | "\n", 37 | " # Constants\n", 38 | " tokens = {\"PAD\": 0, \"EOS\": 1, \"GO\": 2, \"UNK\": 3}\n", 39 | " minLength = 5\n", 40 | " maxLength = 10\n", 41 | " samples = 10000\n", 42 | " vocab_size = 50\n", 43 | " embedding_size = 15\n", 44 | " dropout = 0.3\n", 45 | " layers = 2\n", 46 | " layer_size = 100\n", 47 | " batch_size = 50\n", 48 | " beam_width = 4\n", 49 | "\n", 50 | " def __init__(self):\n", 51 | " \n", 52 | " # Random integers up to the vocab_size (not including reserved integers)\n", 53 | " self.data = np.random.randint(\n", 54 | " low = len(self.tokens),\n", 55 | " high = self.vocab_size,\n", 56 | " size = (self.samples, self.maxLength))\n", 57 | " \n", 58 | " # Assign a random length to each sequence from minLength to maxLength\n", 59 | " self.dataLens = np.random.randint(\n", 60 | " low = self.minLength,\n", 61 | " high = self.maxLength,\n", 62 | " size = self.samples)\n", 63 | " \n", 64 | " # Create labels by sorting the original data\n", 65 | " self.dataLabels = np.ones_like(self.data) * self.tokens['PAD']\n", 66 | " for i in range(len(self.data)):\n", 67 | " self.data[i, self.dataLens[i]:] = self.tokens['PAD']\n", 68 | " self.dataLabels[i, :self.dataLens[i]] = np.sort(self.data[i, :self.dataLens[i]])\n", 69 | " \n", 70 | " # Make placeholders and stuff\n", 71 | " self.make_inputs()\n", 72 | "\n", 73 | " # Build the compute graph\n", 74 | " self.build_graph()\n", 75 | "\n", 76 | " # Create the inputs to the graph (placeholders and stuff)\n", 77 | " def make_inputs(self):\n", 78 | " self.input = tf.placeholder(tf.int32, (self.batch_size, self.maxLength))\n", 79 | " self.lengths = tf.placeholder(tf.int32, (self.batch_size,))\n", 80 | " self.labels = tf.placeholder(tf.int32, (self.batch_size, self.maxLength))\n", 81 | " self.keep_prob = tf.placeholder(tf.float32)\n", 82 | "\n", 83 | " # Embed encoder input\n", 84 | " self.enc_input = tf.contrib.layers.embed_sequence(\n", 85 | " ids = self.input,\n", 86 | " vocab_size = self.vocab_size,\n", 87 | " embed_dim = self.embedding_size)\n", 88 | "\n", 89 | " # Decoder input (GO + label + EOS)\n", 90 | " eos = tf.one_hot(\n", 91 | " indices = self.lengths,\n", 92 | " depth = self.maxLength,\n", 93 | " on_value = self.tokens['EOS'])\n", 94 | " \n", 95 | " self.add_eos = self.labels + eos\n", 96 | " go_tokens = tf.constant(self.tokens['GO'], shape=[self.batch_size, 1])\n", 97 | " pre_embed_dec_input = tf.concat((go_tokens, self.add_eos), 1)\n", 98 | " \n", 99 | " # Embed decoder input\n", 100 | " self.dec_embed = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size]))\n", 101 | " self.dec_input = tf.nn.embedding_lookup(self.dec_embed, pre_embed_dec_input)\n", 102 | "\n", 103 | " def one_layer_cell(self):\n", 104 | " return rnn.DropoutWrapper(rnn.LSTMCell(self.layer_size), self.keep_prob)\n", 105 | " \n", 106 | " def cell(self):\n", 107 | " return rnn.MultiRNNCell([self.one_layer_cell() for _ in range(self.layers)])\n", 108 | " \n", 109 | " def decoder_cell(self, inputs, lengths):\n", 110 | " attention_mechanism = seq2seq.LuongAttention(\n", 111 | " num_units = self.layer_size,\n", 112 | " memory = inputs,\n", 113 | " memory_sequence_length = lengths,\n", 114 | " scale = True)\n", 115 | "\n", 116 | " return seq2seq.AttentionWrapper(\n", 117 | " cell = self.cell(),\n", 118 | " attention_mechanism = attention_mechanism,\n", 119 | " attention_layer_size = self.layer_size)\n", 120 | " \n", 121 | " # Build the compute graph. First encoder, then decoder, then train/test ops\n", 122 | " def build_graph(self):\n", 123 | " \n", 124 | " # Build the encoder\n", 125 | " enc_outputs, enc_state = tf.nn.dynamic_rnn(\n", 126 | " cell = self.cell(),\n", 127 | " inputs = self.enc_input,\n", 128 | " sequence_length = self.lengths,\n", 129 | " dtype = tf.float32)\n", 130 | "\n", 131 | " # Replicate the top-most encoder state for starting state of all layers in the decoder\n", 132 | " dec_start_state = tuple(enc_state[-1] for _ in range(self.layers))\n", 133 | " \n", 134 | " # Output layer converts from layer size to vocab size\n", 135 | " output = Dense(self.vocab_size,\n", 136 | " kernel_initializer = tf.truncated_normal_initializer(stddev=0.1))\n", 137 | " \n", 138 | " # Training decoder: scheduled sampling et al.\n", 139 | " with tf.variable_scope(\"decode\"):\n", 140 | " \n", 141 | " cell = self.decoder_cell(enc_outputs, self.lengths)\n", 142 | " init_state = cell.zero_state(self.batch_size, tf.float32)\n", 143 | " init_state = init_state.clone(cell_state=dec_start_state)\n", 144 | " \n", 145 | " train_helper = seq2seq.ScheduledEmbeddingTrainingHelper(\n", 146 | " inputs = self.dec_input,\n", 147 | " sequence_length = self.lengths,\n", 148 | " embedding = self.dec_embed,\n", 149 | " sampling_probability = 0.1)\n", 150 | "\n", 151 | " train_decoder = seq2seq.BasicDecoder(\n", 152 | " cell = cell,\n", 153 | " helper = train_helper,\n", 154 | " initial_state = init_state,\n", 155 | " output_layer = output)\n", 156 | " \n", 157 | " train_output, _, train_lengths = seq2seq.dynamic_decode(\n", 158 | " decoder = train_decoder,\n", 159 | " maximum_iterations = self.maxLength)\n", 160 | " \n", 161 | " # Tile inputs for beam search decoder\n", 162 | " dec_start_state = seq2seq.tile_batch(dec_start_state, self.beam_width)\n", 163 | " enc_outputs = seq2seq.tile_batch(enc_outputs, self.beam_width)\n", 164 | " lengths = seq2seq.tile_batch(self.lengths, self.beam_width)\n", 165 | " \n", 166 | " # Share weights with training decoder\n", 167 | " with tf.variable_scope(\"decode\", reuse=True):\n", 168 | " \n", 169 | " cell = self.decoder_cell(enc_outputs, lengths)\n", 170 | " init_state = cell.zero_state(self.batch_size * self.beam_width, tf.float32)\n", 171 | " init_state = init_state.clone(cell_state=dec_start_state)\n", 172 | " \n", 173 | " test_decoder = seq2seq.BeamSearchDecoder(\n", 174 | " cell = cell,\n", 175 | " embedding = self.dec_embed,\n", 176 | " start_tokens = tf.ones_like(self.lengths) * self.tokens['GO'],\n", 177 | " end_token = self.tokens['EOS'],\n", 178 | " initial_state = init_state,\n", 179 | " beam_width = self.beam_width,\n", 180 | " output_layer = output)\n", 181 | " \n", 182 | " test_output, _, test_lengths = seq2seq.dynamic_decode(\n", 183 | " decoder = test_decoder,\n", 184 | " maximum_iterations = self.maxLength)\n", 185 | " \n", 186 | " # Create train op. Add one to train lengths, to include EOS\n", 187 | " mask = tf.sequence_mask(train_lengths + 1, self.maxLength - 1, dtype=tf.float32)\n", 188 | " self.cost = seq2seq.sequence_loss(train_output.rnn_output, self.add_eos[:, :-1], mask)\n", 189 | " self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.cost)\n", 190 | "\n", 191 | " # Create test error rate op. Remove one from lengths to exclude EOS\n", 192 | " predicts = self.to_sparse(test_output.predicted_ids[:,:,0], test_lengths[:, 0] - 1)\n", 193 | " labels = self.to_sparse(self.labels, self.lengths)\n", 194 | " self.error_rate = tf.reduce_mean(tf.edit_distance(predicts, labels))\n", 195 | "\n", 196 | " # Convert a dense matrix into a sparse matrix (for e.g. edit_distance)\n", 197 | " def to_sparse(self, tensor, lengths):\n", 198 | " mask = tf.sequence_mask(lengths, self.maxLength)\n", 199 | " indices = tf.to_int64(tf.where(tf.equal(mask, True)))\n", 200 | " values = tf.to_int32(tf.boolean_mask(tensor, mask))\n", 201 | " shape = tf.to_int64(tf.shape(tensor))\n", 202 | " return tf.SparseTensor(indices, values, shape)\n", 203 | "\n", 204 | " # Divide training samples into batches\n", 205 | " def batchify(self):\n", 206 | "\n", 207 | " for i in range(self.samples // self.batch_size):\n", 208 | " yield self.next_batch(i)\n", 209 | "\n", 210 | " # Create a single batch at i * batch_size\n", 211 | " def next_batch(self, i):\n", 212 | "\n", 213 | " start = i * self.batch_size\n", 214 | " stop = (i+1) * self.batch_size\n", 215 | "\n", 216 | " batch = {\n", 217 | " self.input: self.data[start:stop],\n", 218 | " self.lengths: self.dataLens[start:stop],\n", 219 | " self.labels: self.dataLabels[start:stop],\n", 220 | " self.keep_prob: 1. - self.dropout\n", 221 | " }\n", 222 | "\n", 223 | " return batch\n", 224 | "\n", 225 | " # Create a random test batch\n", 226 | " def test_batch(self):\n", 227 | "\n", 228 | " data = np.random.randint(\n", 229 | " low = len(self.tokens),\n", 230 | " high = self.vocab_size,\n", 231 | " size = (self.batch_size, self.maxLength))\n", 232 | " \n", 233 | " dataLens = np.random.randint(\n", 234 | " low = self.minLength,\n", 235 | " high = self.maxLength,\n", 236 | " size = self.batch_size)\n", 237 | " \n", 238 | " dataLabels = np.zeros_like(data)\n", 239 | " for i in range(len(data)):\n", 240 | " data[i, dataLens[i]:] = self.tokens['PAD']\n", 241 | " dataLabels[i, :dataLens[i]] = np.sort(data[i, :dataLens[i]])\n", 242 | "\n", 243 | " return {\n", 244 | " self.input: data,\n", 245 | " self.lengths: dataLens,\n", 246 | " self.labels: dataLabels,\n", 247 | " self.keep_prob: 1.\n", 248 | " }" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Let's create a main method that uses this class! We'll train for 50 epochs and see how good our network gets at sorting integers." 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 2, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stderr", 265 | "output_type": "stream", 266 | "text": [ 267 | "/usr/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py:93: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", 268 | " \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "s2s = seq2seq_example()" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 3, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "name": "stdout", 283 | "output_type": "stream", 284 | "text": [ 285 | "Epoch 1 train loss: 2.91986195683 test error: 0.834714\n", 286 | "Epoch 2 train loss: 2.27854833126 test error: 0.742071\n", 287 | "Epoch 3 train loss: 2.05610381961 test error: 0.609936\n", 288 | "Epoch 4 train loss: 1.92091192007 test error: 0.544151\n", 289 | "Epoch 5 train loss: 1.79896817088 test error: 0.484119\n", 290 | "Epoch 6 train loss: 1.65584457636 test error: 0.349063\n", 291 | "Epoch 7 train loss: 1.4184307152 test error: 0.197675\n", 292 | "Epoch 8 train loss: 1.11107378602 test error: 0.0938571\n", 293 | "Epoch 9 train loss: 0.871339265406 test error: 0.0533254\n", 294 | "Epoch 10 train loss: 0.713713488728 test error: 0.0281667\n", 295 | "Epoch 11 train loss: 0.604298673123 test error: 0.0258333\n", 296 | "Epoch 12 train loss: 0.544282832742 test error: 0.0402698\n", 297 | "Epoch 13 train loss: 0.486932658702 test error: 0.0130159\n", 298 | "Epoch 14 train loss: 0.449633491188 test error: 0.00777778\n", 299 | "Epoch 15 train loss: 0.409089321047 test error: 0.0303889\n", 300 | "Epoch 16 train loss: 0.394229536355 test error: 0.0158333\n", 301 | "Epoch 17 train loss: 0.370213930979 test error: 0.0108333\n", 302 | "Epoch 18 train loss: 0.342018755376 test error: 0.00952381\n", 303 | "Epoch 19 train loss: 0.325859643742 test error: 0.0\n", 304 | "Epoch 20 train loss: 0.308327895328 test error: 0.00444444\n", 305 | "Epoch 21 train loss: 0.290842123702 test error: 0.0084127\n", 306 | "Epoch 22 train loss: 0.284117041528 test error: 0.00869048\n", 307 | "Epoch 23 train loss: 0.275989980996 test error: 0.00777778\n", 308 | "Epoch 24 train loss: 0.269462534711 test error: 0.00666667\n", 309 | "Epoch 25 train loss: 0.252057261914 test error: 0.00507937\n", 310 | "Epoch 26 train loss: 0.247674267814 test error: 0.00285714\n", 311 | "Epoch 27 train loss: 0.232467229217 test error: 0.00805556\n", 312 | "Epoch 28 train loss: 0.228952821717 test error: 0.00888889\n", 313 | "Epoch 29 train loss: 0.224217796773 test error: 0.00222222\n", 314 | "Epoch 30 train loss: 0.210341431685 test error: 0.0\n", 315 | "Epoch 31 train loss: 0.201171869896 test error: 0.0025\n", 316 | "Epoch 32 train loss: 0.195612193421 test error: 0.00844445\n", 317 | "Epoch 33 train loss: 0.18933903683 test error: 0.00333333\n", 318 | "Epoch 34 train loss: 0.184589334577 test error: 0.0\n", 319 | "Epoch 35 train loss: 0.176255308613 test error: 0.00333333\n", 320 | "Epoch 36 train loss: 0.180171127692 test error: 0.0\n", 321 | "Epoch 37 train loss: 0.163527621329 test error: 0.0\n", 322 | "Epoch 38 train loss: 0.167020770088 test error: 0.0\n", 323 | "Epoch 39 train loss: 0.160414721444 test error: 0.00444444\n", 324 | "Epoch 40 train loss: 0.153287142739 test error: 0.0025\n", 325 | "Epoch 41 train loss: 0.15106973609 test error: 0.0\n", 326 | "Epoch 42 train loss: 0.149061797969 test error: 0.0\n", 327 | "Epoch 43 train loss: 0.149537268914 test error: 0.0\n", 328 | "Epoch 44 train loss: 0.139682257585 test error: 0.00730159\n", 329 | "Epoch 45 train loss: 0.137363640536 test error: 0.00222222\n", 330 | "Epoch 46 train loss: 0.135108639039 test error: 0.00472222\n", 331 | "Epoch 47 train loss: 0.128768154997 test error: 0.0025\n", 332 | "Epoch 48 train loss: 0.122774963211 test error: 0.0\n", 333 | "Epoch 49 train loss: 0.125188367758 test error: 0.0\n", 334 | "Epoch 50 train loss: 0.116121740155 test error: 0.00285714\n" 335 | ] 336 | } 337 | ], 338 | "source": [ 339 | "with tf.Session() as sess:\n", 340 | " sess.run(tf.global_variables_initializer())\n", 341 | " for epoch in range(50):\n", 342 | " \n", 343 | " # Keep track of average train cost for this epoch\n", 344 | " train_cost = 0\n", 345 | " for batch in s2s.batchify():\n", 346 | " train_cost += sess.run([s2s.train_op, s2s.cost], batch)[1]\n", 347 | " train_cost /= s2s.samples / s2s.batch_size\n", 348 | " \n", 349 | " # Test time\n", 350 | " er = sess.run(s2s.error_rate, s2s.test_batch())\n", 351 | " \n", 352 | " print(\"Epoch\", (epoch + 1), \"train loss:\", train_cost, \"test error:\", er)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": { 358 | "collapsed": true 359 | }, 360 | "source": [ 361 | "An error rate of 0 is pretty good, I'd say! That's all there is to it." 362 | ] 363 | } 364 | ], 365 | "metadata": { 366 | "kernelspec": { 367 | "display_name": "Python 3", 368 | "language": "python", 369 | "name": "python3" 370 | }, 371 | "language_info": { 372 | "codemirror_mode": { 373 | "name": "ipython", 374 | "version": 3 375 | }, 376 | "file_extension": ".py", 377 | "mimetype": "text/x-python", 378 | "name": "python", 379 | "nbconvert_exporter": "python", 380 | "pygments_lexer": "ipython3", 381 | "version": "3.6.2" 382 | } 383 | }, 384 | "nbformat": 4, 385 | "nbformat_minor": 2 386 | } 387 | -------------------------------------------------------------------------------- /Tensorflow 1.2 CTC example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Tensorflow 1.2 CTC example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "There do exist other ctc examples, but this aims to be a pretty minimal example, for easy understanding.\n", 15 | "\n", 16 | "The first step is to make some random data." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import tensorflow as tf\n", 28 | "import numpy as np\n", 29 | "\n", 30 | "# Convert dense tensor to sparse tensor, required for ctc\n", 31 | "def to_sparse(tensor, lengths):\n", 32 | " mask = tf.sequence_mask(lengths, tf.reduce_max(lengths))\n", 33 | " indices = tf.to_int64(tf.where(tf.equal(mask, True)))\n", 34 | " values = tf.to_int32(tf.boolean_mask(tensor, mask))\n", 35 | " shape = tf.to_int64(tf.shape(tensor))\n", 36 | " return tf.SparseTensor(indices, values, shape)\n", 37 | "\n", 38 | "vocab_size = 4\n", 39 | "lstm_size = 10\n", 40 | "embed_size = 10\n", 41 | "samples = 100\n", 42 | "\n", 43 | "# The max length of the label should be shorter than the min length of input\n", 44 | "min_length = 4\n", 45 | "max_length = 5\n", 46 | "min_label_len = 2\n", 47 | "max_label_len = 2\n", 48 | "\n", 49 | "# Random inputs\n", 50 | "inputs = tf.constant(np.random.randint(1, vocab_size, size=[samples, max_length]))\n", 51 | "lengths = tf.constant(\n", 52 | " np.random.randint(min_length, max_length+1, size=samples),\n", 53 | " dtype=tf.int32)\n", 54 | "\n", 55 | "# Random labels\n", 56 | "labels = tf.constant(np.random.randint(1, vocab_size, size=[samples, max_label_len]))\n", 57 | "label_lengths = tf.constant(\n", 58 | " np.random.randint(min_label_len, max_label_len+1, size=samples),\n", 59 | " dtype=tf.int32)\n", 60 | "\n", 61 | "# Convert labels to sparse tensor\n", 62 | "sparse_labels = to_sparse(labels, label_lengths)\n", 63 | "\n", 64 | "# Transpose inputs to time-major\n", 65 | "inputs = tf.transpose(inputs)\n", 66 | "\n", 67 | "# Embed inputs\n", 68 | "embed = tf.contrib.layers.embed_sequence(inputs, max_length, embed_size)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "With the data out of the way, we can build our model with surprisingly few lines of code." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 2, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "outputs, _ = tf.nn.dynamic_rnn(\n", 85 | " tf.nn.rnn_cell.LSTMCell(lstm_size),\n", 86 | " embed,\n", 87 | " lengths,\n", 88 | " time_major=True,\n", 89 | " dtype=tf.float32)\n", 90 | "\n", 91 | "# Output layer converts lstm_size to vocab_size (plus one for blank label)\n", 92 | "logits = tf.layers.dense(outputs, vocab_size + 1)\n", 93 | "\n", 94 | "# Create train op from ctc loss\n", 95 | "loss = tf.reduce_mean(tf.nn.ctc_loss(sparse_labels, logits, lengths))\n", 96 | "train_op = tf.train.AdamOptimizer(0.001).minimize(loss)\n", 97 | "\n", 98 | "# Create test op from beam search decoder\n", 99 | "decoded, _ = tf.nn.ctc_beam_search_decoder(logits, lengths, beam_width=2)\n", 100 | "error_rate = tf.reduce_mean(tf.edit_distance(sparse_labels, tf.cast(decoded[0], tf.int32)))" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Now we can go ahead and train the model." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 3, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "0.881667\n" 120 | ] 121 | } 122 | ], 123 | "source": [ 124 | "with tf.Session() as sess:\n", 125 | " sess.run(tf.global_variables_initializer())\n", 126 | "\n", 127 | " for i in range(1000):\n", 128 | " sess.run(train_op)\n", 129 | "\n", 130 | " print(sess.run(error_rate))" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.6.1" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 2 155 | } 156 | -------------------------------------------------------------------------------- /ctc.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple example of sorting using ctc 3 | 4 | Author: Peter Plantinga 5 | Date: Summer 2017 6 | """ 7 | 8 | import tensorflow as tf 9 | from ctc_example import ctc_example 10 | 11 | epochs = 50 12 | 13 | ctc = ctc_example() 14 | 15 | with tf.Session() as sess: 16 | sess.run(tf.global_variables_initializer()) 17 | 18 | for i in range(epochs): 19 | 20 | train_cost = 0 21 | for batch in ctc.batchify(): 22 | train_cost += sess.run([ctc.train_op, ctc.cost], batch)[1] 23 | 24 | train_cost *= ctc.batch_size / ctc.samples 25 | 26 | error = sess.run(ctc.error_rate, ctc.test_batch()) 27 | 28 | print("Epoch ", (i+1), " train loss: ", train_cost, "test error: ", error) 29 | -------------------------------------------------------------------------------- /ctc_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of ctc in TensorFlow 1.2 3 | 4 | Sorts a random list of integers 5 | 6 | Author: Peter Plantinga 7 | Date: Summer 2017 8 | """ 9 | 10 | import tensorflow as tf 11 | import tensorflow.contrib.rnn as rnn 12 | import numpy as np 13 | from random import shuffle 14 | 15 | class ctc_example: 16 | 17 | tokens = {"PAD": 0, "EOS": 1, "GO": 2, "UNK": 3} 18 | 19 | minLength = 5 20 | maxLength = 10 21 | samples = 10000 22 | vocab_size = 50 23 | embedding_size = 15 24 | dropout = 0.3 25 | layers = 2 26 | layer_size = 100 27 | batch_size = 50 28 | 29 | def __init__(self): 30 | 31 | # Random integers up to vocab size (not including reserved values) 32 | self.data = np.random.randint( 33 | low = len(self.tokens), 34 | high = self.vocab_size, 35 | size = (self.samples, self.maxLength + self.minLength)) 36 | 37 | # Random length for each sequence from minLength to maxLength 38 | self.dataLens = np.random.randint( 39 | low = self.minLength, 40 | high = self.maxLength, 41 | size = self.samples) 42 | 43 | # Create labels by sorting data 44 | self.dataLabels = np.zeros([self.samples, self.maxLength]) 45 | for i in range(len(self.data)): 46 | self.data[i, self.dataLens[i]:] = self.tokens['PAD'] 47 | self.dataLabels[i, :self.dataLens[i]] = np.sort(self.data[i, :self.dataLens[i]]) 48 | 49 | # Make placeholders and stuff 50 | self.make_inputs() 51 | 52 | # Build computation graph 53 | self.build_graph() 54 | 55 | def make_inputs(self): 56 | self.input = tf.placeholder(tf.int32, (self.batch_size, self.maxLength + self.minLength)) 57 | self.lengths = tf.placeholder(tf.int32, (self.batch_size,)) 58 | self.labels = tf.placeholder(tf.int32, (self.batch_size, self.maxLength)) 59 | self.keep_prob = tf.placeholder(tf.float32) 60 | 61 | # Embed input 62 | self.embedded_input = tf.contrib.layers.embed_sequence( 63 | ids = self.input, 64 | vocab_size = self.vocab_size, 65 | embed_dim = self.embedding_size) 66 | 67 | # Time-major 68 | #self.embedded_input = tf.transpose(self.embedded_input) 69 | 70 | def single_layer_cell(self): 71 | return rnn.DropoutWrapper(rnn.LSTMCell(self.layer_size), self.keep_prob) 72 | 73 | def cell(self): 74 | return rnn.MultiRNNCell([self.single_layer_cell() for _ in range(self.layers)]) 75 | 76 | def build_graph(self): 77 | outputs, _ = tf.nn.bidirectional_dynamic_rnn( 78 | cell_fw = self.cell(), 79 | cell_bw = self.cell(), 80 | inputs = self.embedded_input, 81 | sequence_length = self.lengths + self.minLength, 82 | dtype = tf.float32) 83 | #time_major = True) 84 | 85 | # Concatenate fw and bw outputs, then reshape 86 | outputs = tf.concat(outputs, 2) 87 | outputs = tf.reshape(outputs, [-1, self.layer_size * 2]) 88 | 89 | # Output layer 90 | W = tf.Variable(tf.truncated_normal([self.layer_size * 2, self.vocab_size + 1], stddev=0.1)) 91 | b = tf.Variable(tf.zeros(self.vocab_size + 1)) 92 | logits = tf.matmul(outputs, W) + b 93 | logits = tf.reshape(logits, [self.batch_size, self.maxLength + self.minLength, self.vocab_size + 1]) 94 | logits = tf.transpose(logits, [1, 0, 2]) 95 | 96 | # CTC layer 97 | sparse_labels = self.to_sparse(self.labels, self.lengths) 98 | self.cost = tf.reduce_mean(tf.nn.ctc_loss( 99 | labels = sparse_labels, 100 | inputs = logits, 101 | sequence_length = self.lengths + self.minLength, 102 | time_major = True)) 103 | self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.cost) 104 | 105 | 106 | # Decoder 107 | decoded, _ = tf.nn.ctc_beam_search_decoder( 108 | inputs = logits, 109 | sequence_length = self.lengths, 110 | beam_width = 4) 111 | self.error_rate = tf.reduce_mean(tf.edit_distance(sparse_labels, tf.cast(decoded[0], tf.int32))) 112 | 113 | 114 | def to_sparse(self, tensor, lengths): 115 | mask = tf.sequence_mask(lengths, self.maxLength) 116 | indices = tf.to_int64(tf.where(tf.equal(mask, True))) 117 | values = tf.to_int32(tf.boolean_mask(tensor, mask)) 118 | shape = tf.to_int64(tf.shape(tensor)) 119 | return tf.SparseTensor(indices, values, shape) 120 | 121 | def next_batch(self, i): 122 | 123 | start = i * self.batch_size 124 | stop = (i+1) * self.batch_size 125 | 126 | batch = { 127 | self.input: self.data[start:stop], 128 | self.lengths: self.dataLens[start:stop], 129 | self.labels: self.dataLabels[start:stop], 130 | self.keep_prob: 1. - self.dropout 131 | } 132 | 133 | return batch 134 | 135 | def batchify(self): 136 | 137 | # Shuffle data 138 | a = list(zip(self.data, self.dataLens, self.dataLabels)) 139 | shuffle(a) 140 | self.data, self.dataLens, self.dataLabels = zip(*a) 141 | 142 | for i in range(self.samples // self.batch_size): 143 | yield self.next_batch(i) 144 | 145 | def test_batch(self): 146 | 147 | data = np.random.randint( 148 | low = len(self.tokens), 149 | high = self.vocab_size, 150 | size = (self.batch_size, self.maxLength + self.minLength)) 151 | 152 | dataLens = np.random.randint( 153 | low = self.minLength, 154 | high = self.maxLength, 155 | size = self.batch_size) 156 | 157 | dataLabels = np.zeros([self.batch_size, self.maxLength]) 158 | for i in range(len(data)): 159 | data[i, dataLens[i]:] = self.tokens['PAD'] 160 | dataLabels[i, :dataLens[i]] = np.sort(data[i, :dataLens[i]]) 161 | 162 | return { 163 | self.input: data, 164 | self.lengths: dataLens, 165 | self.labels: dataLabels, 166 | self.keep_prob: 1. 167 | } 168 | -------------------------------------------------------------------------------- /seq2seq.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple example of sorting 3 | 4 | Author: Peter Plantinga 5 | Date: Summer 2017 6 | """ 7 | 8 | import tensorflow as tf 9 | from seq2seq_example import seq2seq_example 10 | 11 | epochs = 50 12 | 13 | s2s = seq2seq_example() 14 | 15 | with tf.Session() as sess: 16 | sess.run(tf.global_variables_initializer()) 17 | 18 | for i in range(epochs): 19 | 20 | train_cost = 0 21 | for batch in s2s.batchify(): 22 | train_cost += sess.run([s2s.train_op, s2s.cost], batch)[1] 23 | 24 | train_cost *= s2s.batch_size / s2s.samples 25 | 26 | error = sess.run(s2s.error_rate, s2s.test_batch()) 27 | 28 | print("Epoch ", (i+1), " train loss: ", train_cost, "test error: ", error) 29 | -------------------------------------------------------------------------------- /seq2seq_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of seq2seq in TensorFlow 1.2 3 | 4 | Sorts a random list of integers 5 | 6 | Author: Peter Plantinga 7 | Date: Summer 2017 8 | """ 9 | 10 | import tensorflow as tf 11 | import tensorflow.contrib.rnn as rnn 12 | import tensorflow.contrib.seq2seq as seq2seq 13 | from tensorflow.python.layers.core import Dense 14 | import numpy as np 15 | from random import shuffle 16 | 17 | class seq2seq_example: 18 | 19 | tokens = {"PAD": 0, "EOS": 1, "GO": 2, "UNK": 3} 20 | 21 | minLength = 5 22 | maxLength = 10 23 | samples = 10000 24 | vocab_size = 50 25 | embedding_size = 15 26 | dropout = 0.3 27 | layers = 2 28 | layer_size = 100 29 | batch_size = 50 30 | beam_width = 4 31 | 32 | def __init__(self): 33 | 34 | # Random integers up to vocab size (not including reserved values) 35 | self.data = np.random.randint( 36 | low = len(self.tokens), 37 | high = self.vocab_size, 38 | size = (self.samples, self.maxLength)) 39 | 40 | # Random length for each sequence from minLength to maxLength 41 | self.dataLens = np.random.randint( 42 | low = self.minLength, 43 | high = self.maxLength, 44 | size = self.samples) 45 | 46 | # Create labels by sorting data 47 | self.dataLabels = np.zeros_like(self.data) 48 | for i in range(len(self.data)): 49 | self.data[i, self.dataLens[i]:] = self.tokens['PAD'] 50 | self.dataLabels[i, :self.dataLens[i]] = np.sort(self.data[i, :self.dataLens[i]]) 51 | 52 | # Make placeholders and stuff 53 | self.make_inputs() 54 | 55 | # Build computation graph 56 | self.build_graph() 57 | 58 | def make_inputs(self): 59 | self.input = tf.placeholder(tf.int32, (self.batch_size, self.maxLength)) 60 | self.lengths = tf.placeholder(tf.int32, (self.batch_size,)) 61 | self.labels = tf.placeholder(tf.int32, (self.batch_size, self.maxLength)) 62 | self.keep_prob = tf.placeholder(tf.float32) 63 | 64 | # Embed encoder input 65 | self.enc_input = tf.contrib.layers.embed_sequence( 66 | ids = self.input, 67 | vocab_size = self.vocab_size, 68 | embed_dim = self.embedding_size) 69 | 70 | # Create decoder input (GO + label + EOS) 71 | eos = tf.one_hot( 72 | indices = self.lengths, 73 | depth = self.maxLength, 74 | on_value = self.tokens['EOS']) 75 | 76 | self.add_eos = self.labels + eos 77 | go_tokens = tf.constant(self.tokens['GO'], shape=[self.batch_size, 1]) 78 | pre_embed_dec_input = tf.concat((go_tokens, self.add_eos), 1) 79 | 80 | # Embed decoder input 81 | self.dec_embed = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size])) 82 | self.dec_input = tf.nn.embedding_lookup(self.dec_embed, pre_embed_dec_input) 83 | 84 | def single_layer_cell(self): 85 | return rnn.DropoutWrapper(rnn.LSTMCell(self.layer_size), self.keep_prob) 86 | 87 | def cell(self): 88 | return rnn.MultiRNNCell([self.single_layer_cell() for _ in range(self.layers)]) 89 | 90 | def decoder_cell(self, inputs, lengths): 91 | attention_mechanism = seq2seq.LuongAttention( 92 | num_units = self.layer_size, 93 | memory = inputs, 94 | memory_sequence_length = lengths, 95 | scale = True) 96 | 97 | return seq2seq.AttentionWrapper( 98 | cell = self.cell(), 99 | attention_mechanism = attention_mechanism, 100 | attention_layer_size = self.layer_size) 101 | 102 | def build_graph(self): 103 | enc_outputs, enc_state = tf.nn.dynamic_rnn( 104 | cell = self.cell(), 105 | inputs = self.enc_input, 106 | sequence_length = self.lengths, 107 | dtype = tf.float32) 108 | 109 | # Replicate the top-most encoder state for starting state of all layers in the decoder 110 | dec_start_state = tuple(enc_state[-1] for _ in range(self.layers)) 111 | 112 | output = Dense(self.vocab_size, 113 | kernel_initializer = tf.truncated_normal_initializer(stddev=0.1)) 114 | 115 | # Training decoder: scheduled sampling et al. 116 | with tf.variable_scope("decode"): 117 | 118 | cell = self.decoder_cell(enc_outputs, self.lengths) 119 | init_state = cell.zero_state(self.batch_size, tf.float32) 120 | init_state = init_state.clone(cell_state=dec_start_state) 121 | 122 | train_helper = seq2seq.ScheduledEmbeddingTrainingHelper( 123 | inputs = self.dec_input, 124 | sequence_length = self.lengths, 125 | embedding = self.dec_embed, 126 | sampling_probability = 0.1) 127 | 128 | train_decoder = seq2seq.BasicDecoder( 129 | cell = cell, 130 | helper = train_helper, 131 | initial_state = init_state, 132 | output_layer = output) 133 | 134 | train_output, _, train_lengths = seq2seq.dynamic_decode( 135 | decoder = train_decoder, 136 | maximum_iterations = self.maxLength) 137 | 138 | dec_start_state = seq2seq.tile_batch(dec_start_state, self.beam_width) 139 | enc_outputs = seq2seq.tile_batch(enc_outputs, self.beam_width) 140 | lengths = seq2seq.tile_batch(self.lengths, self.beam_width) 141 | 142 | with tf.variable_scope("decode", reuse=True): 143 | cell = self.decoder_cell(enc_outputs, lengths) 144 | init_state = cell.zero_state(self.batch_size * self.beam_width, tf.float32) 145 | init_state = init_state.clone(cell_state=dec_start_state) 146 | 147 | test_decoder = seq2seq.BeamSearchDecoder( 148 | cell = cell, 149 | embedding = self.dec_embed, 150 | start_tokens = tf.ones(self.batch_size, dtype=tf.int32) * self.tokens['GO'], 151 | end_token = self.tokens['EOS'], 152 | initial_state = init_state, 153 | beam_width = self.beam_width, 154 | output_layer = output) 155 | test_output, _, test_lengths = seq2seq.dynamic_decode( 156 | decoder = test_decoder, 157 | maximum_iterations = self.maxLength) 158 | 159 | # Create train op 160 | mask = tf.sequence_mask(train_lengths + 1, self.maxLength - 1, dtype=tf.float32) 161 | self.cost = seq2seq.sequence_loss(train_output.rnn_output, self.add_eos[:, :-1], mask) 162 | self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.cost) 163 | 164 | # Create test error rate op 165 | predicts = self.to_sparse(test_output.predicted_ids[:,:,0], test_lengths[:, 0] - 1) 166 | labels = self.to_sparse(self.add_eos, self.lengths) 167 | self.error_rate = tf.reduce_mean(tf.edit_distance(predicts, labels)) 168 | 169 | def to_sparse(self, tensor, lengths): 170 | mask = tf.sequence_mask(lengths, self.maxLength) 171 | indices = tf.to_int64(tf.where(tf.equal(mask, True))) 172 | values = tf.to_int32(tf.boolean_mask(tensor, mask)) 173 | shape = tf.to_int64(tf.shape(tensor)) 174 | return tf.SparseTensor(indices, values, shape) 175 | 176 | def next_batch(self, i): 177 | 178 | start = i * self.batch_size 179 | stop = (i+1) * self.batch_size 180 | 181 | batch = { 182 | self.input: self.data[start:stop], 183 | self.lengths: self.dataLens[start:stop], 184 | self.labels: self.dataLabels[start:stop], 185 | self.keep_prob: 1. - self.dropout 186 | } 187 | 188 | return batch 189 | 190 | def batchify(self): 191 | 192 | # Shuffle data 193 | a = list(zip(self.data, self.dataLens, self.dataLabels)) 194 | shuffle(a) 195 | self.data, self.dataLens, self.dataLabels = zip(*a) 196 | 197 | for i in range(self.samples // self.batch_size): 198 | yield self.next_batch(i) 199 | 200 | def test_batch(self): 201 | 202 | data = np.random.randint( 203 | low = len(self.tokens), 204 | high = self.vocab_size, 205 | size = (self.batch_size, self.maxLength)) 206 | 207 | dataLens = np.random.randint( 208 | low = self.minLength, 209 | high = self.maxLength, 210 | size = self.batch_size) 211 | 212 | dataLabels = np.zeros_like(data) 213 | for i in range(len(data)): 214 | data[i, dataLens[i]:] = self.tokens['PAD'] 215 | dataLabels[i, :dataLens[i]] = np.sort(data[i, :dataLens[i]]) 216 | 217 | return { 218 | self.input: data, 219 | self.lengths: dataLens, 220 | self.labels: dataLabels, 221 | self.keep_prob: 1. 222 | } 223 | --------------------------------------------------------------------------------