├── .gitignore
├── README.md
├── TensorFlow 1.2 seq2seq example.ipynb
├── Tensorflow 1.2 CTC example.ipynb
├── ctc.py
├── ctc_example.py
├── seq2seq.py
└── seq2seq_example.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | *.swp
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Tensorflow Examples
2 | 
3 | This repository stores examples for several neural network types in TensorFlow.
4 | 
5 | * [seq2seq](https://github.com/pplantinga/tensorflow-examples/blob/master/TensorFlow%201.2%20seq2seq%20example.ipynb)
6 | * [ctc](https://github.com/pplantinga/tensorflow-examples/blob/master/Tensorflow%201.2%20CTC%20example.ipynb)
7 | 


--------------------------------------------------------------------------------
/TensorFlow 1.2 seq2seq example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TensorFlow 1.2 seq2seq example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Since there seems to be a dearth of up-to-date tensorflow examples on how to use the seq2seq module in contrib, I've decided to post this code online. It is based primarily on this tutorial: [Udacity's sequence to sequence implementation](https://github.com/udacity/deep-learning/blob/master/seq2seq/sequence_to_sequence_implementation.ipynb)\n",
 15 |     "\n",
 16 |     "This example takes a list of numbers and sorts it. There are multiple updates from the Udacity example, such as scheduled sampling, beam search, attention, and error rate calculation. You will best understand what is going on in this example code if you already have a good background in TensorFlow and seq2seq networks.\n",
 17 |     "\n",
 18 |     "Unfortunately, Jupyter doesn't work well with classes, so I will have to put most of the code in a single cell. The comments should describe what is going on."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {
 25 |     "collapsed": true
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import tensorflow as tf\n",
 30 |     "import tensorflow.contrib.rnn as rnn\n",
 31 |     "import tensorflow.contrib.seq2seq as seq2seq\n",
 32 |     "from tensorflow.python.layers.core import Dense\n",
 33 |     "import numpy as np\n",
 34 |     "\n",
 35 |     "class seq2seq_example:\n",
 36 |     "\n",
 37 |     "    # Constants\n",
 38 |     "    tokens         = {\"PAD\": 0, \"EOS\": 1, \"GO\": 2, \"UNK\": 3}\n",
 39 |     "    minLength      = 5\n",
 40 |     "    maxLength      = 10\n",
 41 |     "    samples        = 10000\n",
 42 |     "    vocab_size     = 50\n",
 43 |     "    embedding_size = 15\n",
 44 |     "    dropout        = 0.3\n",
 45 |     "    layers         = 2\n",
 46 |     "    layer_size     = 100\n",
 47 |     "    batch_size     = 50\n",
 48 |     "    beam_width     = 4\n",
 49 |     "\n",
 50 |     "    def __init__(self):\n",
 51 |     "        \n",
 52 |     "        # Random integers up to the vocab_size (not including reserved integers)\n",
 53 |     "        self.data = np.random.randint(\n",
 54 |     "            low  = len(self.tokens),\n",
 55 |     "            high = self.vocab_size,\n",
 56 |     "            size = (self.samples, self.maxLength))\n",
 57 |     "        \n",
 58 |     "        # Assign a random length to each sequence from minLength to maxLength\n",
 59 |     "        self.dataLens = np.random.randint(\n",
 60 |     "            low  = self.minLength,\n",
 61 |     "            high = self.maxLength,\n",
 62 |     "            size = self.samples)\n",
 63 |     "        \n",
 64 |     "        # Create labels by sorting the original data\n",
 65 |     "        self.dataLabels = np.ones_like(self.data) * self.tokens['PAD']\n",
 66 |     "        for i in range(len(self.data)):\n",
 67 |     "            self.data[i, self.dataLens[i]:] = self.tokens['PAD']\n",
 68 |     "            self.dataLabels[i, :self.dataLens[i]] = np.sort(self.data[i, :self.dataLens[i]])\n",
 69 |     "       \n",
 70 |     "        # Make placeholders and stuff\n",
 71 |     "        self.make_inputs()\n",
 72 |     "\n",
 73 |     "        # Build the compute graph\n",
 74 |     "        self.build_graph()\n",
 75 |     "\n",
 76 |     "    # Create the inputs to the graph (placeholders and stuff)\n",
 77 |     "    def make_inputs(self):\n",
 78 |     "        self.input     = tf.placeholder(tf.int32, (self.batch_size, self.maxLength))\n",
 79 |     "        self.lengths   = tf.placeholder(tf.int32, (self.batch_size,))\n",
 80 |     "        self.labels    = tf.placeholder(tf.int32, (self.batch_size, self.maxLength))\n",
 81 |     "        self.keep_prob = tf.placeholder(tf.float32)\n",
 82 |     "\n",
 83 |     "        # Embed encoder input\n",
 84 |     "        self.enc_input = tf.contrib.layers.embed_sequence(\n",
 85 |     "            ids        = self.input,\n",
 86 |     "            vocab_size = self.vocab_size,\n",
 87 |     "            embed_dim  = self.embedding_size)\n",
 88 |     "\n",
 89 |     "        # Decoder input (GO + label + EOS)\n",
 90 |     "        eos = tf.one_hot(\n",
 91 |     "            indices  = self.lengths,\n",
 92 |     "            depth    = self.maxLength,\n",
 93 |     "            on_value = self.tokens['EOS'])\n",
 94 |     "        \n",
 95 |     "        self.add_eos = self.labels + eos\n",
 96 |     "        go_tokens = tf.constant(self.tokens['GO'], shape=[self.batch_size, 1])\n",
 97 |     "        pre_embed_dec_input = tf.concat((go_tokens, self.add_eos), 1)\n",
 98 |     "        \n",
 99 |     "        # Embed decoder input\n",
100 |     "        self.dec_embed = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size]))\n",
101 |     "        self.dec_input = tf.nn.embedding_lookup(self.dec_embed, pre_embed_dec_input)\n",
102 |     "\n",
103 |     "    def one_layer_cell(self):\n",
104 |     "        return rnn.DropoutWrapper(rnn.LSTMCell(self.layer_size), self.keep_prob)\n",
105 |     "    \n",
106 |     "    def cell(self):\n",
107 |     "        return rnn.MultiRNNCell([self.one_layer_cell() for _ in range(self.layers)])\n",
108 |     "    \n",
109 |     "    def decoder_cell(self, inputs, lengths):\n",
110 |     "        attention_mechanism = seq2seq.LuongAttention(\n",
111 |     "                num_units              = self.layer_size,\n",
112 |     "                memory                 = inputs,\n",
113 |     "                memory_sequence_length = lengths,\n",
114 |     "                scale                  = True)\n",
115 |     "\n",
116 |     "        return seq2seq.AttentionWrapper(\n",
117 |     "                cell                 = self.cell(),\n",
118 |     "                attention_mechanism  = attention_mechanism,\n",
119 |     "                attention_layer_size = self.layer_size)\n",
120 |     "    \n",
121 |     "    # Build the compute graph. First encoder, then decoder, then train/test ops\n",
122 |     "    def build_graph(self):\n",
123 |     "        \n",
124 |     "        # Build the encoder\n",
125 |     "        enc_outputs, enc_state = tf.nn.dynamic_rnn(\n",
126 |     "            cell            = self.cell(),\n",
127 |     "            inputs          = self.enc_input,\n",
128 |     "            sequence_length = self.lengths,\n",
129 |     "            dtype           = tf.float32)\n",
130 |     "\n",
131 |     "        # Replicate the top-most encoder state for starting state of all layers in the decoder\n",
132 |     "        dec_start_state = tuple(enc_state[-1] for _ in range(self.layers))\n",
133 |     "        \n",
134 |     "        # Output layer converts from layer size to vocab size\n",
135 |     "        output = Dense(self.vocab_size,\n",
136 |     "            kernel_initializer = tf.truncated_normal_initializer(stddev=0.1))\n",
137 |     "        \n",
138 |     "        # Training decoder: scheduled sampling et al.\n",
139 |     "        with tf.variable_scope(\"decode\"):\n",
140 |     "            \n",
141 |     "            cell = self.decoder_cell(enc_outputs, self.lengths)\n",
142 |     "            init_state = cell.zero_state(self.batch_size, tf.float32)\n",
143 |     "            init_state = init_state.clone(cell_state=dec_start_state)\n",
144 |     "            \n",
145 |     "            train_helper = seq2seq.ScheduledEmbeddingTrainingHelper(\n",
146 |     "                    inputs               = self.dec_input,\n",
147 |     "                    sequence_length      = self.lengths,\n",
148 |     "                    embedding            = self.dec_embed,\n",
149 |     "                    sampling_probability = 0.1)\n",
150 |     "\n",
151 |     "            train_decoder = seq2seq.BasicDecoder(\n",
152 |     "                    cell          = cell,\n",
153 |     "                    helper        = train_helper,\n",
154 |     "                    initial_state = init_state,\n",
155 |     "                    output_layer  = output)\n",
156 |     "            \n",
157 |     "            train_output, _, train_lengths = seq2seq.dynamic_decode(\n",
158 |     "                    decoder            = train_decoder,\n",
159 |     "                    maximum_iterations = self.maxLength)\n",
160 |     "        \n",
161 |     "        # Tile inputs for beam search decoder\n",
162 |     "        dec_start_state = seq2seq.tile_batch(dec_start_state, self.beam_width)\n",
163 |     "        enc_outputs = seq2seq.tile_batch(enc_outputs, self.beam_width)\n",
164 |     "        lengths = seq2seq.tile_batch(self.lengths, self.beam_width)\n",
165 |     "        \n",
166 |     "        # Share weights with training decoder\n",
167 |     "        with tf.variable_scope(\"decode\", reuse=True):\n",
168 |     "            \n",
169 |     "            cell = self.decoder_cell(enc_outputs, lengths)\n",
170 |     "            init_state = cell.zero_state(self.batch_size * self.beam_width, tf.float32)\n",
171 |     "            init_state = init_state.clone(cell_state=dec_start_state)\n",
172 |     "            \n",
173 |     "            test_decoder = seq2seq.BeamSearchDecoder(\n",
174 |     "                    cell          = cell,\n",
175 |     "                    embedding     = self.dec_embed,\n",
176 |     "                    start_tokens  = tf.ones_like(self.lengths) * self.tokens['GO'],\n",
177 |     "                    end_token     = self.tokens['EOS'],\n",
178 |     "                    initial_state = init_state,\n",
179 |     "                    beam_width    = self.beam_width,\n",
180 |     "                    output_layer  = output)\n",
181 |     "            \n",
182 |     "            test_output, _, test_lengths = seq2seq.dynamic_decode(\n",
183 |     "                    decoder            = test_decoder,\n",
184 |     "                    maximum_iterations = self.maxLength)\n",
185 |     "        \n",
186 |     "        # Create train op. Add one to train lengths, to include EOS\n",
187 |     "        mask = tf.sequence_mask(train_lengths + 1, self.maxLength - 1, dtype=tf.float32)\n",
188 |     "        self.cost = seq2seq.sequence_loss(train_output.rnn_output, self.add_eos[:, :-1], mask)\n",
189 |     "        self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.cost)\n",
190 |     "\n",
191 |     "        # Create test error rate op. Remove one from lengths to exclude EOS\n",
192 |     "        predicts = self.to_sparse(test_output.predicted_ids[:,:,0], test_lengths[:, 0] - 1)\n",
193 |     "        labels = self.to_sparse(self.labels, self.lengths)\n",
194 |     "        self.error_rate = tf.reduce_mean(tf.edit_distance(predicts, labels))\n",
195 |     "\n",
196 |     "    # Convert a dense matrix into a sparse matrix (for e.g. edit_distance)\n",
197 |     "    def to_sparse(self, tensor, lengths):\n",
198 |     "        mask = tf.sequence_mask(lengths, self.maxLength)\n",
199 |     "        indices = tf.to_int64(tf.where(tf.equal(mask, True)))\n",
200 |     "        values = tf.to_int32(tf.boolean_mask(tensor, mask))\n",
201 |     "        shape = tf.to_int64(tf.shape(tensor))\n",
202 |     "        return tf.SparseTensor(indices, values, shape)\n",
203 |     "\n",
204 |     "    # Divide training samples into batches\n",
205 |     "    def batchify(self):\n",
206 |     "\n",
207 |     "        for i in range(self.samples // self.batch_size):\n",
208 |     "            yield self.next_batch(i)\n",
209 |     "\n",
210 |     "    # Create a single batch at i * batch_size\n",
211 |     "    def next_batch(self, i):\n",
212 |     "\n",
213 |     "        start = i * self.batch_size\n",
214 |     "        stop = (i+1) * self.batch_size\n",
215 |     "\n",
216 |     "        batch = {\n",
217 |     "                self.input:     self.data[start:stop],\n",
218 |     "                self.lengths:   self.dataLens[start:stop],\n",
219 |     "                self.labels:    self.dataLabels[start:stop],\n",
220 |     "                self.keep_prob: 1. - self.dropout\n",
221 |     "        }\n",
222 |     "\n",
223 |     "        return batch\n",
224 |     "\n",
225 |     "    # Create a random test batch\n",
226 |     "    def test_batch(self):\n",
227 |     "\n",
228 |     "        data = np.random.randint(\n",
229 |     "            low  = len(self.tokens),\n",
230 |     "            high = self.vocab_size,\n",
231 |     "            size = (self.batch_size, self.maxLength))\n",
232 |     "        \n",
233 |     "        dataLens = np.random.randint(\n",
234 |     "            low  = self.minLength,\n",
235 |     "            high = self.maxLength,\n",
236 |     "            size = self.batch_size)\n",
237 |     "        \n",
238 |     "        dataLabels = np.zeros_like(data)\n",
239 |     "        for i in range(len(data)):\n",
240 |     "            data[i, dataLens[i]:] = self.tokens['PAD']\n",
241 |     "            dataLabels[i, :dataLens[i]] = np.sort(data[i, :dataLens[i]])\n",
242 |     "\n",
243 |     "        return {\n",
244 |     "                self.input: data,\n",
245 |     "                self.lengths: dataLens,\n",
246 |     "                self.labels: dataLabels,\n",
247 |     "                self.keep_prob: 1.\n",
248 |     "        }"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "Let's create a main method that uses this class! We'll train for 50 epochs and see how good our network gets at sorting integers."
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 2,
261 |    "metadata": {},
262 |    "outputs": [
263 |     {
264 |      "name": "stderr",
265 |      "output_type": "stream",
266 |      "text": [
267 |       "/usr/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py:93: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n",
268 |       "  \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n"
269 |      ]
270 |     }
271 |    ],
272 |    "source": [
273 |     "s2s = seq2seq_example()"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 3,
279 |    "metadata": {},
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "Epoch 1 train loss: 2.91986195683 test error: 0.834714\n",
286 |       "Epoch 2 train loss: 2.27854833126 test error: 0.742071\n",
287 |       "Epoch 3 train loss: 2.05610381961 test error: 0.609936\n",
288 |       "Epoch 4 train loss: 1.92091192007 test error: 0.544151\n",
289 |       "Epoch 5 train loss: 1.79896817088 test error: 0.484119\n",
290 |       "Epoch 6 train loss: 1.65584457636 test error: 0.349063\n",
291 |       "Epoch 7 train loss: 1.4184307152 test error: 0.197675\n",
292 |       "Epoch 8 train loss: 1.11107378602 test error: 0.0938571\n",
293 |       "Epoch 9 train loss: 0.871339265406 test error: 0.0533254\n",
294 |       "Epoch 10 train loss: 0.713713488728 test error: 0.0281667\n",
295 |       "Epoch 11 train loss: 0.604298673123 test error: 0.0258333\n",
296 |       "Epoch 12 train loss: 0.544282832742 test error: 0.0402698\n",
297 |       "Epoch 13 train loss: 0.486932658702 test error: 0.0130159\n",
298 |       "Epoch 14 train loss: 0.449633491188 test error: 0.00777778\n",
299 |       "Epoch 15 train loss: 0.409089321047 test error: 0.0303889\n",
300 |       "Epoch 16 train loss: 0.394229536355 test error: 0.0158333\n",
301 |       "Epoch 17 train loss: 0.370213930979 test error: 0.0108333\n",
302 |       "Epoch 18 train loss: 0.342018755376 test error: 0.00952381\n",
303 |       "Epoch 19 train loss: 0.325859643742 test error: 0.0\n",
304 |       "Epoch 20 train loss: 0.308327895328 test error: 0.00444444\n",
305 |       "Epoch 21 train loss: 0.290842123702 test error: 0.0084127\n",
306 |       "Epoch 22 train loss: 0.284117041528 test error: 0.00869048\n",
307 |       "Epoch 23 train loss: 0.275989980996 test error: 0.00777778\n",
308 |       "Epoch 24 train loss: 0.269462534711 test error: 0.00666667\n",
309 |       "Epoch 25 train loss: 0.252057261914 test error: 0.00507937\n",
310 |       "Epoch 26 train loss: 0.247674267814 test error: 0.00285714\n",
311 |       "Epoch 27 train loss: 0.232467229217 test error: 0.00805556\n",
312 |       "Epoch 28 train loss: 0.228952821717 test error: 0.00888889\n",
313 |       "Epoch 29 train loss: 0.224217796773 test error: 0.00222222\n",
314 |       "Epoch 30 train loss: 0.210341431685 test error: 0.0\n",
315 |       "Epoch 31 train loss: 0.201171869896 test error: 0.0025\n",
316 |       "Epoch 32 train loss: 0.195612193421 test error: 0.00844445\n",
317 |       "Epoch 33 train loss: 0.18933903683 test error: 0.00333333\n",
318 |       "Epoch 34 train loss: 0.184589334577 test error: 0.0\n",
319 |       "Epoch 35 train loss: 0.176255308613 test error: 0.00333333\n",
320 |       "Epoch 36 train loss: 0.180171127692 test error: 0.0\n",
321 |       "Epoch 37 train loss: 0.163527621329 test error: 0.0\n",
322 |       "Epoch 38 train loss: 0.167020770088 test error: 0.0\n",
323 |       "Epoch 39 train loss: 0.160414721444 test error: 0.00444444\n",
324 |       "Epoch 40 train loss: 0.153287142739 test error: 0.0025\n",
325 |       "Epoch 41 train loss: 0.15106973609 test error: 0.0\n",
326 |       "Epoch 42 train loss: 0.149061797969 test error: 0.0\n",
327 |       "Epoch 43 train loss: 0.149537268914 test error: 0.0\n",
328 |       "Epoch 44 train loss: 0.139682257585 test error: 0.00730159\n",
329 |       "Epoch 45 train loss: 0.137363640536 test error: 0.00222222\n",
330 |       "Epoch 46 train loss: 0.135108639039 test error: 0.00472222\n",
331 |       "Epoch 47 train loss: 0.128768154997 test error: 0.0025\n",
332 |       "Epoch 48 train loss: 0.122774963211 test error: 0.0\n",
333 |       "Epoch 49 train loss: 0.125188367758 test error: 0.0\n",
334 |       "Epoch 50 train loss: 0.116121740155 test error: 0.00285714\n"
335 |      ]
336 |     }
337 |    ],
338 |    "source": [
339 |     "with tf.Session() as sess:\n",
340 |     "    sess.run(tf.global_variables_initializer())\n",
341 |     "    for epoch in range(50):\n",
342 |     "        \n",
343 |     "        # Keep track of average train cost for this epoch\n",
344 |     "        train_cost = 0\n",
345 |     "        for batch in s2s.batchify():\n",
346 |     "            train_cost += sess.run([s2s.train_op, s2s.cost], batch)[1]\n",
347 |     "        train_cost /= s2s.samples / s2s.batch_size\n",
348 |     "        \n",
349 |     "        # Test time\n",
350 |     "        er = sess.run(s2s.error_rate, s2s.test_batch())\n",
351 |     "        \n",
352 |     "        print(\"Epoch\", (epoch + 1), \"train loss:\", train_cost, \"test error:\", er)"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "markdown",
357 |    "metadata": {
358 |     "collapsed": true
359 |    },
360 |    "source": [
361 |     "An error rate of 0 is pretty good, I'd say! That's all there is to it."
362 |    ]
363 |   }
364 |  ],
365 |  "metadata": {
366 |   "kernelspec": {
367 |    "display_name": "Python 3",
368 |    "language": "python",
369 |    "name": "python3"
370 |   },
371 |   "language_info": {
372 |    "codemirror_mode": {
373 |     "name": "ipython",
374 |     "version": 3
375 |    },
376 |    "file_extension": ".py",
377 |    "mimetype": "text/x-python",
378 |    "name": "python",
379 |    "nbconvert_exporter": "python",
380 |    "pygments_lexer": "ipython3",
381 |    "version": "3.6.2"
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 2
386 | }
387 | 


--------------------------------------------------------------------------------
/Tensorflow 1.2 CTC example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Tensorflow 1.2 CTC example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "There do exist other ctc examples, but this aims to be a pretty minimal example, for easy understanding.\n",
 15 |     "\n",
 16 |     "The first step is to make some random data."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import tensorflow as tf\n",
 28 |     "import numpy as np\n",
 29 |     "\n",
 30 |     "# Convert dense tensor to sparse tensor, required for ctc\n",
 31 |     "def to_sparse(tensor, lengths):\n",
 32 |     "    mask = tf.sequence_mask(lengths, tf.reduce_max(lengths))\n",
 33 |     "    indices = tf.to_int64(tf.where(tf.equal(mask, True)))\n",
 34 |     "    values = tf.to_int32(tf.boolean_mask(tensor, mask))\n",
 35 |     "    shape = tf.to_int64(tf.shape(tensor))\n",
 36 |     "    return tf.SparseTensor(indices, values, shape)\n",
 37 |     "\n",
 38 |     "vocab_size = 4\n",
 39 |     "lstm_size = 10\n",
 40 |     "embed_size = 10\n",
 41 |     "samples = 100\n",
 42 |     "\n",
 43 |     "# The max length of the label should be shorter than the min length of input\n",
 44 |     "min_length = 4\n",
 45 |     "max_length = 5\n",
 46 |     "min_label_len = 2\n",
 47 |     "max_label_len = 2\n",
 48 |     "\n",
 49 |     "# Random inputs\n",
 50 |     "inputs = tf.constant(np.random.randint(1, vocab_size, size=[samples, max_length]))\n",
 51 |     "lengths = tf.constant(\n",
 52 |     "    np.random.randint(min_length, max_length+1, size=samples),\n",
 53 |     "    dtype=tf.int32)\n",
 54 |     "\n",
 55 |     "# Random labels\n",
 56 |     "labels = tf.constant(np.random.randint(1, vocab_size, size=[samples, max_label_len]))\n",
 57 |     "label_lengths = tf.constant(\n",
 58 |     "    np.random.randint(min_label_len, max_label_len+1, size=samples),\n",
 59 |     "    dtype=tf.int32)\n",
 60 |     "\n",
 61 |     "# Convert labels to sparse tensor\n",
 62 |     "sparse_labels = to_sparse(labels, label_lengths)\n",
 63 |     "\n",
 64 |     "# Transpose inputs to time-major\n",
 65 |     "inputs = tf.transpose(inputs)\n",
 66 |     "\n",
 67 |     "# Embed inputs\n",
 68 |     "embed = tf.contrib.layers.embed_sequence(inputs, max_length, embed_size)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "With the data out of the way, we can build our model with surprisingly few lines of code."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 2,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "outputs, _ = tf.nn.dynamic_rnn(\n",
 85 |     "    tf.nn.rnn_cell.LSTMCell(lstm_size),\n",
 86 |     "    embed,\n",
 87 |     "    lengths,\n",
 88 |     "    time_major=True,\n",
 89 |     "    dtype=tf.float32)\n",
 90 |     "\n",
 91 |     "# Output layer converts lstm_size to vocab_size (plus one for blank label)\n",
 92 |     "logits = tf.layers.dense(outputs, vocab_size + 1)\n",
 93 |     "\n",
 94 |     "# Create train op from ctc loss\n",
 95 |     "loss = tf.reduce_mean(tf.nn.ctc_loss(sparse_labels, logits, lengths))\n",
 96 |     "train_op = tf.train.AdamOptimizer(0.001).minimize(loss)\n",
 97 |     "\n",
 98 |     "# Create test op from beam search decoder\n",
 99 |     "decoded, _ = tf.nn.ctc_beam_search_decoder(logits, lengths, beam_width=2)\n",
100 |     "error_rate = tf.reduce_mean(tf.edit_distance(sparse_labels, tf.cast(decoded[0], tf.int32)))"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "Now we can go ahead and train the model."
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 3,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "0.881667\n"
120 |      ]
121 |     }
122 |    ],
123 |    "source": [
124 |     "with tf.Session() as sess:\n",
125 |     "    sess.run(tf.global_variables_initializer())\n",
126 |     "\n",
127 |     "    for i in range(1000):\n",
128 |     "        sess.run(train_op)\n",
129 |     "\n",
130 |     "    print(sess.run(error_rate))"
131 |    ]
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "kernelspec": {
136 |    "display_name": "Python 3",
137 |    "language": "python",
138 |    "name": "python3"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 3
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython3",
150 |    "version": "3.6.1"
151 |   }
152 |  },
153 |  "nbformat": 4,
154 |  "nbformat_minor": 2
155 | }
156 | 


--------------------------------------------------------------------------------
/ctc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple example of sorting using ctc
 3 | 
 4 | Author: Peter Plantinga
 5 | Date: Summer 2017
 6 | """
 7 | 
 8 | import tensorflow as tf
 9 | from ctc_example import ctc_example
10 | 
11 | epochs = 50
12 | 
13 | ctc = ctc_example()
14 | 
15 | with tf.Session() as sess:
16 |     sess.run(tf.global_variables_initializer())
17 | 
18 |     for i in range(epochs):
19 | 
20 |         train_cost = 0
21 |         for batch in ctc.batchify():
22 |             train_cost += sess.run([ctc.train_op, ctc.cost], batch)[1]
23 | 
24 |         train_cost *= ctc.batch_size / ctc.samples
25 | 
26 |         error = sess.run(ctc.error_rate, ctc.test_batch())
27 | 
28 |         print("Epoch ", (i+1), " train loss: ", train_cost, "test error: ", error)
29 | 


--------------------------------------------------------------------------------
/ctc_example.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example of ctc in TensorFlow 1.2
  3 | 
  4 | Sorts a random list of integers
  5 | 
  6 | Author: Peter Plantinga
  7 | Date: Summer 2017
  8 | """
  9 | 
 10 | import tensorflow as tf
 11 | import tensorflow.contrib.rnn as rnn
 12 | import numpy as np
 13 | from random import shuffle
 14 | 
 15 | class ctc_example:
 16 | 
 17 |     tokens = {"PAD": 0, "EOS": 1, "GO": 2, "UNK": 3} 
 18 | 
 19 |     minLength = 5
 20 |     maxLength = 10
 21 |     samples = 10000
 22 |     vocab_size = 50
 23 |     embedding_size = 15
 24 |     dropout = 0.3
 25 |     layers = 2
 26 |     layer_size = 100
 27 |     batch_size = 50
 28 | 
 29 |     def __init__(self):
 30 | 
 31 |         # Random integers up to vocab size (not including reserved values)
 32 |         self.data = np.random.randint(
 33 |                 low  = len(self.tokens),
 34 |                 high = self.vocab_size,
 35 |                 size = (self.samples, self.maxLength + self.minLength))
 36 | 
 37 |         # Random length for each sequence from minLength to maxLength
 38 |         self.dataLens = np.random.randint(
 39 |                 low  = self.minLength,
 40 |                 high = self.maxLength,
 41 |                 size = self.samples)
 42 | 
 43 |         # Create labels by sorting data
 44 |         self.dataLabels = np.zeros([self.samples, self.maxLength])
 45 |         for i in range(len(self.data)):
 46 |             self.data[i, self.dataLens[i]:] = self.tokens['PAD']
 47 |             self.dataLabels[i, :self.dataLens[i]] = np.sort(self.data[i, :self.dataLens[i]])
 48 |        
 49 |         # Make placeholders and stuff
 50 |         self.make_inputs()
 51 | 
 52 |         # Build computation graph
 53 |         self.build_graph()
 54 | 
 55 |     def make_inputs(self):
 56 |         self.input     = tf.placeholder(tf.int32, (self.batch_size, self.maxLength + self.minLength))
 57 |         self.lengths   = tf.placeholder(tf.int32, (self.batch_size,))
 58 |         self.labels    = tf.placeholder(tf.int32, (self.batch_size, self.maxLength))
 59 |         self.keep_prob = tf.placeholder(tf.float32)
 60 | 
 61 |         # Embed input
 62 |         self.embedded_input = tf.contrib.layers.embed_sequence(
 63 |                 ids        = self.input,
 64 |                 vocab_size = self.vocab_size,
 65 |                 embed_dim  = self.embedding_size)
 66 | 
 67 |         # Time-major
 68 |         #self.embedded_input = tf.transpose(self.embedded_input)
 69 | 
 70 |     def single_layer_cell(self):
 71 |         return rnn.DropoutWrapper(rnn.LSTMCell(self.layer_size), self.keep_prob)
 72 | 
 73 |     def cell(self):
 74 |         return rnn.MultiRNNCell([self.single_layer_cell() for _ in range(self.layers)])
 75 | 
 76 |     def build_graph(self):
 77 |         outputs, _ = tf.nn.bidirectional_dynamic_rnn(
 78 |                 cell_fw         = self.cell(),
 79 |                 cell_bw         = self.cell(),
 80 |                 inputs          = self.embedded_input,
 81 |                 sequence_length = self.lengths + self.minLength,
 82 |                 dtype           = tf.float32)
 83 |                 #time_major      = True)
 84 | 
 85 |         # Concatenate fw and bw outputs, then reshape
 86 |         outputs = tf.concat(outputs, 2)
 87 |         outputs = tf.reshape(outputs, [-1, self.layer_size * 2])
 88 | 
 89 |         # Output layer
 90 |         W = tf.Variable(tf.truncated_normal([self.layer_size * 2, self.vocab_size + 1], stddev=0.1))
 91 |         b = tf.Variable(tf.zeros(self.vocab_size + 1))
 92 |         logits = tf.matmul(outputs, W) + b
 93 |         logits = tf.reshape(logits, [self.batch_size, self.maxLength + self.minLength, self.vocab_size + 1])
 94 |         logits = tf.transpose(logits, [1, 0, 2])
 95 | 
 96 |         # CTC layer
 97 |         sparse_labels = self.to_sparse(self.labels, self.lengths)
 98 |         self.cost = tf.reduce_mean(tf.nn.ctc_loss(
 99 |                 labels          = sparse_labels,
100 |                 inputs          = logits,
101 |                 sequence_length = self.lengths + self.minLength,
102 |                 time_major      = True))
103 |         self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.cost)
104 | 
105 | 
106 |         # Decoder
107 |         decoded, _ = tf.nn.ctc_beam_search_decoder(
108 |                 inputs = logits,
109 |                 sequence_length = self.lengths,
110 |                 beam_width = 4)
111 |         self.error_rate = tf.reduce_mean(tf.edit_distance(sparse_labels, tf.cast(decoded[0], tf.int32)))
112 | 
113 | 
114 |     def to_sparse(self, tensor, lengths):
115 |         mask = tf.sequence_mask(lengths, self.maxLength)
116 |         indices = tf.to_int64(tf.where(tf.equal(mask, True)))
117 |         values = tf.to_int32(tf.boolean_mask(tensor, mask))
118 |         shape = tf.to_int64(tf.shape(tensor))
119 |         return tf.SparseTensor(indices, values, shape)
120 | 
121 |     def next_batch(self, i):
122 | 
123 |         start = i * self.batch_size
124 |         stop = (i+1) * self.batch_size
125 | 
126 |         batch = {
127 |                 self.input:     self.data[start:stop],
128 |                 self.lengths:   self.dataLens[start:stop],
129 |                 self.labels:    self.dataLabels[start:stop],
130 |                 self.keep_prob: 1. - self.dropout
131 |         }
132 | 
133 |         return batch
134 | 
135 |     def batchify(self):
136 |         
137 |         # Shuffle data
138 |         a = list(zip(self.data, self.dataLens, self.dataLabels))
139 |         shuffle(a)
140 |         self.data, self.dataLens, self.dataLabels = zip(*a)
141 | 
142 |         for i in range(self.samples // self.batch_size):
143 |             yield self.next_batch(i)
144 | 
145 |     def test_batch(self):
146 | 
147 |         data = np.random.randint(
148 |                 low  = len(self.tokens),
149 |                 high = self.vocab_size,
150 |                 size = (self.batch_size, self.maxLength + self.minLength))
151 | 
152 |         dataLens = np.random.randint(
153 |                 low  = self.minLength,
154 |                 high = self.maxLength,
155 |                 size = self.batch_size)
156 | 
157 |         dataLabels = np.zeros([self.batch_size, self.maxLength])
158 |         for i in range(len(data)):
159 |             data[i, dataLens[i]:] = self.tokens['PAD']
160 |             dataLabels[i, :dataLens[i]] = np.sort(data[i, :dataLens[i]])
161 | 
162 |         return {
163 |                 self.input: data,
164 |                 self.lengths: dataLens,
165 |                 self.labels: dataLabels,
166 |                 self.keep_prob: 1.
167 |         }
168 | 


--------------------------------------------------------------------------------
/seq2seq.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple example of sorting
 3 | 
 4 | Author: Peter Plantinga
 5 | Date: Summer 2017
 6 | """
 7 | 
 8 | import tensorflow as tf
 9 | from seq2seq_example import seq2seq_example
10 | 
11 | epochs = 50
12 | 
13 | s2s = seq2seq_example()
14 | 
15 | with tf.Session() as sess:
16 |     sess.run(tf.global_variables_initializer())
17 | 
18 |     for i in range(epochs):
19 | 
20 |         train_cost = 0
21 |         for batch in s2s.batchify():
22 |             train_cost += sess.run([s2s.train_op, s2s.cost], batch)[1]
23 | 
24 |         train_cost *= s2s.batch_size / s2s.samples
25 | 
26 |         error = sess.run(s2s.error_rate, s2s.test_batch())
27 | 
28 |         print("Epoch ", (i+1), " train loss: ", train_cost, "test error: ", error)
29 | 


--------------------------------------------------------------------------------
/seq2seq_example.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example of seq2seq in TensorFlow 1.2
  3 | 
  4 | Sorts a random list of integers
  5 | 
  6 | Author: Peter Plantinga
  7 | Date: Summer 2017
  8 | """
  9 | 
 10 | import tensorflow as tf
 11 | import tensorflow.contrib.rnn as rnn
 12 | import tensorflow.contrib.seq2seq as seq2seq
 13 | from tensorflow.python.layers.core import Dense
 14 | import numpy as np
 15 | from random import shuffle
 16 | 
 17 | class seq2seq_example:
 18 | 
 19 |     tokens = {"PAD": 0, "EOS": 1, "GO": 2, "UNK": 3} 
 20 | 
 21 |     minLength = 5
 22 |     maxLength = 10
 23 |     samples = 10000
 24 |     vocab_size = 50
 25 |     embedding_size = 15
 26 |     dropout = 0.3
 27 |     layers = 2
 28 |     layer_size = 100
 29 |     batch_size = 50
 30 |     beam_width = 4
 31 | 
 32 |     def __init__(self):
 33 | 
 34 |         # Random integers up to vocab size (not including reserved values)
 35 |         self.data = np.random.randint(
 36 |                 low  = len(self.tokens),
 37 |                 high = self.vocab_size,
 38 |                 size = (self.samples, self.maxLength))
 39 | 
 40 |         # Random length for each sequence from minLength to maxLength
 41 |         self.dataLens = np.random.randint(
 42 |                 low  = self.minLength,
 43 |                 high = self.maxLength,
 44 |                 size = self.samples)
 45 | 
 46 |         # Create labels by sorting data
 47 |         self.dataLabels = np.zeros_like(self.data)
 48 |         for i in range(len(self.data)):
 49 |             self.data[i, self.dataLens[i]:] = self.tokens['PAD']
 50 |             self.dataLabels[i, :self.dataLens[i]] = np.sort(self.data[i, :self.dataLens[i]])
 51 |        
 52 |         # Make placeholders and stuff
 53 |         self.make_inputs()
 54 | 
 55 |         # Build computation graph
 56 |         self.build_graph()
 57 | 
 58 |     def make_inputs(self):
 59 |         self.input     = tf.placeholder(tf.int32, (self.batch_size, self.maxLength))
 60 |         self.lengths   = tf.placeholder(tf.int32, (self.batch_size,))
 61 |         self.labels    = tf.placeholder(tf.int32, (self.batch_size, self.maxLength))
 62 |         self.keep_prob = tf.placeholder(tf.float32)
 63 | 
 64 |         # Embed encoder input
 65 |         self.enc_input = tf.contrib.layers.embed_sequence(
 66 |                 ids        = self.input,
 67 |                 vocab_size = self.vocab_size,
 68 |                 embed_dim  = self.embedding_size)
 69 | 
 70 |         # Create decoder input (GO + label + EOS)
 71 |         eos = tf.one_hot(
 72 |                 indices  = self.lengths,
 73 |                 depth    = self.maxLength,
 74 |                 on_value = self.tokens['EOS'])
 75 | 
 76 |         self.add_eos = self.labels + eos
 77 |         go_tokens = tf.constant(self.tokens['GO'], shape=[self.batch_size, 1])
 78 |         pre_embed_dec_input = tf.concat((go_tokens, self.add_eos), 1)
 79 | 
 80 |         # Embed decoder input
 81 |         self.dec_embed = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_size]))
 82 |         self.dec_input = tf.nn.embedding_lookup(self.dec_embed, pre_embed_dec_input)
 83 | 
 84 |     def single_layer_cell(self):
 85 |         return rnn.DropoutWrapper(rnn.LSTMCell(self.layer_size), self.keep_prob)
 86 | 
 87 |     def cell(self):
 88 |         return rnn.MultiRNNCell([self.single_layer_cell() for _ in range(self.layers)])
 89 | 
 90 |     def decoder_cell(self, inputs, lengths):
 91 |         attention_mechanism = seq2seq.LuongAttention(
 92 |                 num_units              = self.layer_size,
 93 |                 memory                 = inputs,
 94 |                 memory_sequence_length = lengths,
 95 |                 scale                  = True)
 96 | 
 97 |         return seq2seq.AttentionWrapper(
 98 |                 cell                 = self.cell(),
 99 |                 attention_mechanism  = attention_mechanism,
100 |                 attention_layer_size = self.layer_size)
101 | 
102 |     def build_graph(self):
103 |         enc_outputs, enc_state = tf.nn.dynamic_rnn(
104 |                 cell            = self.cell(),
105 |                 inputs          = self.enc_input,
106 |                 sequence_length = self.lengths,
107 |                 dtype           = tf.float32)
108 | 
109 |         # Replicate the top-most encoder state for starting state of all layers in the decoder
110 |         dec_start_state = tuple(enc_state[-1] for _ in range(self.layers))
111 | 
112 |         output = Dense(self.vocab_size,
113 |                 kernel_initializer = tf.truncated_normal_initializer(stddev=0.1))
114 |         
115 |         # Training decoder: scheduled sampling et al.
116 |         with tf.variable_scope("decode"):
117 |             
118 |             cell = self.decoder_cell(enc_outputs, self.lengths)
119 |             init_state = cell.zero_state(self.batch_size, tf.float32)
120 |             init_state = init_state.clone(cell_state=dec_start_state)
121 |             
122 |             train_helper = seq2seq.ScheduledEmbeddingTrainingHelper(
123 |                     inputs               = self.dec_input,
124 |                     sequence_length      = self.lengths,
125 |                     embedding            = self.dec_embed,
126 |                     sampling_probability = 0.1)
127 | 
128 |             train_decoder = seq2seq.BasicDecoder(
129 |                     cell          = cell,
130 |                     helper        = train_helper,
131 |                     initial_state = init_state,
132 |                     output_layer  = output)
133 |             
134 |             train_output, _, train_lengths = seq2seq.dynamic_decode(
135 |                     decoder            = train_decoder,
136 |                     maximum_iterations = self.maxLength)
137 | 
138 |         dec_start_state = seq2seq.tile_batch(dec_start_state, self.beam_width)
139 |         enc_outputs = seq2seq.tile_batch(enc_outputs, self.beam_width)
140 |         lengths = seq2seq.tile_batch(self.lengths, self.beam_width)
141 | 
142 |         with tf.variable_scope("decode", reuse=True):
143 |             cell = self.decoder_cell(enc_outputs, lengths)
144 |             init_state = cell.zero_state(self.batch_size * self.beam_width, tf.float32)
145 |             init_state = init_state.clone(cell_state=dec_start_state)
146 |             
147 |             test_decoder = seq2seq.BeamSearchDecoder(
148 |                     cell          = cell,
149 |                     embedding     = self.dec_embed,
150 |                     start_tokens  = tf.ones(self.batch_size, dtype=tf.int32) * self.tokens['GO'],
151 |                     end_token     = self.tokens['EOS'],
152 |                     initial_state = init_state,
153 |                     beam_width    = self.beam_width,
154 |                     output_layer  = output)
155 |             test_output, _, test_lengths = seq2seq.dynamic_decode(
156 |                     decoder            = test_decoder,
157 |                     maximum_iterations = self.maxLength)
158 | 
159 |         # Create train op
160 |         mask = tf.sequence_mask(train_lengths + 1, self.maxLength - 1, dtype=tf.float32)
161 |         self.cost = seq2seq.sequence_loss(train_output.rnn_output, self.add_eos[:, :-1], mask)
162 |         self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.cost)
163 | 
164 |         # Create test error rate op
165 |         predicts = self.to_sparse(test_output.predicted_ids[:,:,0], test_lengths[:, 0] - 1)
166 |         labels = self.to_sparse(self.add_eos, self.lengths)
167 |         self.error_rate = tf.reduce_mean(tf.edit_distance(predicts, labels))
168 | 
169 |     def to_sparse(self, tensor, lengths):
170 |         mask = tf.sequence_mask(lengths, self.maxLength)
171 |         indices = tf.to_int64(tf.where(tf.equal(mask, True)))
172 |         values = tf.to_int32(tf.boolean_mask(tensor, mask))
173 |         shape = tf.to_int64(tf.shape(tensor))
174 |         return tf.SparseTensor(indices, values, shape)
175 | 
176 |     def next_batch(self, i):
177 | 
178 |         start = i * self.batch_size
179 |         stop = (i+1) * self.batch_size
180 | 
181 |         batch = {
182 |                 self.input:     self.data[start:stop],
183 |                 self.lengths:   self.dataLens[start:stop],
184 |                 self.labels:    self.dataLabels[start:stop],
185 |                 self.keep_prob: 1. - self.dropout
186 |         }
187 | 
188 |         return batch
189 | 
190 |     def batchify(self):
191 |         
192 |         # Shuffle data
193 |         a = list(zip(self.data, self.dataLens, self.dataLabels))
194 |         shuffle(a)
195 |         self.data, self.dataLens, self.dataLabels = zip(*a)
196 | 
197 |         for i in range(self.samples // self.batch_size):
198 |             yield self.next_batch(i)
199 | 
200 |     def test_batch(self):
201 | 
202 |         data = np.random.randint(
203 |                 low  = len(self.tokens),
204 |                 high = self.vocab_size,
205 |                 size = (self.batch_size, self.maxLength))
206 | 
207 |         dataLens = np.random.randint(
208 |                 low  = self.minLength,
209 |                 high = self.maxLength,
210 |                 size = self.batch_size)
211 | 
212 |         dataLabels = np.zeros_like(data)
213 |         for i in range(len(data)):
214 |             data[i, dataLens[i]:] = self.tokens['PAD']
215 |             dataLabels[i, :dataLens[i]] = np.sort(data[i, :dataLens[i]])
216 | 
217 |         return {
218 |                 self.input: data,
219 |                 self.lengths: dataLens,
220 |                 self.labels: dataLabels,
221 |                 self.keep_prob: 1.
222 |         }
223 | 


--------------------------------------------------------------------------------