├── .gitignore
├── README.md
├── day_1
    ├── 0_Using_Pretrained_Embeddings.ipynb
    ├── 1_PyTorch_Basics.ipynb
    ├── 2_classify_names_with_MLP.ipynb
    ├── 3_Chinese_document_classification.ipynb
    ├── figures
    │   └── intro_to_pytorch
    │   │   ├── computational_graph_backward.svg
    │   │   ├── computational_graph_forward.svg
    │   │   ├── pytorch_variable.svg
    │   │   └── tensors.svg
    └── vocabulary.py
├── day_2
    ├── 00-Dataset-Loading-And-Vectorization.ipynb
    ├── 01-Char-RNN-Classify-Surnames.ipynb
    ├── 02-Char-RNN-Predict-Surnames.ipynb
    ├── 03-Char-RNN-Conditionally-Predict-Surnames.ipynb
    ├── CYOA-Amazon-Reviews.ipynb
    ├── CYOA-CFPB-Classification.ipynb
    ├── CYOA-Surname-Classification.ipynb
    ├── CYOA-Twitter-Language-ID.ipynb
    └── vocabulary.py
├── docs
    ├── .gitignore
    ├── Makefile
    ├── conf.py
    ├── day1
    │   ├── index.rst
    │   └── solutions.rst
    ├── day2
    │   ├── cyoa.rst
    │   ├── failfastprototypemode.rst
    │   ├── tensorfu1.rst
    │   ├── tensorfu2.rst
    │   └── warmup.rst
    ├── download_data.rst
    ├── environment_setup.rst
    ├── errata.rst
    ├── extras
    │   ├── attention.rst
    │   ├── compute_conv_size.rst
    │   ├── conv_notes.rst
    │   ├── index.rst
    │   ├── load_pretrained_vectors.rst
    │   └── setting_seed.rst
    ├── faq.rst
    ├── index.rst
    └── migration.rst
├── modelzoo
    └── README.md
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | logs/
  2 | *.pth
  3 | *.vectorizer
  4 | *.vocab
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | env/
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # dotenv
 87 | .env
 88 | 
 89 | # virtualenv
 90 | .venv
 91 | venv/
 92 | ENV/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | .DS_Store
107 | amazon_train_small.csv
108 | glove.6B.zip
109 | names_test.csv
110 | names_test_delip_version.csv
111 | names_train.csv
112 | snli_1.0.zip
113 | trump.csv
114 | data/
115 | .ipython/
116 | .jupyter/
117 | .local/
118 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # O'Reilly Artificial Intelligence Conference
 2 | 
 3 | ## Natural Language Processing with Deep Learning training
 4 | 
 5 | #### Delip Rao, Brian McMahan
 6 | 
 7 | Please visit [dl4nlp.info](http://dl4nlp.info/en/latest/) for more information about the current training session.
 8 | 
 9 | 
10 | ## Docker Instructions
11 | 
12 | Start from the root directory which contains the Dockerfile,
13 | please run the following commands (replacing LOCALPORT with whatever you'd like)
14 | 
15 | (data should come with repo, so no need to download fresh)
16 | 
17 | ```
18 | docker build -t dl4nlp .
19 | docker run -p LOCALPORT:8888 -d dl4nlp
20 | ```
21 | 
22 | If running outside of docker, can do the following from root dir (where you can see day_1, day_2, etc):
23 | 
24 | ```
25 | jupyter notebook --notebook-dir=$(pwd)
26 | ```
27 | 
28 | Assuming all necessary things are installed.  Required Python packages are in `requirements.txt`, except for PyTorch.  Follow the installation instructions on the PyTorch website.  Also visit [dl4nlp.info](http://dl4nlp.info/en/latest/) for more information about the current training session and [nlproc.info](http://nlproc.info/) for more resources from us!
29 | 


--------------------------------------------------------------------------------
/day_1/0_Using_Pretrained_Embeddings.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from argparse import Namespace\n",
 10 |     "import os\n",
 11 |     "os.environ['OMP_NUM_THREADS'] = '4' \n",
 12 |     "\n",
 13 |     "from annoy import AnnoyIndex\n",
 14 |     "import numpy as np\n",
 15 |     "import torch\n",
 16 |     "from tqdm import tqdm_notebook\n"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "These pre-trained word embeddings come from the [Glove project](https://nlp.stanford.edu/projects/glove/). For more details, about how the embeddings were generated see [this paper](https://nlp.stanford.edu/pubs/glove.pdf)."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "args = Namespace(\n",
 33 |     "    glove_filename='../data/glove.6B.100d.txt'\n",
 34 |     ")"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "def load_word_vectors(filename):\n",
 44 |     "    \"\"\"\n",
 45 |     "    A helper function to load word vectors from a file.\n",
 46 |     "    \"\"\"\n",
 47 |     "    word_to_index = {}\n",
 48 |     "    word_vectors = []\n",
 49 |     "    \n",
 50 |     "    with open(filename) as fp:\n",
 51 |     "        for line in tqdm_notebook(fp.readlines(), leave=False):\n",
 52 |     "            line = line.split(\" \")\n",
 53 |     "            \n",
 54 |     "            word = line[0]\n",
 55 |     "            word_to_index[word] = len(word_to_index)\n",
 56 |     "            \n",
 57 |     "            vec = np.array([float(x) for x in line[1:]])\n",
 58 |     "            word_vectors.append(vec)\n",
 59 |     "            \n",
 60 |     "    return word_to_index, word_vectors"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 4,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "class PreTrainedEmbeddings(object):\n",
 70 |     "    \"\"\"\n",
 71 |     "    A helper class to use standalone pre-trained embeddings\n",
 72 |     "    \"\"\"\n",
 73 |     "    def __init__(self, glove_filename):\n",
 74 |     "        self.word_to_index, self.word_vectors = load_word_vectors(glove_filename)\n",
 75 |     "        self.word_vector_size = len(self.word_vectors[0])\n",
 76 |     "        \n",
 77 |     "        self.index_to_word = {v: k for k, v in self.word_to_index.items()}\n",
 78 |     "        self.index = AnnoyIndex(self.word_vector_size, metric='euclidean')\n",
 79 |     "        print('Building Index')\n",
 80 |     "        for _, i in tqdm_notebook(self.word_to_index.items(), leave=False):\n",
 81 |     "            self.index.add_item(i, self.word_vectors[i])\n",
 82 |     "        self.index.build(50)\n",
 83 |     "        print('Finished!')\n",
 84 |     "    \n",
 85 |     "    def get_embedding(self, word):\n",
 86 |     "        return self.word_vectors[self.word_to_index[word]]\n",
 87 |     "    \n",
 88 |     "    def closest(self, word, n=1):\n",
 89 |     "        \"\"\"\n",
 90 |     "        Finall the top-n closest words (in the embedding space) to a given word.\n",
 91 |     "        \"\"\"\n",
 92 |     "        vector = self.get_embedding(word)\n",
 93 |     "        nn_indices = self.index.get_nns_by_vector(vector, n)\n",
 94 |     "        return [self.index_to_word[neighbor] for neighbor in nn_indices]\n",
 95 |     "    \n",
 96 |     "    def closest_v(self, vector, n=1):\n",
 97 |     "        nn_indices = self.index.get_nns_by_vector(vector, n)\n",
 98 |     "        return [self.index_to_word[neighbor] for neighbor in nn_indices]\n",
 99 |     "    \n",
100 |     "    def sim(self, w1, w2):\n",
101 |     "        \"\"\"\n",
102 |     "        find similarity between two words. returns a non-negative score.\n",
103 |     "        Higher the score, more the similarity in some dimension.\n",
104 |     "        \"\"\"\n",
105 |     "        return np.dot(self.get_embedding(w1), self.get_embedding(w2))"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "application/vnd.jupyter.widget-view+json": {
116 |        "model_id": "",
117 |        "version_major": 2,
118 |        "version_minor": 0
119 |       },
120 |       "text/plain": [
121 |        "HBox(children=(IntProgress(value=0, max=400000), HTML(value='')))"
122 |       ]
123 |      },
124 |      "metadata": {},
125 |      "output_type": "display_data"
126 |     },
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "Building Index\n"
132 |      ]
133 |     },
134 |     {
135 |      "data": {
136 |       "application/vnd.jupyter.widget-view+json": {
137 |        "model_id": "",
138 |        "version_major": 2,
139 |        "version_minor": 0
140 |       },
141 |       "text/plain": [
142 |        "HBox(children=(IntProgress(value=0, max=400000), HTML(value='')))"
143 |       ]
144 |      },
145 |      "metadata": {},
146 |      "output_type": "display_data"
147 |     },
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "Finished!\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "glove = PreTrainedEmbeddings(args.glove_filename)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 6,
163 |    "metadata": {
164 |     "scrolled": true
165 |    },
166 |    "outputs": [
167 |     {
168 |      "data": {
169 |       "text/plain": [
170 |        "['apple', 'microsoft', 'dell', 'pc', 'compaq']"
171 |       ]
172 |      },
173 |      "execution_count": 6,
174 |      "metadata": {},
175 |      "output_type": "execute_result"
176 |     }
177 |    ],
178 |    "source": [
179 |     "glove.closest('apple', n=5)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 7,
185 |    "metadata": {
186 |     "scrolled": true
187 |    },
188 |    "outputs": [
189 |     {
190 |      "data": {
191 |       "text/plain": [
192 |        "['plane', 'airplane', 'jet', 'flight', 'crashed']"
193 |       ]
194 |      },
195 |      "execution_count": 7,
196 |      "metadata": {},
197 |      "output_type": "execute_result"
198 |     }
199 |    ],
200 |    "source": [
201 |     "glove.closest('plane', n=5)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 8,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "(26.873448266652, 16.501491855324)"
213 |       ]
214 |      },
215 |      "execution_count": 8,
216 |      "metadata": {},
217 |      "output_type": "execute_result"
218 |     }
219 |    ],
220 |    "source": [
221 |     "glove.sim('beer', 'wine'), glove.sim('beer', 'gasoline')"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "metadata": {},
227 |    "source": [
228 |     "**A study of lexical relationships uncovered by word embeddings**\n",
229 |     "\n",
230 |     "Traditionally many of these relationships were hand-coded. See, for example, [the WordNet project](https://wordnet.princeton.edu/)."
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 9,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "def SAT_analogy(w1, w2, w3):\n",
240 |     "    '''\n",
241 |     "    Solves problems of the type:\n",
242 |     "    w1 : w2 :: w3 : __\n",
243 |     "    '''\n",
244 |     "    closest_words = []\n",
245 |     "    try:\n",
246 |     "        w1v = glove.get_embedding(w1)\n",
247 |     "        w2v = glove.get_embedding(w2)\n",
248 |     "        w3v = glove.get_embedding(w3)\n",
249 |     "        w4v = w3v + (w2v - w1v)\n",
250 |     "        closest_words = glove.closest_v(w4v, n=5)\n",
251 |     "        closest_words = [w for w in closest_words if w not in [w1, w2, w3]]\n",
252 |     "    except:\n",
253 |     "        pass\n",
254 |     "    if len(closest_words) == 0:\n",
255 |     "        print(':-(')\n",
256 |     "    else:\n",
257 |     "        the_closest_word = closest_words[0]\n",
258 |     "        print('{} : {} :: {} : {}'.format(w1, w2, w3, the_closest_word))"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "**Pronouns**"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 10,
271 |    "metadata": {},
272 |    "outputs": [
273 |     {
274 |      "name": "stdout",
275 |      "output_type": "stream",
276 |      "text": [
277 |       "man : he :: woman : she\n"
278 |      ]
279 |     }
280 |    ],
281 |    "source": [
282 |     "SAT_analogy('man', 'he', 'woman')"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "** Verb-Noun relationships **"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 11,
295 |    "metadata": {},
296 |    "outputs": [
297 |     {
298 |      "name": "stdout",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "fly : plane :: sail : ship\n"
302 |      ]
303 |     }
304 |    ],
305 |    "source": [
306 |     "SAT_analogy('fly', 'plane', 'sail')"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {},
312 |    "source": [
313 |     "**Noun-Noun relationships**"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 12,
319 |    "metadata": {},
320 |    "outputs": [
321 |     {
322 |      "name": "stdout",
323 |      "output_type": "stream",
324 |      "text": [
325 |       "cat : kitten :: dog : pug\n"
326 |      ]
327 |     }
328 |    ],
329 |    "source": [
330 |     "SAT_analogy('cat', 'kitten', 'dog')"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 13,
336 |    "metadata": {},
337 |    "outputs": [
338 |     {
339 |      "name": "stdout",
340 |      "output_type": "stream",
341 |      "text": [
342 |       "human : baby :: dog : puppy\n"
343 |      ]
344 |     }
345 |    ],
346 |    "source": [
347 |     "SAT_analogy('human', 'baby', 'dog')"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 14,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "human : babies :: dog : puppies\n"
360 |      ]
361 |     }
362 |    ],
363 |    "source": [
364 |     "SAT_analogy('human', 'babies', 'dog')"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "**Hypernymy**"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": 15,
377 |    "metadata": {},
378 |    "outputs": [
379 |     {
380 |      "name": "stdout",
381 |      "output_type": "stream",
382 |      "text": [
383 |       "blue : color :: dog : animal\n"
384 |      ]
385 |     }
386 |    ],
387 |    "source": [
388 |     "SAT_analogy('blue', 'color', 'dog')"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "metadata": {},
394 |    "source": [
395 |     "**Meronymy**"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": 16,
401 |    "metadata": {},
402 |    "outputs": [
403 |     {
404 |      "name": "stdout",
405 |      "output_type": "stream",
406 |      "text": [
407 |       "leg : legs :: hand : hands\n"
408 |      ]
409 |     }
410 |    ],
411 |    "source": [
412 |     "SAT_analogy('leg', 'legs', 'hand')"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "metadata": {},
418 |    "source": [
419 |     "**Troponymy**"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": 17,
425 |    "metadata": {},
426 |    "outputs": [
427 |     {
428 |      "name": "stdout",
429 |      "output_type": "stream",
430 |      "text": [
431 |       "talk : communicate :: read : correctly\n"
432 |      ]
433 |     }
434 |    ],
435 |    "source": [
436 |     "SAT_analogy('talk', 'communicate', 'read')"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "**Metonymy**"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": 18,
449 |    "metadata": {},
450 |    "outputs": [
451 |     {
452 |      "name": "stdout",
453 |      "output_type": "stream",
454 |      "text": [
455 |       "blue : democrat :: red : republican\n"
456 |      ]
457 |     }
458 |    ],
459 |    "source": [
460 |     "SAT_analogy('blue', 'democrat', 'red')"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "markdown",
465 |    "metadata": {},
466 |    "source": [
467 |     "**Misc**"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": 19,
473 |    "metadata": {},
474 |    "outputs": [
475 |     {
476 |      "name": "stdout",
477 |      "output_type": "stream",
478 |      "text": [
479 |       "man : doctor :: woman : nurse\n"
480 |      ]
481 |     }
482 |    ],
483 |    "source": [
484 |     "SAT_analogy('man', 'doctor', 'woman')"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": 20,
490 |    "metadata": {},
491 |    "outputs": [
492 |     {
493 |      "name": "stdout",
494 |      "output_type": "stream",
495 |      "text": [
496 |       "man : leader :: woman : opposition\n"
497 |      ]
498 |     }
499 |    ],
500 |    "source": [
501 |     "SAT_analogy('man', 'leader', 'woman')"
502 |    ]
503 |   },
504 |   {
505 |    "cell_type": "code",
506 |    "execution_count": null,
507 |    "metadata": {},
508 |    "outputs": [],
509 |    "source": []
510 |   },
511 |   {
512 |    "cell_type": "code",
513 |    "execution_count": null,
514 |    "metadata": {},
515 |    "outputs": [],
516 |    "source": []
517 |   }
518 |  ],
519 |  "metadata": {
520 |   "kernelspec": {
521 |    "display_name": "Python 3",
522 |    "language": "python",
523 |    "name": "python3"
524 |   },
525 |   "language_info": {
526 |    "codemirror_mode": {
527 |     "name": "ipython",
528 |     "version": 3
529 |    },
530 |    "file_extension": ".py",
531 |    "mimetype": "text/x-python",
532 |    "name": "python",
533 |    "nbconvert_exporter": "python",
534 |    "pygments_lexer": "ipython3",
535 |    "version": "3.6.8"
536 |   }
537 |  },
538 |  "nbformat": 4,
539 |  "nbformat_minor": 2
540 | }
541 | 


--------------------------------------------------------------------------------
/day_1/figures/intro_to_pytorch/computational_graph_backward.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" standalone="yes"?>
2 | 
3 | <svg version="1.1" viewBox="0.0 0.0 666.0472440944882 353.0" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l666.04724 0l0 353.0l-666.04724 0l0 -353.0z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#ffffff" d="m0 0l666.04724 0l0 353.0l-666.04724 0z" fill-rule="evenodd"></path><path fill="#f05732" d="m40.439632 165.88452l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m40.439632 165.88452l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m63.83785 190.27191l1.9218712 -7.671875l1.71875 0l-2.921875 10.03125l-1.3906212 0l-2.4375 -7.609375l-2.375 7.609375l-1.390625 0l-2.90625 -10.03125l1.703125 0l1.96875 7.5l2.34375 -7.5l1.375 0l2.390625 7.671875z" fill-rule="nonzero"></path><path fill="#f05732" d="m40.439632 39.65354l40.440945 0l0 39.65354l-40.440945 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m40.439632 39.65354l40.440945 0l0 39.65354l-40.440945 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m60.70891 60.02531l2.21875 -3.65625l1.9999962 0l-3.2812462 4.953125l3.3906212 5.078129l-1.9843712 0l-2.328125 -3.7500038l-2.3125 3.7500038l-2.0 0l3.390625 -5.078129l-3.28125 -4.953125l1.984375 0l2.203125 3.65625z" fill-rule="nonzero"></path><path fill="#f05732" d="m40.439632 273.86353l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m40.439632 273.86353l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m65.22357 295.70404q0 2.296875 -1.0625 3.703125q-1.0468712 1.390625 -2.8281212 1.390625q-1.90625 0 -2.953125 -1.34375l-0.078125 1.15625l-1.578125 0l0 -14.25l1.71875 0l0 5.3125q1.03125 -1.28125 2.859375 -1.28125q1.828125 0 2.8749962 1.390625q1.046875 1.375 1.046875 3.765625l0 0.15625zm-1.7187462 -0.203125q0 -1.75 -0.6875 -2.703125q-0.671875 -0.953125 -1.9375 -0.953125q-1.703125 0 -2.4375 1.578125l0 4.34375q0.78125 1.578125 2.453125 1.578125q1.234375 0 1.921875 -0.953125q0.6875 -0.96875 0.6875 -2.890625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m80.88058 59.480316l88.472435 0.40944672" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m80.88058 59.480316l88.472435 0.40944672" fill-rule="evenodd"></path><path fill="#942174" d="m441.7966 180.84514l0 0c0 -13.637512 11.668793 -24.692902 26.062988 -24.692902l0 0c6.912323 0 13.541565 2.6015625 18.429321 7.232376c4.8877563 4.630829 7.633667 10.91156 7.633667 17.460526l0 0c0 13.637527 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055389 -26.062988 -24.692917z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m441.7966 180.84514l0 0c0 -13.637512 11.668793 -24.692902 26.062988 -24.692902l0 0c6.912323 0 13.541565 2.6015625 18.429321 7.232376c4.8877563 4.630829 7.633667 10.91156 7.633667 17.460526l0 0c0 13.637527 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055389 -26.062988 -24.692917z" fill-rule="evenodd"></path><path fill="#ffffff" d="m469.13205 180.14827l5.359375 0l0 2.3125l-5.359375 0l0 6.0625l-2.453125 0l0 -6.0625l-5.34375 0l0 -2.3125l5.34375 0l0 -5.609375l2.453125 0l0 5.609375z" fill-rule="nonzero"></path><path fill="#942174" d="m214.51312 116.46719l0 0c0 -13.63752 11.668793 -24.692917 26.062988 -24.692917l0 0c6.9123383 0 13.541565 2.6015701 18.429321 7.2323914c4.8877563 4.6308136 7.633667 10.911552 7.633667 17.460526l0 0c0 13.637512 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055405 -26.062988 -24.692917z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m214.51312 116.46719l0 0c0 -13.63752 11.668793 -24.692917 26.062988 -24.692917l0 0c6.9123383 0 13.541565 2.6015701 18.429321 7.2323914c4.8877563 4.6308136 7.633667 10.911552 7.633667 17.460526l0 0c0 13.637512 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055405 -26.062988 -24.692917z" fill-rule="evenodd"></path><path fill="#ffffff" d="m239.17992 117.447395l-3.96875 -1.1875l0.59375 -2.0l3.984375 1.484375l-0.109375 -4.546875l2.015625 0l-0.140625 4.609375l3.921875 -1.453125l0.609375 2.0l-4.046875 1.203125l2.609375 3.578125l-1.640625 1.234375l-2.453125 -3.796875l-2.375 3.703125l-1.640625 -1.203125l2.640625 -3.625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m169.34776 59.90026l45.16536 56.566933" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m169.34776 59.90026l41.42166 51.87816" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m209.47864 112.80901l4.1223297 2.5157547l-1.5407867 -4.5769577z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m170.26115 185.31758l44.25197 -68.85039" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m170.26115 185.31758l41.00789 -63.803024" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m212.65852 122.40762l1.0641785 -4.71064l-3.8431396 2.9245224z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m82.20079 185.50656l88.47244 0.40945435" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m82.20079 185.50656l88.47244 0.40945435" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m82.20079 293.48557l130.4567 0.50393677" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m82.20079 293.48557l130.4567 0.50393677" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m396.63123 116.46719l45.165375 64.377945" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m396.63123 116.46719l41.71942 59.466187" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m436.9985 176.88199l3.9584656 2.7664032l-1.2541504 -4.6636505z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m212.66273 294.0105l229.13387 -113.16536" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m212.66273 294.0105l223.75417 -110.50842" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m437.14835 184.98303l3.337494 -3.4905243l-4.8003235 0.528595z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m88.97592 20.128609l72.37796 0l0 33.574802l-72.37796 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m107.94467 47.048607l-8.84375 0l0 -1.234375l4.671875 -5.1875q1.046875 -1.1875 1.4375 -1.921875q0.390625 -0.734375 0.390625 -1.53125q0 -1.046875 -0.640625 -1.71875q-0.640625 -0.6875 -1.703125 -0.6875q-1.28125 0 -2.0 0.734375q-0.703125 0.71875 -0.703125 2.015625l-1.71875 0q0 -1.859375 1.203125 -3.0q1.203125 -1.15625 3.21875 -1.15625q1.875 0 2.96875 0.984375q1.109375 0.984375 1.109375 2.625q0 2.0 -2.546875 4.75l-3.625 3.921875l6.78125 0l0 1.40625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m88.97244 147.06561l27.401573 0l0 33.574814l-27.401573 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m101.59744 166.39186l1.28125 0q1.21875 -0.015625 1.90625 -0.625q0.703125 -0.625 0.703125 -1.6875q0 -2.375 -2.359375 -2.375q-1.125 0 -1.796875 0.640625q-0.65625 0.625 -0.65625 1.671875l-1.703125 0q0 -1.59375 1.171875 -2.65625q1.171875 -1.0625 2.984375 -1.0625q1.90625 0 2.984375 1.015625q1.09375 1.0 1.09375 2.796875q0 0.890625 -0.578125 1.71875q-0.5625 0.8125 -1.546875 1.21875q1.109375 0.359375 1.71875 1.1875q0.609375 0.8125 0.609375 1.984375q0 1.8125 -1.1875 2.890625q-1.1875 1.0625 -3.09375 1.0625q-1.90625 0 -3.09375 -1.03125q-1.1875 -1.03125 -1.1875 -2.71875l1.71875 0q0 1.0625 0.6875 1.703125q0.703125 0.640625 1.875 0.640625q1.25 0 1.90625 -0.640625q0.65625 -0.65625 0.65625 -1.875q0 -1.171875 -0.734375 -1.796875q-0.71875 -0.640625 -2.078125 -0.65625l-1.28125 0l0 -1.40625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m88.97244 256.2021l27.401573 0l0 33.57483l-27.401573 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m104.73807 283.1221l-1.734375 0l0 -11.4375l-3.453125 1.265625l0 -1.5625l4.921875 -1.84375l0.265625 0l0 13.578125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m332.07217 79.30708l27.40158 0l0 33.574806l-27.40158 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m348.93155 92.71146l0 1.453125l-0.3125 0q-2.015625 0.03125 -3.203125 1.1875q-1.1875 1.15625 -1.359375 3.234375q1.0625 -1.21875 2.90625 -1.21875q1.765625 0 2.8125 1.25q1.0625 1.234375 1.0625 3.203125q0 2.09375 -1.140625 3.34375q-1.140625 1.25 -3.046875 1.25q-1.9375 0 -3.15625 -1.484375q-1.203125 -1.5 -1.203125 -3.84375l0 -0.65625q0 -3.734375 1.59375 -5.703125q1.59375 -1.96875 4.734375 -2.015625l0.3125 0zm-2.25 6.078125q-0.890625 0 -1.640625 0.53125q-0.734375 0.53125 -1.015625 1.328125l0 0.640625q0 1.65625 0.75 2.6875q0.75 1.015625 1.875 1.015625q1.15625 0 1.8125 -0.84375q0.671875 -0.859375 0.671875 -2.25q0 -1.390625 -0.671875 -2.25q-0.671875 -0.859375 -1.78125 -0.859375z" fill-rule="nonzero"></path><path fill="#f05732" d="m285.3517 96.64042l40.44095 0l0 39.65354l-40.44095 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m285.3517 96.64042l40.44095 0l0 39.65354l-40.44095 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m303.85535 121.98094l5.875 0l0 1.40625l-7.96875 0l0 -1.265625l5.546875 -7.359375l-5.453125 0l0 -1.40625l7.59375 0l0 1.203125l-5.59375 7.421875z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m266.6391 116.46719l18.70868 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m266.6391 116.46719l12.708679 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m279.34778 118.11893l4.538086 -1.6517334l-4.538086 -1.6517334z" fill-rule="evenodd"></path><path fill="#f05732" d="m512.6312 161.01837l40.44098 0l0 39.65355l-40.44098 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m512.6312 161.01837l40.44098 0l0 39.65355l-40.44098 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m533.0163 185.24951l2.34375 -7.515625l1.84375 0l-4.046875 11.578125q-0.9375 2.5 -2.96875 2.5l-0.328125 -0.015625l-0.640625 -0.125l0 -1.390625l0.46875 0.03125q0.859375 0 1.34375 -0.359375q0.5 -0.34375 0.8125 -1.28125l0.375 -1.015625l-3.578125 -9.921875l1.875 0l2.5 7.515625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m325.79266 116.46719l72.37793 0.8503952" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m325.79266 116.46719l72.37793 0.8503952" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m493.92258 180.84514l18.708649 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m493.92258 180.84514l12.708649 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m506.63123 182.49687l4.5381165 -1.6517334l-4.5381165 -1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m560.9803 141.16011l27.401611 0l0 33.5748l-27.401611 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m579.82404 155.53323l-5.59375 12.546875l-1.796875 0l5.578125 -12.09375l-7.3125 0l0 -1.421875l9.125 0l0 0.96875z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m88.97244 293.48557l27.401573 0l0 33.5748l-27.401573 0z" fill-rule="evenodd"></path><path fill="#e06666" d="m104.73807 320.40555l-1.734375 0l0 -11.4375l-3.453125 1.265625l0 -1.5625l4.921875 -1.84375l0.265625 0l0 13.578125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m88.97244 59.479004l27.401573 0l0 33.5748l-27.401573 0z" fill-rule="evenodd"></path><path fill="#e06666" d="m101.59744 78.80525l1.28125 0q1.21875 -0.015625 1.90625 -0.625q0.703125 -0.625 0.703125 -1.6875q0 -2.375 -2.359375 -2.375q-1.125 0 -1.796875 0.640625q-0.65625 0.625 -0.65625 1.671875l-1.703125 0q0 -1.59375 1.171875 -2.65625q1.171875 -1.0625 2.984375 -1.0625q1.90625 0 2.984375 1.015625q1.09375 1.0 1.09375 2.796875q0 0.890625 -0.578125 1.71875q-0.5625 0.8125 -1.546875 1.21875q1.109375 0.359375 1.71875 1.1875q0.609375 0.8125 0.609375 1.984375q0 1.8125 -1.1875 2.890625q-1.1875 1.0625 -3.09375 1.0625q-1.90625 0 -3.09375 -1.03125q-1.1875 -1.03125 -1.1875 -2.71875l1.71875 0q0 1.0625 0.6875 1.703125q0.703125 0.640625 1.875 0.640625q1.25 0 1.90625 -0.640625q0.65625 -0.65625 0.65625 -1.875q0 -1.171875 -0.734375 -1.796875q-0.71875 -0.640625 -2.078125 -0.65625l-1.28125 0l0 -1.40625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m88.97244 190.78215l27.401573 0l0 33.5748l-27.401573 0z" fill-rule="evenodd"></path><path fill="#e06666" d="m107.94119 217.70215l-8.84375 0l0 -1.234375l4.671875 -5.1875q1.046875 -1.1875 1.4375 -1.921875q0.390625 -0.734375 0.390625 -1.53125q0 -1.046875 -0.640625 -1.71875q-0.640625 -0.6875 -1.703125 -0.6875q-1.28125 0 -2.0 0.734375q-0.703125 0.71875 -0.703125 2.015625l-1.71875 0q0 -1.859375 1.203125 -3.0q1.203125 -1.15625 3.21875 -1.15625q1.875 0 2.96875 0.984375q1.109375 0.984375 1.109375 2.625q0 2.0 -2.546875 4.75l-3.625 3.921875l6.78125 0l0 1.40625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m560.19727 186.95538l32.472412 0l0 33.5748l-32.472412 0z" fill-rule="evenodd"></path><path fill="#e06666" d="m575.9629 213.87538l-1.734375 0l0 -11.4375l-3.453125 1.265625l0 -1.5625l4.921875 -1.84375l0.265625 0l0 13.578125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m329.5378 120.902885l32.472443 0l0 33.574806l-32.472443 0z" fill-rule="evenodd"></path><path fill="#e06666" d="m347.3017 147.82289l-1.734375 0l0 -11.4375l-3.453125 1.265625l0 -1.5625l4.921875 -1.84375l0.265625 0l0 13.578125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m553.0722 180.64043l88.47241 0.4094391" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m553.0722 180.64043l88.47241 0.4094391" fill-rule="evenodd"></path></g></svg>
4 | 
5 | 


--------------------------------------------------------------------------------
/day_1/figures/intro_to_pytorch/computational_graph_forward.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" standalone="yes"?>
2 | 
3 | <svg version="1.1" viewBox="0.0 0.0 683.2939632545932 354.2782152230971" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l683.29395 0l0 354.27823l-683.29395 0l0 -354.27823z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#ffffff" d="m0 0l683.29395 0l0 354.27823l-683.29395 0z" fill-rule="evenodd"></path><path fill="#f05732" d="m40.440945 165.88452l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m40.440945 165.88452l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m63.83916 190.27191l1.9218712 -7.671875l1.71875 0l-2.921875 10.03125l-1.3906212 0l-2.4375 -7.609375l-2.375 7.609375l-1.390625 0l-2.90625 -10.03125l1.703125 0l1.96875 7.5l2.34375 -7.5l1.375 0l2.390625 7.671875z" fill-rule="nonzero"></path><path fill="#f05732" d="m40.440945 39.65354l40.440945 0l0 39.65354l-40.440945 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m40.440945 39.65354l40.440945 0l0 39.65354l-40.440945 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m60.710224 60.02531l2.21875 -3.65625l1.9999962 0l-3.2812462 4.953125l3.3906212 5.078129l-1.9843712 0l-2.328125 -3.7500038l-2.3125 3.7500038l-2.0 0l3.390625 -5.078129l-3.28125 -4.953125l1.984375 0l2.203125 3.65625z" fill-rule="nonzero"></path><path fill="#f05732" d="m40.440945 273.86353l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m40.440945 273.86353l40.440945 0l0 39.653534l-40.440945 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m65.224884 295.70404q0 2.296875 -1.0625 3.703125q-1.0468712 1.390625 -2.8281212 1.390625q-1.90625 0 -2.953125 -1.34375l-0.078125 1.15625l-1.578125 0l0 -14.25l1.71875 0l0 5.3125q1.03125 -1.28125 2.859375 -1.28125q1.828125 0 2.8749962 1.390625q1.046875 1.375 1.046875 3.765625l0 0.15625zm-1.7187462 -0.203125q0 -1.75 -0.6875 -2.703125q-0.671875 -0.953125 -1.9375 -0.953125q-1.703125 0 -2.4375 1.578125l0 4.34375q0.78125 1.578125 2.453125 1.578125q1.234375 0 1.921875 -0.953125q0.6875 -0.96875 0.6875 -2.890625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m80.88189 59.480316l88.472435 0.40944672" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m80.88189 59.480316l88.472435 0.40944672" fill-rule="evenodd"></path><path fill="#942174" d="m441.7979 180.84514l0 0c0 -13.637512 11.668793 -24.692902 26.062988 -24.692902l0 0c6.912323 0 13.541565 2.6015625 18.429321 7.232376c4.8877563 4.630829 7.633667 10.91156 7.633667 17.460526l0 0c0 13.637527 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055389 -26.062988 -24.692917z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m441.7979 180.84514l0 0c0 -13.637512 11.668793 -24.692902 26.062988 -24.692902l0 0c6.912323 0 13.541565 2.6015625 18.429321 7.232376c4.8877563 4.630829 7.633667 10.91156 7.633667 17.460526l0 0c0 13.637527 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055389 -26.062988 -24.692917z" fill-rule="evenodd"></path><path fill="#ffffff" d="m469.13336 180.14827l5.359375 0l0 2.3125l-5.359375 0l0 6.0625l-2.453125 0l0 -6.0625l-5.34375 0l0 -2.3125l5.34375 0l0 -5.609375l2.453125 0l0 5.609375z" fill-rule="nonzero"></path><path fill="#942174" d="m214.51443 116.46719l0 0c0 -13.63752 11.668793 -24.692917 26.062988 -24.692917l0 0c6.9123383 0 13.541565 2.6015701 18.429321 7.2323914c4.8877563 4.6308136 7.633667 10.911552 7.633667 17.460526l0 0c0 13.637512 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055405 -26.062988 -24.692917z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m214.51443 116.46719l0 0c0 -13.63752 11.668793 -24.692917 26.062988 -24.692917l0 0c6.9123383 0 13.541565 2.6015701 18.429321 7.2323914c4.8877563 4.6308136 7.633667 10.911552 7.633667 17.460526l0 0c0 13.637512 -11.668793 24.692917 -26.062988 24.692917l0 0c-14.394196 0 -26.062988 -11.055405 -26.062988 -24.692917z" fill-rule="evenodd"></path><path fill="#ffffff" d="m239.18123 117.447395l-3.96875 -1.1875l0.59375 -2.0l3.984375 1.484375l-0.109375 -4.546875l2.015625 0l-0.140625 4.609375l3.921875 -1.453125l0.609375 2.0l-4.046875 1.203125l2.609375 3.578125l-1.640625 1.234375l-2.453125 -3.796875l-2.375 3.703125l-1.640625 -1.203125l2.640625 -3.625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m169.34908 59.90026l45.16536 56.566933" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m169.34908 59.90026l41.42166 51.87816" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m209.47995 112.80901l4.1223297 2.5157547l-1.5407867 -4.5769577z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m170.26247 185.31758l44.25197 -68.85039" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m170.26247 185.31758l41.00789 -63.803024" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m212.65984 122.40762l1.0641785 -4.71064l-3.8431396 2.9245224z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m82.2021 185.50656l88.47244 0.40945435" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m82.2021 185.50656l88.47244 0.40945435" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m82.2021 293.48557l130.4567 0.50393677" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m82.2021 293.48557l130.4567 0.50393677" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m396.63254 116.46719l45.165375 64.377945" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m396.63254 116.46719l41.71942 59.466187" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m436.99982 176.88199l3.9584656 2.7664032l-1.2541504 -4.6636505z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m212.66405 294.0105l229.13387 -113.16536" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m212.66405 294.0105l223.75417 -110.50842" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m437.14966 184.98303l3.337494 -3.4905243l-4.8003235 0.528595z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m88.973755 20.128609l27.401573 0l0 33.574802l-27.401573 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m107.942505 47.048607l-8.84375 0l0 -1.234375l4.671875 -5.1875q1.046875 -1.1875 1.4375 -1.921875q0.390625 -0.734375 0.390625 -1.53125q0 -1.046875 -0.640625 -1.71875q-0.640625 -0.6875 -1.703125 -0.6875q-1.28125 0 -2.0 0.734375q-0.703125 0.71875 -0.703125 2.015625l-1.71875 0q0 -1.859375 1.203125 -3.0q1.203125 -1.15625 3.21875 -1.15625q1.875 0 2.96875 0.984375q1.109375 0.984375 1.109375 2.625q0 2.0 -2.546875 4.75l-3.625 3.921875l6.78125 0l0 1.40625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m88.973755 147.06561l27.401573 0l0 33.574814l-27.401573 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m101.598755 166.39186l1.28125 0q1.21875 -0.015625 1.90625 -0.625q0.703125 -0.625 0.703125 -1.6875q0 -2.375 -2.359375 -2.375q-1.125 0 -1.796875 0.640625q-0.65625 0.625 -0.65625 1.671875l-1.703125 0q0 -1.59375 1.171875 -2.65625q1.171875 -1.0625 2.984375 -1.0625q1.90625 0 2.984375 1.015625q1.09375 1.0 1.09375 2.796875q0 0.890625 -0.578125 1.71875q-0.5625 0.8125 -1.546875 1.21875q1.109375 0.359375 1.71875 1.1875q0.609375 0.8125 0.609375 1.984375q0 1.8125 -1.1875 2.890625q-1.1875 1.0625 -3.09375 1.0625q-1.90625 0 -3.09375 -1.03125q-1.1875 -1.03125 -1.1875 -2.71875l1.71875 0q0 1.0625 0.6875 1.703125q0.703125 0.640625 1.875 0.640625q1.25 0 1.90625 -0.640625q0.65625 -0.65625 0.65625 -1.875q0 -1.171875 -0.734375 -1.796875q-0.71875 -0.640625 -2.078125 -0.65625l-1.28125 0l0 -1.40625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m88.973755 256.2021l27.401573 0l0 33.57483l-27.401573 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m104.73938 283.1221l-1.734375 0l0 -11.4375l-3.453125 1.265625l0 -1.5625l4.921875 -1.84375l0.265625 0l0 13.578125z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m332.0735 79.30708l27.40158 0l0 33.574806l-27.40158 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m348.93286 92.71146l0 1.453125l-0.3125 0q-2.015625 0.03125 -3.203125 1.1875q-1.1875 1.15625 -1.359375 3.234375q1.0625 -1.21875 2.90625 -1.21875q1.765625 0 2.8125 1.25q1.0625 1.234375 1.0625 3.203125q0 2.09375 -1.140625 3.34375q-1.140625 1.25 -3.046875 1.25q-1.9375 0 -3.15625 -1.484375q-1.203125 -1.5 -1.203125 -3.84375l0 -0.65625q0 -3.734375 1.59375 -5.703125q1.59375 -1.96875 4.734375 -2.015625l0.3125 0zm-2.25 6.078125q-0.890625 0 -1.640625 0.53125q-0.734375 0.53125 -1.015625 1.328125l0 0.640625q0 1.65625 0.75 2.6875q0.75 1.015625 1.875 1.015625q1.15625 0 1.8125 -0.84375q0.671875 -0.859375 0.671875 -2.25q0 -1.390625 -0.671875 -2.25q-0.671875 -0.859375 -1.78125 -0.859375z" fill-rule="nonzero"></path><path fill="#f05732" d="m285.35303 96.64042l40.44095 0l0 39.65354l-40.44095 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m285.35303 96.64042l40.44095 0l0 39.65354l-40.44095 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m303.85666 121.98094l5.875 0l0 1.40625l-7.96875 0l0 -1.265625l5.546875 -7.359375l-5.453125 0l0 -1.40625l7.59375 0l0 1.203125l-5.59375 7.421875z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m266.6404 116.46719l18.70868 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m266.6404 116.46719l12.708679 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m279.3491 118.11893l4.538086 -1.6517334l-4.538086 -1.6517334z" fill-rule="evenodd"></path><path fill="#f05732" d="m512.63257 161.01837l40.440918 0l0 39.65355l-40.440918 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m512.63257 161.01837l40.440918 0l0 39.65355l-40.440918 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m533.0176 185.24951l2.34375 -7.515625l1.84375 0l-4.046875 11.578125q-0.9375 2.5 -2.96875 2.5l-0.328125 -0.015625l-0.640625 -0.125l0 -1.390625l0.46875 0.03125q0.859375 0 1.34375 -0.359375q0.5 -0.34375 0.8125 -1.28125l0.375 -1.015625l-3.578125 -9.921875l1.875 0l2.5 7.515625z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m493.9239 180.84514l18.70868 0" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m493.9239 180.84514l12.708649 0" fill-rule="evenodd"></path><path fill="#000000" stroke="#000000" stroke-width="1.0" stroke-linecap="butt" d="m506.63254 182.49687l4.5381165 -1.6517334l-4.5381165 -1.6517334z" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m560.9816 141.16011l27.40155 0l0 33.5748l-27.40155 0z" fill-rule="evenodd"></path><path fill="#35524a" d="m579.8254 155.53323l-5.59375 12.546875l-1.796875 0l5.578125 -12.09375l-7.3125 0l0 -1.421875l9.125 0l0 0.96875z" fill-rule="nonzero"></path><path fill="#000000" fill-opacity="0.0" d="m553.0735 180.64043l88.47247 0.4094391" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m553.0735 180.64043l88.47247 0.4094391" fill-rule="evenodd"></path><path fill="#000000" fill-opacity="0.0" d="m325.80054 116.26247l71.68503 0.37795258" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m325.80054 116.26247l71.68503 0.37795258" fill-rule="evenodd"></path></g></svg>
4 | 
5 | 


--------------------------------------------------------------------------------
/day_1/figures/intro_to_pytorch/pytorch_variable.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" standalone="yes"?>
2 | 
3 | <svg version="1.1" viewBox="0.0 0.0 456.90813648293965 299.8897637795276" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><clipPath id="p.0"><path d="m0 0l456.90814 0l0 299.88977l-456.90814 0l0 -299.88977z" clip-rule="nonzero"></path></clipPath><g clip-path="url(#p.0)"><path fill="#ffffff" d="m0 0l456.90814 0l0 299.88977l-456.90814 0z" fill-rule="evenodd"></path><path fill="#f05732" d="m119.03674 55.07874l218.83466 0l0 181.88977l-218.83466 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m119.03674 55.07874l218.83466 0l0 181.88977l-218.83466 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m200.55896 79.639366l3.859375 -11.15625l1.9375 0l-5.0 13.515625l-1.578125 0l-5.0 -13.515625l1.9375 0l3.84375 11.15625zm13.3228 2.359375q-0.15625 -0.296875 -0.25 -1.0625q-1.1875 1.25 -2.859375 1.25q-1.484375 0 -2.4375 -0.84375q-0.9375 -0.84375 -0.9375 -2.125q0 -1.5625 1.1875 -2.4375q1.1875 -0.875 3.359375 -0.875l1.671875 0l0 -0.78125q0 -0.90625 -0.546875 -1.4375q-0.53125 -0.53125 -1.578125 -0.53125q-0.921875 0 -1.546875 0.46875q-0.625 0.46875 -0.625 1.125l-1.71875 0q0 -0.75 0.53125 -1.453125q0.53125 -0.703125 1.4375 -1.109375q0.921875 -0.40625 2.015625 -0.40625q1.734375 0 2.71875 0.875q0.984375 0.859375 1.015625 2.375l0 4.625q0 1.375 0.359375 2.1875l0 0.15625l-1.796875 0zm-2.859375 -1.3125q0.8125 0 1.53125 -0.40625q0.734375 -0.421875 1.0625 -1.09375l0 -2.0625l-1.359375 0q-3.140625 0 -3.140625 1.84375q0 0.8125 0.53125 1.265625q0.53125 0.453125 1.375 0.453125zm11.663605 -7.1875q-0.390625 -0.0625 -0.84375 -0.0625q-1.6875 0 -2.296875 1.4375l0 7.125l-1.71875 0l0 -10.03125l1.671875 0l0.03125 1.15625q0.84375 -1.34375 2.390625 -1.34375q0.5 0 0.765625 0.125l0 1.59375zm3.3128815 8.5l-1.703125 0l0 -10.03125l1.703125 0l0 10.03125zm-1.84375 -12.703125q0 -0.421875 0.25 -0.703125q0.25 -0.28125 0.75 -0.28125q0.515625 0 0.765625 0.28125q0.265625 0.28125 0.265625 0.703125q0 0.421875 -0.265625 0.703125q-0.25 0.265625 -0.765625 0.265625q-0.5 0 -0.75 -0.265625q-0.25 -0.28125 -0.25 -0.703125zm10.71492 12.703125q-0.15625 -0.296875 -0.25 -1.0625q-1.1875 1.25 -2.859375 1.25q-1.484375 0 -2.4375 -0.84375q-0.9375 -0.84375 -0.9375 -2.125q0 -1.5625 1.1875 -2.4375q1.1875 -0.875 3.359375 -0.875l1.671875 0l0 -0.78125q0 -0.90625 -0.546875 -1.4375q-0.53125 -0.53125 -1.578125 -0.53125q-0.921875 0 -1.546875 0.46875q-0.625 0.46875 -0.625 1.125l-1.71875 0q0 -0.75 0.53125 -1.453125q0.53125 -0.703125 1.4375 -1.109375q0.921875 -0.40625 2.015625 -0.40625q1.734375 0 2.71875 0.875q0.984375 0.859375 1.015625 2.375l0 4.625q0 1.375 0.359375 2.1875l0 0.15625l-1.796875 0zm-2.859375 -1.3125q0.8125 0 1.53125 -0.40625q0.734375 -0.421875 1.0625 -1.09375l0 -2.0625l-1.359375 0q-3.140625 0 -3.140625 1.84375q0 0.8125 0.53125 1.265625q0.53125 0.453125 1.375 0.453125zm15.3042145 -3.59375q0 2.296875 -1.0624847 3.703125q-1.046875 1.390625 -2.828125 1.390625q-1.90625 0 -2.953125 -1.34375l-0.078125 1.15625l-1.578125 0l0 -14.25l1.71875 0l0 5.3125q1.03125 -1.28125 2.859375 -1.28125q1.828125 0 2.875 1.390625q1.0468597 1.375 1.0468597 3.765625l0 0.15625zm-1.7187347 -0.203125q0 -1.75 -0.6875 -2.703125q-0.671875 -0.953125 -1.9375 -0.953125q-1.703125 0 -2.4375 1.578125l0 4.34375q0.78125 1.578125 2.453125 1.578125q1.234375 0 1.921875 -0.953125q0.6875 -0.96875 0.6875 -2.890625zm5.544937 5.109375l-1.703125 0l0 -14.25l1.703125 0l0 14.25zm6.839905 0.1875q-2.046875 0 -3.328125 -1.34375q-1.28125 -1.34375 -1.28125 -3.578125l0 -0.328125q0 -1.484375 0.5625 -2.65625q0.578125 -1.171875 1.59375 -1.828125q1.03125 -0.671875 2.234375 -0.671875q1.953125 0 3.03125 1.296875q1.09375 1.28125 1.09375 3.6875l0 0.703125l-6.796875 0q0.03125 1.484375 0.859375 2.40625q0.84375 0.90625 2.125 0.90625q0.90625 0 1.53125 -0.359375q0.640625 -0.375 1.109375 -0.984375l1.046875 0.8125q-1.265625 1.9375 -3.78125 1.9375zm-0.21875 -9.0q-1.03125 0 -1.75 0.765625q-0.703125 0.75 -0.859375 2.109375l5.015625 0l0 -0.125q-0.0625 -1.3125 -0.703125 -2.03125q-0.625 -0.71875 -1.703125 -0.71875z" fill-rule="nonzero"></path><path fill="#942174" d="m186.67998 89.8771l83.52757 0l0 38.677162l-83.52757 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m186.67998 89.8771l83.52757 0l0 38.677162l-83.52757 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m210.86292 111.026306q0 -2.296875 1.09375 -3.703125q1.109375 -1.40625 2.875 -1.40625q1.765625 0 2.796875 1.203125l0 -5.234375l1.71875 0l0 14.25l-1.578125 0l-0.09375 -1.078125q-1.03125 1.265625 -2.859375 1.265625q-1.75 0 -2.859375 -1.421875q-1.09375 -1.4375 -1.09375 -3.734375l0 -0.140625zm1.71875 0.203125q0 1.703125 0.703125 2.671875q0.71875 0.96875 1.953125 0.96875q1.640625 0 2.390625 -1.46875l0 -4.609375q-0.78125 -1.421875 -2.375 -1.421875q-1.25 0 -1.96875 0.984375q-0.703125 0.96875 -0.703125 2.875zm15.427719 4.90625q-0.15625 -0.296875 -0.25 -1.0625q-1.1875 1.25 -2.859375 1.25q-1.484375 0 -2.4375 -0.84375q-0.9375 -0.84375 -0.9375 -2.125q0 -1.5625 1.1875 -2.4375q1.1875 -0.875 3.359375 -0.875l1.671875 0l0 -0.78125q0 -0.90625 -0.546875 -1.4375q-0.53125 -0.53125 -1.578125 -0.53125q-0.921875 0 -1.546875 0.46875q-0.625 0.46875 -0.625 1.125l-1.71875 0q0 -0.75 0.53125 -1.453125q0.53125 -0.703125 1.4375 -1.109375q0.921875 -0.40625 2.015625 -0.40625q1.734375 0 2.71875 0.875q0.984375 0.859375 1.015625 2.375l0 4.625q0 1.375 0.359375 2.1875l0 0.15625l-1.796875 0zm-2.859375 -1.3125q0.8125 0 1.53125 -0.40625q0.734375 -0.421875 1.0625 -1.09375l0 -2.0625l-1.359375 0q-3.140625 0 -3.140625 1.84375q0 0.8125 0.53125 1.265625q0.53125 0.453125 1.375 0.453125zm9.132355 -11.15625l0 2.4375l1.875 0l0 1.3125l-1.875 0l0 6.234375q0 0.59375 0.25 0.90625q0.25 0.296875 0.859375 0.296875q0.296875 0 0.8125 -0.109375l0 1.390625q-0.671875 0.1875 -1.3125 0.1875q-1.15625 0 -1.75 -0.6875q-0.578125 -0.703125 -0.578125 -1.984375l0 -6.234375l-1.828125 0l0 -1.3125l1.828125 0l0 -2.4375l1.71875 0zm9.969254 12.46875q-0.15625 -0.296875 -0.25 -1.0625q-1.1875 1.25 -2.859375 1.25q-1.484375 0 -2.4375 -0.84375q-0.9375 -0.84375 -0.9375 -2.125q0 -1.5625 1.1875 -2.4375q1.1875 -0.875 3.359375 -0.875l1.671875 0l0 -0.78125q0 -0.90625 -0.546875 -1.4375q-0.53125 -0.53125 -1.578125 -0.53125q-0.921875 0 -1.546875 0.46875q-0.625 0.46875 -0.625 1.125l-1.71875 0q0 -0.75 0.53125 -1.453125q0.53125 -0.703125 1.4375 -1.109375q0.921875 -0.40625 2.015625 -0.40625q1.734375 0 2.71875 0.875q0.984375 0.859375 1.015625 2.375l0 4.625q0 1.375 0.359375 2.1875l0 0.15625l-1.796875 0zm-2.859375 -1.3125q0.8125 0 1.53125 -0.40625q0.734375 -0.421875 1.0625 -1.09375l0 -2.0625l-1.359375 0q-3.140625 0 -3.140625 1.84375q0 0.8125 0.53125 1.265625q0.53125 0.453125 1.375 0.453125z" fill-rule="nonzero"></path><path fill="#942174" d="m186.67998 136.97548l83.52757 0l0 38.67717l-83.52757 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m186.67998 136.97548l83.52757 0l0 38.67717l-83.52757 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m210.60982 158.12468q0 -2.34375 1.078125 -3.71875q1.09375 -1.390625 2.890625 -1.390625q1.828125 0 2.859375 1.296875l0.078125 -1.109375l1.578125 0l0 9.796875q0 1.9375 -1.15625 3.0625q-1.15625 1.125 -3.109375 1.125q-1.078125 0 -2.125 -0.46875q-1.03125 -0.453125 -1.578125 -1.265625l0.890625 -1.03125q1.09375 1.359375 2.6875 1.359375q1.265625 0 1.953125 -0.703125q0.703125 -0.703125 0.703125 -1.984375l0 -0.859375q-1.03125 1.1875 -2.8125 1.1875q-1.75 0 -2.84375 -1.421875q-1.09375 -1.421875 -1.09375 -3.875zm1.71875 0.203125q0 1.703125 0.703125 2.671875q0.703125 0.96875 1.953125 0.96875q1.625 0 2.375 -1.484375l0 -4.578125q-0.78125 -1.4375 -2.359375 -1.4375q-1.25 0 -1.96875 0.984375q-0.703125 0.96875 -0.703125 2.875zm14.013687 -3.59375q-0.390625 -0.0625 -0.84375 -0.0625q-1.6875 0 -2.296875 1.4375l0 7.125l-1.71875 0l0 -10.03125l1.671875 0l0.03125 1.15625q0.84375 -1.34375 2.390625 -1.34375q0.5 0 0.765625 0.125l0 1.59375zm7.6566315 8.5q-0.15625 -0.296875 -0.25 -1.0625q-1.1875 1.25 -2.859375 1.25q-1.484375 0 -2.4375 -0.84375q-0.9375 -0.84375 -0.9375 -2.125q0 -1.5625 1.1875 -2.4375q1.1875 -0.875 3.359375 -0.875l1.671875 0l0 -0.78125q0 -0.90625 -0.546875 -1.4375q-0.53125 -0.53125 -1.578125 -0.53125q-0.921875 0 -1.546875 0.46875q-0.625 0.46875 -0.625 1.125l-1.71875 0q0 -0.75 0.53125 -1.453125q0.53125 -0.703125 1.4375 -1.109375q0.921875 -0.40625 2.015625 -0.40625q1.734375 0 2.71875 0.875q0.984375 0.859375 1.015625 2.375l0 4.625q0 1.375 0.359375 2.1875l0 0.15625l-1.796875 0zm-2.859375 -1.3125q0.8125 0 1.53125 -0.40625q0.734375 -0.421875 1.0625 -1.09375l0 -2.0625l-1.359375 0q-3.140625 0 -3.140625 1.84375q0 0.8125 0.53125 1.265625q0.53125 0.453125 1.375 0.453125zm6.3823547 -3.796875q0 -2.296875 1.09375 -3.703125q1.109375 -1.40625 2.875 -1.40625q1.765625 0 2.796875 1.203125l0 -5.234375l1.71875 0l0 14.25l-1.578125 0l-0.09375 -1.078125q-1.03125 1.265625 -2.859375 1.265625q-1.75 0 -2.859375 -1.421875q-1.09375 -1.4375 -1.09375 -3.734375l0 -0.140625zm1.71875 0.203125q0 1.703125 0.703125 2.671875q0.71875 0.96875 1.953125 0.96875q1.640625 0 2.390625 -1.46875l0 -4.609375q-0.78125 -1.421875 -2.375 -1.421875q-1.25 0 -1.96875 0.984375q-0.703125 0.96875 -0.703125 2.875z" fill-rule="nonzero"></path><path fill="#942174" d="m186.67998 184.07385l83.52757 0l0 38.67717l-83.52757 0z" fill-rule="evenodd"></path><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m186.67998 184.07385l83.52757 0l0 38.67717l-83.52757 0z" fill-rule="evenodd"></path><path fill="#ffffff" d="m204.19334 209.1137q0.921875 0 1.59375 -0.546875q0.6875 -0.5625 0.765625 -1.390625l1.625 0q-0.046875 0.859375 -0.59375 1.640625q-0.546875 0.765625 -1.46875 1.234375q-0.90625 0.46875 -1.921875 0.46875q-2.046875 0 -3.265625 -1.359375q-1.203125 -1.375 -1.203125 -3.75l0 -0.296875q0 -1.46875 0.53125 -2.609375q0.546875 -1.140625 1.546875 -1.765625q1.0 -0.625 2.375 -0.625q1.6875 0 2.796875 1.015625q1.125 1.0 1.203125 2.609375l-1.625 0q-0.078125 -0.96875 -0.734375 -1.59375q-0.65625 -0.625 -1.640625 -0.625q-1.296875 0 -2.03125 0.9375q-0.71875 0.9375 -0.71875 2.71875l0 0.328125q0 1.734375 0.71875 2.671875q0.71875 0.9375 2.046875 0.9375zm10.593506 -7.28125q-0.390625 -0.0625 -0.84375 -0.0625q-1.6875 0 -2.296875 1.4375l0 7.125l-1.71875 0l0 -10.03125l1.671875 0l0.03125 1.15625q0.84375 -1.34375 2.390625 -1.34375q0.5 0 0.765625 0.125l0 1.59375zm5.6253815 8.6875q-2.046875 0 -3.328125 -1.34375q-1.28125 -1.34375 -1.28125 -3.578125l0 -0.328125q0 -1.484375 0.5625 -2.65625q0.578125 -1.171875 1.59375 -1.828125q1.03125 -0.671875 2.234375 -0.671875q1.953125 0 3.03125 1.296875q1.09375 1.28125 1.09375 3.6875l0 0.703125l-6.796875 0q0.03125 1.484375 0.859375 2.40625q0.84375 0.90625 2.125 0.90625q0.90625 0 1.53125 -0.359375q0.640625 -0.375 1.109375 -0.984375l1.046875 0.8125q-1.265625 1.9375 -3.78125 1.9375zm-0.21875 -9.0q-1.03125 0 -1.75 0.765625q-0.703125 0.75 -0.859375 2.109375l5.015625 0l0 -0.125q-0.0625 -1.3125 -0.703125 -2.03125q-0.625 -0.71875 -1.703125 -0.71875zm12.133804 8.8125q-0.15625 -0.296875 -0.25 -1.0625q-1.1875 1.25 -2.859375 1.25q-1.484375 0 -2.4375 -0.84375q-0.9375 -0.84375 -0.9375 -2.125q0 -1.5625 1.1875 -2.4375q1.1875 -0.875 3.359375 -0.875l1.671875 0l0 -0.78125q0 -0.90625 -0.546875 -1.4375q-0.53125 -0.53125 -1.578125 -0.53125q-0.921875 0 -1.546875 0.46875q-0.625 0.46875 -0.625 1.125l-1.71875 0q0 -0.75 0.53125 -1.453125q0.53125 -0.703125 1.4375 -1.109375q0.921875 -0.40625 2.015625 -0.40625q1.734375 0 2.71875 0.875q0.984375 0.859375 1.015625 2.375l0 4.625q0 1.375 0.359375 2.1875l0 0.15625l-1.796875 0zm-2.859375 -1.3125q0.8125 0 1.53125 -0.40625q0.734375 -0.421875 1.0625 -1.09375l0 -2.0625l-1.359375 0q-3.140625 0 -3.140625 1.84375q0 0.8125 0.53125 1.265625q0.53125 0.453125 1.375 0.453125zm9.132355 -11.15625l0 2.4375l1.875 0l0 1.3125l-1.875 0l0 6.234375q0 0.59375 0.25 0.90625q0.25 0.296875 0.859375 0.296875q0.296875 0 0.8125 -0.109375l0 1.390625q-0.671875 0.1875 -1.3125 0.1875q-1.15625 0 -1.75 -0.6875q-0.578125 -0.703125 -0.578125 -1.984375l0 -6.234375l-1.828125 0l0 -1.3125l1.828125 0l0 -2.4375l1.71875 0zm3.3130035 7.359375q0 -1.484375 0.578125 -2.65625q0.578125 -1.171875 1.609375 -1.8125q1.046875 -0.640625 2.375 -0.640625q2.046875 0 3.3125 1.421875q1.265625 1.40625 1.265625 3.765625l0 0.125q0 1.46875 -0.5625 2.640625q-0.5625 1.15625 -1.609375 1.8125q-1.046875 0.640625 -2.390625 0.640625q-2.046875 0 -3.3125 -1.421875q-1.265625 -1.421875 -1.265625 -3.75l0 -0.125zm1.71875 0.203125q0 1.671875 0.78125 2.6875q0.78125 1.0 2.078125 1.0q1.296875 0 2.0625 -1.015625q0.78125 -1.03125 0.78125 -2.875q0 -1.65625 -0.78125 -2.671875q-0.78125 -1.03125 -2.078125 -1.03125q-1.28125 0 -2.0625 1.015625q-0.78125 1.0 -0.78125 2.890625zm14.233643 -3.59375q-0.390625 -0.0625 -0.84375 -0.0625q-1.6875 0 -2.296875 1.4375l0 7.125l-1.71875 0l0 -10.03125l1.671875 0l0.03125 1.15625q0.84375 -1.34375 2.390625 -1.34375q0.5 0 0.765625 0.125l0 1.59375z" fill-rule="nonzero"></path></g></svg>
4 | 
5 | 


--------------------------------------------------------------------------------
/day_1/vocabulary.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | 
  3 | import numpy as np
  4 | from torch.utils.data import Dataset
  5 | import six
  6 | 
  7 | import json
  8 | 
  9 | 
 10 | class Vocabulary(object):
 11 |     """
 12 |     An implementation that manages the interface between a token dataset and the
 13 |         machine learning algorithm.
 14 |     """
 15 |     def __init__(self, use_unks=False, unk_token="<UNK>",
 16 |                  use_mask=False, mask_token="<MASK>", use_start_end=False,
 17 |                  start_token="<START>", end_token="<END>"):
 18 |         """
 19 |         Args:
 20 |             use_unks (bool): The vocabulary will output UNK tokens for out of
 21 |                 vocabulary items.
 22 |                 [default=False]
 23 |             unk_token (str): The token used for unknown tokens.
 24 |                 If `use_unks` is True, this will be added to the vocabulary.
 25 |                 [default='<UNK>']
 26 |             use_mask (bool): The vocabulary will reserve the 0th index for a mask token.
 27 |                 This is used to handle variable lengths in sequence models.
 28 |                 [default=False]
 29 |             mask_token (str): The token used for the mask.
 30 |                 Note: mostly a placeholder; it's unlikely the token will be seen.
 31 |                 [default='<MASK>']
 32 |             use_start_end (bool): The vocabulary will reserve indices for two tokens
 33 |                 that represent the start and end of a sequence.
 34 |                 [default=False]
 35 |             start_token: The token used to indicate the start of a sequence.
 36 |                 If `use_start_end` is True, this will be added to the vocabulary.
 37 |                 [default='<START>']
 38 |             end_token: The token used to indicate the end of a sequence
 39 |                  If `use_start_end` is True, this will be added to the vocabulary.
 40 |                  [default='<END>']
 41 |         """
 42 | 
 43 |         self._mapping = {}  # str -> int
 44 |         self._flip = {}  # int -> str;
 45 |         self._i = 0
 46 |         self._frozen = False
 47 | 
 48 |         # mask token for use in masked recurrent networks
 49 |         # usually need to be the 0th index
 50 |         self.use_mask = use_mask
 51 |         self.mask_token = mask_token
 52 |         if self.use_mask:
 53 |             self.add(self.mask_token)
 54 | 
 55 |         # unk token for out of vocabulary tokens
 56 |         self.use_unks = use_unks
 57 |         self.unk_token = unk_token
 58 |         if self.use_unks:
 59 |             self.add(self.unk_token)
 60 | 
 61 |         # start token for sequence models
 62 |         self.use_start_end = use_start_end
 63 |         self.start_token = start_token
 64 |         self.end_token = end_token
 65 |         if self.use_start_end:
 66 |             self.add(self.start_token)
 67 |             self.add(self.end_token)
 68 | 
 69 |     def iterkeys(self):
 70 |         for k in self._mapping.keys():
 71 |             if k == self.unk_token or k == self.mask_token:
 72 |                 continue
 73 |             else:
 74 |                 yield k
 75 | 
 76 |     def keys(self):
 77 |         return list(self.iterkeys())
 78 | 
 79 |     def iteritems(self):
 80 |         for key, value in self._mapping.items():
 81 |             if key == self.unk_token or key == self.mask_token:
 82 |                 continue
 83 |             yield key, value
 84 | 
 85 |     def items(self):
 86 |         return list(self.iteritems())
 87 | 
 88 |     def values(self):
 89 |         return [value for _, value in self.iteritems()]
 90 | 
 91 |     def __getitem__(self, k):
 92 |         if self._frozen:
 93 |             if k in self._mapping:
 94 |                 out_index = self._mapping[k]
 95 |             elif self.use_unks:
 96 |                 out_index = self.unk_index
 97 |             else:  # case: frozen, don't want unks, raise exception
 98 |                 raise VocabularyException("Vocabulary is frozen. " +
 99 |                                           "Key '{}' not found.".format(k))
100 |         elif k in self._mapping:  # case: normal
101 |             out_index = self._mapping[k]
102 |         else:
103 |             out_index = self._mapping[k] = self._i
104 |             self._i += 1
105 |             self._flip[out_index] = k
106 | 
107 |         return out_index
108 | 
109 |     def add(self, k):
110 |         return self.__getitem__(k)
111 | 
112 |     def add_many(self, x):
113 |         return [self.add(k) for k in x]
114 | 
115 |     def lookup(self, i):
116 |         try:
117 |             return self._flip[i]
118 |         except KeyError:
119 |             raise VocabularyException("Key {} not in Vocabulary".format(i))
120 | 
121 |     def lookup_many(self, x):
122 |         for k in x:
123 |             yield self.lookup(k)
124 | 
125 |     def map(self, sequence, include_start_end=False):
126 |         if include_start_end:
127 |             yield self.start_index
128 | 
129 |         for item in sequence:
130 |             yield self[item]
131 | 
132 |         if include_start_end:
133 |             yield self.end_index
134 | 
135 |     def freeze(self, use_unks=False):
136 |         self.use_unks = use_unks
137 | 
138 |         if use_unks and self.unk_token not in self:
139 |             self.add(self.unk_token)
140 | 
141 |         self._frozen = True
142 | 
143 |     def unfreeze(self):
144 |         self._frozen = False
145 | 
146 |     @property
147 |     def unk_index(self):
148 |         if self.unk_token not in self:
149 |             return None
150 |         return self._mapping[self.unk_token]
151 | 
152 |     @property
153 |     def mask_index(self):
154 |         if self.mask_token not in self:
155 |             return None
156 |         return self._mapping[self.mask_token]
157 | 
158 |     @property
159 |     def start_index(self):
160 |         if self.start_token not in self:
161 |             return None
162 |         return self._mapping[self.start_token]
163 | 
164 |     @property
165 |     def end_index(self):
166 |         if self.end_token not in self:
167 |             return None
168 |         return self._mapping[self.end_token]
169 | 
170 |     def __contains__(self, k):
171 |         return k in self._mapping
172 | 
173 |     def __len__(self):
174 |         return len(self._mapping)
175 | 
176 |     def __repr__(self):
177 |         return "<Vocabulary(size={},frozen={})>".format(len(self), self._frozen)
178 | 
179 |     def get_serializable_contents(self):
180 |         """
181 |         Creats a dict containing the necessary information to recreate this instance
182 |         """
183 |         config = {"_mapping": self._mapping,
184 |                   "_flip": self._flip,
185 |                   "_frozen": self._frozen,
186 |                   "_i": self._i,
187 |                   "_counts": list(self._counts.items()),
188 |                   "_frequency_threshold": self._frequency_threshold,
189 |                   "use_unks": self.use_unks,
190 |                   "unk_token": self.unk_token,
191 |                   "use_mask": self.use_mask,
192 |                   "mask_token": self.mask_token,
193 |                   "use_start_end": self.use_start_end,
194 |                   "start_token": self.start_token,
195 |                   "end_token": self.end_token}
196 |         return config
197 | 
198 |     @classmethod
199 |     def deserialize_from_contents(cls, content):
200 |         """
201 |         Recreate a Vocabulary instance; expect same dict as output in `serialize`
202 |         """
203 |         try:
204 |             _mapping = content.pop("_mapping")
205 |             _flip = content.pop("_flip")
206 |             _i = content.pop("_i")
207 |             _frozen = content.pop("_frozen")
208 |             _counts = content.pop("_counts")
209 |             _frequency_threshold = content.pop("_frequency_threshold")
210 |         except KeyError:
211 |             raise Exception("unable to deserialize vocabulary")
212 |         if isinstance(list(_flip.keys())[0], six.string_types):
213 |             _flip = {int(k): v for k, v in _flip.items()}
214 |         out = cls(**content)
215 |         out._mapping = _mapping
216 |         out._flip = _flip
217 |         out._i = _i
218 |         out._counts = Counter(dict(_counts))
219 |         out._frequency_threshold = _frequency_threshold
220 | 
221 |         if _frozen:
222 |             out.freeze(out.use_unks)
223 | 
224 |         return out
225 | 
226 | 


--------------------------------------------------------------------------------
/day_2/00-Dataset-Loading-And-Vectorization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from argparse import Namespace\n",
 10 |     "from collections import Counter\n",
 11 |     "import json\n",
 12 |     "import os\n",
 13 |     "os.environ['OMP_NUM_THREADS'] = '4' \n",
 14 |     "\n",
 15 |     "import numpy as np\n",
 16 |     "import pandas as pd\n",
 17 |     "import torch\n",
 18 |     "import torch.nn as nn\n",
 19 |     "import torch.nn.functional as F\n",
 20 |     "import torch.optim as optim\n",
 21 |     "from torch.utils.data import Dataset, DataLoader\n",
 22 |     "from tqdm import tqdm_notebook\n",
 23 |     "\n",
 24 |     "from vocabulary import Vocabulary"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "# Header\n",
 32 |     "\n",
 33 |     "In this notebook, we outline how we approach loading and vectorizing datasets.  \n",
 34 |     "\n",
 35 |     "Loading and vectorizing a dataset consists of 3 components:\n",
 36 |     "\n",
 37 |     "1. A data structure named `Vocabulary` which manages the token to integer mapping\n",
 38 |     "2. A `Vectorizer` which manages the vocabulary (or vocabularies) for mapping data points to a vector of integers\n",
 39 |     "3. The Dataset itself which takes as input a dataframe and a vectorizer.  For classification tasks, it's expected that there are 2 columns, one for the observation, and one for the label.\n",
 40 |     "\n",
 41 |     "To prepare for those 3 components, we do the following tasks:\n",
 42 |     "\n",
 43 |     "1. Annotate the dataset with split information. \n",
 44 |     "2. Preprocess and split the x data (the observations) into lists of tokens\n",
 45 |     "3. Count the tokens and use the counts to restrict the vectorized tokens to those that are frequent enough to learn from\n",
 46 |     "\n",
 47 |     "From there, creating the vectorizer is as simple as iterating through the counted tokens.  Then, the vectorizer is used to transform each subset of the dataset (corresponding to the splits) into matrices of token integers and vectors of label indices. \n",
 48 |     "\n",
 49 |     "### Dataset Information\n",
 50 |     "\n",
 51 |     "- Name: Surnames \n",
 52 |     "- Fields: `surname`, `nationality`\n",
 53 |     "- Size: 10980"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "# Args\n",
 61 |     "\n",
 62 |     "We utilize the `Namespace` object from python's standard library to contain hyper parameters and runtime settings.  Primarily, this is done because it plays well with static analyzers and can be serialized for distributed settings. It's also convenient because it gives attribute-access rather than key-based access of dictionaries. "
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 2,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "args = Namespace(\n",
 72 |     "    surname_csv=\"../data/surnames.csv\"\n",
 73 |     ")\n",
 74 |     "\n",
 75 |     "START_TOKEN = \"^\"\n",
 76 |     "END_TOKEN = \"_\""
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "# Dataset"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "### Dataset Loading Utilities"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 3,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def count_tokens(x_data_list):\n",
100 |     "    \"\"\"Count the tokens in the data list\n",
101 |     "    \n",
102 |     "    Args:\n",
103 |     "        x_data_list (list(list(str))): a list of lists, each sublist is a list of string tokens. \n",
104 |     "            In other words, a list of the data points where the data points have been tokenized.\n",
105 |     "    Returns:\n",
106 |     "        dict: a mapping from tokens to their counts \n",
107 |     "    \n",
108 |     "    \"\"\"\n",
109 |     "    # alternatively\n",
110 |     "    # return Counter([token for x_data in x_data_list for token in x_data])\n",
111 |     "    counter = Counter()\n",
112 |     "    for x_data in x_data_list:\n",
113 |     "        for token in x_data:\n",
114 |     "            counter[token] += 1\n",
115 |     "    return counter\n",
116 |     "\n",
117 |     "def add_splits(df, target_y_column, split_proportions=(0.7, 0.15, 0.15), seed=0):\n",
118 |     "    \"\"\"Add 'train', 'val', and 'test' splits to the dataset\n",
119 |     "    \n",
120 |     "    Args:\n",
121 |     "        df (pd.DataFrame): the data frame to assign splits to\n",
122 |     "        target_y_column (str): the name of the label column; in order to\n",
123 |     "            preserve the class distribution between splits, the label column\n",
124 |     "            is used to group the datapoints and splits are assigned within these groups.\n",
125 |     "        split_proportions (tuple(float, float, float)): three floats which represent the\n",
126 |     "            proportion in 'train', 'val, 'and 'test'. Must sum to 1. \n",
127 |     "        seed (int): the random seed for making the shuffling deterministic. If the dataset and seed\n",
128 |     "            are kept the same, the split assignment is deterministic. \n",
129 |     "    Returns:\n",
130 |     "        pd.DataFrame: the input dataframe with a new column for split assignments; note: row order\n",
131 |     "            will have changed.\n",
132 |     "            \n",
133 |     "    \"\"\"\n",
134 |     "    df_by_label = {label: [] for label in df[target_y_column].unique()}\n",
135 |     "    for _, row in df.iterrows():\n",
136 |     "        df_by_label[row[target_y_column]].append(row.to_dict())\n",
137 |     "    \n",
138 |     "    np.random.seed(seed)\n",
139 |     "    \n",
140 |     "    assert sum(split_proportions) == 1, \"`split_proportions` should sum to 1\"\n",
141 |     "    train_p, val_p, test_p = split_proportions\n",
142 |     "    \n",
143 |     "    out_df = []\n",
144 |     "    # to ensure consistent behavior, lexicographically sort the dictionary\n",
145 |     "    for _, data_points in sorted(df_by_label.items()):\n",
146 |     "        np.random.shuffle(data_points)\n",
147 |     "        n_total = len(data_points)\n",
148 |     "        n_train = int(train_p * n_total)\n",
149 |     "        n_val = int(val_p * n_total)\n",
150 |     "        \n",
151 |     "        for data_point in data_points[:n_train]:\n",
152 |     "            data_point['split'] = 'train'\n",
153 |     "            \n",
154 |     "        for data_point in data_points[n_train:n_train+n_val]:\n",
155 |     "            data_point['split'] = 'val'\n",
156 |     "            \n",
157 |     "        for data_point in data_points[n_train+n_val:]:\n",
158 |     "            data_point['split'] = 'test'\n",
159 |     "        \n",
160 |     "        out_df.extend(data_points)\n",
161 |     "    \n",
162 |     "    return pd.DataFrame(out_df)\n"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "### Supervised Text Vectorizer"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 4,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "class SupervisedTextVectorizer:\n",
179 |     "    \"\"\"A composite data structure that uses Vocabularies to map text and its labels to integers\n",
180 |     "    \n",
181 |     "    Attributes:\n",
182 |     "        token_vocab (Vocabulary): the vocabulary managing the mapping between text tokens and \n",
183 |     "            the unique indices that represent them\n",
184 |     "        label_voab (Vocabulary): the vocabulary managing the mapping between labels and the\n",
185 |     "            unique indices that represent them.\n",
186 |     "        max_seq_length (int): the length of the longest sequence (including start or end tokens\n",
187 |     "            that will be prepended or appended).\n",
188 |     "    \"\"\"\n",
189 |     "    def __init__(self, token_vocab, label_vocab, max_seq_length):\n",
190 |     "        \"\"\"Initialize the SupervisedTextVectorizer\n",
191 |     "        \n",
192 |     "        Args:\n",
193 |     "            token_vocab (Vocabulary): the vocabulary managing the mapping between text tokens and \n",
194 |     "                the unique indices that represent them\n",
195 |     "            label_voab (Vocabulary): the vocabulary managing the mapping between labels and the\n",
196 |     "                unique indices that represent them.\n",
197 |     "            max_seq_length (int): the length of the longest sequence (including start or end tokens\n",
198 |     "                that will be prepended or appended).\n",
199 |     "        \"\"\"\n",
200 |     "        self.token_vocab = token_vocab\n",
201 |     "        self.label_vocab = label_vocab\n",
202 |     "        self.max_seq_length = max_seq_length\n",
203 |     "        \n",
204 |     "    def _wrap_with_start_end(self, x_data):\n",
205 |     "        \"\"\"Prepend the start token and append the end token.\n",
206 |     "        \n",
207 |     "        Args:\n",
208 |     "            x_data (list(str)): the list of string tokens in the data point\n",
209 |     "        Returns:\n",
210 |     "            list(str): the list of string tokens with start token prepended and end token appended\n",
211 |     "        \"\"\"\n",
212 |     "        return [self.token_vocab.start_token] + x_data + [self.token_vocab.end_token]\n",
213 |     "    \n",
214 |     "    def vectorize(self, x_data, y_label):\n",
215 |     "        \"\"\"Convert the data point and its label into their integer form\n",
216 |     "        \n",
217 |     "        Args:\n",
218 |     "            x_data (list(str)): the list of string tokens in the data point\n",
219 |     "            y_label (str,int): the label associated with the data point\n",
220 |     "        Returns:\n",
221 |     "            numpy.ndarray, int: x_data in vector form, padded to the max_seq_length; and \n",
222 |     "                the label mapped to the integer that represents it\n",
223 |     "        \"\"\"\n",
224 |     "        x_data = self._wrap_with_start_end(x_data)\n",
225 |     "        x_vector = np.zeros(self.max_seq_length).astype(np.int64)\n",
226 |     "        x_data_indices = [self.token_vocab[token] for token in x_data]\n",
227 |     "        x_vector[:len(x_data_indices)] = x_data_indices\n",
228 |     "        y_index = self.label_vocab[y_label]\n",
229 |     "        return x_vector, y_index\n",
230 |     "    \n",
231 |     "    def transform(self, x_data_list, y_label_list):\n",
232 |     "        \"\"\"Transform a dataset by vectorizing each datapoint\n",
233 |     "        \n",
234 |     "        Args: \n",
235 |     "            x_data_list (list(list(str))): a list of lists, each sublist contains string tokens\n",
236 |     "            y_label_list (list(str,int)): a list of either strings or integers. the y label can come\n",
237 |     "                as strings or integers, but they are remapped with the label_vocab to a unique integer\n",
238 |     "        Returns:\n",
239 |     "            np.ndarray(matrix), np.ndarray(vector): the vectorized x (matrix) and vectorized y (vector) \n",
240 |     "        \"\"\"\n",
241 |     "        x_matrix = []\n",
242 |     "        y_vector = []\n",
243 |     "        for x_data, y_label in zip(x_data_list, y_label_list):\n",
244 |     "            x_vector, y_index = self.vectorize(x_data, y_label)\n",
245 |     "            x_matrix.append(x_vector)\n",
246 |     "            y_vector.append(y_index)\n",
247 |     "        \n",
248 |     "        return np.stack(x_matrix), np.stack(y_vector)\n",
249 |     "    \n",
250 |     "    @classmethod\n",
251 |     "    def from_df(cls, df, target_x_column, target_y_column, token_count_cutoff=0):\n",
252 |     "        \"\"\"Instantiate the SupervisedTextVectorizer from a standardized dataframe\n",
253 |     "        \n",
254 |     "        Standardized DataFrame has a special meaning:\n",
255 |     "            there is a column that has been tokenized into a list of strings\n",
256 |     "        \n",
257 |     "        Args:\n",
258 |     "            df (pd.DataFrame): the dataset with a tokenized text column and a label column\n",
259 |     "            target_x_column (str): the name of the tokenized text column\n",
260 |     "            target_y_column (str): the name of the label column\n",
261 |     "            token_count_cutoff (int): [default=0] the minimum token frequency to add to the\n",
262 |     "                token_vocab.  Any tokens that are less frequent will not be added.\n",
263 |     "        Returns:\n",
264 |     "            SupervisedTextVectorizer: the instantiated vectorizer\n",
265 |     "        \"\"\"\n",
266 |     "        # get the x data (the observations)\n",
267 |     "        target_x_list = df[target_x_column].tolist()\n",
268 |     "        # compute max sequence length, add 2 for the start, end tokens\n",
269 |     "        max_seq_length = max(map(len, target_x_list)) + 2 \n",
270 |     "        \n",
271 |     "        # populate token vocab        \n",
272 |     "        token_vocab = Vocabulary(use_unks=False,\n",
273 |     "                                 use_mask=True,\n",
274 |     "                                 use_start_end=True,\n",
275 |     "                                 start_token=START_TOKEN,\n",
276 |     "                                 end_token=END_TOKEN)\n",
277 |     "        counts = count_tokens(target_x_list)\n",
278 |     "        # sort counts in reverse order\n",
279 |     "        for token, count in sorted(counts.items(), key=lambda x: x[1], reverse=True):\n",
280 |     "            if count < token_count_cutoff:\n",
281 |     "                break\n",
282 |     "            token_vocab.add(token)\n",
283 |     "\n",
284 |     "        # populate label vocab\n",
285 |     "        label_vocab = Vocabulary(use_unks=False, use_start_end=False, use_mask=False)\n",
286 |     "        # add the sorted unique labels \n",
287 |     "        label_vocab.add_many(sorted(df[target_y_column].unique()))\n",
288 |     "        \n",
289 |     "        return cls(token_vocab, label_vocab, max_seq_length)\n",
290 |     "    \n",
291 |     "    def save(self, filename):\n",
292 |     "        \"\"\"Save the vectorizer using json to the file specified\n",
293 |     "        \n",
294 |     "        Args:\n",
295 |     "            filename (str): the output file\n",
296 |     "        \"\"\"\n",
297 |     "        vec_dict = {\"token_vocab\": self.token_vocab.get_serializable_contents(),\n",
298 |     "                    \"label_vocab\": self.label_vocab.get_serializable_contents(),\n",
299 |     "                    'max_seq_length': self.max_seq_length}\n",
300 |     "\n",
301 |     "        with open(filename, \"wb\") as fp:\n",
302 |     "            json.dump(vec_dict, fp)\n",
303 |     "        \n",
304 |     "    @classmethod\n",
305 |     "    def load(cls, filename):\n",
306 |     "        \"\"\"Load the vectorizer from the json file it was saved to\n",
307 |     "        \n",
308 |     "        Args:\n",
309 |     "            filename (str): the file into which the vectorizer was saved.\n",
310 |     "        Returns:\n",
311 |     "            SupervisedTextVectorizer: the instantiated vectorizer\n",
312 |     "        \"\"\"\n",
313 |     "        with open(filename, \"rb\") as fp:\n",
314 |     "            contents = json.load(fp)\n",
315 |     "\n",
316 |     "        contents[\"token_vocab\"] = Vocabulary.deserialize_from_contents(contents[\"token_vocab\"])\n",
317 |     "        contents[\"label_vocab\"] = Vocabulary.deserialize_from_contents(contents[\"label_vocab\"])\n",
318 |     "        return cls(**contents)"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "### Supervised Text Dataset"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 5,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "class SupervisedTextDataset(Dataset):\n",
335 |     "    \"\"\"\n",
336 |     "    Attributes:\n",
337 |     "        vectorizer (SupervisedTextVectorizer): an instantiated vectorizer\n",
338 |     "        active_split (str): the string name of the active split\n",
339 |     "        \n",
340 |     "        # internal use\n",
341 |     "        _split_df (dict): a mapping from split name to partitioned DataFrame\n",
342 |     "        _vectorized (dict): a mapping from split to an x data matrix and y vector\n",
343 |     "        _active_df (pd.DataFrame): the DataFrame corresponding to the split\n",
344 |     "        _active_x (np.ndarray): a matrix of the vectorized text data\n",
345 |     "        _active_y (np.ndarray): a vector of the vectorized labels\n",
346 |     "    \"\"\"\n",
347 |     "    def __init__(self, df, vectorizer, target_x_column, target_y_column):\n",
348 |     "        \"\"\"Initialize the SupervisedTextDataset\n",
349 |     "        \n",
350 |     "        Args:\n",
351 |     "            df (pd.DataFrame): the dataset with a text and label column\n",
352 |     "            vectorizer (SupervisedTextVectorizer): an instantiated vectorizer\n",
353 |     "            target_x_column (str): the column containing the tokenized text\n",
354 |     "            target_y_column (str): the column containing the label\n",
355 |     "        \"\"\"\n",
356 |     "        self._split_df = {\n",
357 |     "            'train': df[df.split=='train'],\n",
358 |     "            'val': df[df.split=='val'],\n",
359 |     "            'test': df[df.split=='test']\n",
360 |     "        }\n",
361 |     "        \n",
362 |     "        self._vectorized = {}\n",
363 |     "        for split_name, split_df in self._split_df.items():\n",
364 |     "            self._vectorized[split_name] = \\\n",
365 |     "                vectorizer.transform(x_data_list=split_df[target_x_column].tolist(), \n",
366 |     "                                     y_label_list=split_df[target_y_column].tolist())\n",
367 |     "        self.vectorizer = vectorizer\n",
368 |     "        self.active_split = None\n",
369 |     "        self._active_df = None\n",
370 |     "        self._active_x = None\n",
371 |     "        self._active_y = None\n",
372 |     "        \n",
373 |     "        self.set_split(\"train\")\n",
374 |     "        \n",
375 |     "    def set_split(self, split_name):\n",
376 |     "        \"\"\"Set the active split\n",
377 |     "        \n",
378 |     "        Args:\n",
379 |     "            split_name (str): the name of the split to make active; should\n",
380 |     "                be one of 'train', 'val', or 'test'\n",
381 |     "        \"\"\"\n",
382 |     "        self.active_split = split_name\n",
383 |     "        self._active_x, self._active_y = self._vectorized[split_name]\n",
384 |     "        self._active_df = self._split_df[split_name]\n",
385 |     "    \n",
386 |     "    def __getitem__(self, index):\n",
387 |     "        \"\"\"Return the data point corresponding to the index\n",
388 |     "        \n",
389 |     "        Args:\n",
390 |     "            index (int): an int between 0 and len(self._active_x)\n",
391 |     "        Returns:\n",
392 |     "            dict: the data for this data point. Has the following form:\n",
393 |     "                {\"x_data\": the vectorized text data point, \n",
394 |     "                 \"y_target\": the index of the label for this data point, \n",
395 |     "                 \"x_lengths\": method: the number of nonzeros in the vector,\n",
396 |     "                 \"data_index\": the provided index for bookkeeping}\n",
397 |     "        \"\"\"\n",
398 |     "        return {\n",
399 |     "            \"x_data\": self._active_x[index],\n",
400 |     "            \"y_target\": self._active_y[index],\n",
401 |     "            \"x_lengths\": len(self._active_x[index].nonzero()[0]),\n",
402 |     "            \"data_index\": index\n",
403 |     "        }\n",
404 |     "    \n",
405 |     "    def __len__(self):\n",
406 |     "        \"\"\"The length of the active dataset\n",
407 |     "        \n",
408 |     "        Returns:\n",
409 |     "            int: len(self._active_x)\n",
410 |     "        \"\"\"\n",
411 |     "        return self._active_x.shape[0]"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "markdown",
416 |    "metadata": {},
417 |    "source": [
418 |     "### Dataset Loading Function"
419 |    ]
420 |   },
421 |   {
422 |    "cell_type": "code",
423 |    "execution_count": 6,
424 |    "metadata": {},
425 |    "outputs": [],
426 |    "source": [
427 |     "def character_tokenizer(input_string):\n",
428 |     "    \"\"\"Tokenized a string a list of its characters\n",
429 |     "    \n",
430 |     "    Args:\n",
431 |     "        input_string (str): the character string to tokenize\n",
432 |     "    Returns:\n",
433 |     "        list: a list of characters\n",
434 |     "    \"\"\"\n",
435 |     "    return list(input_string.lower())\n",
436 |     "\n",
437 |     "def load_surname_dataset(dataset_csv, tokenizer_func, saved_vectorizer_file=None):\n",
438 |     "    \"\"\"Load the surname dataset \n",
439 |     "    \n",
440 |     "    Args:\n",
441 |     "        dataset_csv (str): the location of the dataset\n",
442 |     "        tokenizer_func (function): the tokenizing function to turn each datapoint into \n",
443 |     "            its tokenized form\n",
444 |     "        saved_vectorizer_file (str or None): [default=None] if not None, load the vectorizer\n",
445 |     "            from the file\n",
446 |     "    \"\"\"\n",
447 |     "    df = add_splits(pd.read_csv(dataset_csv), 'nationality')\n",
448 |     "    df['tokenized'] = df.surname.apply(tokenizer_func)\n",
449 |     "    if saved_vectorizer_file is not None:\n",
450 |     "        vectorizer = SupervisedTextVectorizer.load(saved_vectorizer_file)\n",
451 |     "    else:\n",
452 |     "        vectorizer = SupervisedTextVectorizer.from_df(df, \n",
453 |     "                                                      target_x_column='tokenized', \n",
454 |     "                                                      target_y_column='nationality')\n",
455 |     "    dataset = SupervisedTextDataset(df=df, \n",
456 |     "                                    vectorizer=vectorizer, \n",
457 |     "                                    target_x_column='tokenized', \n",
458 |     "                                    target_y_column='nationality')\n",
459 |     "    \n",
460 |     "    return dataset"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "code",
465 |    "execution_count": 7,
466 |    "metadata": {},
467 |    "outputs": [],
468 |    "source": [
469 |     "dataset = load_surname_dataset(args.surname_csv, tokenizer_func=character_tokenizer)"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "markdown",
474 |    "metadata": {},
475 |    "source": [
476 |     "# Training"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "markdown",
481 |    "metadata": {},
482 |    "source": [
483 |     "### Generating batches\n",
484 |     "\n",
485 |     "Finally, the make_data_generator interacts with PyTorch's `DataLoader` and returns a generator. It basically just iterates over the `DataLoader` generator and does some processing.  Currently, it returns a function rather than just making the generator itself so some control can be had over num_batches & volatile mode, and other run time things. It's mostly a cheap and easy function that can be written in many ways. "
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": 8,
491 |    "metadata": {},
492 |    "outputs": [],
493 |    "source": [
494 |     "def generate_batches(dataset, batch_size, shuffle=True,\n",
495 |     "                     drop_last=True, device=\"cpu\", dataloader_kwargs=None): \n",
496 |     "    \"\"\"Generate batches from a dataset\n",
497 |     "    \n",
498 |     "    Args:\n",
499 |     "        dataset (torch.utils.data.Dataset): the instantiated dataset\n",
500 |     "        batch_size (int): the size of the batches\n",
501 |     "        shuffle (bool): [default=True] batches are formed from shuffled indices\n",
502 |     "        drop_last (bool): [default=True] don't return the final batch if it's smaller\n",
503 |     "            than the specified batch size\n",
504 |     "        device (str): [default=\"cpu\"] the device to move the tensors to\n",
505 |     "        dataloader_kwargs (dict or None): [default=None] Any additional arguments to the\n",
506 |     "            DataLoader can be specified\n",
507 |     "    Yields:\n",
508 |     "        dict: a dictionary mapping from tensor name to tensor object where the first\n",
509 |     "            dimension of tensor object is the batch dimension\n",
510 |     "    Note: \n",
511 |     "        This function is mostly an iterator for the DataLoader, but has the added\n",
512 |     "        feature that it moves the tensors to a target device. \n",
513 |     "    \"\"\"\n",
514 |     "    dataloader_kwargs = dataloader_kwargs or {}\n",
515 |     "    \n",
516 |     "    dataloader = DataLoader(dataset=dataset, batch_size=batch_size,\n",
517 |     "                            shuffle=shuffle, drop_last=drop_last, **dataloader_kwargs)\n",
518 |     "\n",
519 |     "    for data_dict in dataloader:\n",
520 |     "        out_data_dict = {}\n",
521 |     "        for name, tensor in data_dict.items():\n",
522 |     "            out_data_dict[name] = data_dict[name].to(device)\n",
523 |     "        yield out_data_dict\n",
524 |     "        \n",
525 |     "        \n"
526 |    ]
527 |   },
528 |   {
529 |    "cell_type": "markdown",
530 |    "metadata": {},
531 |    "source": [
532 |     "### TrainState"
533 |    ]
534 |   },
535 |   {
536 |    "cell_type": "code",
537 |    "execution_count": 9,
538 |    "metadata": {},
539 |    "outputs": [],
540 |    "source": [
541 |     "class TrainState:\n",
542 |     "    \"\"\"A data structure for managing training state operations.\n",
543 |     "    \n",
544 |     "    The TrainState will monitor validation loss and everytime a new best loss\n",
545 |     "        (lower is better) is observed, a couple things happen:\n",
546 |     "        \n",
547 |     "        1. The model is checkpointed\n",
548 |     "        2. Patience is reset\n",
549 |     "    \n",
550 |     "    Attributes:\n",
551 |     "        model (torch.nn.Module): the model being trained and will be\n",
552 |     "            checkpointed during training.\n",
553 |     "        dataset (SupervisedTextDataset, TextSequenceDataset): the dataset \n",
554 |     "            which is being iterate during training; must have the `active_split`\n",
555 |     "            attribute. \n",
556 |     "        log_dir (str): the directory to output the checkpointed model \n",
557 |     "        patience (int): the number of epochs since a new best loss was observed\n",
558 |     "        \n",
559 |     "        # Internal Use\n",
560 |     "        _full_model_path (str): `log_dir/model_state_file`\n",
561 |     "        _split (str): the active split\n",
562 |     "        _best_loss (float): the best observed loss\n",
563 |     "    \"\"\"\n",
564 |     "    def __init__(self, model, dataset, log_dir, model_state_file=\"model.pth\"):\n",
565 |     "        \"\"\"Initialize the TrainState\n",
566 |     "        \n",
567 |     "        Args:\n",
568 |     "            model (torch.nn.Module): the model to be checkpointed during training\n",
569 |     "            dataset (SupervisedTextDataset, TextSequenceDataset): the dataset \n",
570 |     "                which is being iterate during training; must have the `active_split`\n",
571 |     "                attribute. \n",
572 |     "            log_dir (str): the directory to output the checkpointed model \n",
573 |     "            model_state_file (str): the name of the checkpoint model\n",
574 |     "        \"\"\"\n",
575 |     "        self.model = model\n",
576 |     "        self.dataset = dataset\n",
577 |     "        self._full_model_path = os.path.join(log_dir, model_state_file)\n",
578 |     "        if not os.path.exists(log_dir):\n",
579 |     "            os.makedirs(log_dir)\n",
580 |     "        self.log_dir = log_dir\n",
581 |     "        \n",
582 |     "        self._metrics_by_split = {\n",
583 |     "            'train': {}, \n",
584 |     "            'val': {}, \n",
585 |     "            'test': {}\n",
586 |     "        }\n",
587 |     "        \n",
588 |     "        self._split = 'train'\n",
589 |     "        self._best_loss = 10**10\n",
590 |     "        self.patience = 0\n",
591 |     "        \n",
592 |     "    def _init_metric(self, split, metric_name):\n",
593 |     "        \"\"\"Initialize a metric to the specified split\n",
594 |     "        \n",
595 |     "        A dictionary is created in `self._metrics_by_split` with\n",
596 |     "            the keys 'running', 'count', and 'history'. \n",
597 |     "        \n",
598 |     "        Args:\n",
599 |     "            split (str): the target split to record the metric\n",
600 |     "            metric_name (str): the name of the metric\n",
601 |     "        \"\"\"\n",
602 |     "        self._metrics_by_split[split][metric_name] = {\n",
603 |     "            'running': 0.,\n",
604 |     "            'count': 0,\n",
605 |     "            'history': []\n",
606 |     "        }\n",
607 |     "        \n",
608 |     "    def _update_metric(self, metric_name, metric_value):\n",
609 |     "        \"\"\"Update a metric with an observed value\n",
610 |     "        \n",
611 |     "        Specifically, the running average is updated.\n",
612 |     "        \n",
613 |     "        Args:\n",
614 |     "            metric_name (str): the name of the metric\n",
615 |     "            metric_value (float): the observed value of the metric\n",
616 |     "        \"\"\"\n",
617 |     "        if metric_name not in self._metrics_by_split[self._split]:\n",
618 |     "            self._init_metric(self._split, metric_name)\n",
619 |     "        metric = self._metrics_by_split[self._split][metric_name]\n",
620 |     "        metric['count'] += 1\n",
621 |     "        metric['running'] += (metric_value - metric['running']) / metric['count']\n",
622 |     "        \n",
623 |     "    def set_split(self, split):\n",
624 |     "        \"\"\"Set the dataset split\n",
625 |     "        \n",
626 |     "        Args:\n",
627 |     "            split (str): the target split to set\n",
628 |     "        \"\"\"\n",
629 |     "        self._split = split\n",
630 |     "        \n",
631 |     "    def get_history(self, split, metric_name):\n",
632 |     "        \"\"\"Get the history of values for any metric in any split\n",
633 |     "        \n",
634 |     "        Args:\n",
635 |     "            split (str): the target split\n",
636 |     "            metric_name (str): the target metric\n",
637 |     "            \n",
638 |     "        Returns:\n",
639 |     "            list(float): the running average of each epoch for `metric_name` in `split` \n",
640 |     "        \"\"\"\n",
641 |     "        return self._metrics_by_split[split][metric_name]['history']\n",
642 |     "    \n",
643 |     "    def get_value_of(self, split, metric_name):\n",
644 |     "        \"\"\"Retrieve the running average of any metric in any split\n",
645 |     "        \n",
646 |     "        Args:\n",
647 |     "            split (str): the target split\n",
648 |     "            metric_name (str): the target metric\n",
649 |     "            \n",
650 |     "        Returns:\n",
651 |     "            float: the running average for `metric_name` in `split`\n",
652 |     "        \"\"\"\n",
653 |     "        return self._metrics_by_split[split][metric_name]['running']\n",
654 |     "        \n",
655 |     "    def log_metrics(self, **metrics):\n",
656 |     "        \"\"\"Log some values for some metrics\n",
657 |     "        \n",
658 |     "        Args:\n",
659 |     "            metrics (kwargs): pass keyword args with the form `metric_name=metric_value`\n",
660 |     "                to log the metric values into the attribute `_metrics_by_split`.\n",
661 |     "        \"\"\"\n",
662 |     "        self._split = self.dataset.active_split\n",
663 |     "        for metric_name, metric_value in metrics.items():\n",
664 |     "            self._update_metric(metric_name, metric_value)\n",
665 |     "            \n",
666 |     "    def log_epoch_end(self):\n",
667 |     "        \"\"\"Log the end of the epoch. \n",
668 |     "        \n",
669 |     "        Some key functions happen at the end of the epoch:\n",
670 |     "            - for each metric in each split running averages, counts, \n",
671 |     "              and history are updated\n",
672 |     "            - the model is checkpointed if a new best value is observed\n",
673 |     "            - patience is incremented if a new best value is not observed\n",
674 |     "        \"\"\"\n",
675 |     "        for split_dict in self._metrics_by_split.values():\n",
676 |     "            for metric_dict in split_dict.values():\n",
677 |     "                metric_dict['history'].append(metric_dict['running'])\n",
678 |     "                metric_dict['running'] = 0.0\n",
679 |     "                metric_dict['count'] = 0\n",
680 |     "                \n",
681 |     "        if 'loss' in self._metrics_by_split['val']:\n",
682 |     "            val_loss = self._metrics_by_split['val']['loss']['history'][-1]\n",
683 |     "            if val_loss < self._best_loss:\n",
684 |     "                self._best_loss = val_loss\n",
685 |     "                self.save_model()\n",
686 |     "                self.patience = 0\n",
687 |     "            else:\n",
688 |     "                self.patience += 1\n",
689 |     "    \n",
690 |     "    def save_model(self):\n",
691 |     "        \"\"\" Save `model` to `log_dir/model_state_file` \"\"\"\n",
692 |     "        torch.save(self.model.state_dict(), self._full_model_path)\n",
693 |     "    \n",
694 |     "    def reload_best(self):\n",
695 |     "        \"\"\" reload `log_dir/model_state_file` to `model` \"\"\"\n",
696 |     "        if os.path.exists(self._full_model_path):\n",
697 |     "            self.model.load_state_dict(torch.load(self._full_model_path))"
698 |    ]
699 |   },
700 |   {
701 |    "cell_type": "code",
702 |    "execution_count": 10,
703 |    "metadata": {},
704 |    "outputs": [
705 |     {
706 |      "name": "stdout",
707 |      "output_type": "stream",
708 |      "text": [
709 |       "x_data has shape torch.Size([4, 22])\n",
710 |       "y_target has shape torch.Size([4])\n",
711 |       "x_lengths has shape torch.Size([4])\n",
712 |       "data_index has shape torch.Size([4])\n"
713 |      ]
714 |     }
715 |    ],
716 |    "source": [
717 |     "batch_generator = generate_batches(dataset, batch_size=4)\n",
718 |     "batch = next(batch_generator)\n",
719 |     "for key, value in batch.items():\n",
720 |     "    print(f\"{key} has shape {value.shape}\")"
721 |    ]
722 |   },
723 |   {
724 |    "cell_type": "code",
725 |    "execution_count": 12,
726 |    "metadata": {},
727 |    "outputs": [
728 |     {
729 |      "name": "stdout",
730 |      "output_type": "stream",
731 |      "text": [
732 |       "2.0\n",
733 |       "2.1666666666666665\n",
734 |       "1.1666666666666667\n",
735 |       "[1.1666666666666667]\n",
736 |       "[2.1666666666666665]\n"
737 |      ]
738 |     }
739 |    ],
740 |    "source": [
741 |     "import os\n",
742 |     "import torch\n",
743 |     "model = torch.nn.Linear(10,10)\n",
744 |     "train_state = TrainState(model, dataset, 'model.pth', './logs')\n",
745 |     "dataset.set_split(\"train\")\n",
746 |     "train_state.log_metrics(loss=1.0)\n",
747 |     "train_state.log_metrics(loss=2.0)\n",
748 |     "train_state.log_metrics(loss=0.5)\n",
749 |     "\n",
750 |     "dataset.set_split(\"val\")\n",
751 |     "train_state.log_metrics(loss=1.5)\n",
752 |     "train_state.log_metrics(loss=2.5)\n",
753 |     "train_state.log_metrics(loss=2.5)\n",
754 |     "\n",
755 |     "print(train_state.get_value_of(\"val\", \"loss\"))\n",
756 |     "print(train_state.get_value_of(\"train\", \"loss\"))\n",
757 |     "train_state.log_epoch_end()\n",
758 |     "\n",
759 |     "print(train_state.get_history(split='train', metric_name='loss'))\n",
760 |     "print(train_state.get_history(split='val', metric_name='loss'))"
761 |    ]
762 |   },
763 |   {
764 |    "cell_type": "code",
765 |    "execution_count": null,
766 |    "metadata": {},
767 |    "outputs": [],
768 |    "source": []
769 |   }
770 |  ],
771 |  "metadata": {
772 |   "kernelspec": {
773 |    "display_name": "magis",
774 |    "language": "python",
775 |    "name": "magis"
776 |   },
777 |   "language_info": {
778 |    "codemirror_mode": {
779 |     "name": "ipython",
780 |     "version": 3
781 |    },
782 |    "file_extension": ".py",
783 |    "mimetype": "text/x-python",
784 |    "name": "python",
785 |    "nbconvert_exporter": "python",
786 |    "pygments_lexer": "ipython3",
787 |    "version": "3.7.2"
788 |   }
789 |  },
790 |  "nbformat": 4,
791 |  "nbformat_minor": 2
792 | }
793 | 


--------------------------------------------------------------------------------
/day_2/CYOA-Surname-Classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Surname Classification with RNNs\n",
  8 |     "\n",
  9 |     "## Dataset Info\n",
 10 |     "\n",
 11 |     "The surnames dataset has been collected from a couple different sources. \n",
 12 |     "\n",
 13 |     "#### Value Counts for the Nationality:\n",
 14 |     "\n",
 15 |     "```\n",
 16 |     "russian       9408\n",
 17 |     "english       3668\n",
 18 |     "arabic        2000\n",
 19 |     "japanese       991\n",
 20 |     "german         724\n",
 21 |     "italian        709\n",
 22 |     "czech          519\n",
 23 |     "spanish        298\n",
 24 |     "dutch          297\n",
 25 |     "french         277\n",
 26 |     "chinese        268\n",
 27 |     "irish          232\n",
 28 |     "greek          203\n",
 29 |     "polish         139\n",
 30 |     "scottish       100\n",
 31 |     "korean          94\n",
 32 |     "portuguese      74\n",
 33 |     "vietnamese      73\n",
 34 |     "Name: nationality, dtype: int64\n",
 35 |     "```\n",
 36 |     "\n",
 37 |     "## Model Info\n",
 38 |     "\n",
 39 |     "Choose Your Own Adventure!"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 1,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "from argparse import Namespace\n",
 49 |     "from collections import Counter\n",
 50 |     "import json\n",
 51 |     "import os\n",
 52 |     "os.environ['OMP_NUM_THREADS'] = '4' \n",
 53 |     "\n",
 54 |     "import matplotlib.pyplot as plt\n",
 55 |     "import numpy as np\n",
 56 |     "import pandas as pd\n",
 57 |     "import seaborn as sns\n",
 58 |     "import torch\n",
 59 |     "import torch.nn as nn\n",
 60 |     "import torch.nn.functional as F\n",
 61 |     "import torch.optim as optim\n",
 62 |     "from torch.utils.data import Dataset, DataLoader\n",
 63 |     "from tqdm import tqdm_notebook\n",
 64 |     "\n",
 65 |     "from vocabulary import Vocabulary\n",
 66 |     "\n",
 67 |     "%matplotlib inline\n",
 68 |     "\n",
 69 |     "plt.style.use('fivethirtyeight')\n",
 70 |     "plt.rcParams['figure.figsize'] = (14, 6)\n",
 71 |     "\n",
 72 |     "START_TOKEN = \"^\"\n",
 73 |     "END_TOKEN = \"_\""
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "# Dataset"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "### Dataset Utilities"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 2,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "def count_tokens(x_data_list):\n",
 97 |     "    \"\"\"Count the tokens in the data list\n",
 98 |     "    \n",
 99 |     "    Args:\n",
100 |     "        x_data_list (list(list(str))): a list of lists, each sublist is a list of string tokens. \n",
101 |     "            In other words, a list of the data points where the data points have been tokenized.\n",
102 |     "    Returns:\n",
103 |     "        dict: a mapping from tokens to their counts \n",
104 |     "    \n",
105 |     "    \"\"\"\n",
106 |     "    # alternatively\n",
107 |     "    # return Counter([token for x_data in x_data_list for token in x_data])\n",
108 |     "    counter = Counter()\n",
109 |     "    for x_data in x_data_list:\n",
110 |     "        for token in x_data:\n",
111 |     "            counter[token] += 1\n",
112 |     "    return counter\n",
113 |     "\n",
114 |     "def add_splits(df, target_y_column, split_proportions=(0.7, 0.15, 0.15), seed=0):\n",
115 |     "    \"\"\"Add 'train', 'val', and 'test' splits to the dataset\n",
116 |     "    \n",
117 |     "    Args:\n",
118 |     "        df (pd.DataFrame): the data frame to assign splits to\n",
119 |     "        target_y_column (str): the name of the label column; in order to\n",
120 |     "            preserve the class distribution between splits, the label column\n",
121 |     "            is used to group the datapoints and splits are assigned within these groups.\n",
122 |     "        split_proportions (tuple(float, float, float)): three floats which represent the\n",
123 |     "            proportion in 'train', 'val, 'and 'test'. Must sum to 1. \n",
124 |     "        seed (int): the random seed for making the shuffling deterministic. If the dataset and seed\n",
125 |     "            are kept the same, the split assignment is deterministic. \n",
126 |     "    Returns:\n",
127 |     "        pd.DataFrame: the input dataframe with a new column for split assignments; note: row order\n",
128 |     "            will have changed.\n",
129 |     "            \n",
130 |     "    \"\"\"\n",
131 |     "    df_by_label = {label: [] for label in df[target_y_column].unique()}\n",
132 |     "    for _, row in df.iterrows():\n",
133 |     "        df_by_label[row[target_y_column]].append(row.to_dict())\n",
134 |     "    \n",
135 |     "    np.random.seed(seed)\n",
136 |     "    \n",
137 |     "    assert sum(split_proportions) == 1, \"`split_proportions` should sum to 1\"\n",
138 |     "    train_p, val_p, test_p = split_proportions\n",
139 |     "    \n",
140 |     "    out_df = []\n",
141 |     "    # to ensure consistent behavior, lexicographically sort the dictionary\n",
142 |     "    for _, data_points in sorted(df_by_label.items()):\n",
143 |     "        np.random.shuffle(data_points)\n",
144 |     "        n_total = len(data_points)\n",
145 |     "        n_train = int(train_p * n_total)\n",
146 |     "        n_val = int(val_p * n_total)\n",
147 |     "        \n",
148 |     "        for data_point in data_points[:n_train]:\n",
149 |     "            data_point['split'] = 'train'\n",
150 |     "            \n",
151 |     "        for data_point in data_points[n_train:n_train+n_val]:\n",
152 |     "            data_point['split'] = 'val'\n",
153 |     "            \n",
154 |     "        for data_point in data_points[n_train+n_val:]:\n",
155 |     "            data_point['split'] = 'test'\n",
156 |     "        \n",
157 |     "        out_df.extend(data_points)\n",
158 |     "    \n",
159 |     "    return pd.DataFrame(out_df)\n"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "### Supervised Text Vectorizer"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 3,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "class SupervisedTextVectorizer:\n",
176 |     "    \"\"\"A composite data structure that uses Vocabularies to map text and its labels to integers\n",
177 |     "    \n",
178 |     "    Attributes:\n",
179 |     "        token_vocab (Vocabulary): the vocabulary managing the mapping between text tokens and \n",
180 |     "            the unique indices that represent them\n",
181 |     "        label_voab (Vocabulary): the vocabulary managing the mapping between labels and the\n",
182 |     "            unique indices that represent them.\n",
183 |     "        max_seq_length (int): the length of the longest sequence (including start or end tokens\n",
184 |     "            that will be prepended or appended).\n",
185 |     "    \"\"\"\n",
186 |     "    def __init__(self, token_vocab, label_vocab, max_seq_length):\n",
187 |     "        \"\"\"Initialize the SupervisedTextVectorizer\n",
188 |     "        \n",
189 |     "        Args:\n",
190 |     "            token_vocab (Vocabulary): the vocabulary managing the mapping between text tokens and \n",
191 |     "                the unique indices that represent them\n",
192 |     "            label_voab (Vocabulary): the vocabulary managing the mapping between labels and the\n",
193 |     "                unique indices that represent them.\n",
194 |     "            max_seq_length (int): the length of the longest sequence (including start or end tokens\n",
195 |     "                that will be prepended or appended).\n",
196 |     "        \"\"\"\n",
197 |     "        self.token_vocab = token_vocab\n",
198 |     "        self.label_vocab = label_vocab\n",
199 |     "        self.max_seq_length = max_seq_length\n",
200 |     "        \n",
201 |     "    def _wrap_with_start_end(self, x_data):\n",
202 |     "        \"\"\"Prepend the start token and append the end token.\n",
203 |     "        \n",
204 |     "        Args:\n",
205 |     "            x_data (list(str)): the list of string tokens in the data point\n",
206 |     "        Returns:\n",
207 |     "            list(str): the list of string tokens with start token prepended and end token appended\n",
208 |     "        \"\"\"\n",
209 |     "        return [self.token_vocab.start_token] + x_data + [self.token_vocab.end_token]\n",
210 |     "    \n",
211 |     "    def vectorize(self, x_data, y_label):\n",
212 |     "        \"\"\"Convert the data point and its label into their integer form\n",
213 |     "        \n",
214 |     "        Args:\n",
215 |     "            x_data (list(str)): the list of string tokens in the data point\n",
216 |     "            y_label (str,int): the label associated with the data point\n",
217 |     "        Returns:\n",
218 |     "            numpy.ndarray, int: x_data in vector form, padded to the max_seq_length; and \n",
219 |     "                the label mapped to the integer that represents it\n",
220 |     "        \"\"\"\n",
221 |     "        x_data = self._wrap_with_start_end(x_data)\n",
222 |     "        x_vector = np.zeros(self.max_seq_length).astype(np.int64)\n",
223 |     "        x_data_indices = [self.token_vocab[token] for token in x_data]\n",
224 |     "        x_vector[:len(x_data_indices)] = x_data_indices\n",
225 |     "        y_index = self.label_vocab[y_label]\n",
226 |     "        return x_vector, y_index\n",
227 |     "    \n",
228 |     "    def transform(self, x_data_list, y_label_list):\n",
229 |     "        \"\"\"Transform a dataset by vectorizing each datapoint\n",
230 |     "        \n",
231 |     "        Args: \n",
232 |     "            x_data_list (list(list(str))): a list of lists, each sublist contains string tokens\n",
233 |     "            y_label_list (list(str,int)): a list of either strings or integers. the y label can come\n",
234 |     "                as strings or integers, but they are remapped with the label_vocab to a unique integer\n",
235 |     "        Returns:\n",
236 |     "            np.ndarray(matrix), np.ndarray(vector): the vectorized x (matrix) and vectorized y (vector) \n",
237 |     "        \"\"\"\n",
238 |     "        x_matrix = []\n",
239 |     "        y_vector = []\n",
240 |     "        for x_data, y_label in zip(x_data_list, y_label_list):\n",
241 |     "            x_vector, y_index = self.vectorize(x_data, y_label)\n",
242 |     "            x_matrix.append(x_vector)\n",
243 |     "            y_vector.append(y_index)\n",
244 |     "        \n",
245 |     "        return np.stack(x_matrix), np.stack(y_vector)\n",
246 |     "    \n",
247 |     "    @classmethod\n",
248 |     "    def from_df(cls, df, target_x_column, target_y_column, token_count_cutoff=0):\n",
249 |     "        \"\"\"Instantiate the SupervisedTextVectorizer from a standardized dataframe\n",
250 |     "        \n",
251 |     "        Standardized DataFrame has a special meaning:\n",
252 |     "            there is a column that has been tokenized into a list of strings\n",
253 |     "        \n",
254 |     "        Args:\n",
255 |     "            df (pd.DataFrame): the dataset with a tokenized text column and a label column\n",
256 |     "            target_x_column (str): the name of the tokenized text column\n",
257 |     "            target_y_column (str): the name of the label column\n",
258 |     "            token_count_cutoff (int): [default=0] the minimum token frequency to add to the\n",
259 |     "                token_vocab.  Any tokens that are less frequent will not be added.\n",
260 |     "        Returns:\n",
261 |     "            SupervisedTextVectorizer: the instantiated vectorizer\n",
262 |     "        \"\"\"\n",
263 |     "        # get the x data (the observations)\n",
264 |     "        target_x_list = df[target_x_column].tolist()\n",
265 |     "        # compute max sequence length, add 2 for the start, end tokens\n",
266 |     "        max_seq_length = max(map(len, target_x_list)) + 2 \n",
267 |     "        \n",
268 |     "        # populate token vocab        \n",
269 |     "        token_vocab = Vocabulary(use_unks=False,\n",
270 |     "                                 use_mask=True,\n",
271 |     "                                 use_start_end=True,\n",
272 |     "                                 start_token=START_TOKEN,\n",
273 |     "                                 end_token=END_TOKEN)\n",
274 |     "        counts = count_tokens(target_x_list)\n",
275 |     "        # sort counts in reverse order\n",
276 |     "        for token, count in sorted(counts.items(), key=lambda x: x[1], reverse=True):\n",
277 |     "            if count < token_count_cutoff:\n",
278 |     "                break\n",
279 |     "            token_vocab.add(token)\n",
280 |     "\n",
281 |     "        # populate label vocab\n",
282 |     "        label_vocab = Vocabulary(use_unks=False, use_start_end=False, use_mask=False)\n",
283 |     "        # add the sorted unique labels \n",
284 |     "        label_vocab.add_many(sorted(df[target_y_column].unique()))\n",
285 |     "        \n",
286 |     "        return cls(token_vocab, label_vocab, max_seq_length)\n",
287 |     "    \n",
288 |     "    def save(self, filename):\n",
289 |     "        \"\"\"Save the vectorizer using json to the file specified\n",
290 |     "        \n",
291 |     "        Args:\n",
292 |     "            filename (str): the output file\n",
293 |     "        \"\"\"\n",
294 |     "        vec_dict = {\"token_vocab\": self.token_vocab.get_serializable_contents(),\n",
295 |     "                    \"label_vocab\": self.label_vocab.get_serializable_contents(),\n",
296 |     "                    'max_seq_length': self.max_seq_length}\n",
297 |     "\n",
298 |     "        with open(filename, \"wb\") as fp:\n",
299 |     "            json.dump(vec_dict, fp)\n",
300 |     "        \n",
301 |     "    @classmethod\n",
302 |     "    def load(cls, filename):\n",
303 |     "        \"\"\"Load the vectorizer from the json file it was saved to\n",
304 |     "        \n",
305 |     "        Args:\n",
306 |     "            filename (str): the file into which the vectorizer was saved.\n",
307 |     "        Returns:\n",
308 |     "            SupervisedTextVectorizer: the instantiated vectorizer\n",
309 |     "        \"\"\"\n",
310 |     "        with open(filename, \"rb\") as fp:\n",
311 |     "            contents = json.load(fp)\n",
312 |     "\n",
313 |     "        contents[\"token_vocab\"] = Vocabulary.deserialize_from_contents(contents[\"token_vocab\"])\n",
314 |     "        contents[\"label_vocab\"] = Vocabulary.deserialize_from_contents(contents[\"label_vocab\"])\n",
315 |     "        return cls(**contents)"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "markdown",
320 |    "metadata": {},
321 |    "source": [
322 |     "### Supervised Text Dataset"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 4,
328 |    "metadata": {},
329 |    "outputs": [],
330 |    "source": [
331 |     "class SupervisedTextDataset(Dataset):\n",
332 |     "    \"\"\"\n",
333 |     "    Attributes:\n",
334 |     "        vectorizer (SupervisedTextVectorizer): an instantiated vectorizer\n",
335 |     "        active_split (str): the string name of the active split\n",
336 |     "        \n",
337 |     "        # internal use\n",
338 |     "        _split_df (dict): a mapping from split name to partitioned DataFrame\n",
339 |     "        _vectorized (dict): a mapping from split to an x data matrix and y vector\n",
340 |     "        _active_df (pd.DataFrame): the DataFrame corresponding to the split\n",
341 |     "        _active_x (np.ndarray): a matrix of the vectorized text data\n",
342 |     "        _active_y (np.ndarray): a vector of the vectorized labels\n",
343 |     "    \"\"\"\n",
344 |     "    def __init__(self, df, vectorizer, target_x_column, target_y_column):\n",
345 |     "        \"\"\"Initialize the SupervisedTextDataset\n",
346 |     "        \n",
347 |     "        Args:\n",
348 |     "            df (pd.DataFrame): the dataset with a text and label column\n",
349 |     "            vectorizer (SupervisedTextVectorizer): an instantiated vectorizer\n",
350 |     "            target_x_column (str): the column containing the tokenized text\n",
351 |     "            target_y_column (str): the column containing the label\n",
352 |     "        \"\"\"\n",
353 |     "        self._split_df = {\n",
354 |     "            'train': df[df.split=='train'],\n",
355 |     "            'val': df[df.split=='val'],\n",
356 |     "            'test': df[df.split=='test']\n",
357 |     "        }\n",
358 |     "        \n",
359 |     "        self._vectorized = {}\n",
360 |     "        for split_name, split_df in self._split_df.items():\n",
361 |     "            self._vectorized[split_name] = \\\n",
362 |     "                vectorizer.transform(x_data_list=split_df[target_x_column].tolist(), \n",
363 |     "                                     y_label_list=split_df[target_y_column].tolist())\n",
364 |     "        self.vectorizer = vectorizer\n",
365 |     "        self.active_split = None\n",
366 |     "        self._active_df = None\n",
367 |     "        self._active_x = None\n",
368 |     "        self._active_y = None\n",
369 |     "        \n",
370 |     "        self.set_split(\"train\")\n",
371 |     "        \n",
372 |     "    def set_split(self, split_name):\n",
373 |     "        \"\"\"Set the active split\n",
374 |     "        \n",
375 |     "        Args:\n",
376 |     "            split_name (str): the name of the split to make active; should\n",
377 |     "                be one of 'train', 'val', or 'test'\n",
378 |     "        \"\"\"\n",
379 |     "        self.active_split = split_name\n",
380 |     "        self._active_x, self._active_y = self._vectorized[split_name]\n",
381 |     "        self._active_df = self._split_df[split_name]\n",
382 |     "    \n",
383 |     "    def __getitem__(self, index):\n",
384 |     "        \"\"\"Return the data point corresponding to the index\n",
385 |     "        \n",
386 |     "        Args:\n",
387 |     "            index (int): an int between 0 and len(self._active_x)\n",
388 |     "        Returns:\n",
389 |     "            dict: the data for this data point. Has the following form:\n",
390 |     "                {\"x_data\": the vectorized text data point, \n",
391 |     "                 \"y_target\": the index of the label for this data point, \n",
392 |     "                 \"x_lengths\": method: the number of nonzeros in the vector,\n",
393 |     "                 \"data_index\": the provided index for bookkeeping}\n",
394 |     "        \"\"\"\n",
395 |     "        return {\n",
396 |     "            \"x_data\": self._active_x[index],\n",
397 |     "            \"y_target\": self._active_y[index],\n",
398 |     "            \"x_lengths\": len(self._active_x[index].nonzero()[0]),\n",
399 |     "            \"data_index\": index\n",
400 |     "        }\n",
401 |     "    \n",
402 |     "    def __len__(self):\n",
403 |     "        \"\"\"The length of the active dataset\n",
404 |     "        \n",
405 |     "        Returns:\n",
406 |     "            int: len(self._active_x)\n",
407 |     "        \"\"\"\n",
408 |     "        return self._active_x.shape[0]"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "markdown",
413 |    "metadata": {},
414 |    "source": [
415 |     "### Dataset Loading Function"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 5,
421 |    "metadata": {},
422 |    "outputs": [],
423 |    "source": [
424 |     "def character_tokenizer(input_string):\n",
425 |     "    \"\"\"Tokenized a string a list of its characters\n",
426 |     "    \n",
427 |     "    Args:\n",
428 |     "        input_string (str): the character string to tokenize\n",
429 |     "    Returns:\n",
430 |     "        list: a list of characters\n",
431 |     "    \"\"\"\n",
432 |     "    return list(input_string.lower())\n",
433 |     "\n",
434 |     "def load_surname_dataset(dataset_csv, tokenizer_func, saved_vectorizer_file=None):\n",
435 |     "    \"\"\"Load the surname dataset \n",
436 |     "    \n",
437 |     "    Args:\n",
438 |     "        dataset_csv (str): the location of the dataset\n",
439 |     "        tokenizer_func (function): the tokenizing function to turn each datapoint into \n",
440 |     "            its tokenized form\n",
441 |     "        saved_vectorizer_file (str or None): [default=None] if not None, load the vectorizer\n",
442 |     "            from the file\n",
443 |     "    \"\"\"\n",
444 |     "    df = add_splits(pd.read_csv(dataset_csv), 'nationality')\n",
445 |     "    df['tokenized'] = df.surname.apply(tokenizer_func)\n",
446 |     "    if saved_vectorizer_file is not None:\n",
447 |     "        vectorizer = SupervisedTextVectorizer.load(saved_vectorizer_file)\n",
448 |     "    else:\n",
449 |     "        vectorizer = SupervisedTextVectorizer.from_df(df, \n",
450 |     "                                                      target_x_column='tokenized', \n",
451 |     "                                                      target_y_column='nationality')\n",
452 |     "    dataset = SupervisedTextDataset(df=df, \n",
453 |     "                                    vectorizer=vectorizer, \n",
454 |     "                                    target_x_column='tokenized', \n",
455 |     "                                    target_y_column='nationality')\n",
456 |     "    \n",
457 |     "    return dataset"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "markdown",
462 |    "metadata": {},
463 |    "source": [
464 |     "### Verify it loads"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "code",
469 |    "execution_count": 13,
470 |    "metadata": {},
471 |    "outputs": [
472 |     {
473 |      "data": {
474 |       "text/plain": [
475 |        "{'x_data': array([ 1,  9, 12, 13, 19,  7,  8,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,\n",
476 |        "         0,  0,  0,  0,  0]), 'y_target': 0, 'x_lengths': 8, 'data_index': 0}"
477 |       ]
478 |      },
479 |      "execution_count": 13,
480 |      "metadata": {},
481 |      "output_type": "execute_result"
482 |     }
483 |    ],
484 |    "source": [
485 |     "dataset = load_surname_dataset(\"../data/surnames.csv\", \n",
486 |     "                               character_tokenizer)\n",
487 |     "dataset[0]"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "markdown",
492 |    "metadata": {},
493 |    "source": [
494 |     "# Model\n",
495 |     "\n",
496 |     "Fill this part out!"
497 |    ]
498 |   },
499 |   {
500 |    "cell_type": "markdown",
501 |    "metadata": {},
502 |    "source": [
503 |     "### Model Utilities"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "markdown",
508 |    "metadata": {},
509 |    "source": [
510 |     "### Model Definitions"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "markdown",
515 |    "metadata": {},
516 |    "source": [
517 |     "### Prototyping"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "markdown",
522 |    "metadata": {},
523 |    "source": [
524 |     "# Training"
525 |    ]
526 |   },
527 |   {
528 |    "cell_type": "markdown",
529 |    "metadata": {},
530 |    "source": [
531 |     "### Training Utilities"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": 29,
537 |    "metadata": {},
538 |    "outputs": [],
539 |    "source": [
540 |     "def compute_accuracy(y_pred, y_true):\n",
541 |     "    \"\"\"Compute the accuracy between a matrix of predictions and a vector of label indices\n",
542 |     "    \n",
543 |     "    Args:\n",
544 |     "        y_pred (torch.FloatTensor): [shape=(batch_size, num_classes)]\n",
545 |     "            The matrix of predictions\n",
546 |     "        y_true (torch.FloatTensor): [shape=(batch_size,)]\n",
547 |     "            The vector of label indices\n",
548 |     "    \"\"\"\n",
549 |     "    y_pred_indices = y_pred.argmax(dim=1)\n",
550 |     "    n_correct = torch.eq(y_pred_indices, y_true).sum().item()\n",
551 |     "    return n_correct / len(y_pred_indices) * 100\n",
552 |     "\n",
553 |     "\n",
554 |     "def generate_batches(dataset, batch_size, shuffle=True,\n",
555 |     "                     drop_last=True, device=\"cpu\", dataloader_kwargs=None): \n",
556 |     "    \"\"\"Generate batches from a dataset\n",
557 |     "    \n",
558 |     "    Args:\n",
559 |     "        dataset (torch.utils.data.Dataset): the instantiated dataset\n",
560 |     "        batch_size (int): the size of the batches\n",
561 |     "        shuffle (bool): [default=True] batches are formed from shuffled indices\n",
562 |     "        drop_last (bool): [default=True] don't return the final batch if it's smaller\n",
563 |     "            than the specified batch size\n",
564 |     "        device (str): [default=\"cpu\"] the device to move the tensors to\n",
565 |     "        dataloader_kwargs (dict or None): [default=None] Any additional arguments to the\n",
566 |     "            DataLoader can be specified\n",
567 |     "    Yields:\n",
568 |     "        dict: a dictionary mapping from tensor name to tensor object where the first\n",
569 |     "            dimension of tensor object is the batch dimension\n",
570 |     "    Note: \n",
571 |     "        This function is mostly an iterator for the DataLoader, but has the added\n",
572 |     "        feature that it moves the tensors to a target device. \n",
573 |     "    \"\"\"\n",
574 |     "    dataloader_kwargs = dataloader_kwargs or {}\n",
575 |     "    \n",
576 |     "    dataloader = DataLoader(dataset=dataset, batch_size=batch_size,\n",
577 |     "                            shuffle=shuffle, drop_last=drop_last, **dataloader_kwargs)\n",
578 |     "\n",
579 |     "    for data_dict in dataloader:\n",
580 |     "        out_data_dict = {}\n",
581 |     "        for name, tensor in data_dict.items():\n",
582 |     "            out_data_dict[name] = data_dict[name].to(device)\n",
583 |     "        yield out_data_dict\n",
584 |     "\n",
585 |     "        \n",
586 |     "class TrainState:\n",
587 |     "    \"\"\"A data structure for managing training state operations.\n",
588 |     "    \n",
589 |     "    The TrainState will monitor validation loss and everytime a new best loss\n",
590 |     "        (lower is better) is observed, a couple things happen:\n",
591 |     "        \n",
592 |     "        1. The model is checkpointed\n",
593 |     "        2. Patience is reset\n",
594 |     "    \n",
595 |     "    Attributes:\n",
596 |     "        model (torch.nn.Module): the model being trained and will be\n",
597 |     "            checkpointed during training.\n",
598 |     "        dataset (SupervisedTextDataset, TextSequenceDataset): the dataset \n",
599 |     "            which is being iterate during training; must have the `active_split`\n",
600 |     "            attribute. \n",
601 |     "        log_dir (str): the directory to output the checkpointed model \n",
602 |     "        patience (int): the number of epochs since a new best loss was observed\n",
603 |     "        \n",
604 |     "        # Internal Use\n",
605 |     "        _full_model_path (str): `log_dir/model_state_file`\n",
606 |     "        _split (str): the active split\n",
607 |     "        _best_loss (float): the best observed loss\n",
608 |     "    \"\"\"\n",
609 |     "    def __init__(self, model, dataset, log_dir, model_state_file=\"model.pth\"):\n",
610 |     "        \"\"\"Initialize the TrainState\n",
611 |     "        \n",
612 |     "        Args:\n",
613 |     "            model (torch.nn.Module): the model to be checkpointed during training\n",
614 |     "            dataset (SupervisedTextDataset, TextSequenceDataset): the dataset \n",
615 |     "                which is being iterate during training; must have the `active_split`\n",
616 |     "                attribute. \n",
617 |     "            log_dir (str): the directory to output the checkpointed model \n",
618 |     "            model_state_file (str): the name of the checkpoint model\n",
619 |     "        \"\"\"\n",
620 |     "        self.model = model\n",
621 |     "        self.dataset = dataset\n",
622 |     "        self._full_model_path = os.path.join(log_dir, model_state_file)\n",
623 |     "        if not os.path.exists(log_dir):\n",
624 |     "            os.makedirs(log_dir)\n",
625 |     "        self.log_dir = log_dir\n",
626 |     "        \n",
627 |     "        self._metrics_by_split = {\n",
628 |     "            'train': {}, \n",
629 |     "            'val': {}, \n",
630 |     "            'test': {}\n",
631 |     "        }\n",
632 |     "        \n",
633 |     "        self._split = 'train'\n",
634 |     "        self._best_loss = 10**10\n",
635 |     "        self.patience = 0\n",
636 |     "        \n",
637 |     "    def _init_metric(self, split, metric_name):\n",
638 |     "        \"\"\"Initialize a metric to the specified split\n",
639 |     "        \n",
640 |     "        A dictionary is created in `self._metrics_by_split` with\n",
641 |     "            the keys 'running', 'count', and 'history'. \n",
642 |     "        \n",
643 |     "        Args:\n",
644 |     "            split (str): the target split to record the metric\n",
645 |     "            metric_name (str): the name of the metric\n",
646 |     "        \"\"\"\n",
647 |     "        self._metrics_by_split[split][metric_name] = {\n",
648 |     "            'running': 0.,\n",
649 |     "            'count': 0,\n",
650 |     "            'history': []\n",
651 |     "        }\n",
652 |     "        \n",
653 |     "    def _update_metric(self, metric_name, metric_value):\n",
654 |     "        \"\"\"Update a metric with an observed value\n",
655 |     "        \n",
656 |     "        Specifically, the running average is updated.\n",
657 |     "        \n",
658 |     "        Args:\n",
659 |     "            metric_name (str): the name of the metric\n",
660 |     "            metric_value (float): the observed value of the metric\n",
661 |     "        \"\"\"\n",
662 |     "        if metric_name not in self._metrics_by_split[self._split]:\n",
663 |     "            self._init_metric(self._split, metric_name)\n",
664 |     "        metric = self._metrics_by_split[self._split][metric_name]\n",
665 |     "        metric['count'] += 1\n",
666 |     "        metric['running'] += (metric_value - metric['running']) / metric['count']\n",
667 |     "        \n",
668 |     "    def set_split(self, split):\n",
669 |     "        \"\"\"Set the dataset split\n",
670 |     "        \n",
671 |     "        Args:\n",
672 |     "            split (str): the target split to set\n",
673 |     "        \"\"\"\n",
674 |     "        self._split = split\n",
675 |     "        \n",
676 |     "    def get_history(self, split, metric_name):\n",
677 |     "        \"\"\"Get the history of values for any metric in any split\n",
678 |     "        \n",
679 |     "        Args:\n",
680 |     "            split (str): the target split\n",
681 |     "            metric_name (str): the target metric\n",
682 |     "            \n",
683 |     "        Returns:\n",
684 |     "            list(float): the running average of each epoch for `metric_name` in `split` \n",
685 |     "        \"\"\"\n",
686 |     "        return self._metrics_by_split[split][metric_name]['history']\n",
687 |     "    \n",
688 |     "    def get_value_of(self, split, metric_name):\n",
689 |     "        \"\"\"Retrieve the running average of any metric in any split\n",
690 |     "        \n",
691 |     "        Args:\n",
692 |     "            split (str): the target split\n",
693 |     "            metric_name (str): the target metric\n",
694 |     "            \n",
695 |     "        Returns:\n",
696 |     "            float: the running average for `metric_name` in `split`\n",
697 |     "        \"\"\"\n",
698 |     "        return self._metrics_by_split[split][metric_name]['running']\n",
699 |     "        \n",
700 |     "    def log_metrics(self, **metrics):\n",
701 |     "        \"\"\"Log some values for some metrics\n",
702 |     "        \n",
703 |     "        Args:\n",
704 |     "            metrics (kwargs): pass keyword args with the form `metric_name=metric_value`\n",
705 |     "                to log the metric values into the attribute `_metrics_by_split`.\n",
706 |     "        \"\"\"\n",
707 |     "        self._split = self.dataset.active_split\n",
708 |     "        for metric_name, metric_value in metrics.items():\n",
709 |     "            self._update_metric(metric_name, metric_value)\n",
710 |     "            \n",
711 |     "    def log_epoch_end(self):\n",
712 |     "        \"\"\"Log the end of the epoch. \n",
713 |     "        \n",
714 |     "        Some key functions happen at the end of the epoch:\n",
715 |     "            - for each metric in each split running averages, counts, \n",
716 |     "              and history are updated\n",
717 |     "            - the model is checkpointed if a new best value is observed\n",
718 |     "            - patience is incremented if a new best value is not observed\n",
719 |     "        \"\"\"\n",
720 |     "        for split_dict in self._metrics_by_split.values():\n",
721 |     "            for metric_dict in split_dict.values():\n",
722 |     "                metric_dict['history'].append(metric_dict['running'])\n",
723 |     "                metric_dict['running'] = 0.0\n",
724 |     "                metric_dict['count'] = 0\n",
725 |     "                \n",
726 |     "        if 'loss' in self._metrics_by_split['val']:\n",
727 |     "            val_loss = self._metrics_by_split['val']['loss']['history'][-1]\n",
728 |     "            if val_loss < self._best_loss:\n",
729 |     "                self._best_loss = val_loss\n",
730 |     "                self.save_model()\n",
731 |     "                self.patience = 0\n",
732 |     "            else:\n",
733 |     "                self.patience += 1\n",
734 |     "    \n",
735 |     "    def save_model(self):\n",
736 |     "        \"\"\" Save `model` to `log_dir/model_state_file` \"\"\"\n",
737 |     "        torch.save(self.model.state_dict(), self._full_model_path)\n",
738 |     "    \n",
739 |     "    def reload_best(self):\n",
740 |     "        \"\"\" reload `log_dir/model_state_file` to `model` \"\"\"\n",
741 |     "        if os.path.exists(self._full_model_path):\n",
742 |     "            self.model.load_state_dict(torch.load(self._full_model_path))"
743 |    ]
744 |   },
745 |   {
746 |    "cell_type": "markdown",
747 |    "metadata": {},
748 |    "source": [
749 |     "### Args"
750 |    ]
751 |   },
752 |   {
753 |    "cell_type": "code",
754 |    "execution_count": 48,
755 |    "metadata": {},
756 |    "outputs": [
757 |     {
758 |      "name": "stdout",
759 |      "output_type": "stream",
760 |      "text": [
761 |       "Using CUDA: False\n"
762 |      ]
763 |     },
764 |     {
765 |      "data": {
766 |       "text/plain": [
767 |        "device(type='cpu')"
768 |       ]
769 |      },
770 |      "execution_count": 48,
771 |      "metadata": {},
772 |      "output_type": "execute_result"
773 |     }
774 |    ],
775 |    "source": [
776 |     "args = Namespace(\n",
777 |     "    # dataset\n",
778 |     "    surname_csv=\"../data/surnames.csv\",\n",
779 |     "    # model hyper parameters\n",
780 |     "    num_embeddings=-1,\n",
781 |     "    num_classes=-1,\n",
782 |     "    # training options\n",
783 |     "    batch_size = 128,\n",
784 |     "    cuda=False,\n",
785 |     "    learning_rate=0.001,\n",
786 |     "    num_epochs=100,\n",
787 |     "    patience_threshold=3,\n",
788 |     ")\n",
789 |     "\n",
790 |     "\n",
791 |     "# Check CUDA\n",
792 |     "if not torch.cuda.is_available():\n",
793 |     "    args.cuda = False\n",
794 |     "\n",
795 |     "print(\"Using CUDA: {}\".format(args.cuda))\n",
796 |     "\n",
797 |     "args.device = torch.device(\"cuda\" if args.cuda else \"cpu\")\n",
798 |     "args.device"
799 |    ]
800 |   },
801 |   {
802 |    "cell_type": "markdown",
803 |    "metadata": {},
804 |    "source": [
805 |     "### Instantiation"
806 |    ]
807 |   },
808 |   {
809 |    "cell_type": "code",
810 |    "execution_count": null,
811 |    "metadata": {},
812 |    "outputs": [],
813 |    "source": [
814 |     "dataset = load_surname_dataset(args.surname_csv, tokenizer_func=character_tokenizer)\n",
815 |     "\n",
816 |     "args.num_embeddings = len(dataset.vectorizer.token_vocab)\n",
817 |     "args.num_classes = len(dataset.vectorizer.label_vocab)\n",
818 |     "\n",
819 |     "# model = ??"
820 |    ]
821 |   },
822 |   {
823 |    "cell_type": "markdown",
824 |    "metadata": {},
825 |    "source": [
826 |     "### Training Routine"
827 |    ]
828 |   },
829 |   {
830 |    "cell_type": "code",
831 |    "execution_count": null,
832 |    "metadata": {},
833 |    "outputs": [],
834 |    "source": [
835 |     "model = model.to(args.device)\n",
836 |     "\n",
837 |     "train_state = TrainState(model=model, dataset=dataset, log_dir='./logs/classify_surnames/v1',  \n",
838 |     "                         model_state_file='model.pth')\n",
839 |     "\n",
840 |     "optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)\n",
841 |     "\n",
842 |     "# loss function with class-weighted modifications\n",
843 |     "loss_func = nn.CrossEntropyLoss(weight=class_weights)\n",
844 |     "\n",
845 |     "\n",
846 |     "# progress bars\n",
847 |     "epoch_bar = tqdm_notebook(desc='epochs', total=args.num_epochs, position=1)\n",
848 |     "\n",
849 |     "dataset.set_split(\"train\")\n",
850 |     "train_bar = tqdm_notebook(desc='training', total=len(dataset)//args.batch_size)\n",
851 |     "\n",
852 |     "dataset.set_split(\"val\")\n",
853 |     "val_bar = tqdm_notebook(desc='validation', total=len(dataset)//args.batch_size)\n",
854 |     "        \n",
855 |     "\n",
856 |     "try:\n",
857 |     "    for _ in range(args.num_epochs):\n",
858 |     "        model.train()\n",
859 |     "        dataset.set_split(\"train\")\n",
860 |     "        \n",
861 |     "        for batch in generate_batches(dataset, batch_size=args.batch_size, device=args.device):\n",
862 |     "            # Step 1: clear the gradients \n",
863 |     "            optimizer.zero_grad()\n",
864 |     "            \n",
865 |     "            # Step 2: compute the outputs\n",
866 |     "            y_prediction = model(batch['x_data'], batch['x_lengths'])\n",
867 |     "\n",
868 |     "            # Step 3: compute the loss\n",
869 |     "            loss = loss_func(y_prediction, batch['y_target'])\n",
870 |     "            \n",
871 |     "            # Step 4: propagate the gradients\n",
872 |     "            loss.backward() \n",
873 |     "            \n",
874 |     "            # Step 5: update the model weights\n",
875 |     "            optimizer.step()\n",
876 |     "            \n",
877 |     "            # Auxillary: logging\n",
878 |     "            train_state.log_metrics(loss=loss.item(), \n",
879 |     "                                    accuracy=compute_accuracy(y_prediction, batch['y_target']))\n",
880 |     "            \n",
881 |     "            train_bar.set_postfix(loss=train_state.get_value_of(split=\"train\", metric_name=\"loss\"),\n",
882 |     "                                  acc=train_state.get_value_of(split=\"train\", metric_name=\"accuracy\"))\n",
883 |     "            train_bar.update()\n",
884 |     "            \n",
885 |     "        # loop over test dataset\n",
886 |     "        \n",
887 |     "        model.eval()\n",
888 |     "        dataset.set_split(\"val\")\n",
889 |     "        \n",
890 |     "        for batch in generate_batches(dataset, batch_size=args.batch_size, device=args.device):\n",
891 |     "            # Step 1: compute the outputs\n",
892 |     "            y_prediction = model(batch['x_data'], batch['x_lengths'])\n",
893 |     "\n",
894 |     "            # Step 2: compute the loss\n",
895 |     "            loss = loss_func(y_prediction, batch['y_target'])\n",
896 |     "            \n",
897 |     "            # Auxillary: logging\n",
898 |     "            train_state.log_metrics(loss=loss.item(), \n",
899 |     "                                    accuracy=compute_accuracy(y_prediction, batch['y_target']))\n",
900 |     "            \n",
901 |     "            val_bar.set_postfix(loss=train_state.get_value_of(split=\"val\", metric_name=\"loss\"),\n",
902 |     "                                  acc=train_state.get_value_of(split=\"val\", metric_name=\"accuracy\"))\n",
903 |     "            val_bar.update()\n",
904 |     "\n",
905 |     "        \n",
906 |     "        epoch_bar.set_postfix(train_loss=train_state.get_value_of(split=\"train\", \n",
907 |     "                                                                  metric_name=\"loss\"), \n",
908 |     "                              train_accuracy=train_state.get_value_of(split=\"train\", \n",
909 |     "                                                                      metric_name=\"accuracy\"),\n",
910 |     "                              val_loss=train_state.get_value_of(split=\"val\", \n",
911 |     "                                                                metric_name=\"loss\"), \n",
912 |     "                              val_accuracy=train_state.get_value_of(split=\"val\", \n",
913 |     "                                                                    metric_name=\"accuracy\"),\n",
914 |     "                              patience=train_state.patience)\n",
915 |     "        epoch_bar.update()\n",
916 |     "        train_state.log_epoch_end()\n",
917 |     "        train_bar.n = 0\n",
918 |     "        val_bar.n = 0\n",
919 |     "        \n",
920 |     "        if train_state.patience > args.patience_threshold:\n",
921 |     "            break\n",
922 |     "            \n",
923 |     "    train_state.reload_best()\n",
924 |     "    model.eval()\n",
925 |     "    dataset.set_split(\"test\")\n",
926 |     "    test_bar = tqdm_notebook(desc='test', total=len(dataset)//args.batch_size)\n",
927 |     "\n",
928 |     "    for batch in generate_batches(dataset, batch_size=args.batch_size, device=args.device):\n",
929 |     "        # Step 1: compute the outputs\n",
930 |     "        y_prediction = model(batch['x_data'], batch['x_lengths'])\n",
931 |     "\n",
932 |     "        # Step 2: compute the loss\n",
933 |     "        loss = loss_func(y_prediction, batch['y_target'])\n",
934 |     "\n",
935 |     "        # Auxillary: logging\n",
936 |     "        train_state.log_metrics(loss=loss.item(), \n",
937 |     "                                accuracy=compute_accuracy(y_prediction, batch['y_target']))\n",
938 |     "\n",
939 |     "        test_bar.set_postfix(loss=train_state.get_value_of(split=\"test\", metric_name=\"loss\"),\n",
940 |     "                             acc=train_state.get_value_of(split=\"test\", metric_name=\"accuracy\"))\n",
941 |     "        test_bar.update()\n",
942 |     "    \n",
943 |     "\n",
944 |     "except KeyboardInterrupt:\n",
945 |     "    print(\"...\")"
946 |    ]
947 |   }
948 |  ],
949 |  "metadata": {
950 |   "kernelspec": {
951 |    "display_name": "magis",
952 |    "language": "python",
953 |    "name": "magis"
954 |   },
955 |   "language_info": {
956 |    "codemirror_mode": {
957 |     "name": "ipython",
958 |     "version": 3
959 |    },
960 |    "file_extension": ".py",
961 |    "mimetype": "text/x-python",
962 |    "name": "python",
963 |    "nbconvert_exporter": "python",
964 |    "pygments_lexer": "ipython3",
965 |    "version": "3.7.2"
966 |   }
967 |  },
968 |  "nbformat": 4,
969 |  "nbformat_minor": 2
970 | }
971 | 


--------------------------------------------------------------------------------
/day_2/vocabulary.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | 
  3 | import numpy as np
  4 | from torch.utils.data import Dataset
  5 | import six
  6 | 
  7 | import json
  8 | 
  9 | 
 10 | class Vocabulary(object):
 11 |     """
 12 |     An implementation that manages the interface between a token dataset and the
 13 |         machine learning algorithm.
 14 |     """
 15 |     def __init__(self, use_unks=False, unk_token="<UNK>",
 16 |                  use_mask=False, mask_token="<MASK>", use_start_end=False,
 17 |                  start_token="<START>", end_token="<END>"):
 18 |         """
 19 |         Args:
 20 |             use_unks (bool): The vocabulary will output UNK tokens for out of
 21 |                 vocabulary items.
 22 |                 [default=False]
 23 |             unk_token (str): The token used for unknown tokens.
 24 |                 If `use_unks` is True, this will be added to the vocabulary.
 25 |                 [default='<UNK>']
 26 |             use_mask (bool): The vocabulary will reserve the 0th index for a mask token.
 27 |                 This is used to handle variable lengths in sequence models.
 28 |                 [default=False]
 29 |             mask_token (str): The token used for the mask.
 30 |                 Note: mostly a placeholder; it's unlikely the token will be seen.
 31 |                 [default='<MASK>']
 32 |             use_start_end (bool): The vocabulary will reserve indices for two tokens
 33 |                 that represent the start and end of a sequence.
 34 |                 [default=False]
 35 |             start_token: The token used to indicate the start of a sequence.
 36 |                 If `use_start_end` is True, this will be added to the vocabulary.
 37 |                 [default='<START>']
 38 |             end_token: The token used to indicate the end of a sequence
 39 |                  If `use_start_end` is True, this will be added to the vocabulary.
 40 |                  [default='<END>']
 41 |         """
 42 | 
 43 |         self._mapping = {}  # str -> int
 44 |         self._flip = {}  # int -> str;
 45 |         self._i = 0
 46 |         self._frozen = False
 47 | 
 48 |         # mask token for use in masked recurrent networks
 49 |         # usually need to be the 0th index
 50 |         self.use_mask = use_mask
 51 |         self.mask_token = mask_token
 52 |         if self.use_mask:
 53 |             self.add(self.mask_token)
 54 | 
 55 |         # unk token for out of vocabulary tokens
 56 |         self.use_unks = use_unks
 57 |         self.unk_token = unk_token
 58 |         if self.use_unks:
 59 |             self.add(self.unk_token)
 60 | 
 61 |         # start token for sequence models
 62 |         self.use_start_end = use_start_end
 63 |         self.start_token = start_token
 64 |         self.end_token = end_token
 65 |         if self.use_start_end:
 66 |             self.add(self.start_token)
 67 |             self.add(self.end_token)
 68 | 
 69 |     def iterkeys(self):
 70 |         for k in self._mapping.keys():
 71 |             if k == self.unk_token or k == self.mask_token:
 72 |                 continue
 73 |             else:
 74 |                 yield k
 75 | 
 76 |     def keys(self):
 77 |         return list(self.iterkeys())
 78 | 
 79 |     def iteritems(self):
 80 |         for key, value in self._mapping.items():
 81 |             if key == self.unk_token or key == self.mask_token:
 82 |                 continue
 83 |             yield key, value
 84 | 
 85 |     def items(self):
 86 |         return list(self.iteritems())
 87 | 
 88 |     def values(self):
 89 |         return [value for _, value in self.iteritems()]
 90 | 
 91 |     def __getitem__(self, k):
 92 |         if self._frozen:
 93 |             if k in self._mapping:
 94 |                 out_index = self._mapping[k]
 95 |             elif self.use_unks:
 96 |                 out_index = self.unk_index
 97 |             else:  # case: frozen, don't want unks, raise exception
 98 |                 raise VocabularyException("Vocabulary is frozen. " +
 99 |                                           "Key '{}' not found.".format(k))
100 |         elif k in self._mapping:  # case: normal
101 |             out_index = self._mapping[k]
102 |         else:
103 |             out_index = self._mapping[k] = self._i
104 |             self._i += 1
105 |             self._flip[out_index] = k
106 | 
107 |         return out_index
108 | 
109 |     def add(self, k):
110 |         return self.__getitem__(k)
111 | 
112 |     def add_many(self, x):
113 |         return [self.add(k) for k in x]
114 | 
115 |     def lookup(self, i):
116 |         try:
117 |             return self._flip[i]
118 |         except KeyError:
119 |             raise VocabularyException("Key {} not in Vocabulary".format(i))
120 | 
121 |     def lookup_many(self, x):
122 |         for k in x:
123 |             yield self.lookup(k)
124 | 
125 |     def map(self, sequence, include_start_end=False):
126 |         if include_start_end:
127 |             yield self.start_index
128 | 
129 |         for item in sequence:
130 |             yield self[item]
131 | 
132 |         if include_start_end:
133 |             yield self.end_index
134 | 
135 |     def freeze(self, use_unks=False):
136 |         self.use_unks = use_unks
137 | 
138 |         if use_unks and self.unk_token not in self:
139 |             self.add(self.unk_token)
140 | 
141 |         self._frozen = True
142 | 
143 |     def unfreeze(self):
144 |         self._frozen = False
145 | 
146 |     @property
147 |     def unk_index(self):
148 |         if self.unk_token not in self:
149 |             return None
150 |         return self._mapping[self.unk_token]
151 | 
152 |     @property
153 |     def mask_index(self):
154 |         if self.mask_token not in self:
155 |             return None
156 |         return self._mapping[self.mask_token]
157 | 
158 |     @property
159 |     def start_index(self):
160 |         if self.start_token not in self:
161 |             return None
162 |         return self._mapping[self.start_token]
163 | 
164 |     @property
165 |     def end_index(self):
166 |         if self.end_token not in self:
167 |             return None
168 |         return self._mapping[self.end_token]
169 | 
170 |     def __contains__(self, k):
171 |         return k in self._mapping
172 | 
173 |     def __len__(self):
174 |         return len(self._mapping)
175 | 
176 |     def __repr__(self):
177 |         return "<Vocabulary(size={},frozen={})>".format(len(self), self._frozen)
178 | 
179 |     def get_serializable_contents(self):
180 |         """
181 |         Creats a dict containing the necessary information to recreate this instance
182 |         """
183 |         config = {"_mapping": self._mapping,
184 |                   "_flip": self._flip,
185 |                   "_frozen": self._frozen,
186 |                   "_i": self._i,
187 |                   "_counts": list(self._counts.items()),
188 |                   "_frequency_threshold": self._frequency_threshold,
189 |                   "use_unks": self.use_unks,
190 |                   "unk_token": self.unk_token,
191 |                   "use_mask": self.use_mask,
192 |                   "mask_token": self.mask_token,
193 |                   "use_start_end": self.use_start_end,
194 |                   "start_token": self.start_token,
195 |                   "end_token": self.end_token}
196 |         return config
197 | 
198 |     @classmethod
199 |     def deserialize_from_contents(cls, content):
200 |         """
201 |         Recreate a Vocabulary instance; expect same dict as output in `serialize`
202 |         """
203 |         try:
204 |             _mapping = content.pop("_mapping")
205 |             _flip = content.pop("_flip")
206 |             _i = content.pop("_i")
207 |             _frozen = content.pop("_frozen")
208 |             _counts = content.pop("_counts")
209 |             _frequency_threshold = content.pop("_frequency_threshold")
210 |         except KeyError:
211 |             raise Exception("unable to deserialize vocabulary")
212 |         if isinstance(list(_flip.keys())[0], six.string_types):
213 |             _flip = {int(k): v for k, v in _flip.items()}
214 |         out = cls(**content)
215 |         out._mapping = _mapping
216 |         out._flip = _flip
217 |         out._i = _i
218 |         out._counts = Counter(dict(_counts))
219 |         out._frequency_threshold = _frequency_threshold
220 | 
221 |         if _frozen:
222 |             out.freeze(out.use_unks)
223 | 
224 |         return out
225 | 
226 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | _static/
3 | _templates/
4 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = pytorch-nlp-tutorial-sf2017
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # pytorch-nlp-tutorial documentation build configuration file, created by
  5 | # sphinx-quickstart on Sun Sep  3 13:31:44 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | # import os
 21 | # import sys
 22 | # sys.path.insert(0, os.path.abspath('.'))
 23 | 
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = ['sphinx.ext.mathjax']
 35 | 
 36 | # Add any paths that contain templates here, relative to this directory.
 37 | templates_path = ['_templates']
 38 | 
 39 | # The suffix(es) of source filenames.
 40 | # You can specify multiple suffix as a list of string:
 41 | #
 42 | # source_suffix = ['.rst', '.md']
 43 | source_suffix = '.rst'
 44 | 
 45 | # The master toctree document.
 46 | master_doc = 'index'
 47 | 
 48 | # General information about the project.
 49 | project = 'NLP with PyTorch'
 50 | copyright = '2019, Brian McMahan and Delip Rao'
 51 | author = 'Brian McMahan and Delip Rao'
 52 | 
 53 | # The version info for the project you're documenting, acts as replacement for
 54 | # |version| and |release|, also used in various other places throughout the
 55 | # built documents.
 56 | #
 57 | # The short X.Y version.
 58 | version = ''
 59 | # The full version, including alpha/beta/rc tags.
 60 | release = ''
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | #
 65 | # This is also used if you do content translation via gettext catalogs.
 66 | # Usually you set "language" from the command line for these cases.
 67 | language = None
 68 | 
 69 | # List of patterns, relative to source directory, that match files and
 70 | # directories to ignore when looking for source files.
 71 | # This patterns also effect to html_static_path and html_extra_path
 72 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 73 | 
 74 | # The name of the Pygments (syntax highlighting) style to use.
 75 | pygments_style = 'sphinx'
 76 | 
 77 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 78 | todo_include_todos = False
 79 | 
 80 | 
 81 | # -- Options for HTML output ----------------------------------------------
 82 | 
 83 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 84 | # a list of builtin themes.
 85 | #
 86 | html_theme = 'sphinx_rtd_theme'
 87 | 
 88 | # Theme options are theme-specific and customize the look and feel of a theme
 89 | # further.  For a list of options available for each theme, see the
 90 | # documentation.
 91 | #
 92 | # html_theme_options = {}
 93 | 
 94 | # Add any paths that contain custom static files (such as style sheets) here,
 95 | # relative to this directory. They are copied after the builtin static files,
 96 | # so a file named "default.css" will overwrite the builtin "default.css".
 97 | html_static_path = ['_static']
 98 | 
 99 | 
100 | # -- Options for HTMLHelp output ------------------------------------------
101 | 
102 | # Output file base name for HTML help builder.
103 | htmlhelp_basename = 'pytorch-nlp-tutorial'
104 | 
105 | 
106 | # -- Options for LaTeX output ---------------------------------------------
107 | 
108 | latex_elements = {
109 |     # The paper size ('letterpaper' or 'a4paper').
110 |     #
111 |     # 'papersize': 'letterpaper',
112 | 
113 |     # The font size ('10pt', '11pt' or '12pt').
114 |     #
115 |     # 'pointsize': '10pt',
116 | 
117 |     # Additional stuff for the LaTeX preamble.
118 |     #
119 |     # 'preamble': '',
120 | 
121 |     # Latex figure (float) alignment
122 |     #
123 |     # 'figure_align': 'htbp',
124 | }
125 | 
126 | # Grouping the document tree into LaTeX files. List of tuples
127 | # (source start file, target name, title,
128 | #  author, documentclass [howto, manual, or own class]).
129 | latex_documents = [
130 |     (master_doc, 'pytorch-nlp-tutorial.tex', 'pytorch-nlp-tutorial Documentation',
131 |      'Brian McMahan and Delip Rao', 'manual'),
132 | ]
133 | 
134 | 
135 | # -- Options for manual page output ---------------------------------------
136 | 
137 | # One entry per manual page. List of tuples
138 | # (source start file, name, description, authors, manual section).
139 | man_pages = [
140 |     (master_doc, 'pytorch-nlp-tutorial', 'pytorch-nlp-tutorial Documentation',
141 |      [author], 1)
142 | ]
143 | 
144 | # -- Options for Texinfo output -------------------------------------------
145 | 
146 | # Grouping the document tree into Texinfo files. List of tuples
147 | # (source start file, target name, title, author,
148 | #  dir menu entry, description, category)
149 | texinfo_documents = [
150 |     (master_doc, 'pytorch-nlp-tutorial', 'pytorch-nlp-tutorial Documentation',
151 |      author, 'pytorch-nlp-tutorial', 'One line description of project.',
152 |      'Miscellaneous'),
153 | ]
154 | 
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/docs/day1/index.rst:
--------------------------------------------------------------------------------
 1 | Day 1
 2 | =====
 3 | 
 4 | Here you will find things from Day 1!
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    takehome
10 | 


--------------------------------------------------------------------------------
/docs/day1/solutions.rst:
--------------------------------------------------------------------------------
  1 | Solutions 
  2 | =========
  3 | 
  4 | Problem 1
  5 | ---------
  6 | 
  7 | For when x is a scalar or a vector of length 1:
  8 | 
  9 | .. code-block:: python
 10 | 
 11 |    def f(x):
 12 |        if x > 0:
 13 |            return torch.sin(x)
 14 |        else:
 15 |            return torch.cos(x)
 16 | 
 17 |    x = torch.tensor([1.0, 0.5], requires_grad=True)
 18 | 
 19 |    y = f(x)
 20 |    print(y)
 21 |    y.backward()
 22 |    print(x.grad)
 23 | 
 24 | 
 25 | For when x is a vector, the conditional becomes ambiguous.  To handle this, we can use the python `all` function.  Computing the backward pass requires that the error signal be a scalar.  Since there are now multiple outputs of `f`, we can turn `y` into a scalar just by summing the outputs. 
 26 | 
 27 | .. code-block:: python
 28 | 
 29 |    def f(x):
 30 |        if all(x > 0):
 31 |            return torch.sin(x)
 32 |        else:
 33 |            return torch.cos(x)
 34 | 
 35 |    x = torch.tensor([1.0, 0.5], requires_grad=True)
 36 | 
 37 |    y = f(x)
 38 |    print(y)
 39 |    y.sum().backward()
 40 |    print(x.grad)
 41 | 
 42 | 
 43 | There is one last catch to this: we are forcing the fate of the entire vector on a strong "and" condition (all items must be above 0 or they will all be considered below 0).  To handle things in a more granular level, there are two different methods. 
 44 | 
 45 | Method 1: use a for a loop
 46 | 
 47 | .. code-block:: python
 48 | 
 49 | 
 50 |    def f2(x):
 51 |        output = []
 52 |        for x_i in x:
 53 |            if x_i > 0:
 54 |                output.append(torch.sin(x_i))
 55 |            else:
 56 |                output.append(torch.cos(x_i))
 57 |        return torch.stack(output)
 58 | 
 59 |    x = torch.tensor([1.0, -1.0], requires_grad=True)
 60 |    y = f2(x)
 61 |    print(y)
 62 |    y.sum().backward()
 63 |    print(x.grad)
 64 | 
 65 | Method 2: use a mask
 66 | 
 67 | .. code-block:: python
 68 | 
 69 |    def f3(x):
 70 |        mask = (x > 0).float()
 71 |        # alternatively, mask = torch.gt(x, 0).float()
 72 |        return mask * torch.sin(x) + (1 - mask) * torch.cos(x)
 73 | 
 74 |    x = torch.tensor([1.0, -1.0], requires_grad=True)
 75 |    y = f3(x)
 76 |    print(y)
 77 |    y.sum().backward()
 78 |    print(x.grad)
 79 | 
 80 | 
 81 | Problem 2
 82 | ---------
 83 | 
 84 | .. code-block:: python
 85 | 
 86 |    def cbow(phrase):
 87 |        words = phrase.split(" ")
 88 |        embeddings = []
 89 |        for word in words:
 90 |            if word in glove.word_to_index:
 91 |                embeddings.append(glove.get_embedding(word))
 92 |        embeddings = np.stack(embeddings)
 93 |        return np.mean(embeddings, axis=0)
 94 | 
 95 |    cbow("the dog flew over the moon").shape
 96 | 
 97 |    # >> (100,)
 98 | 
 99 |    def cbow_sim(phrase1, phrase2):
100 |        vec1 = cbow(phrase1)
101 |        vec2 = cbow(phrase2)
102 |        return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
103 | 
104 |    cbow_sim("green apple", "green apple")
105 |    # >> 1.0
106 | 
107 |    cbow_sim("green apple", "apple green")
108 |    # >> 1.0
109 | 
110 |    cbow_sim("green apple", "red potato")
111 |    # >> 0.749
112 | 
113 |    cbow_sim("green apple", "green alien")
114 |    # >> 0.683
115 | 
116 |    cbow_sim("green apple", "blue alien")
117 |    # >> 0.5799815958114477
118 | 
119 |    cbow_sim("eat an apple", "ingest an apple")
120 |    # >> 0.9304712574359718


--------------------------------------------------------------------------------
/docs/day2/cyoa.rst:
--------------------------------------------------------------------------------
 1 | Choose Your Own Adventure
 2 | =========================
 3 | 
 4 | 
 5 | The Choose Your Own Adventures have been structured to allow for pure model exploration without worrying as much about the dataset or the training routine.  In each notebook, you will find an implemented dataset loading routine as well as an implemented training routine.  These two parts should seem familiar to you given the last 2 days of content.  What is not implemented is a model definition nor its instantiation.  
 6 | 
 7 | It is up to you what you want to use!  Do you want to build the Continuous Bag of Words (CBOW)?  Use an RNN or a CNN?  Do you want to combine the CNN and RNN?  Try out whatever you like and see if you can get the highest accuracy in class!
 8 | 
 9 | Strategies for Model Exploration
10 | --------------------------------
11 | 
12 | Identifying the I/O 
13 | ^^^^^^^^^^^^^^^^^^^
14 | 
15 | A good place to start when doing model exploration is by defining the input-output program that the model is intended to solve.  
16 | 
17 | If you look at the previous models, you will notice the following pattern:
18 | 
19 | - Each model starts with embedding the inputs
20 | - Each model ends with applying a Linear layer to create the correct output size
21 | 
22 | These are the input and output of the models.  
23 | 
24 | Fail Fast Prototyping
25 | ^^^^^^^^^^^^^^^^^^^^^
26 | 
27 | Use the dataset to get a single batch and the input data from that batch.  You can use that sample input data to prototype an approach to solving the I/O problem.  
28 | 
29 | .. code-block:: python
30 | 
31 |    batch = next(iter(DataLoader(dataset, batch_size=4)))
32 |    print(module_to_test(batch['x_data']).shape)
33 | 
34 | Three simple models to try
35 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
36 | 
37 | 1. Continuous Bag of Words (CBOW)
38 | 	- CBOW is a model which has the following structure: embed the tokens, pool their embeddings in some way, classify the resulting vector.  One way of pooling is to just average them. Others could include taking the max or summing.  A Linear layer is then used to compute the classification vector. 
39 | 
40 | 2. Text Convolutional Neural Network (CNN)
41 | 	- CNN will learn spatially invariant patterns because it applies its weights as a sliding window over the input.  You can keep applying more and more CNNs (as in the Chinese Document example), or you could apply one or two and then pool in the same way as the CBOW.  Once you have a single vector for each data point, a final Linear layer is used to compute the classification vector. 
42 | 
43 | 3. Recurrent Neural Network (RNN)
44 | 	- Whether the character or word variants, an RNN learns a sequence model of its inputs. In doing classification, the final vector of the sequence is used to represent the entire sequence.  Then, this vector is optionally passed through a couple Linear layers (which themselves can be grouped and described as a Multilayer Perceptron).  Finally, whether the final RNN vector is passed through a Multilayer Perceptron or not, a final Linear layer is used to compute the classification output.  
45 | 
46 | 
47 | More complicated models to try
48 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
49 | 
50 | 1. Better RNNs (Gated Recurrent Unit (GRU), Long Short Term Memory (LSTM))
51 | 	- Instead of using the simple RNN provided, use an RNN variant that has gating like GRU or LSTM
52 | 
53 | 2. CNN + RNN
54 | 	- One thing you could try is to apply a CNN one or more times to create sequences of vectors that are informed by their neighboring vectors.  Then, apply the RNN to learn a sequence model over these vectors.  You can use the same method to pull out the final vector for each sequence, but with one caveat.  If you apply the CNN in a way that shrinks the sequence dimension, then the indices of the final positions won't quite be right. One way to get around this is to have the CNN keep the sequence dimension the same size.  This is done by setting the `padding` argument to be `kernel_size//2`.  For example, if `kernel_size=3`, then it should be that `padding=1`.  Similarly with `kernel_size=5`, then `padding=2`.  The padding is added onto both sides of the sequence dimension. 
55 | 
56 | 3. Deep Averaging Network
57 | 	- The Deep Averaging Network is very similar to CBOW, but has one major differences: it applies an MLP to the pooled vectors. 
58 | 
59 | 4. Using Attention
60 | 	- If you're feeling ambitious, try implementing attention! 
61 | 	- One way to do attention is use a Linear layer which maps feature vectors to scalars
62 | 		+ We begin with a sequence tensor, x_data, that is embedded, x_embedded_sequence = emb(x_data)
63 | 		+ The shape here is the similar as the embedded sequence tensor: (batch, sequence, 1)
64 | 		+ You can use the apply_across_sequence_loop or apply_across_sequence_reshape  
65 | 	- A softmax is then used on the scalar to produce a probability vector
66 | 	 	+ attention_weights = F.softmax(attention_weights, dim=2)
67 | 	- The probability vector is broadcast (multiplied) across the sequences, so that it weights each sequence vector
68 | 		+ weighted_sequence = attention_weights * x_embedded_sequence
69 | 	- The sequences are the summed over
70 | 		+ weighted_sequence.sum(dim=1)


--------------------------------------------------------------------------------
/docs/day2/failfastprototypemode.rst:
--------------------------------------------------------------------------------
 1 | Fail Fast Prototype Mode
 2 | ========================
 3 | 
 4 | When building neural networks, you want things to either work or fail fast.  Long iteration loops are the truest enemy of the  machine learning practitioner.  
 5 | 
 6 | 
 7 | To that end, the following techniques will help you out. 
 8 | 
 9 | .. code-block:: python
10 | 
11 |    import torch
12 |    import torch.nn as nn
13 | 
14 |    # 2dim tensor.. aka a matrix
15 |    x = torch.randn(4, 5)
16 | 
17 |    # this is the same as:
18 |    batch_size = 4
19 |    feature_size = 5
20 |    x = torch.randn(batch_size, feature_size)
21 | 
22 |    # now let's try out some NN layer
23 |    output_size = 10
24 |    fc = nn.Linaer(feature_size, output_size)
25 |    print(fc(x).shape)
26 | 
27 | 
28 | You can construct whatever prototype variables you want doing this. 
29 | 
30 | Prototyping an embedding
31 | ^^^^^^^^^^^^^^^^^^^^^^^^
32 | 
33 | 
34 | .. code-block:: python
35 | 
36 |    import torch
37 |    import torch.nn as nn
38 | 
39 |    batch_size = 4
40 |    sequence_size = 5
41 |    integer_range = 100
42 |    embedding_size = 25
43 |    # notice rand vs randn.  rand is uniform (0,1), and randn is normal (-1,1) 
44 |    random_numbers = (torch.rand(batch_size, sequence_size) * integer_range).long()
45 | 
46 |    embedder = nn.Embedding(num_embeddings=integer_range, 
47 |                            embedding_dim=embedding_size)
48 | 
49 |    print(embedder(x).shape)
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/docs/day2/tensorfu1.rst:
--------------------------------------------------------------------------------
 1 | Tensor-Fu-1
 2 | ===========
 3 | 
 4 | 
 5 | Exercise 1
 6 | ----------
 7 | 
 8 | Task: create a tensor for prototyping using `torch.randn`_.
 9 | 
10 | .. code-block:: python
11 | 
12 |    import torch
13 |    import torch.nn as nn
14 | 
15 | 
16 | 
17 | Exercise 2
18 | ----------
19 | 
20 | 
21 | Task: Create a linear layer which works wih x2dim
22 | 
23 | .. code-block:: python
24 | 
25 |    import torch
26 |    import torch.nn as nn
27 | 
28 |    x2dim = torch.randn(9, 10)
29 | 
30 |    # required and default parameters:
31 |    # fc = nn.Linear(in_features, out_features)
32 | 
33 | 
34 | Exercise 3
35 | ----------
36 | 
37 | Task: Create a convolution which works on x3dim
38 | 
39 | .. code-block:: python
40 | 
41 |    import torch
42 |    import torch.nn as nn
43 | 
44 |    x3dim = torch.randn(9, 10, 11)
45 | 
46 |    # required and default parameters:
47 |    # conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0)
48 | 


--------------------------------------------------------------------------------
/docs/day2/tensorfu2.rst:
--------------------------------------------------------------------------------
 1 | Tensor-Fu-2
 2 | ===========
 3 | 
 4 | Exercise 1
 5 | ----------
 6 | 
 7 | 
 8 | Task: The code below is not quite right for prototyping purposes.  
 9 | Fix it so that the indices is more like an actual batched data point
10 | and has batch_size=30 and seq_length=10.
11 | 
12 | .. code-block:: python
13 | 
14 |    indices = torch.from_numpy(np.random.randint(0, 100, size=(10,)))
15 | 
16 |    emb = nn.Embedding(num_embeddings=100, embedding_dim=16)
17 |    assert emb(indices).shape == (30, 10, 16)
18 | 
19 | Exercise 2
20 | ----------
21 | 
22 | Task: Create a MultiEmbedding class which can input two sets of indices, embed them, and concat the results!
23 | 
24 | .. code-block:: python
25 | 
26 |    class MultiEmbedding(nn.Module):
27 |        def __init__(self, num_embeddings1, num_embeddings2, embedding_dim1, embedding_dim2):
28 |            pass
29 | 
30 |        def forward(self, indices1, indices2):
31 |            # use something like
32 |            # z = torch.cat([x, y], dim=1)
33 | 
34 |            pass
35 | 
36 | 
37 |    # testing
38 | 
39 |    # use indices method from above
40 |    # the batch dimensions should agree
41 |    # indices1 = 
42 |    # indices2 = 
43 |    # multiemb = MutliEmbedding(num_emb1, num_emb2, size_emb1, size_emb2)
44 |    # output = multiemb(indices1, indices2)
45 |    # print(output.shape) # should be (batch, size_emb1 + size_emb2)


--------------------------------------------------------------------------------
/docs/day2/warmup.rst:
--------------------------------------------------------------------------------
 1 | Warm Up Exercise
 2 | ================
 3 | 
 4 | To get you back into the PyTorch groove, let's do some easy exercises. You will have 10 minutes.  See how far you can get.
 5 | 
 6 | 1. Use :code:`torch.randn` to create two tensors of size (29, 30, 32) and and (32, 100).
 7 | 2. Use :code:`torch.matmul` to matrix multiply the two tensors.
 8 | 3. Use :code:`torch.sum` on the resulting tensor, passing the optional argument of :code:`dim=1` to sum across the 1st dimension.  Before you run this, can you predict the size?
 9 | 4. Create a new long tensor of size (3, 10) from the :code:`np.random.randint` method.
10 | 5. Use this new long tensor to index into the tensor from step 3.
11 | 6. Use :code:`torch.mean` to average across the last dimension in the tensor from step 5.
12 | 


--------------------------------------------------------------------------------
/docs/download_data.rst:
--------------------------------------------------------------------------------
 1 | Getting the Data
 2 | ================
 3 | 
 4 | In this training, there are two options of participating.
 5 | 
 6 | Option 1: Download and Setup things on your laptop
 7 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 8 | 
 9 | The first option is to download the data below, setup the environment, and download the notebooks when we make them available.
10 | If you choose this options but do not download the data before the first day, we will have several flash drives with the data on it.
11 | 
12 | Please visit `this link <https://drive.google.com/file/d/0B2hg7DTHpfLsdHhEUVhHWU5hUXc/view?usp=sharing>`_ to download the data.
13 | 
14 | 
15 | Option 2: Use O'Reilly's online resource through your browser
16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17 | 
18 | The second option is to use an online resource provided by O'Reilly. On the first day of this training, you will be provided with a link to a JupyterHub instance where the environment will be pre-made and ready to go!  If you choose this option, you do not have to do anything until you arrive on Sunday.
19 | You are still required to bring your laptop.
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/environment_setup.rst:
--------------------------------------------------------------------------------
  1 | Environment Setup
  2 | =================
  3 | 
  4 | On this page, you will find not only the list of dependencies to install
  5 | for the tutorial, but a description of how to install them. This tutorial assumes
  6 | you have a laptop with OSX or Linux. If you use Windows, you might have to install
  7 | a virtual machine to get a UNIX-like environment to continue with the rest of this
  8 | instruction. A lot of this instruction is more verbose than needed to accomodate
  9 | participants of different skill levels.
 10 | 
 11 | **Please note that these are only optional.  On the first day of this training, you will be provided with a link to a JupyterHub instance where the environment will be pre-made and ready to go!**
 12 | 
 13 | 0. Get Anaconda
 14 | ---------------
 15 | 
 16 | Anaconda is a Python (and R) distribution that aims to provide everything
 17 | needed for common scientific and machine learning situations out-of-the-box.
 18 | We chose Anaconda for this tutorial as it significantly simplifies Python
 19 | dependency management.
 20 | 
 21 | In practice, Anaconda can be used to manage different environment and packages.
 22 | This setup document will assume that you have Anaconda installed as your default
 23 | Python distribution.
 24 | 
 25 | You can download Anaconda here: https://www.continuum.io/downloads
 26 | 
 27 | After installing Anaconda, you can access its command-line interface
 28 | with the :code:`conda` command.
 29 | 
 30 | 
 31 | 1. Create a new environment
 32 | ---------------------------
 33 | 
 34 | Environments are a tool for sanitary software development.  By this, we mean that
 35 | you can install specific versions of packages without worrying that it breaks
 36 | a dependency elsewhere.
 37 | 
 38 | Here is how you can create an environment with Anaconda
 39 | 
 40 | .. code-block:: bash
 41 | 
 42 |    conda create -n dl4nlp python=3.6
 43 | 
 44 | 
 45 | 2. Install Dependencies
 46 | -----------------------
 47 | 
 48 | 2a. Activate the environment
 49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 50 | 
 51 | After creating the environment, you need to **activate** the environment:
 52 | 
 53 | .. code-block:: bash
 54 | 
 55 |    source activate dl4nlp
 56 | 
 57 | After an environment is activated, it might prepend/append itself to your
 58 | console prompt to let you know it is active.
 59 | 
 60 | With the environment activated, any installation commands
 61 | (whether it is :code:`pip install X`, :code:`python setup.py install` or using
 62 | Anaconda's install command :code:`conda install X`) will only install inside
 63 | the environment.
 64 | 
 65 | 2b. Install IPython and Jupyter
 66 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 67 | 
 68 | Two core dependencies are IPython and Jupyter.  Let's install them first:
 69 | 
 70 | .. code-block:: bash
 71 | 
 72 |    conda install ipython
 73 |    conda install jupyter
 74 | 
 75 | To allow a jupyter notebooks to use this environment as their kernel, it
 76 | needs to be linked:
 77 | 
 78 | .. code-block:: bash
 79 | 
 80 |    python -m ipykernel install --user --name dl4nlp
 81 | 
 82 | 2c. Installing CUDA (optional)
 83 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 84 | 
 85 | NOTE: CUDA is currently not supported out of the conda package control manager.
 86 | Please refer to pytorch's github repository for compilation instructions.
 87 | 
 88 | If you have a CUDA compatible GPU, it is worthwhile to take advantage of it as
 89 | it can significantly speedup training and make your PyTorch experimentation more
 90 | enjoyable.
 91 | 
 92 | To install CUDA:
 93 | 
 94 | 1. Download CUDA appropriate to your OS/Arch from `here <https://developer.nvidia.com/cuda-downloads>`_.
 95 | 2. Follow installation steps for your architecture/OS. For Ubuntu/x86_64, see `here <http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#ubuntu-installation>`_.
 96 | 3. Download and install CUDNN from `here <https://developer.nvidia.com/cudnn>`_.
 97 | 
 98 | Make sure you have the latest CUDA and CUDNN.
 99 | 
100 | 2d. Install PyTorch
101 | ^^^^^^^^^^^^^^^^^^^
102 | 
103 | There are instructions on http://pytorch.org which detail how to install it.
104 | If you have been following along so far and have Anaconda installed with CUDA enabled, you can simply do:
105 | 
106 | 
107 | .. code-block:: bash
108 | 
109 |    conda install pytorch torchvision -c pytorch
110 | 
111 | 
112 | 2e. Clone (or Download) Repository
113 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
114 | 
115 | At this point, you may have already cloned the tutorial repository.  But if
116 | you have not, you will need it for the next step.
117 | 
118 | .. code-block:: bash
119 | 
120 |    git clone https://github.com/joosthub/pytorch-nlp-tutorial-eu2018.git
121 | 
122 | If you do not have git or do not want to use it, you can also
123 | `download the repository as a zip file <https://github.com/joosthub/pytorch-nlp-tutorial-eu2018/archive/master.zip>`_
124 | 
125 | 2f. Install Dependencies from Repository
126 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
127 | 
128 | Assuming the you have cloned (or downloaded and unzipped) the repository,
129 | please navigate to the directory in your terminal.  Then, you can do the following:
130 | 
131 | .. code-block:: bash
132 | 
133 |    pip install -r requirements.txt
134 | 


--------------------------------------------------------------------------------
/docs/errata.rst:
--------------------------------------------------------------------------------
1 | Errata
2 | ======
3 | 
4 | Please check back.
5 | 


--------------------------------------------------------------------------------
/docs/extras/attention.rst:
--------------------------------------------------------------------------------
 1 | Design Pattern: Attention
 2 | =========================
 3 | 
 4 | Attention is a useful pattern for when you want to take a collection of vectors---whether it be a sequence of vectors representing a sequence of words, or an unordered collections of vectors representing a collection of attributes---and summarize them into a single vector.  This has similar analogs to the CBOW examples we saw on Day 1, but instead of just averaging or using max pooling, we are learning a function which learns to compute the weights for each of the vectors before summing them together.
 5 | 
 6 | Importantly, the weights that the attention module is learning is a valid probability distribution.  This means that weighting the vectors by the value the attention module learns can additionally be seen as computing the Expection. Or, it could as interpolating. In any case, attention's main use is to select 'softly' amongst a set of vectors.
 7 | 
 8 | The attention vector has several different published forms. The one below is very simple and just learns a single vector as the attention mechanism.
 9 | 
10 | Using the :code:`new_parameter` function we have been using for the RNN notebooks:
11 | 
12 | .. code-block:: python
13 | 
14 |    def new_parameter(*size):
15 |        out = Parameter(FloatTensor(*size))
16 |        torch.nn.init.xavier_normal(out)
17 |        return out
18 | 
19 | We can then do:
20 | 
21 | .. code-block:: python
22 | 
23 |    class Attention(nn.Module):
24 |        def __init__(self, attention_size):
25 |            super(Attention, self).__init__()
26 |            self.attention = new_parameter(attention_size, 1)
27 | 
28 |        def forward(self, x_in):
29 |            # after this, we have (batch, dim1) with a diff weight per each cell
30 |            attention_score = torch.matmul(x_in, self.attention).squeeze()
31 |            attention_score = F.softmax(attention_score).view(x_in.size(0), x_in.size(1), 1)
32 |            scored_x = x_in * attention_score
33 | 
34 |            # now, sum across dim 1 to get the expected feature vector
35 |            condensed_x = torch.sum(scored_x, dim=1)
36 | 
37 |            return condensed_x
38 | 
39 | 
40 | 
41 |    attn = Attention(100)
42 |    x = Variable(torch.randn(16,30,100))
43 |    attn(x).size() == (16,100)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/extras/compute_conv_size.rst:
--------------------------------------------------------------------------------
 1 | Compute Convolution Sizes
 2 | =========================
 3 | 
 4 | 
 5 | .. code-block:: python
 6 | 
 7 |    import math
 8 | 
 9 |    def conv_shape_helper_1d(input_seq_len, kernel_size, stride=1, padding=0, dilation=1):
10 |        kernel_width = dilation * (kernel_size - 1) + 1
11 |        tensor_size = input_seq_len + 2 * padding
12 |        return math.floor((tensor_size - kernel_width) / stride + 1)
13 | 


--------------------------------------------------------------------------------
/docs/extras/conv_notes.rst:
--------------------------------------------------------------------------------
 1 | Notes for Convolution Models
 2 | ============================
 3 | 
 4 | Implementing a convolutional model can be tricky.  Here are some notes to help get you along:
 5 | 
 6 | 1. Convolutions in PyTorch expect the channels to be on the 1st dimension
 7 | 	- We treat the feature vectors (typically from an embedding layer) as the channel / kernel dimension
 8 | 	- If the shape of your tensor is (batch, seq, feature), then this means a permutation is needed to move the (batch, feature, seq)  
 9 | 	- To get an intuition as why, imagine an image.  A minibatch of images is (batch, 3, width, height) because an image exists as RGB coordinates for each pixel. 
10 | 	- This can also be thought of as 3 feature maps of the image
11 | 	- In the same way, the feature dimension of a sequence can be separate feature maps for the entire sequence
12 | 	- **The consequence of all of this is a required permute operation post-embedding but pre-convolution: x_embedded.permute(0, 2, 1)**
13 | 2.  It is a modeling choice on how to go from the embedded tensor to a final vector, but the goal **is to end up with a single final vector per batch item**
14 | 	- This means the channel dimension will be our final vector and we want to apply operations to shrink the sequence dimension until it is size=1
15 | 	- You could create enough convolutions that eventually it will shrink to size 1
16 | 	- This becomes dependent on the max_seq_len (if this changes, the number of convolutions to shrink to size=1 also changes)
17 | 	- Some sort of pooling or lenght-variation operation is recommended for the final reduction. 


--------------------------------------------------------------------------------
/docs/extras/index.rst:
--------------------------------------------------------------------------------
 1 | Recipes and PyTorch patterns
 2 | ============================
 3 | 
 4 | 
 5 | In this section, you will find a set of recipes for doing various things with PyTorch.
 6 | 
 7 | .. toctree::
 8 | 
 9 |    load_pretrained_vectors
10 |    compute_conv_size
11 |    conv_notes
12 |    attention
13 | 


--------------------------------------------------------------------------------
/docs/extras/load_pretrained_vectors.rst:
--------------------------------------------------------------------------------
 1 | Loading Pretrained Vectors
 2 | ==========================
 3 | 
 4 | It can be extremely useful to make a model which had as advantageous starting point.
 5 | 
 6 | To do this, we can set the values of the embedding matrix.
 7 | 
 8 | 
 9 | .. code-block:: python
10 | 
11 |    def get_pretrained_embeddings(filename, dim_size, token_vocab):
12 |        embedding_matrix = torch.zeros(len(token_vocab), dim_size)
13 |        all_words = set(token_vocab.keys())
14 |        
15 |        with open(filename) as fp:
16 |            for line in tqdm_notebook(fp.readlines(), leave=False):
17 |                line = line.split(" ")
18 |                word = line[0]
19 |                if word not in token_vocab:
20 |                    continue
21 |                all_words.remove(word)
22 |                row_index = token_vocab[word]
23 |                embedding_matrix[row_index] = torch.FloatTensor([float(x) for x in line[1:]])
24 |        for remaining_word in all_words:
25 |            row_index = token_vocab[remaining_word]
26 |            embedding_matrix[row_index] = torch.nn.init.kaiming_normal_(torch.zeros(1, dim_size))
27 |                
28 |        return embedding_matrix
29 | 
30 | 
31 | Then, we can load that embedding matrix:
32 | 
33 | .. code-block:: python
34 | 
35 |    load_pretrained = True
36 |    embedding_size = 32
37 |    pretrained_embeddings = None
38 |    
39 |    if load_pretrained:
40 |        pretrained_embeddings = get_pretrained_embeddings("../data/glove.6B.100d.txt", 
41 |                                                          dim_size=100, 
42 |                                                          token_vocab=dataset.vectorizer.token_vocab)
43 |        embedding_size = pretrained_embeddings.shape[1]
44 | 
45 | 
46 | And we can use it in an embedding layer:
47 | 
48 | .. code-block:: python
49 | 
50 |    emb = nn.Embedding(embedding_dim=embedding_size, 
51 |                       num_embeddings=num_embeddings, 
52 |                       padding_idx=0, 
53 |                       _weight=pretrained_embeddings)


--------------------------------------------------------------------------------
/docs/extras/setting_seed.rst:
--------------------------------------------------------------------------------
 1 | Small Tidbits
 2 | =============
 3 | 
 4 | 
 5 | Set Seed Everywhere
 6 | -------------------
 7 | 
 8 | .. code-block:: python
 9 | 
10 |    import numpy as np
11 |    import torch
12 | 
13 |    def set_seed_everywhere(seed, cuda):(
14 |           """Set the seed for numpy and pytorch
15 |    
16 |           Args:
17 |               seed (int): the seed to set everything to
18 |               cuda (bool): whether to set the cuda seed as well
19 |        """
20 |        np.random.seed(seed)
21 |        torch.manual_seed(seed)
22 |        if cuda:
23 |            torch.cuda.manual_seed_all(seed)
24 | 


--------------------------------------------------------------------------------
/docs/faq.rst:
--------------------------------------------------------------------------------
 1 | Frequency Asked Questions
 2 | =========================
 3 | 
 4 | On this page, you will find a list of questions that we either anticipate
 5 | people will ask or that we have been asked previously.  They are intended to
 6 | be the first stop for any confusion or trouble that might occur.
 7 | 
 8 | 
 9 | Do I Need to have a NVIDIA GPU enabled laptop?
10 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11 | 
12 | Nope!  While having a NVIDIA GPU enabled laptop will make the training run
13 | faster, we provide instructions for people who do not have one.
14 | 
15 | If you are plan on working on Natural Language Processing/Deep Learning in the future,
16 | a GPU enabled laptop might be a good investment.
17 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Natural Language Processing (NLP) with PyTorch
 2 | ==============================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :hidden:
 7 |    :caption: Extra Resources
 8 | 
 9 |    download_data
10 |    environment_setup
11 |    faq
12 |    migration
13 | 
14 | .. toctree::
15 |    :hidden:
16 |    :caption: Day 1 Materials
17 |    
18 |    day1/solutions
19 | 
20 | .. toctree::
21 |    :hidden:
22 |    :maxdepth: 3
23 |    :caption: Day 2 Materials
24 | 
25 |    day2/warmup
26 |    day2/failfastprototypemode
27 |    day2/tensorfu1
28 |    day2/tensorfu2
29 |    day2/cyoa
30 |    extras/index
31 | 
32 | ..
33 |    day2/cyoa
34 |    day2/tensorfu2
35 |    day2/patterns/tidbits
36 |    day2/adventures/lookups
37 |    day2/adventures/interpolation
38 |    day2/sampling
39 | 
40 | 
41 | Hello! This is a directory of resources for a training tutorial to be
42 | given at the O'Reilly AI Conference in New York City on Monday, April 15th, and Tuesday, April 16th. 
43 | 
44 | Please read below for general information.  You can find the github repository at `this link <https://github.com/joosthub/pytorch-nlp-tutorial-ny2019>`_.  Please note that there are two ways to engage in this training (described below).
45 | 
46 | More information will be added to this site as the training progresses.  
47 | 
48 | General Information
49 | -------------------
50 | 
51 | Prerequisites:
52 | ^^^^^^^^^^^^^
53 | 
54 | - A working knowledge of Python and the command line
55 | - Familiarity with precalc math (multiply matrices, dot products of vectors, etc.) and derivatives of simple functions.
56 | - A general understanding of machine learning (setting up experiments, evaluation, etc.) (useful but not required)
57 | 
58 | Hardware and/or installation requirements:
59 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
60 | 
61 | - There are two options:
62 |    1. **Using O'Reilly's online resources**.  For this, you only needs a laptop; on the first day, we will provide a URL to use an online computing resource (a JupyterHub instance) provided by O'Reilly.  If you have a Safari account, you can use that to log on. Otherwise, you can create a free trial (the free trial button is on the URL we provide).  You will be able to access Jupyter notebooks through this and they will persist until the end of the second day of training.  This option is not limited by what operating system you have. You will need to have a browser installed.
63 |    2. **Setting everything up locally**.  For this, you need a laptop with the PyTorch environment set up.  This is only recommended if you want to have the environment locally or have a laptop with a GPU. (If you have trouble following the provided instructions or if you find any mistakes, please file an issue `here <https://github.com/joosthub/pytorch-nlp-tutorial-ny2019>`_.)  
64 | 


--------------------------------------------------------------------------------
/docs/migration.rst:
--------------------------------------------------------------------------------
1 | Migrating to PyTorch 0.4.0
2 | ==========================
3 | 
4 | If you have used PyTorch before 0.4.0, some things have changed! To help you understand how to migrate, the PyTorch folks have a wonderful migration guide found `here <http://pytorch.org/2018/04/22/0_4_0-migration-guide.html>`_.
5 | 
6 | 


--------------------------------------------------------------------------------
/modelzoo/README.md:
--------------------------------------------------------------------------------
 1 | # Model Zoo files
 2 | 
 3 | In this folder, you should place the saved states that we have pre-trained.
 4 | 
 5 | You can download them from [here](https://drive.google.com/file/d/0B2hg7DTHpfLsZW44aTRVd2FrbEE/view?usp=sharing)
 6 | 
 7 | 
 8 | You should have the following files:
 9 | 
10 | - trump_twitter.vocab
11 | - surnames_classify.vocab
12 | - charnn_emb16_hid64_surnames_classify.state
13 | - charnn_emb16_hid64_surnames_predict.state
14 | - charnn_emb16_hid64_surnames_conditionally_predict.state
15 | - wordrnn_emb100_hid64_trump_tweets_predict_fresh_train_8_min.state
16 | - wordrnn_emb100_hid64_trump_tweets_predict.state
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pandas
 2 | nltk
 3 | annoy
 4 | seaborn
 5 | numpy
 6 | matplotlib
 7 | scikit-learn
 8 | tqdm
 9 | ipywidgets
10 | graphviz


--------------------------------------------------------------------------------