├── 1_Background_of_NLP.ipynb
├── 2_Representation_Vector.ipynb
├── 3_Tagging_RNN.ipynb
├── 4_NMT.ipynb
└── README.md


/1_Background_of_NLP.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "import pandas as pd\n",
  10 |     "eng_data = pd.read_csv(\"../data/IMDB Dataset.csv\")"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": 2,
  16 |    "metadata": {},
  17 |    "outputs": [
  18 |     {
  19 |      "data": {
  20 |       "text/plain": [
  21 |        "(50000, 2)"
  22 |       ]
  23 |      },
  24 |      "execution_count": 2,
  25 |      "metadata": {},
  26 |      "output_type": "execute_result"
  27 |     }
  28 |    ],
  29 |    "source": [
  30 |     "eng_data.shape"
  31 |    ]
  32 |   },
  33 |   {
  34 |    "cell_type": "code",
  35 |    "execution_count": 3,
  36 |    "metadata": {},
  37 |    "outputs": [
  38 |     {
  39 |      "data": {
  40 |       "text/html": [
  41 |        "<div>\n",
  42 |        "<style scoped>\n",
  43 |        "    .dataframe tbody tr th:only-of-type {\n",
  44 |        "        vertical-align: middle;\n",
  45 |        "    }\n",
  46 |        "\n",
  47 |        "    .dataframe tbody tr th {\n",
  48 |        "        vertical-align: top;\n",
  49 |        "    }\n",
  50 |        "\n",
  51 |        "    .dataframe thead th {\n",
  52 |        "        text-align: right;\n",
  53 |        "    }\n",
  54 |        "</style>\n",
  55 |        "<table border=\"1\" class=\"dataframe\">\n",
  56 |        "  <thead>\n",
  57 |        "    <tr style=\"text-align: right;\">\n",
  58 |        "      <th></th>\n",
  59 |        "      <th>review</th>\n",
  60 |        "      <th>sentiment</th>\n",
  61 |        "    </tr>\n",
  62 |        "  </thead>\n",
  63 |        "  <tbody>\n",
  64 |        "    <tr>\n",
  65 |        "      <th>0</th>\n",
  66 |        "      <td>One of the other reviewers has mentioned that ...</td>\n",
  67 |        "      <td>positive</td>\n",
  68 |        "    </tr>\n",
  69 |        "    <tr>\n",
  70 |        "      <th>1</th>\n",
  71 |        "      <td>A wonderful little production. &lt;br /&gt;&lt;br /&gt;The...</td>\n",
  72 |        "      <td>positive</td>\n",
  73 |        "    </tr>\n",
  74 |        "    <tr>\n",
  75 |        "      <th>2</th>\n",
  76 |        "      <td>I thought this was a wonderful way to spend ti...</td>\n",
  77 |        "      <td>positive</td>\n",
  78 |        "    </tr>\n",
  79 |        "    <tr>\n",
  80 |        "      <th>3</th>\n",
  81 |        "      <td>Basically there's a family where a little boy ...</td>\n",
  82 |        "      <td>negative</td>\n",
  83 |        "    </tr>\n",
  84 |        "    <tr>\n",
  85 |        "      <th>4</th>\n",
  86 |        "      <td>Petter Mattei's \"Love in the Time of Money\" is...</td>\n",
  87 |        "      <td>positive</td>\n",
  88 |        "    </tr>\n",
  89 |        "  </tbody>\n",
  90 |        "</table>\n",
  91 |        "</div>"
  92 |       ],
  93 |       "text/plain": [
  94 |        "                                              review sentiment\n",
  95 |        "0  One of the other reviewers has mentioned that ...  positive\n",
  96 |        "1  A wonderful little production. <br /><br />The...  positive\n",
  97 |        "2  I thought this was a wonderful way to spend ti...  positive\n",
  98 |        "3  Basically there's a family where a little boy ...  negative\n",
  99 |        "4  Petter Mattei's \"Love in the Time of Money\" is...  positive"
 100 |       ]
 101 |      },
 102 |      "execution_count": 3,
 103 |      "metadata": {},
 104 |      "output_type": "execute_result"
 105 |     }
 106 |    ],
 107 |    "source": [
 108 |     "eng_data.head(5)"
 109 |    ]
 110 |   },
 111 |   {
 112 |    "cell_type": "code",
 113 |    "execution_count": 4,
 114 |    "metadata": {},
 115 |    "outputs": [],
 116 |    "source": [
 117 |     "from bs4 import BeautifulSoup\n",
 118 |     "from nltk.tokenize.toktok import ToktokTokenizer\n",
 119 |     "import re\n",
 120 |     "import nltk\n",
 121 |     "\n",
 122 |     "def strip_html(text):\n",
 123 |     "    soup = BeautifulSoup(text, \"html.parser\")\n",
 124 |     "    return soup.get_text()\n",
 125 |     "\n",
 126 |     "def remove_between_square_brackets(text):\n",
 127 |     "    return re.sub('\\[[^]]*\\]', '', text)\n",
 128 |     "\n",
 129 |     "def remove_special_characters(text, remove_digits = True):\n",
 130 |     "    pattern=r'[^a-zA-z0-9\\s]'\n",
 131 |     "    text=re.sub(pattern,'',text)\n",
 132 |     "    return text\n",
 133 |     "\n",
 134 |     "def remove_stopwords(text, is_lower_case = False):\n",
 135 |     "    tokenizer = ToktokTokenizer()\n",
 136 |     "    stopword_list = nltk.corpus.stopwords.words('english')\n",
 137 |     "    tokens = tokenizer.tokenize(text)\n",
 138 |     "    tokens = [token.strip() for token in tokens]\n",
 139 |     "    if is_lower_case:\n",
 140 |     "        filtered_tokens = [token for token in tokens if token not in stopword_list]\n",
 141 |     "    else:\n",
 142 |     "        filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]\n",
 143 |     "    filtered_text = ' '.join(filtered_tokens)    \n",
 144 |     "    return filtered_text\n",
 145 |     "\n",
 146 |     "def text_cleaning(text):\n",
 147 |     "    text = strip_html(text)\n",
 148 |     "    text = remove_between_square_brackets(text)\n",
 149 |     "    text = remove_special_characters(text, remove_digits = True)\n",
 150 |     "    text = remove_stopwords(text, is_lower_case = False)\n",
 151 |     "    return text"
 152 |    ]
 153 |   },
 154 |   {
 155 |    "cell_type": "code",
 156 |    "execution_count": 5,
 157 |    "metadata": {},
 158 |    "outputs": [
 159 |     {
 160 |      "data": {
 161 |       "text/plain": [
 162 |        "'I thought this was a wonderful way to spend time on a too hot summer weekend, sitting in the air conditioned theater and watching a light-hearted comedy. The plot is simplistic, but the dialogue is witty and the characters are likable (even the well bread suspected serial killer). While some may be disappointed when they realize this is not Match Point 2: Risk Addiction, I thought it was proof that Woody Allen is still fully in control of the style many of us have grown to love.<br /><br />This was the most I\\'d laughed at one of Woody\\'s comedies in years (dare I say a decade?). While I\\'ve never been impressed with Scarlet Johanson, in this she managed to tone down her \"sexy\" image and jumped right into a average, but spirited young woman.<br /><br />This may not be the crown jewel of his career, but it was wittier than \"Devil Wears Prada\" and more interesting than \"Superman\" a great comedy to go see with friends.'"
 163 |       ]
 164 |      },
 165 |      "execution_count": 5,
 166 |      "metadata": {},
 167 |      "output_type": "execute_result"
 168 |     }
 169 |    ],
 170 |    "source": [
 171 |     "eng_data[\"review\"][2]"
 172 |    ]
 173 |   },
 174 |   {
 175 |    "cell_type": "code",
 176 |    "execution_count": 6,
 177 |    "metadata": {},
 178 |    "outputs": [
 179 |     {
 180 |      "data": {
 181 |       "text/plain": [
 182 |        "'I thought this was a wonderful way to spend time on a too hot summer weekend, sitting in the air conditioned theater and watching a light-hearted comedy. The plot is simplistic, but the dialogue is witty and the characters are likable (even the well bread suspected serial killer). While some may be disappointed when they realize this is not Match Point 2: Risk Addiction, I thought it was proof that Woody Allen is still fully in control of the style many of us have grown to love.This was the most I\\'d laughed at one of Woody\\'s comedies in years (dare I say a decade?). While I\\'ve never been impressed with Scarlet Johanson, in this she managed to tone down her \"sexy\" image and jumped right into a average, but spirited young woman.This may not be the crown jewel of his career, but it was wittier than \"Devil Wears Prada\" and more interesting than \"Superman\" a great comedy to go see with friends.'"
 183 |       ]
 184 |      },
 185 |      "execution_count": 6,
 186 |      "metadata": {},
 187 |      "output_type": "execute_result"
 188 |     }
 189 |    ],
 190 |    "source": [
 191 |     "strip_html(eng_data[\"review\"][2])"
 192 |    ]
 193 |   },
 194 |   {
 195 |    "cell_type": "code",
 196 |    "execution_count": 7,
 197 |    "metadata": {},
 198 |    "outputs": [],
 199 |    "source": [
 200 |     "eng_data[\"review\"] = eng_data[\"review\"].apply(text_cleaning)"
 201 |    ]
 202 |   },
 203 |   {
 204 |    "cell_type": "code",
 205 |    "execution_count": 8,
 206 |    "metadata": {},
 207 |    "outputs": [
 208 |     {
 209 |      "data": {
 210 |       "text/plain": [
 211 |        "'wonderful little production filming technique unassuming oldtimeBBC fashion gives comforting sometimes discomforting sense realism entire piece actors extremely well chosen Michael Sheen got polari voices pat truly see seamless editing guided references Williams diary entries well worth watching terrificly written performed piece masterful production one great masters comedy life realism really comes home little things fantasy guard rather use traditional dream techniques remains solid disappears plays knowledge senses particularly scenes concerning Orton Halliwell sets particularly flat Halliwells murals decorating every surface terribly well done'"
 212 |       ]
 213 |      },
 214 |      "execution_count": 8,
 215 |      "metadata": {},
 216 |      "output_type": "execute_result"
 217 |     }
 218 |    ],
 219 |    "source": [
 220 |     "eng_data[\"review\"][1]"
 221 |    ]
 222 |   },
 223 |   {
 224 |    "cell_type": "code",
 225 |    "execution_count": 9,
 226 |    "metadata": {},
 227 |    "outputs": [],
 228 |    "source": [
 229 |     "from nltk.tokenize import word_tokenize\n",
 230 |     "vocab_lst = [word_tokenize(x) for x in eng_data[\"review\"]]"
 231 |    ]
 232 |   },
 233 |   {
 234 |    "cell_type": "code",
 235 |    "execution_count": 10,
 236 |    "metadata": {},
 237 |    "outputs": [
 238 |     {
 239 |      "data": {
 240 |       "text/plain": [
 241 |        "[('movie', 82310),\n",
 242 |        " ('film', 73514),\n",
 243 |        " ('one', 46301),\n",
 244 |        " ('like', 37483),\n",
 245 |        " ('good', 27403),\n",
 246 |        " ('would', 23751),\n",
 247 |        " ('time', 22741),\n",
 248 |        " ('really', 22207),\n",
 249 |        " ('see', 21765),\n",
 250 |        " ('even', 21494)]"
 251 |       ]
 252 |      },
 253 |      "execution_count": 10,
 254 |      "metadata": {},
 255 |      "output_type": "execute_result"
 256 |     }
 257 |    ],
 258 |    "source": [
 259 |     "from collections import Counter\n",
 260 |     "vocab_lst2 = [y for x in vocab_lst for y in x]\n",
 261 |     "Counter(vocab_lst2).most_common(10)"
 262 |    ]
 263 |   },
 264 |   {
 265 |    "cell_type": "code",
 266 |    "execution_count": 11,
 267 |    "metadata": {},
 268 |    "outputs": [],
 269 |    "source": [
 270 |     "vocab_lst3 = list(Counter(vocab_lst2).keys())\n",
 271 |     "vocab_to_index = {word: index for index, word in enumerate(vocab_lst3)}\n",
 272 |     "index_to_vocab = {index: word for index, word in enumerate(vocab_lst3)}"
 273 |    ]
 274 |   },
 275 |   {
 276 |    "cell_type": "code",
 277 |    "execution_count": 12,
 278 |    "metadata": {
 279 |     "scrolled": true
 280 |    },
 281 |    "outputs": [
 282 |     {
 283 |      "data": {
 284 |       "text/plain": [
 285 |        "{0: 'One',\n",
 286 |        " 1: 'reviewers',\n",
 287 |        " 2: 'mentioned',\n",
 288 |        " 3: 'watching',\n",
 289 |        " 4: '1',\n",
 290 |        " 5: 'Oz',\n",
 291 |        " 6: 'episode',\n",
 292 |        " 7: 'youll',\n",
 293 |        " 8: 'hooked',\n",
 294 |        " 9: 'right',\n",
 295 |        " 10: 'exactly',\n",
 296 |        " 11: 'happened',\n",
 297 |        " 12: 'meThe',\n",
 298 |        " 13: 'first',\n",
 299 |        " 14: 'thing',\n",
 300 |        " 15: 'struck',\n",
 301 |        " 16: 'brutality',\n",
 302 |        " 17: 'unflinching',\n",
 303 |        " 18: 'scenes',\n",
 304 |        " 19: 'violence',\n",
 305 |        " 20: 'set',\n",
 306 |        " 21: 'word',\n",
 307 |        " 22: 'GO',\n",
 308 |        " 23: 'Trust',\n",
 309 |        " 24: 'show',\n",
 310 |        " 25: 'faint',\n",
 311 |        " 26: 'hearted',\n",
 312 |        " 27: 'timid',\n",
 313 |        " 28: 'pulls',\n",
 314 |        " 29: 'punches',\n",
 315 |        " 30: 'regards',\n",
 316 |        " 31: 'drugs',\n",
 317 |        " 32: 'sex',\n",
 318 |        " 33: 'hardcore',\n",
 319 |        " 34: 'classic',\n",
 320 |        " 35: 'use',\n",
 321 |        " 36: 'wordIt',\n",
 322 |        " 37: 'called',\n",
 323 |        " 38: 'OZ',\n",
 324 |        " 39: 'nickname',\n",
 325 |        " 40: 'given',\n",
 326 |        " 41: 'Oswald',\n",
 327 |        " 42: 'Maximum',\n",
 328 |        " 43: 'Security',\n",
 329 |        " 44: 'State',\n",
 330 |        " 45: 'Penitentary',\n",
 331 |        " 46: 'focuses',\n",
 332 |        " 47: 'mainly',\n",
 333 |        " 48: 'Emerald',\n",
 334 |        " 49: 'City',\n",
 335 |        " 50: 'experimental',\n",
 336 |        " 51: 'section',\n",
 337 |        " 52: 'prison',\n",
 338 |        " 53: 'cells',\n",
 339 |        " 54: 'glass',\n",
 340 |        " 55: 'fronts',\n",
 341 |        " 56: 'face',\n",
 342 |        " 57: 'inwards',\n",
 343 |        " 58: 'privacy',\n",
 344 |        " 59: 'high',\n",
 345 |        " 60: 'agenda',\n",
 346 |        " 61: 'Em',\n",
 347 |        " 62: 'home',\n",
 348 |        " 63: 'manyAryans',\n",
 349 |        " 64: 'Muslims',\n",
 350 |        " 65: 'gangstas',\n",
 351 |        " 66: 'Latinos',\n",
 352 |        " 67: 'Christians',\n",
 353 |        " 68: 'Italians',\n",
 354 |        " 69: 'Irish',\n",
 355 |        " 70: 'moreso',\n",
 356 |        " 71: 'scuffles',\n",
 357 |        " 72: 'death',\n",
 358 |        " 73: 'stares',\n",
 359 |        " 74: 'dodgy',\n",
 360 |        " 75: 'dealings',\n",
 361 |        " 76: 'shady',\n",
 362 |        " 77: 'agreements',\n",
 363 |        " 78: 'never',\n",
 364 |        " 79: 'far',\n",
 365 |        " 80: 'awayI',\n",
 366 |        " 81: 'would',\n",
 367 |        " 82: 'say',\n",
 368 |        " 83: 'main',\n",
 369 |        " 84: 'appeal',\n",
 370 |        " 85: 'due',\n",
 371 |        " 86: 'fact',\n",
 372 |        " 87: 'goes',\n",
 373 |        " 88: 'shows',\n",
 374 |        " 89: 'wouldnt',\n",
 375 |        " 90: 'dare',\n",
 376 |        " 91: 'Forget',\n",
 377 |        " 92: 'pretty',\n",
 378 |        " 93: 'pictures',\n",
 379 |        " 94: 'painted',\n",
 380 |        " 95: 'mainstream',\n",
 381 |        " 96: 'audiences',\n",
 382 |        " 97: 'forget',\n",
 383 |        " 98: 'charm',\n",
 384 |        " 99: 'romanceOZ',\n",
 385 |        " 100: 'doesnt',\n",
 386 |        " 101: 'mess',\n",
 387 |        " 102: 'around',\n",
 388 |        " 103: 'ever',\n",
 389 |        " 104: 'saw',\n",
 390 |        " 105: 'nasty',\n",
 391 |        " 106: 'surreal',\n",
 392 |        " 107: 'couldnt',\n",
 393 |        " 108: 'ready',\n",
 394 |        " 109: 'watched',\n",
 395 |        " 110: 'developed',\n",
 396 |        " 111: 'taste',\n",
 397 |        " 112: 'got',\n",
 398 |        " 113: 'accustomed',\n",
 399 |        " 114: 'levels',\n",
 400 |        " 115: 'graphic',\n",
 401 |        " 116: 'injustice',\n",
 402 |        " 117: 'crooked',\n",
 403 |        " 118: 'guards',\n",
 404 |        " 119: 'wholl',\n",
 405 |        " 120: 'sold',\n",
 406 |        " 121: 'nickel',\n",
 407 |        " 122: 'inmates',\n",
 408 |        " 123: 'kill',\n",
 409 |        " 124: 'order',\n",
 410 |        " 125: 'get',\n",
 411 |        " 126: 'away',\n",
 412 |        " 127: 'well',\n",
 413 |        " 128: 'mannered',\n",
 414 |        " 129: 'middle',\n",
 415 |        " 130: 'class',\n",
 416 |        " 131: 'turned',\n",
 417 |        " 132: 'bitches',\n",
 418 |        " 133: 'lack',\n",
 419 |        " 134: 'street',\n",
 420 |        " 135: 'skills',\n",
 421 |        " 136: 'experience',\n",
 422 |        " 137: 'Watching',\n",
 423 |        " 138: 'may',\n",
 424 |        " 139: 'become',\n",
 425 |        " 140: 'comfortable',\n",
 426 |        " 141: 'uncomfortable',\n",
 427 |        " 142: 'viewingthats',\n",
 428 |        " 143: 'touch',\n",
 429 |        " 144: 'darker',\n",
 430 |        " 145: 'side',\n",
 431 |        " 146: 'wonderful',\n",
 432 |        " 147: 'little',\n",
 433 |        " 148: 'production',\n",
 434 |        " 149: 'filming',\n",
 435 |        " 150: 'technique',\n",
 436 |        " 151: 'unassuming',\n",
 437 |        " 152: 'oldtimeBBC',\n",
 438 |        " 153: 'fashion',\n",
 439 |        " 154: 'gives',\n",
 440 |        " 155: 'comforting',\n",
 441 |        " 156: 'sometimes',\n",
 442 |        " 157: 'discomforting',\n",
 443 |        " 158: 'sense',\n",
 444 |        " 159: 'realism',\n",
 445 |        " 160: 'entire',\n",
 446 |        " 161: 'piece',\n",
 447 |        " 162: 'actors',\n",
 448 |        " 163: 'extremely',\n",
 449 |        " 164: 'chosen',\n",
 450 |        " 165: 'Michael',\n",
 451 |        " 166: 'Sheen',\n",
 452 |        " 167: 'polari',\n",
 453 |        " 168: 'voices',\n",
 454 |        " 169: 'pat',\n",
 455 |        " 170: 'truly',\n",
 456 |        " 171: 'see',\n",
 457 |        " 172: 'seamless',\n",
 458 |        " 173: 'editing',\n",
 459 |        " 174: 'guided',\n",
 460 |        " 175: 'references',\n",
 461 |        " 176: 'Williams',\n",
 462 |        " 177: 'diary',\n",
 463 |        " 178: 'entries',\n",
 464 |        " 179: 'worth',\n",
 465 |        " 180: 'terrificly',\n",
 466 |        " 181: 'written',\n",
 467 |        " 182: 'performed',\n",
 468 |        " 183: 'masterful',\n",
 469 |        " 184: 'one',\n",
 470 |        " 185: 'great',\n",
 471 |        " 186: 'masters',\n",
 472 |        " 187: 'comedy',\n",
 473 |        " 188: 'life',\n",
 474 |        " 189: 'really',\n",
 475 |        " 190: 'comes',\n",
 476 |        " 191: 'things',\n",
 477 |        " 192: 'fantasy',\n",
 478 |        " 193: 'guard',\n",
 479 |        " 194: 'rather',\n",
 480 |        " 195: 'traditional',\n",
 481 |        " 196: 'dream',\n",
 482 |        " 197: 'techniques',\n",
 483 |        " 198: 'remains',\n",
 484 |        " 199: 'solid',\n",
 485 |        " 200: 'disappears',\n",
 486 |        " 201: 'plays',\n",
 487 |        " 202: 'knowledge',\n",
 488 |        " 203: 'senses',\n",
 489 |        " 204: 'particularly',\n",
 490 |        " 205: 'concerning',\n",
 491 |        " 206: 'Orton',\n",
 492 |        " 207: 'Halliwell',\n",
 493 |        " 208: 'sets',\n",
 494 |        " 209: 'flat',\n",
 495 |        " 210: 'Halliwells',\n",
 496 |        " 211: 'murals',\n",
 497 |        " 212: 'decorating',\n",
 498 |        " 213: 'every',\n",
 499 |        " 214: 'surface',\n",
 500 |        " 215: 'terribly',\n",
 501 |        " 216: 'done',\n",
 502 |        " 217: 'thought',\n",
 503 |        " 218: 'way',\n",
 504 |        " 219: 'spend',\n",
 505 |        " 220: 'time',\n",
 506 |        " 221: 'hot',\n",
 507 |        " 222: 'summer',\n",
 508 |        " 223: 'weekend',\n",
 509 |        " 224: 'sitting',\n",
 510 |        " 225: 'air',\n",
 511 |        " 226: 'conditioned',\n",
 512 |        " 227: 'theater',\n",
 513 |        " 228: 'lighthearted',\n",
 514 |        " 229: 'plot',\n",
 515 |        " 230: 'simplistic',\n",
 516 |        " 231: 'dialogue',\n",
 517 |        " 232: 'witty',\n",
 518 |        " 233: 'characters',\n",
 519 |        " 234: 'likable',\n",
 520 |        " 235: 'even',\n",
 521 |        " 236: 'bread',\n",
 522 |        " 237: 'suspected',\n",
 523 |        " 238: 'serial',\n",
 524 |        " 239: 'killer',\n",
 525 |        " 240: 'disappointed',\n",
 526 |        " 241: 'realize',\n",
 527 |        " 242: 'Match',\n",
 528 |        " 243: 'Point',\n",
 529 |        " 244: '2',\n",
 530 |        " 245: 'Risk',\n",
 531 |        " 246: 'Addiction',\n",
 532 |        " 247: 'proof',\n",
 533 |        " 248: 'Woody',\n",
 534 |        " 249: 'Allen',\n",
 535 |        " 250: 'still',\n",
 536 |        " 251: 'fully',\n",
 537 |        " 252: 'control',\n",
 538 |        " 253: 'style',\n",
 539 |        " 254: 'many',\n",
 540 |        " 255: 'us',\n",
 541 |        " 256: 'grown',\n",
 542 |        " 257: 'loveThis',\n",
 543 |        " 258: 'Id',\n",
 544 |        " 259: 'laughed',\n",
 545 |        " 260: 'Woodys',\n",
 546 |        " 261: 'comedies',\n",
 547 |        " 262: 'years',\n",
 548 |        " 263: 'decade',\n",
 549 |        " 264: 'Ive',\n",
 550 |        " 265: 'impressed',\n",
 551 |        " 266: 'Scarlet',\n",
 552 |        " 267: 'Johanson',\n",
 553 |        " 268: 'managed',\n",
 554 |        " 269: 'tone',\n",
 555 |        " 270: 'sexy',\n",
 556 |        " 271: 'image',\n",
 557 |        " 272: 'jumped',\n",
 558 |        " 273: 'average',\n",
 559 |        " 274: 'spirited',\n",
 560 |        " 275: 'young',\n",
 561 |        " 276: 'womanThis',\n",
 562 |        " 277: 'crown',\n",
 563 |        " 278: 'jewel',\n",
 564 |        " 279: 'career',\n",
 565 |        " 280: 'wittier',\n",
 566 |        " 281: 'Devil',\n",
 567 |        " 282: 'Wears',\n",
 568 |        " 283: 'Prada',\n",
 569 |        " 284: 'interesting',\n",
 570 |        " 285: 'Superman',\n",
 571 |        " 286: 'go',\n",
 572 |        " 287: 'friends',\n",
 573 |        " 288: 'Basically',\n",
 574 |        " 289: 'theres',\n",
 575 |        " 290: 'family',\n",
 576 |        " 291: 'boy',\n",
 577 |        " 292: 'Jake',\n",
 578 |        " 293: 'thinks',\n",
 579 |        " 294: 'zombie',\n",
 580 |        " 295: 'closet',\n",
 581 |        " 296: 'parents',\n",
 582 |        " 297: 'fighting',\n",
 583 |        " 298: 'timeThis',\n",
 584 |        " 299: 'movie',\n",
 585 |        " 300: 'slower',\n",
 586 |        " 301: 'soap',\n",
 587 |        " 302: 'opera',\n",
 588 |        " 303: 'suddenly',\n",
 589 |        " 304: 'decides',\n",
 590 |        " 305: 'Rambo',\n",
 591 |        " 306: 'zombieOK',\n",
 592 |        " 307: 'youre',\n",
 593 |        " 308: 'going',\n",
 594 |        " 309: 'make',\n",
 595 |        " 310: 'film',\n",
 596 |        " 311: 'must',\n",
 597 |        " 312: 'Decide',\n",
 598 |        " 313: 'thriller',\n",
 599 |        " 314: 'drama',\n",
 600 |        " 315: 'watchable',\n",
 601 |        " 316: 'Parents',\n",
 602 |        " 317: 'divorcing',\n",
 603 |        " 318: 'arguing',\n",
 604 |        " 319: 'like',\n",
 605 |        " 320: 'real',\n",
 606 |        " 321: 'totally',\n",
 607 |        " 322: 'ruins',\n",
 608 |        " 323: 'expected',\n",
 609 |        " 324: 'BOOGEYMAN',\n",
 610 |        " 325: 'similar',\n",
 611 |        " 326: 'instead',\n",
 612 |        " 327: 'meaningless',\n",
 613 |        " 328: 'spots3',\n",
 614 |        " 329: '10',\n",
 615 |        " 330: 'playing',\n",
 616 |        " 331: 'descent',\n",
 617 |        " 332: 'dialogs',\n",
 618 |        " 333: 'shots',\n",
 619 |        " 334: 'ignore',\n",
 620 |        " 335: 'Petter',\n",
 621 |        " 336: 'Matteis',\n",
 622 |        " 337: 'Love',\n",
 623 |        " 338: 'Time',\n",
 624 |        " 339: 'Money',\n",
 625 |        " 340: 'visually',\n",
 626 |        " 341: 'stunning',\n",
 627 |        " 342: 'watch',\n",
 628 |        " 343: 'Mr',\n",
 629 |        " 344: 'Mattei',\n",
 630 |        " 345: 'offers',\n",
 631 |        " 346: 'vivid',\n",
 632 |        " 347: 'portrait',\n",
 633 |        " 348: 'human',\n",
 634 |        " 349: 'relations',\n",
 635 |        " 350: 'seems',\n",
 636 |        " 351: 'telling',\n",
 637 |        " 352: 'money',\n",
 638 |        " 353: 'power',\n",
 639 |        " 354: 'success',\n",
 640 |        " 355: 'people',\n",
 641 |        " 356: 'different',\n",
 642 |        " 357: 'situations',\n",
 643 |        " 358: 'encounter',\n",
 644 |        " 359: 'variation',\n",
 645 |        " 360: 'Arthur',\n",
 646 |        " 361: 'Schnitzlers',\n",
 647 |        " 362: 'play',\n",
 648 |        " 363: 'theme',\n",
 649 |        " 364: 'director',\n",
 650 |        " 365: 'transfers',\n",
 651 |        " 366: 'action',\n",
 652 |        " 367: 'present',\n",
 653 |        " 368: 'New',\n",
 654 |        " 369: 'York',\n",
 655 |        " 370: 'meet',\n",
 656 |        " 371: 'connect',\n",
 657 |        " 372: 'connected',\n",
 658 |        " 373: 'another',\n",
 659 |        " 374: 'next',\n",
 660 |        " 375: 'person',\n",
 661 |        " 376: 'know',\n",
 662 |        " 377: 'previous',\n",
 663 |        " 378: 'point',\n",
 664 |        " 379: 'contact',\n",
 665 |        " 380: 'Stylishly',\n",
 666 |        " 381: 'sophisticated',\n",
 667 |        " 382: 'luxurious',\n",
 668 |        " 383: 'look',\n",
 669 |        " 384: 'taken',\n",
 670 |        " 385: 'live',\n",
 671 |        " 386: 'world',\n",
 672 |        " 387: 'habitatThe',\n",
 673 |        " 388: 'gets',\n",
 674 |        " 389: 'souls',\n",
 675 |        " 390: 'picture',\n",
 676 |        " 391: 'stages',\n",
 677 |        " 392: 'loneliness',\n",
 678 |        " 393: 'inhabits',\n",
 679 |        " 394: 'big',\n",
 680 |        " 395: 'city',\n",
 681 |        " 396: 'best',\n",
 682 |        " 397: 'place',\n",
 683 |        " 398: 'find',\n",
 684 |        " 399: 'sincere',\n",
 685 |        " 400: 'fulfillment',\n",
 686 |        " 401: 'discerns',\n",
 687 |        " 402: 'case',\n",
 688 |        " 403: 'encounterThe',\n",
 689 |        " 404: 'acting',\n",
 690 |        " 405: 'good',\n",
 691 |        " 406: 'direction',\n",
 692 |        " 407: 'Steve',\n",
 693 |        " 408: 'Buscemi',\n",
 694 |        " 409: 'Rosario',\n",
 695 |        " 410: 'Dawson',\n",
 696 |        " 411: 'Carol',\n",
 697 |        " 412: 'Kane',\n",
 698 |        " 413: 'Imperioli',\n",
 699 |        " 414: 'Adrian',\n",
 700 |        " 415: 'Grenier',\n",
 701 |        " 416: 'rest',\n",
 702 |        " 417: 'talented',\n",
 703 |        " 418: 'cast',\n",
 704 |        " 419: 'come',\n",
 705 |        " 420: 'aliveWe',\n",
 706 |        " 421: 'wish',\n",
 707 |        " 422: 'luck',\n",
 708 |        " 423: 'await',\n",
 709 |        " 424: 'anxiously',\n",
 710 |        " 425: 'work',\n",
 711 |        " 426: 'Probably',\n",
 712 |        " 427: 'alltime',\n",
 713 |        " 428: 'favorite',\n",
 714 |        " 429: 'story',\n",
 715 |        " 430: 'selflessness',\n",
 716 |        " 431: 'sacrifice',\n",
 717 |        " 432: 'dedication',\n",
 718 |        " 433: 'noble',\n",
 719 |        " 434: 'cause',\n",
 720 |        " 435: 'preachy',\n",
 721 |        " 436: 'boring',\n",
 722 |        " 437: 'old',\n",
 723 |        " 438: 'despite',\n",
 724 |        " 439: 'seen',\n",
 725 |        " 440: '15',\n",
 726 |        " 441: 'times',\n",
 727 |        " 442: 'last',\n",
 728 |        " 443: '25',\n",
 729 |        " 444: 'Paul',\n",
 730 |        " 445: 'Lukas',\n",
 731 |        " 446: 'performance',\n",
 732 |        " 447: 'brings',\n",
 733 |        " 448: 'tears',\n",
 734 |        " 449: 'eyes',\n",
 735 |        " 450: 'Bette',\n",
 736 |        " 451: 'Davis',\n",
 737 |        " 452: 'sympathetic',\n",
 738 |        " 453: 'roles',\n",
 739 |        " 454: 'delight',\n",
 740 |        " 455: 'kids',\n",
 741 |        " 456: 'grandma',\n",
 742 |        " 457: 'says',\n",
 743 |        " 458: 'dressedup',\n",
 744 |        " 459: 'midgets',\n",
 745 |        " 460: 'children',\n",
 746 |        " 461: 'makes',\n",
 747 |        " 462: 'fun',\n",
 748 |        " 463: 'mothers',\n",
 749 |        " 464: 'slow',\n",
 750 |        " 465: 'awakening',\n",
 751 |        " 466: 'whats',\n",
 752 |        " 467: 'happening',\n",
 753 |        " 468: 'roof',\n",
 754 |        " 469: 'believable',\n",
 755 |        " 470: 'startling',\n",
 756 |        " 471: 'dozen',\n",
 757 |        " 472: 'thumbs',\n",
 758 |        " 473: 'theyd',\n",
 759 |        " 474: 'sure',\n",
 760 |        " 475: 'resurrection',\n",
 761 |        " 476: 'dated',\n",
 762 |        " 477: 'Seahunt',\n",
 763 |        " 478: 'series',\n",
 764 |        " 479: 'tech',\n",
 765 |        " 480: 'today',\n",
 766 |        " 481: 'bring',\n",
 767 |        " 482: 'back',\n",
 768 |        " 483: 'kid',\n",
 769 |        " 484: 'excitement',\n",
 770 |        " 485: 'meI',\n",
 771 |        " 486: 'grew',\n",
 772 |        " 487: 'black',\n",
 773 |        " 488: 'white',\n",
 774 |        " 489: 'TV',\n",
 775 |        " 490: 'Gunsmoke',\n",
 776 |        " 491: 'heros',\n",
 777 |        " 492: 'weekYou',\n",
 778 |        " 493: 'vote',\n",
 779 |        " 494: 'comeback',\n",
 780 |        " 495: 'new',\n",
 781 |        " 496: 'sea',\n",
 782 |        " 497: 'huntWe',\n",
 783 |        " 498: 'need',\n",
 784 |        " 499: 'change',\n",
 785 |        " 500: 'pace',\n",
 786 |        " 501: 'water',\n",
 787 |        " 502: 'adventureOh',\n",
 788 |        " 503: 'thank',\n",
 789 |        " 504: 'outlet',\n",
 790 |        " 505: 'view',\n",
 791 |        " 506: 'viewpoints',\n",
 792 |        " 507: 'moviesSo',\n",
 793 |        " 508: 'ole',\n",
 794 |        " 509: 'believe',\n",
 795 |        " 510: 'wan',\n",
 796 |        " 511: 'na',\n",
 797 |        " 512: 'sayWould',\n",
 798 |        " 513: 'nice',\n",
 799 |        " 514: 'read',\n",
 800 |        " 515: 'plus',\n",
 801 |        " 516: 'points',\n",
 802 |        " 517: 'huntIf',\n",
 803 |        " 518: 'rhymes',\n",
 804 |        " 519: 'lines',\n",
 805 |        " 520: 'let',\n",
 806 |        " 521: 'submitor',\n",
 807 |        " 522: 'leave',\n",
 808 |        " 523: 'doubt',\n",
 809 |        " 524: 'quitIf',\n",
 810 |        " 525: 'lets',\n",
 811 |        " 526: 'amazing',\n",
 812 |        " 527: 'fresh',\n",
 813 |        " 528: 'innovative',\n",
 814 |        " 529: 'idea',\n",
 815 |        " 530: '70s',\n",
 816 |        " 531: 'aired',\n",
 817 |        " 532: '7',\n",
 818 |        " 533: '8',\n",
 819 |        " 534: 'brilliant',\n",
 820 |        " 535: 'dropped',\n",
 821 |        " 536: '1990',\n",
 822 |        " 537: 'funny',\n",
 823 |        " 538: 'anymore',\n",
 824 |        " 539: 'continued',\n",
 825 |        " 540: 'decline',\n",
 826 |        " 541: 'complete',\n",
 827 |        " 542: 'waste',\n",
 828 |        " 543: 'todayIts',\n",
 829 |        " 544: 'disgraceful',\n",
 830 |        " 545: 'fallen',\n",
 831 |        " 546: 'writing',\n",
 832 |        " 547: 'painfully',\n",
 833 |        " 548: 'bad',\n",
 834 |        " 549: 'performances',\n",
 835 |        " 550: 'almost',\n",
 836 |        " 551: 'mildly',\n",
 837 |        " 552: 'entertaining',\n",
 838 |        " 553: 'respite',\n",
 839 |        " 554: 'guesthosts',\n",
 840 |        " 555: 'probably',\n",
 841 |        " 556: 'hard',\n",
 842 |        " 557: 'creator',\n",
 843 |        " 558: 'handselected',\n",
 844 |        " 559: 'original',\n",
 845 |        " 560: 'also',\n",
 846 |        " 561: 'chose',\n",
 847 |        " 562: 'band',\n",
 848 |        " 563: 'hacks',\n",
 849 |        " 564: 'followed',\n",
 850 |        " 565: 'recognize',\n",
 851 |        " 566: 'brilliance',\n",
 852 |        " 567: 'fit',\n",
 853 |        " 568: 'replace',\n",
 854 |        " 569: 'mediocrity',\n",
 855 |        " 570: 'felt',\n",
 856 |        " 571: 'give',\n",
 857 |        " 572: 'stars',\n",
 858 |        " 573: 'respect',\n",
 859 |        " 574: 'made',\n",
 860 |        " 575: 'huge',\n",
 861 |        " 576: 'awful',\n",
 862 |        " 577: 'cant',\n",
 863 |        " 578: 'Encouraged',\n",
 864 |        " 579: 'positive',\n",
 865 |        " 580: 'comments',\n",
 866 |        " 581: 'looking',\n",
 867 |        " 582: 'forward',\n",
 868 |        " 583: 'Bad',\n",
 869 |        " 584: 'mistake',\n",
 870 |        " 585: '950',\n",
 871 |        " 586: 'films',\n",
 872 |        " 587: 'worst',\n",
 873 |        " 588: 'pacing',\n",
 874 |        " 589: 'storyline',\n",
 875 |        " 590: 'soundtrack',\n",
 876 |        " 591: 'song',\n",
 877 |        " 592: 'lame',\n",
 878 |        " 593: 'country',\n",
 879 |        " 594: 'tune',\n",
 880 |        " 595: 'played',\n",
 881 |        " 596: 'less',\n",
 882 |        " 597: 'four',\n",
 883 |        " 598: 'looks',\n",
 884 |        " 599: 'cheap',\n",
 885 |        " 600: 'extreme',\n",
 886 |        " 601: 'Rarely',\n",
 887 |        " 602: 'happy',\n",
 888 |        " 603: 'end',\n",
 889 |        " 604: 'credits',\n",
 890 |        " 605: 'prevents',\n",
 891 |        " 606: 'giving',\n",
 892 |        " 607: '1score',\n",
 893 |        " 608: 'Harvey',\n",
 894 |        " 609: 'Keitel',\n",
 895 |        " 610: 'least',\n",
 896 |        " 611: 'making',\n",
 897 |        " 612: 'bit',\n",
 898 |        " 613: 'effort',\n",
 899 |        " 614: 'obsessives',\n",
 900 |        " 615: 'gut',\n",
 901 |        " 616: 'wrenching',\n",
 902 |        " 617: 'laughter',\n",
 903 |        " 618: 'love',\n",
 904 |        " 619: 'hell',\n",
 905 |        " 620: 'mom',\n",
 906 |        " 621: 'liked',\n",
 907 |        " 622: 'itGreat',\n",
 908 |        " 623: 'Camp',\n",
 909 |        " 624: 'Phil',\n",
 910 |        " 625: 'Alien',\n",
 911 |        " 626: 'quirky',\n",
 912 |        " 627: 'humour',\n",
 913 |        " 628: 'based',\n",
 914 |        " 629: 'oddness',\n",
 915 |        " 630: 'everything',\n",
 916 |        " 631: 'actual',\n",
 917 |        " 632: 'punchlinesAt',\n",
 918 |        " 633: 'odd',\n",
 919 |        " 634: 'progressed',\n",
 920 |        " 635: 'didnt',\n",
 921 |        " 636: 'jokes',\n",
 922 |        " 637: 'anymoreIts',\n",
 923 |        " 638: 'low',\n",
 924 |        " 639: 'budget',\n",
 925 |        " 640: 'thats',\n",
 926 |        " 641: 'problem',\n",
 927 |        " 642: 'eventually',\n",
 928 |        " 643: 'lost',\n",
 929 |        " 644: 'interestI',\n",
 930 |        " 645: 'imagine',\n",
 931 |        " 646: 'stoner',\n",
 932 |        " 647: 'currently',\n",
 933 |        " 648: 'partakingFor',\n",
 934 |        " 649: 'something',\n",
 935 |        " 650: 'better',\n",
 936 |        " 651: 'try',\n",
 937 |        " 652: 'Brother',\n",
 938 |        " 653: 'planet',\n",
 939 |        " 654: '12',\n",
 940 |        " 655: 'came',\n",
 941 |        " 656: 'recall',\n",
 942 |        " 657: 'scariest',\n",
 943 |        " 658: 'scene',\n",
 944 |        " 659: 'bird',\n",
 945 |        " 660: 'eating',\n",
 946 |        " 661: 'men',\n",
 947 |        " 662: 'dangling',\n",
 948 |        " 663: 'helplessly',\n",
 949 |        " 664: 'parachutes',\n",
 950 |        " 665: 'horror',\n",
 951 |        " 666: 'horrorAs',\n",
 952 |        " 667: 'cheesy',\n",
 953 |        " 668: 'B',\n",
 954 |        " 669: 'Saturday',\n",
 955 |        " 670: 'afternoons',\n",
 956 |        " 671: 'tired',\n",
 957 |        " 672: 'formula',\n",
 958 |        " 673: 'monster',\n",
 959 |        " 674: 'type',\n",
 960 |        " 675: 'movies',\n",
 961 |        " 676: 'usually',\n",
 962 |        " 677: 'included',\n",
 963 |        " 678: 'hero',\n",
 964 |        " 679: 'beautiful',\n",
 965 |        " 680: 'woman',\n",
 966 |        " 681: 'might',\n",
 967 |        " 682: 'daughter',\n",
 968 |        " 683: 'professor',\n",
 969 |        " 684: 'resolution',\n",
 970 |        " 685: 'died',\n",
 971 |        " 686: 'care',\n",
 972 |        " 687: 'much',\n",
 973 |        " 688: 'romantic',\n",
 974 |        " 689: 'angle',\n",
 975 |        " 690: 'year',\n",
 976 |        " 691: 'predictable',\n",
 977 |        " 692: 'plots',\n",
 978 |        " 693: 'unintentional',\n",
 979 |        " 694: 'humorBut',\n",
 980 |        " 695: 'later',\n",
 981 |        " 696: 'Psycho',\n",
 982 |        " 697: 'loved',\n",
 983 |        " 698: 'star',\n",
 984 |        " 699: 'Janet',\n",
 985 |        " 700: 'Leigh',\n",
 986 |        " 701: 'bumped',\n",
 987 |        " 702: 'early',\n",
 988 |        " 703: 'sat',\n",
 989 |        " 704: 'took',\n",
 990 |        " 705: 'notice',\n",
 991 |        " 706: 'Since',\n",
 992 |        " 707: 'screenwriters',\n",
 993 |        " 708: 'scary',\n",
 994 |        " 709: 'possible',\n",
 995 |        " 710: 'wellworn',\n",
 996 |        " 711: 'rules',\n",
 997 |        " 712: 'im',\n",
 998 |        " 713: 'fan',\n",
 999 |        " 714: 'Bolls',\n",
1000 |        " 715: 'enjoyed',\n",
1001 |        " 716: 'Postal',\n",
1002 |        " 717: 'maybe',\n",
1003 |        " 718: 'Boll',\n",
1004 |        " 719: 'apparently',\n",
1005 |        " 720: 'bought',\n",
1006 |        " 721: 'rights',\n",
1007 |        " 722: 'Far',\n",
1008 |        " 723: 'Cry',\n",
1009 |        " 724: 'long',\n",
1010 |        " 725: 'ago',\n",
1011 |        " 726: 'game',\n",
1012 |        " 727: 'finsished',\n",
1013 |        " 728: 'People',\n",
1014 |        " 729: 'killing',\n",
1015 |        " 730: 'mercs',\n",
1016 |        " 731: 'infiltrating',\n",
1017 |        " 732: 'secret',\n",
1018 |        " 733: 'research',\n",
1019 |        " 734: 'labs',\n",
1020 |        " 735: 'located',\n",
1021 |        " 736: 'tropical',\n",
1022 |        " 737: 'island',\n",
1023 |        " 738: 'warned',\n",
1024 |        " 739: 'schemed',\n",
1025 |        " 740: 'together',\n",
1026 |        " 741: 'along',\n",
1027 |        " 742: 'legion',\n",
1028 |        " 743: 'schmucks',\n",
1029 |        " 744: 'Feeling',\n",
1030 |        " 745: 'loneley',\n",
1031 |        " 746: 'invites',\n",
1032 |        " 747: 'three',\n",
1033 |        " 748: 'countrymen',\n",
1034 |        " 749: 'players',\n",
1035 |        " 750: 'names',\n",
1036 |        " 751: 'Til',\n",
1037 |        " 752: 'Schweiger',\n",
1038 |        " 753: 'Udo',\n",
1039 |        " 754: 'Kier',\n",
1040 |        " 755: 'Ralf',\n",
1041 |        " 756: 'MoellerThree',\n",
1042 |        " 757: 'actually',\n",
1043 |        " 758: 'selfs',\n",
1044 |        " 759: 'biz',\n",
1045 |        " 760: 'tale',\n",
1046 |        " 761: 'Jack',\n",
1047 |        " 762: 'Carver',\n",
1048 |        " 763: 'yes',\n",
1049 |        " 764: 'German',\n",
1050 |        " 765: 'hail',\n",
1051 |        " 766: 'bratwurst',\n",
1052 |        " 767: 'dudes',\n",
1053 |        " 768: 'However',\n",
1054 |        " 769: 'Tils',\n",
1055 |        " 770: 'badass',\n",
1056 |        " 771: 'complained',\n",
1057 |        " 772: 'hes',\n",
1058 |        " 773: 'staying',\n",
1059 |        " 774: 'true',\n",
1060 |        " 775: 'whole',\n",
1061 |        " 776: 'carver',\n",
1062 |        " 777: 'perspective',\n",
1063 |        " 778: 'dont',\n",
1064 |        " 779: 'looked',\n",
1065 |        " 780: 'kicking',\n",
1066 |        " 781: 'beyond',\n",
1067 |        " 782: 'demented',\n",
1068 |        " 783: 'evil',\n",
1069 |        " 784: 'mad',\n",
1070 |        " 785: 'scientist',\n",
1071 |        " 786: 'Dr',\n",
1072 |        " 787: 'Krieger',\n",
1073 |        " 788: 'GeneticallyMutatedsoldiers',\n",
1074 |        " 789: 'GMS',\n",
1075 |        " 790: 'Performing',\n",
1076 |        " 791: 'topsecret',\n",
1077 |        " 792: 'reminds',\n",
1078 |        " 793: 'SPOILER',\n",
1079 |        " 794: 'Vancouver',\n",
1080 |        " 795: 'reason',\n",
1081 |        " 796: 'Thats',\n",
1082 |        " 797: 'palm',\n",
1083 |        " 798: 'trees',\n",
1084 |        " 799: 'Instead',\n",
1085 |        " 800: 'rich',\n",
1086 |        " 801: 'lumberjackwoods',\n",
1087 |        " 802: 'havent',\n",
1088 |        " 803: 'gone',\n",
1089 |        " 804: 'FAR',\n",
1090 |        " 805: 'started',\n",
1091 |        " 806: 'CRY',\n",
1092 |        " 807: 'mehehe',\n",
1093 |        " 808: 'can',\n",
1094 |        " 809: 'not',\n",
1095 |        " 810: 'stay',\n",
1096 |        " 811: 'shenanigans',\n",
1097 |        " 812: 'delivers',\n",
1098 |        " 813: 'meaning',\n",
1099 |        " 814: 'suckThere',\n",
1100 |        " 815: 'mentioning',\n",
1101 |        " 816: 'imply',\n",
1102 |        " 817: 'areas',\n",
1103 |        " 818: 'boat',\n",
1104 |        " 819: 'cromedalbino',\n",
1105 |        " 820: 'squad',\n",
1106 |        " 821: 'enters',\n",
1107 |        " 822: 'laugh',\n",
1108 |        " 823: 'reeks',\n",
1109 |        " 824: 'scheisse',\n",
1110 |        " 825: 'poop',\n",
1111 |        " 826: 'simpletons',\n",
1112 |        " 827: 'take',\n",
1113 |        " 828: 'wiff',\n",
1114 |        " 829: 'ahead',\n",
1115 |        " 830: 'BTW',\n",
1116 |        " 831: 'annoying',\n",
1117 |        " 832: 'sidekick',\n",
1118 |        " 833: 'shoot',\n",
1119 |        " 834: 'minutes',\n",
1120 |        " 835: 'screen',\n",
1121 |        " 836: 'ShakespeareShakespeare',\n",
1122 |        " 837: 'lostI',\n",
1123 |        " 838: 'appreciate',\n",
1124 |        " 839: 'trying',\n",
1125 |        " 840: 'Shakespeare',\n",
1126 |        " 841: 'masses',\n",
1127 |        " 842: 'ruin',\n",
1128 |        " 843: 'goodIs',\n",
1129 |        " 844: 'Scottish',\n",
1130 |        " 845: 'Play',\n",
1131 |        " 846: 'certain',\n",
1132 |        " 847: 'Rev',\n",
1133 |        " 848: 'Bowdler',\n",
1134 |        " 849: 'hence',\n",
1135 |        " 850: 'bowdlerization',\n",
1136 |        " 851: 'tried',\n",
1137 |        " 852: 'Victorian',\n",
1138 |        " 853: 'eraIn',\n",
1139 |        " 854: 'words',\n",
1140 |        " 855: 'improve',\n",
1141 |        " 856: 'perfectionI',\n",
1142 |        " 857: 'write',\n",
1143 |        " 858: 'ten',\n",
1144 |        " 859: 'text',\n",
1145 |        " 860: 'English',\n",
1146 |        " 861: 'composition',\n",
1147 |        " 862: 'forte',\n",
1148 |        " 863: 'keep',\n",
1149 |        " 864: 'saying',\n",
1150 |        " 865: 'cut',\n",
1151 |        " 866: 'fantastic',\n",
1152 |        " 867: 'prisoners',\n",
1153 |        " 868: 'famous',\n",
1154 |        " 869: 'george',\n",
1155 |        " 870: 'clooney',\n",
1156 |        " 871: 'Im',\n",
1157 |        " 872: 'roll',\n",
1158 |        " 873: 'Another',\n",
1159 |        " 874: 'man',\n",
1160 |        " 875: 'constant',\n",
1161 |        " 876: 'sorrow',\n",
1162 |        " 877: 'recommand',\n",
1163 |        " 878: 'everybody',\n",
1164 |        " 879: 'Greetings',\n",
1165 |        " 880: 'Bart',\n",
1166 |        " 881: 'Kind',\n",
1167 |        " 882: 'drawn',\n",
1168 |        " 883: 'erotic',\n",
1169 |        " 884: 'amateurish',\n",
1170 |        " 885: 'unbelievable',\n",
1171 |        " 886: 'bits',\n",
1172 |        " 887: 'Sort',\n",
1173 |        " 888: 'school',\n",
1174 |        " 889: 'project',\n",
1175 |        " 890: 'Rosanna',\n",
1176 |        " 891: 'Arquette',\n",
1177 |        " 892: 'thinking',\n",
1178 |        " 893: 'stock',\n",
1179 |        " 894: 'bizarre',\n",
1180 |        " 895: 'supposed',\n",
1181 |        " 896: 'Midwest',\n",
1182 |        " 897: 'town',\n",
1183 |        " 898: 'Pretty',\n",
1184 |        " 899: 'involved',\n",
1185 |        " 900: 'lessons',\n",
1186 |        " 901: 'learned',\n",
1187 |        " 902: 'insights',\n",
1188 |        " 903: 'stilted',\n",
1189 |        " 904: 'quite',\n",
1190 |        " 905: 'ridiculous',\n",
1191 |        " 906: 'lots',\n",
1192 |        " 907: 'skin',\n",
1193 |        " 908: 'intrigues',\n",
1194 |        " 909: 'videotaped',\n",
1195 |        " 910: 'nonsenseWhat',\n",
1196 |        " 911: 'bisexual',\n",
1197 |        " 912: 'relationship',\n",
1198 |        " 913: 'nowhere',\n",
1199 |        " 914: 'heterosexual',\n",
1200 |        " 915: 'encounters',\n",
1201 |        " 916: 'absurd',\n",
1202 |        " 917: 'dance',\n",
1203 |        " 918: 'stereotyped',\n",
1204 |        " 919: 'Give',\n",
1205 |        " 920: 'pass',\n",
1206 |        " 921: 'million',\n",
1207 |        " 922: 'miles',\n",
1208 |        " 923: 'wasted',\n",
1209 |        " 924: 'could',\n",
1210 |        " 925: 'spent',\n",
1211 |        " 926: 'starving',\n",
1212 |        " 927: 'Aids',\n",
1213 |        " 928: 'Africa',\n",
1214 |        " 929: 'simply',\n",
1215 |        " 930: 'remade',\n",
1216 |        " 931: 'fails',\n",
1217 |        " 932: 'capture',\n",
1218 |        " 933: 'flavor',\n",
1219 |        " 934: 'terror',\n",
1220 |        " 935: '1963',\n",
1221 |        " 936: 'title',\n",
1222 |        " 937: 'Liam',\n",
1223 |        " 938: 'Neeson',\n",
1224 |        " 939: 'excellent',\n",
1225 |        " 940: 'always',\n",
1226 |        " 941: 'holds',\n",
1227 |        " 942: 'exception',\n",
1228 |        " 943: 'Owen',\n",
1229 |        " 944: 'Wilson',\n",
1230 |        " 945: 'feel',\n",
1231 |        " 946: 'character',\n",
1232 |        " 947: 'Luke',\n",
1233 |        " 948: 'major',\n",
1234 |        " 949: 'fault',\n",
1235 |        " 950: 'version',\n",
1236 |        " 951: 'strayed',\n",
1237 |        " 952: 'Shirley',\n",
1238 |        " 953: 'Jackson',\n",
1239 |        " 954: 'attempts',\n",
1240 |        " 955: 'grandiose',\n",
1241 |        " 956: 'thrill',\n",
1242 |        " 957: 'earlier',\n",
1243 |        " 958: 'trade',\n",
1244 |        " 959: 'snazzier',\n",
1245 |        " 960: 'special',\n",
1246 |        " 961: 'effects',\n",
1247 |        " 962: 'enjoy',\n",
1248 |        " 963: 'friction',\n",
1249 |        " 964: 'older',\n",
1250 |        " 965: 'top',\n",
1251 |        " 966: 'Horrible',\n",
1252 |        " 967: 'wasnt',\n",
1253 |        " 968: 'continuous',\n",
1254 |        " 969: 'minute',\n",
1255 |        " 970: 'fight',\n",
1256 |        " 971: 'chance',\n",
1257 |        " 972: 'development',\n",
1258 |        " 973: 'busy',\n",
1259 |        " 974: 'running',\n",
1260 |        " 975: 'sword',\n",
1261 |        " 976: 'emotional',\n",
1262 |        " 977: 'attachment',\n",
1263 |        " 978: 'except',\n",
1264 |        " 979: 'machine',\n",
1265 |        " 980: 'wanted',\n",
1266 |        " 981: 'destroy',\n",
1267 |        " 982: 'Scenes',\n",
1268 |        " 983: 'blatantly',\n",
1269 |        " 984: 'stolen',\n",
1270 |        " 985: 'LOTR',\n",
1271 |        " 986: 'Star',\n",
1272 |        " 987: 'Wars',\n",
1273 |        " 988: 'Matrix',\n",
1274 |        " 989: 'ExamplesThe',\n",
1275 |        " 990: 'ghost',\n",
1276 |        " 991: 'final',\n",
1277 |        " 992: 'Yoda',\n",
1278 |        " 993: 'Obee',\n",
1279 |        " 994: 'Vader',\n",
1280 |        " 995: 'spider',\n",
1281 |        " 996: 'beginning',\n",
1282 |        " 997: 'Frodo',\n",
1283 |        " 998: 'attacked',\n",
1284 |        " 999: 'Return',\n",
1285 |        " ...}"
1286 |       ]
1287 |      },
1288 |      "execution_count": 12,
1289 |      "metadata": {},
1290 |      "output_type": "execute_result"
1291 |     }
1292 |    ],
1293 |    "source": [
1294 |     "index_to_vocab"
1295 |    ]
1296 |   },
1297 |   {
1298 |    "cell_type": "code",
1299 |    "execution_count": 13,
1300 |    "metadata": {
1301 |     "scrolled": true
1302 |    },
1303 |    "outputs": [
1304 |     {
1305 |      "data": {
1306 |       "text/plain": [
1307 |        "{'One': 0,\n",
1308 |        " 'reviewers': 1,\n",
1309 |        " 'mentioned': 2,\n",
1310 |        " 'watching': 3,\n",
1311 |        " '1': 4,\n",
1312 |        " 'Oz': 5,\n",
1313 |        " 'episode': 6,\n",
1314 |        " 'youll': 7,\n",
1315 |        " 'hooked': 8,\n",
1316 |        " 'right': 9,\n",
1317 |        " 'exactly': 10,\n",
1318 |        " 'happened': 11,\n",
1319 |        " 'meThe': 12,\n",
1320 |        " 'first': 13,\n",
1321 |        " 'thing': 14,\n",
1322 |        " 'struck': 15,\n",
1323 |        " 'brutality': 16,\n",
1324 |        " 'unflinching': 17,\n",
1325 |        " 'scenes': 18,\n",
1326 |        " 'violence': 19,\n",
1327 |        " 'set': 20,\n",
1328 |        " 'word': 21,\n",
1329 |        " 'GO': 22,\n",
1330 |        " 'Trust': 23,\n",
1331 |        " 'show': 24,\n",
1332 |        " 'faint': 25,\n",
1333 |        " 'hearted': 26,\n",
1334 |        " 'timid': 27,\n",
1335 |        " 'pulls': 28,\n",
1336 |        " 'punches': 29,\n",
1337 |        " 'regards': 30,\n",
1338 |        " 'drugs': 31,\n",
1339 |        " 'sex': 32,\n",
1340 |        " 'hardcore': 33,\n",
1341 |        " 'classic': 34,\n",
1342 |        " 'use': 35,\n",
1343 |        " 'wordIt': 36,\n",
1344 |        " 'called': 37,\n",
1345 |        " 'OZ': 38,\n",
1346 |        " 'nickname': 39,\n",
1347 |        " 'given': 40,\n",
1348 |        " 'Oswald': 41,\n",
1349 |        " 'Maximum': 42,\n",
1350 |        " 'Security': 43,\n",
1351 |        " 'State': 44,\n",
1352 |        " 'Penitentary': 45,\n",
1353 |        " 'focuses': 46,\n",
1354 |        " 'mainly': 47,\n",
1355 |        " 'Emerald': 48,\n",
1356 |        " 'City': 49,\n",
1357 |        " 'experimental': 50,\n",
1358 |        " 'section': 51,\n",
1359 |        " 'prison': 52,\n",
1360 |        " 'cells': 53,\n",
1361 |        " 'glass': 54,\n",
1362 |        " 'fronts': 55,\n",
1363 |        " 'face': 56,\n",
1364 |        " 'inwards': 57,\n",
1365 |        " 'privacy': 58,\n",
1366 |        " 'high': 59,\n",
1367 |        " 'agenda': 60,\n",
1368 |        " 'Em': 61,\n",
1369 |        " 'home': 62,\n",
1370 |        " 'manyAryans': 63,\n",
1371 |        " 'Muslims': 64,\n",
1372 |        " 'gangstas': 65,\n",
1373 |        " 'Latinos': 66,\n",
1374 |        " 'Christians': 67,\n",
1375 |        " 'Italians': 68,\n",
1376 |        " 'Irish': 69,\n",
1377 |        " 'moreso': 70,\n",
1378 |        " 'scuffles': 71,\n",
1379 |        " 'death': 72,\n",
1380 |        " 'stares': 73,\n",
1381 |        " 'dodgy': 74,\n",
1382 |        " 'dealings': 75,\n",
1383 |        " 'shady': 76,\n",
1384 |        " 'agreements': 77,\n",
1385 |        " 'never': 78,\n",
1386 |        " 'far': 79,\n",
1387 |        " 'awayI': 80,\n",
1388 |        " 'would': 81,\n",
1389 |        " 'say': 82,\n",
1390 |        " 'main': 83,\n",
1391 |        " 'appeal': 84,\n",
1392 |        " 'due': 85,\n",
1393 |        " 'fact': 86,\n",
1394 |        " 'goes': 87,\n",
1395 |        " 'shows': 88,\n",
1396 |        " 'wouldnt': 89,\n",
1397 |        " 'dare': 90,\n",
1398 |        " 'Forget': 91,\n",
1399 |        " 'pretty': 92,\n",
1400 |        " 'pictures': 93,\n",
1401 |        " 'painted': 94,\n",
1402 |        " 'mainstream': 95,\n",
1403 |        " 'audiences': 96,\n",
1404 |        " 'forget': 97,\n",
1405 |        " 'charm': 98,\n",
1406 |        " 'romanceOZ': 99,\n",
1407 |        " 'doesnt': 100,\n",
1408 |        " 'mess': 101,\n",
1409 |        " 'around': 102,\n",
1410 |        " 'ever': 103,\n",
1411 |        " 'saw': 104,\n",
1412 |        " 'nasty': 105,\n",
1413 |        " 'surreal': 106,\n",
1414 |        " 'couldnt': 107,\n",
1415 |        " 'ready': 108,\n",
1416 |        " 'watched': 109,\n",
1417 |        " 'developed': 110,\n",
1418 |        " 'taste': 111,\n",
1419 |        " 'got': 112,\n",
1420 |        " 'accustomed': 113,\n",
1421 |        " 'levels': 114,\n",
1422 |        " 'graphic': 115,\n",
1423 |        " 'injustice': 116,\n",
1424 |        " 'crooked': 117,\n",
1425 |        " 'guards': 118,\n",
1426 |        " 'wholl': 119,\n",
1427 |        " 'sold': 120,\n",
1428 |        " 'nickel': 121,\n",
1429 |        " 'inmates': 122,\n",
1430 |        " 'kill': 123,\n",
1431 |        " 'order': 124,\n",
1432 |        " 'get': 125,\n",
1433 |        " 'away': 126,\n",
1434 |        " 'well': 127,\n",
1435 |        " 'mannered': 128,\n",
1436 |        " 'middle': 129,\n",
1437 |        " 'class': 130,\n",
1438 |        " 'turned': 131,\n",
1439 |        " 'bitches': 132,\n",
1440 |        " 'lack': 133,\n",
1441 |        " 'street': 134,\n",
1442 |        " 'skills': 135,\n",
1443 |        " 'experience': 136,\n",
1444 |        " 'Watching': 137,\n",
1445 |        " 'may': 138,\n",
1446 |        " 'become': 139,\n",
1447 |        " 'comfortable': 140,\n",
1448 |        " 'uncomfortable': 141,\n",
1449 |        " 'viewingthats': 142,\n",
1450 |        " 'touch': 143,\n",
1451 |        " 'darker': 144,\n",
1452 |        " 'side': 145,\n",
1453 |        " 'wonderful': 146,\n",
1454 |        " 'little': 147,\n",
1455 |        " 'production': 148,\n",
1456 |        " 'filming': 149,\n",
1457 |        " 'technique': 150,\n",
1458 |        " 'unassuming': 151,\n",
1459 |        " 'oldtimeBBC': 152,\n",
1460 |        " 'fashion': 153,\n",
1461 |        " 'gives': 154,\n",
1462 |        " 'comforting': 155,\n",
1463 |        " 'sometimes': 156,\n",
1464 |        " 'discomforting': 157,\n",
1465 |        " 'sense': 158,\n",
1466 |        " 'realism': 159,\n",
1467 |        " 'entire': 160,\n",
1468 |        " 'piece': 161,\n",
1469 |        " 'actors': 162,\n",
1470 |        " 'extremely': 163,\n",
1471 |        " 'chosen': 164,\n",
1472 |        " 'Michael': 165,\n",
1473 |        " 'Sheen': 166,\n",
1474 |        " 'polari': 167,\n",
1475 |        " 'voices': 168,\n",
1476 |        " 'pat': 169,\n",
1477 |        " 'truly': 170,\n",
1478 |        " 'see': 171,\n",
1479 |        " 'seamless': 172,\n",
1480 |        " 'editing': 173,\n",
1481 |        " 'guided': 174,\n",
1482 |        " 'references': 175,\n",
1483 |        " 'Williams': 176,\n",
1484 |        " 'diary': 177,\n",
1485 |        " 'entries': 178,\n",
1486 |        " 'worth': 179,\n",
1487 |        " 'terrificly': 180,\n",
1488 |        " 'written': 181,\n",
1489 |        " 'performed': 182,\n",
1490 |        " 'masterful': 183,\n",
1491 |        " 'one': 184,\n",
1492 |        " 'great': 185,\n",
1493 |        " 'masters': 186,\n",
1494 |        " 'comedy': 187,\n",
1495 |        " 'life': 188,\n",
1496 |        " 'really': 189,\n",
1497 |        " 'comes': 190,\n",
1498 |        " 'things': 191,\n",
1499 |        " 'fantasy': 192,\n",
1500 |        " 'guard': 193,\n",
1501 |        " 'rather': 194,\n",
1502 |        " 'traditional': 195,\n",
1503 |        " 'dream': 196,\n",
1504 |        " 'techniques': 197,\n",
1505 |        " 'remains': 198,\n",
1506 |        " 'solid': 199,\n",
1507 |        " 'disappears': 200,\n",
1508 |        " 'plays': 201,\n",
1509 |        " 'knowledge': 202,\n",
1510 |        " 'senses': 203,\n",
1511 |        " 'particularly': 204,\n",
1512 |        " 'concerning': 205,\n",
1513 |        " 'Orton': 206,\n",
1514 |        " 'Halliwell': 207,\n",
1515 |        " 'sets': 208,\n",
1516 |        " 'flat': 209,\n",
1517 |        " 'Halliwells': 210,\n",
1518 |        " 'murals': 211,\n",
1519 |        " 'decorating': 212,\n",
1520 |        " 'every': 213,\n",
1521 |        " 'surface': 214,\n",
1522 |        " 'terribly': 215,\n",
1523 |        " 'done': 216,\n",
1524 |        " 'thought': 217,\n",
1525 |        " 'way': 218,\n",
1526 |        " 'spend': 219,\n",
1527 |        " 'time': 220,\n",
1528 |        " 'hot': 221,\n",
1529 |        " 'summer': 222,\n",
1530 |        " 'weekend': 223,\n",
1531 |        " 'sitting': 224,\n",
1532 |        " 'air': 225,\n",
1533 |        " 'conditioned': 226,\n",
1534 |        " 'theater': 227,\n",
1535 |        " 'lighthearted': 228,\n",
1536 |        " 'plot': 229,\n",
1537 |        " 'simplistic': 230,\n",
1538 |        " 'dialogue': 231,\n",
1539 |        " 'witty': 232,\n",
1540 |        " 'characters': 233,\n",
1541 |        " 'likable': 234,\n",
1542 |        " 'even': 235,\n",
1543 |        " 'bread': 236,\n",
1544 |        " 'suspected': 237,\n",
1545 |        " 'serial': 238,\n",
1546 |        " 'killer': 239,\n",
1547 |        " 'disappointed': 240,\n",
1548 |        " 'realize': 241,\n",
1549 |        " 'Match': 242,\n",
1550 |        " 'Point': 243,\n",
1551 |        " '2': 244,\n",
1552 |        " 'Risk': 245,\n",
1553 |        " 'Addiction': 246,\n",
1554 |        " 'proof': 247,\n",
1555 |        " 'Woody': 248,\n",
1556 |        " 'Allen': 249,\n",
1557 |        " 'still': 250,\n",
1558 |        " 'fully': 251,\n",
1559 |        " 'control': 252,\n",
1560 |        " 'style': 253,\n",
1561 |        " 'many': 254,\n",
1562 |        " 'us': 255,\n",
1563 |        " 'grown': 256,\n",
1564 |        " 'loveThis': 257,\n",
1565 |        " 'Id': 258,\n",
1566 |        " 'laughed': 259,\n",
1567 |        " 'Woodys': 260,\n",
1568 |        " 'comedies': 261,\n",
1569 |        " 'years': 262,\n",
1570 |        " 'decade': 263,\n",
1571 |        " 'Ive': 264,\n",
1572 |        " 'impressed': 265,\n",
1573 |        " 'Scarlet': 266,\n",
1574 |        " 'Johanson': 267,\n",
1575 |        " 'managed': 268,\n",
1576 |        " 'tone': 269,\n",
1577 |        " 'sexy': 270,\n",
1578 |        " 'image': 271,\n",
1579 |        " 'jumped': 272,\n",
1580 |        " 'average': 273,\n",
1581 |        " 'spirited': 274,\n",
1582 |        " 'young': 275,\n",
1583 |        " 'womanThis': 276,\n",
1584 |        " 'crown': 277,\n",
1585 |        " 'jewel': 278,\n",
1586 |        " 'career': 279,\n",
1587 |        " 'wittier': 280,\n",
1588 |        " 'Devil': 281,\n",
1589 |        " 'Wears': 282,\n",
1590 |        " 'Prada': 283,\n",
1591 |        " 'interesting': 284,\n",
1592 |        " 'Superman': 285,\n",
1593 |        " 'go': 286,\n",
1594 |        " 'friends': 287,\n",
1595 |        " 'Basically': 288,\n",
1596 |        " 'theres': 289,\n",
1597 |        " 'family': 290,\n",
1598 |        " 'boy': 291,\n",
1599 |        " 'Jake': 292,\n",
1600 |        " 'thinks': 293,\n",
1601 |        " 'zombie': 294,\n",
1602 |        " 'closet': 295,\n",
1603 |        " 'parents': 296,\n",
1604 |        " 'fighting': 297,\n",
1605 |        " 'timeThis': 298,\n",
1606 |        " 'movie': 299,\n",
1607 |        " 'slower': 300,\n",
1608 |        " 'soap': 301,\n",
1609 |        " 'opera': 302,\n",
1610 |        " 'suddenly': 303,\n",
1611 |        " 'decides': 304,\n",
1612 |        " 'Rambo': 305,\n",
1613 |        " 'zombieOK': 306,\n",
1614 |        " 'youre': 307,\n",
1615 |        " 'going': 308,\n",
1616 |        " 'make': 309,\n",
1617 |        " 'film': 310,\n",
1618 |        " 'must': 311,\n",
1619 |        " 'Decide': 312,\n",
1620 |        " 'thriller': 313,\n",
1621 |        " 'drama': 314,\n",
1622 |        " 'watchable': 315,\n",
1623 |        " 'Parents': 316,\n",
1624 |        " 'divorcing': 317,\n",
1625 |        " 'arguing': 318,\n",
1626 |        " 'like': 319,\n",
1627 |        " 'real': 320,\n",
1628 |        " 'totally': 321,\n",
1629 |        " 'ruins': 322,\n",
1630 |        " 'expected': 323,\n",
1631 |        " 'BOOGEYMAN': 324,\n",
1632 |        " 'similar': 325,\n",
1633 |        " 'instead': 326,\n",
1634 |        " 'meaningless': 327,\n",
1635 |        " 'spots3': 328,\n",
1636 |        " '10': 329,\n",
1637 |        " 'playing': 330,\n",
1638 |        " 'descent': 331,\n",
1639 |        " 'dialogs': 332,\n",
1640 |        " 'shots': 333,\n",
1641 |        " 'ignore': 334,\n",
1642 |        " 'Petter': 335,\n",
1643 |        " 'Matteis': 336,\n",
1644 |        " 'Love': 337,\n",
1645 |        " 'Time': 338,\n",
1646 |        " 'Money': 339,\n",
1647 |        " 'visually': 340,\n",
1648 |        " 'stunning': 341,\n",
1649 |        " 'watch': 342,\n",
1650 |        " 'Mr': 343,\n",
1651 |        " 'Mattei': 344,\n",
1652 |        " 'offers': 345,\n",
1653 |        " 'vivid': 346,\n",
1654 |        " 'portrait': 347,\n",
1655 |        " 'human': 348,\n",
1656 |        " 'relations': 349,\n",
1657 |        " 'seems': 350,\n",
1658 |        " 'telling': 351,\n",
1659 |        " 'money': 352,\n",
1660 |        " 'power': 353,\n",
1661 |        " 'success': 354,\n",
1662 |        " 'people': 355,\n",
1663 |        " 'different': 356,\n",
1664 |        " 'situations': 357,\n",
1665 |        " 'encounter': 358,\n",
1666 |        " 'variation': 359,\n",
1667 |        " 'Arthur': 360,\n",
1668 |        " 'Schnitzlers': 361,\n",
1669 |        " 'play': 362,\n",
1670 |        " 'theme': 363,\n",
1671 |        " 'director': 364,\n",
1672 |        " 'transfers': 365,\n",
1673 |        " 'action': 366,\n",
1674 |        " 'present': 367,\n",
1675 |        " 'New': 368,\n",
1676 |        " 'York': 369,\n",
1677 |        " 'meet': 370,\n",
1678 |        " 'connect': 371,\n",
1679 |        " 'connected': 372,\n",
1680 |        " 'another': 373,\n",
1681 |        " 'next': 374,\n",
1682 |        " 'person': 375,\n",
1683 |        " 'know': 376,\n",
1684 |        " 'previous': 377,\n",
1685 |        " 'point': 378,\n",
1686 |        " 'contact': 379,\n",
1687 |        " 'Stylishly': 380,\n",
1688 |        " 'sophisticated': 381,\n",
1689 |        " 'luxurious': 382,\n",
1690 |        " 'look': 383,\n",
1691 |        " 'taken': 384,\n",
1692 |        " 'live': 385,\n",
1693 |        " 'world': 386,\n",
1694 |        " 'habitatThe': 387,\n",
1695 |        " 'gets': 388,\n",
1696 |        " 'souls': 389,\n",
1697 |        " 'picture': 390,\n",
1698 |        " 'stages': 391,\n",
1699 |        " 'loneliness': 392,\n",
1700 |        " 'inhabits': 393,\n",
1701 |        " 'big': 394,\n",
1702 |        " 'city': 395,\n",
1703 |        " 'best': 396,\n",
1704 |        " 'place': 397,\n",
1705 |        " 'find': 398,\n",
1706 |        " 'sincere': 399,\n",
1707 |        " 'fulfillment': 400,\n",
1708 |        " 'discerns': 401,\n",
1709 |        " 'case': 402,\n",
1710 |        " 'encounterThe': 403,\n",
1711 |        " 'acting': 404,\n",
1712 |        " 'good': 405,\n",
1713 |        " 'direction': 406,\n",
1714 |        " 'Steve': 407,\n",
1715 |        " 'Buscemi': 408,\n",
1716 |        " 'Rosario': 409,\n",
1717 |        " 'Dawson': 410,\n",
1718 |        " 'Carol': 411,\n",
1719 |        " 'Kane': 412,\n",
1720 |        " 'Imperioli': 413,\n",
1721 |        " 'Adrian': 414,\n",
1722 |        " 'Grenier': 415,\n",
1723 |        " 'rest': 416,\n",
1724 |        " 'talented': 417,\n",
1725 |        " 'cast': 418,\n",
1726 |        " 'come': 419,\n",
1727 |        " 'aliveWe': 420,\n",
1728 |        " 'wish': 421,\n",
1729 |        " 'luck': 422,\n",
1730 |        " 'await': 423,\n",
1731 |        " 'anxiously': 424,\n",
1732 |        " 'work': 425,\n",
1733 |        " 'Probably': 426,\n",
1734 |        " 'alltime': 427,\n",
1735 |        " 'favorite': 428,\n",
1736 |        " 'story': 429,\n",
1737 |        " 'selflessness': 430,\n",
1738 |        " 'sacrifice': 431,\n",
1739 |        " 'dedication': 432,\n",
1740 |        " 'noble': 433,\n",
1741 |        " 'cause': 434,\n",
1742 |        " 'preachy': 435,\n",
1743 |        " 'boring': 436,\n",
1744 |        " 'old': 437,\n",
1745 |        " 'despite': 438,\n",
1746 |        " 'seen': 439,\n",
1747 |        " '15': 440,\n",
1748 |        " 'times': 441,\n",
1749 |        " 'last': 442,\n",
1750 |        " '25': 443,\n",
1751 |        " 'Paul': 444,\n",
1752 |        " 'Lukas': 445,\n",
1753 |        " 'performance': 446,\n",
1754 |        " 'brings': 447,\n",
1755 |        " 'tears': 448,\n",
1756 |        " 'eyes': 449,\n",
1757 |        " 'Bette': 450,\n",
1758 |        " 'Davis': 451,\n",
1759 |        " 'sympathetic': 452,\n",
1760 |        " 'roles': 453,\n",
1761 |        " 'delight': 454,\n",
1762 |        " 'kids': 455,\n",
1763 |        " 'grandma': 456,\n",
1764 |        " 'says': 457,\n",
1765 |        " 'dressedup': 458,\n",
1766 |        " 'midgets': 459,\n",
1767 |        " 'children': 460,\n",
1768 |        " 'makes': 461,\n",
1769 |        " 'fun': 462,\n",
1770 |        " 'mothers': 463,\n",
1771 |        " 'slow': 464,\n",
1772 |        " 'awakening': 465,\n",
1773 |        " 'whats': 466,\n",
1774 |        " 'happening': 467,\n",
1775 |        " 'roof': 468,\n",
1776 |        " 'believable': 469,\n",
1777 |        " 'startling': 470,\n",
1778 |        " 'dozen': 471,\n",
1779 |        " 'thumbs': 472,\n",
1780 |        " 'theyd': 473,\n",
1781 |        " 'sure': 474,\n",
1782 |        " 'resurrection': 475,\n",
1783 |        " 'dated': 476,\n",
1784 |        " 'Seahunt': 477,\n",
1785 |        " 'series': 478,\n",
1786 |        " 'tech': 479,\n",
1787 |        " 'today': 480,\n",
1788 |        " 'bring': 481,\n",
1789 |        " 'back': 482,\n",
1790 |        " 'kid': 483,\n",
1791 |        " 'excitement': 484,\n",
1792 |        " 'meI': 485,\n",
1793 |        " 'grew': 486,\n",
1794 |        " 'black': 487,\n",
1795 |        " 'white': 488,\n",
1796 |        " 'TV': 489,\n",
1797 |        " 'Gunsmoke': 490,\n",
1798 |        " 'heros': 491,\n",
1799 |        " 'weekYou': 492,\n",
1800 |        " 'vote': 493,\n",
1801 |        " 'comeback': 494,\n",
1802 |        " 'new': 495,\n",
1803 |        " 'sea': 496,\n",
1804 |        " 'huntWe': 497,\n",
1805 |        " 'need': 498,\n",
1806 |        " 'change': 499,\n",
1807 |        " 'pace': 500,\n",
1808 |        " 'water': 501,\n",
1809 |        " 'adventureOh': 502,\n",
1810 |        " 'thank': 503,\n",
1811 |        " 'outlet': 504,\n",
1812 |        " 'view': 505,\n",
1813 |        " 'viewpoints': 506,\n",
1814 |        " 'moviesSo': 507,\n",
1815 |        " 'ole': 508,\n",
1816 |        " 'believe': 509,\n",
1817 |        " 'wan': 510,\n",
1818 |        " 'na': 511,\n",
1819 |        " 'sayWould': 512,\n",
1820 |        " 'nice': 513,\n",
1821 |        " 'read': 514,\n",
1822 |        " 'plus': 515,\n",
1823 |        " 'points': 516,\n",
1824 |        " 'huntIf': 517,\n",
1825 |        " 'rhymes': 518,\n",
1826 |        " 'lines': 519,\n",
1827 |        " 'let': 520,\n",
1828 |        " 'submitor': 521,\n",
1829 |        " 'leave': 522,\n",
1830 |        " 'doubt': 523,\n",
1831 |        " 'quitIf': 524,\n",
1832 |        " 'lets': 525,\n",
1833 |        " 'amazing': 526,\n",
1834 |        " 'fresh': 527,\n",
1835 |        " 'innovative': 528,\n",
1836 |        " 'idea': 529,\n",
1837 |        " '70s': 530,\n",
1838 |        " 'aired': 531,\n",
1839 |        " '7': 532,\n",
1840 |        " '8': 533,\n",
1841 |        " 'brilliant': 534,\n",
1842 |        " 'dropped': 535,\n",
1843 |        " '1990': 536,\n",
1844 |        " 'funny': 537,\n",
1845 |        " 'anymore': 538,\n",
1846 |        " 'continued': 539,\n",
1847 |        " 'decline': 540,\n",
1848 |        " 'complete': 541,\n",
1849 |        " 'waste': 542,\n",
1850 |        " 'todayIts': 543,\n",
1851 |        " 'disgraceful': 544,\n",
1852 |        " 'fallen': 545,\n",
1853 |        " 'writing': 546,\n",
1854 |        " 'painfully': 547,\n",
1855 |        " 'bad': 548,\n",
1856 |        " 'performances': 549,\n",
1857 |        " 'almost': 550,\n",
1858 |        " 'mildly': 551,\n",
1859 |        " 'entertaining': 552,\n",
1860 |        " 'respite': 553,\n",
1861 |        " 'guesthosts': 554,\n",
1862 |        " 'probably': 555,\n",
1863 |        " 'hard': 556,\n",
1864 |        " 'creator': 557,\n",
1865 |        " 'handselected': 558,\n",
1866 |        " 'original': 559,\n",
1867 |        " 'also': 560,\n",
1868 |        " 'chose': 561,\n",
1869 |        " 'band': 562,\n",
1870 |        " 'hacks': 563,\n",
1871 |        " 'followed': 564,\n",
1872 |        " 'recognize': 565,\n",
1873 |        " 'brilliance': 566,\n",
1874 |        " 'fit': 567,\n",
1875 |        " 'replace': 568,\n",
1876 |        " 'mediocrity': 569,\n",
1877 |        " 'felt': 570,\n",
1878 |        " 'give': 571,\n",
1879 |        " 'stars': 572,\n",
1880 |        " 'respect': 573,\n",
1881 |        " 'made': 574,\n",
1882 |        " 'huge': 575,\n",
1883 |        " 'awful': 576,\n",
1884 |        " 'cant': 577,\n",
1885 |        " 'Encouraged': 578,\n",
1886 |        " 'positive': 579,\n",
1887 |        " 'comments': 580,\n",
1888 |        " 'looking': 581,\n",
1889 |        " 'forward': 582,\n",
1890 |        " 'Bad': 583,\n",
1891 |        " 'mistake': 584,\n",
1892 |        " '950': 585,\n",
1893 |        " 'films': 586,\n",
1894 |        " 'worst': 587,\n",
1895 |        " 'pacing': 588,\n",
1896 |        " 'storyline': 589,\n",
1897 |        " 'soundtrack': 590,\n",
1898 |        " 'song': 591,\n",
1899 |        " 'lame': 592,\n",
1900 |        " 'country': 593,\n",
1901 |        " 'tune': 594,\n",
1902 |        " 'played': 595,\n",
1903 |        " 'less': 596,\n",
1904 |        " 'four': 597,\n",
1905 |        " 'looks': 598,\n",
1906 |        " 'cheap': 599,\n",
1907 |        " 'extreme': 600,\n",
1908 |        " 'Rarely': 601,\n",
1909 |        " 'happy': 602,\n",
1910 |        " 'end': 603,\n",
1911 |        " 'credits': 604,\n",
1912 |        " 'prevents': 605,\n",
1913 |        " 'giving': 606,\n",
1914 |        " '1score': 607,\n",
1915 |        " 'Harvey': 608,\n",
1916 |        " 'Keitel': 609,\n",
1917 |        " 'least': 610,\n",
1918 |        " 'making': 611,\n",
1919 |        " 'bit': 612,\n",
1920 |        " 'effort': 613,\n",
1921 |        " 'obsessives': 614,\n",
1922 |        " 'gut': 615,\n",
1923 |        " 'wrenching': 616,\n",
1924 |        " 'laughter': 617,\n",
1925 |        " 'love': 618,\n",
1926 |        " 'hell': 619,\n",
1927 |        " 'mom': 620,\n",
1928 |        " 'liked': 621,\n",
1929 |        " 'itGreat': 622,\n",
1930 |        " 'Camp': 623,\n",
1931 |        " 'Phil': 624,\n",
1932 |        " 'Alien': 625,\n",
1933 |        " 'quirky': 626,\n",
1934 |        " 'humour': 627,\n",
1935 |        " 'based': 628,\n",
1936 |        " 'oddness': 629,\n",
1937 |        " 'everything': 630,\n",
1938 |        " 'actual': 631,\n",
1939 |        " 'punchlinesAt': 632,\n",
1940 |        " 'odd': 633,\n",
1941 |        " 'progressed': 634,\n",
1942 |        " 'didnt': 635,\n",
1943 |        " 'jokes': 636,\n",
1944 |        " 'anymoreIts': 637,\n",
1945 |        " 'low': 638,\n",
1946 |        " 'budget': 639,\n",
1947 |        " 'thats': 640,\n",
1948 |        " 'problem': 641,\n",
1949 |        " 'eventually': 642,\n",
1950 |        " 'lost': 643,\n",
1951 |        " 'interestI': 644,\n",
1952 |        " 'imagine': 645,\n",
1953 |        " 'stoner': 646,\n",
1954 |        " 'currently': 647,\n",
1955 |        " 'partakingFor': 648,\n",
1956 |        " 'something': 649,\n",
1957 |        " 'better': 650,\n",
1958 |        " 'try': 651,\n",
1959 |        " 'Brother': 652,\n",
1960 |        " 'planet': 653,\n",
1961 |        " '12': 654,\n",
1962 |        " 'came': 655,\n",
1963 |        " 'recall': 656,\n",
1964 |        " 'scariest': 657,\n",
1965 |        " 'scene': 658,\n",
1966 |        " 'bird': 659,\n",
1967 |        " 'eating': 660,\n",
1968 |        " 'men': 661,\n",
1969 |        " 'dangling': 662,\n",
1970 |        " 'helplessly': 663,\n",
1971 |        " 'parachutes': 664,\n",
1972 |        " 'horror': 665,\n",
1973 |        " 'horrorAs': 666,\n",
1974 |        " 'cheesy': 667,\n",
1975 |        " 'B': 668,\n",
1976 |        " 'Saturday': 669,\n",
1977 |        " 'afternoons': 670,\n",
1978 |        " 'tired': 671,\n",
1979 |        " 'formula': 672,\n",
1980 |        " 'monster': 673,\n",
1981 |        " 'type': 674,\n",
1982 |        " 'movies': 675,\n",
1983 |        " 'usually': 676,\n",
1984 |        " 'included': 677,\n",
1985 |        " 'hero': 678,\n",
1986 |        " 'beautiful': 679,\n",
1987 |        " 'woman': 680,\n",
1988 |        " 'might': 681,\n",
1989 |        " 'daughter': 682,\n",
1990 |        " 'professor': 683,\n",
1991 |        " 'resolution': 684,\n",
1992 |        " 'died': 685,\n",
1993 |        " 'care': 686,\n",
1994 |        " 'much': 687,\n",
1995 |        " 'romantic': 688,\n",
1996 |        " 'angle': 689,\n",
1997 |        " 'year': 690,\n",
1998 |        " 'predictable': 691,\n",
1999 |        " 'plots': 692,\n",
2000 |        " 'unintentional': 693,\n",
2001 |        " 'humorBut': 694,\n",
2002 |        " 'later': 695,\n",
2003 |        " 'Psycho': 696,\n",
2004 |        " 'loved': 697,\n",
2005 |        " 'star': 698,\n",
2006 |        " 'Janet': 699,\n",
2007 |        " 'Leigh': 700,\n",
2008 |        " 'bumped': 701,\n",
2009 |        " 'early': 702,\n",
2010 |        " 'sat': 703,\n",
2011 |        " 'took': 704,\n",
2012 |        " 'notice': 705,\n",
2013 |        " 'Since': 706,\n",
2014 |        " 'screenwriters': 707,\n",
2015 |        " 'scary': 708,\n",
2016 |        " 'possible': 709,\n",
2017 |        " 'wellworn': 710,\n",
2018 |        " 'rules': 711,\n",
2019 |        " 'im': 712,\n",
2020 |        " 'fan': 713,\n",
2021 |        " 'Bolls': 714,\n",
2022 |        " 'enjoyed': 715,\n",
2023 |        " 'Postal': 716,\n",
2024 |        " 'maybe': 717,\n",
2025 |        " 'Boll': 718,\n",
2026 |        " 'apparently': 719,\n",
2027 |        " 'bought': 720,\n",
2028 |        " 'rights': 721,\n",
2029 |        " 'Far': 722,\n",
2030 |        " 'Cry': 723,\n",
2031 |        " 'long': 724,\n",
2032 |        " 'ago': 725,\n",
2033 |        " 'game': 726,\n",
2034 |        " 'finsished': 727,\n",
2035 |        " 'People': 728,\n",
2036 |        " 'killing': 729,\n",
2037 |        " 'mercs': 730,\n",
2038 |        " 'infiltrating': 731,\n",
2039 |        " 'secret': 732,\n",
2040 |        " 'research': 733,\n",
2041 |        " 'labs': 734,\n",
2042 |        " 'located': 735,\n",
2043 |        " 'tropical': 736,\n",
2044 |        " 'island': 737,\n",
2045 |        " 'warned': 738,\n",
2046 |        " 'schemed': 739,\n",
2047 |        " 'together': 740,\n",
2048 |        " 'along': 741,\n",
2049 |        " 'legion': 742,\n",
2050 |        " 'schmucks': 743,\n",
2051 |        " 'Feeling': 744,\n",
2052 |        " 'loneley': 745,\n",
2053 |        " 'invites': 746,\n",
2054 |        " 'three': 747,\n",
2055 |        " 'countrymen': 748,\n",
2056 |        " 'players': 749,\n",
2057 |        " 'names': 750,\n",
2058 |        " 'Til': 751,\n",
2059 |        " 'Schweiger': 752,\n",
2060 |        " 'Udo': 753,\n",
2061 |        " 'Kier': 754,\n",
2062 |        " 'Ralf': 755,\n",
2063 |        " 'MoellerThree': 756,\n",
2064 |        " 'actually': 757,\n",
2065 |        " 'selfs': 758,\n",
2066 |        " 'biz': 759,\n",
2067 |        " 'tale': 760,\n",
2068 |        " 'Jack': 761,\n",
2069 |        " 'Carver': 762,\n",
2070 |        " 'yes': 763,\n",
2071 |        " 'German': 764,\n",
2072 |        " 'hail': 765,\n",
2073 |        " 'bratwurst': 766,\n",
2074 |        " 'dudes': 767,\n",
2075 |        " 'However': 768,\n",
2076 |        " 'Tils': 769,\n",
2077 |        " 'badass': 770,\n",
2078 |        " 'complained': 771,\n",
2079 |        " 'hes': 772,\n",
2080 |        " 'staying': 773,\n",
2081 |        " 'true': 774,\n",
2082 |        " 'whole': 775,\n",
2083 |        " 'carver': 776,\n",
2084 |        " 'perspective': 777,\n",
2085 |        " 'dont': 778,\n",
2086 |        " 'looked': 779,\n",
2087 |        " 'kicking': 780,\n",
2088 |        " 'beyond': 781,\n",
2089 |        " 'demented': 782,\n",
2090 |        " 'evil': 783,\n",
2091 |        " 'mad': 784,\n",
2092 |        " 'scientist': 785,\n",
2093 |        " 'Dr': 786,\n",
2094 |        " 'Krieger': 787,\n",
2095 |        " 'GeneticallyMutatedsoldiers': 788,\n",
2096 |        " 'GMS': 789,\n",
2097 |        " 'Performing': 790,\n",
2098 |        " 'topsecret': 791,\n",
2099 |        " 'reminds': 792,\n",
2100 |        " 'SPOILER': 793,\n",
2101 |        " 'Vancouver': 794,\n",
2102 |        " 'reason': 795,\n",
2103 |        " 'Thats': 796,\n",
2104 |        " 'palm': 797,\n",
2105 |        " 'trees': 798,\n",
2106 |        " 'Instead': 799,\n",
2107 |        " 'rich': 800,\n",
2108 |        " 'lumberjackwoods': 801,\n",
2109 |        " 'havent': 802,\n",
2110 |        " 'gone': 803,\n",
2111 |        " 'FAR': 804,\n",
2112 |        " 'started': 805,\n",
2113 |        " 'CRY': 806,\n",
2114 |        " 'mehehe': 807,\n",
2115 |        " 'can': 808,\n",
2116 |        " 'not': 809,\n",
2117 |        " 'stay': 810,\n",
2118 |        " 'shenanigans': 811,\n",
2119 |        " 'delivers': 812,\n",
2120 |        " 'meaning': 813,\n",
2121 |        " 'suckThere': 814,\n",
2122 |        " 'mentioning': 815,\n",
2123 |        " 'imply': 816,\n",
2124 |        " 'areas': 817,\n",
2125 |        " 'boat': 818,\n",
2126 |        " 'cromedalbino': 819,\n",
2127 |        " 'squad': 820,\n",
2128 |        " 'enters': 821,\n",
2129 |        " 'laugh': 822,\n",
2130 |        " 'reeks': 823,\n",
2131 |        " 'scheisse': 824,\n",
2132 |        " 'poop': 825,\n",
2133 |        " 'simpletons': 826,\n",
2134 |        " 'take': 827,\n",
2135 |        " 'wiff': 828,\n",
2136 |        " 'ahead': 829,\n",
2137 |        " 'BTW': 830,\n",
2138 |        " 'annoying': 831,\n",
2139 |        " 'sidekick': 832,\n",
2140 |        " 'shoot': 833,\n",
2141 |        " 'minutes': 834,\n",
2142 |        " 'screen': 835,\n",
2143 |        " 'ShakespeareShakespeare': 836,\n",
2144 |        " 'lostI': 837,\n",
2145 |        " 'appreciate': 838,\n",
2146 |        " 'trying': 839,\n",
2147 |        " 'Shakespeare': 840,\n",
2148 |        " 'masses': 841,\n",
2149 |        " 'ruin': 842,\n",
2150 |        " 'goodIs': 843,\n",
2151 |        " 'Scottish': 844,\n",
2152 |        " 'Play': 845,\n",
2153 |        " 'certain': 846,\n",
2154 |        " 'Rev': 847,\n",
2155 |        " 'Bowdler': 848,\n",
2156 |        " 'hence': 849,\n",
2157 |        " 'bowdlerization': 850,\n",
2158 |        " 'tried': 851,\n",
2159 |        " 'Victorian': 852,\n",
2160 |        " 'eraIn': 853,\n",
2161 |        " 'words': 854,\n",
2162 |        " 'improve': 855,\n",
2163 |        " 'perfectionI': 856,\n",
2164 |        " 'write': 857,\n",
2165 |        " 'ten': 858,\n",
2166 |        " 'text': 859,\n",
2167 |        " 'English': 860,\n",
2168 |        " 'composition': 861,\n",
2169 |        " 'forte': 862,\n",
2170 |        " 'keep': 863,\n",
2171 |        " 'saying': 864,\n",
2172 |        " 'cut': 865,\n",
2173 |        " 'fantastic': 866,\n",
2174 |        " 'prisoners': 867,\n",
2175 |        " 'famous': 868,\n",
2176 |        " 'george': 869,\n",
2177 |        " 'clooney': 870,\n",
2178 |        " 'Im': 871,\n",
2179 |        " 'roll': 872,\n",
2180 |        " 'Another': 873,\n",
2181 |        " 'man': 874,\n",
2182 |        " 'constant': 875,\n",
2183 |        " 'sorrow': 876,\n",
2184 |        " 'recommand': 877,\n",
2185 |        " 'everybody': 878,\n",
2186 |        " 'Greetings': 879,\n",
2187 |        " 'Bart': 880,\n",
2188 |        " 'Kind': 881,\n",
2189 |        " 'drawn': 882,\n",
2190 |        " 'erotic': 883,\n",
2191 |        " 'amateurish': 884,\n",
2192 |        " 'unbelievable': 885,\n",
2193 |        " 'bits': 886,\n",
2194 |        " 'Sort': 887,\n",
2195 |        " 'school': 888,\n",
2196 |        " 'project': 889,\n",
2197 |        " 'Rosanna': 890,\n",
2198 |        " 'Arquette': 891,\n",
2199 |        " 'thinking': 892,\n",
2200 |        " 'stock': 893,\n",
2201 |        " 'bizarre': 894,\n",
2202 |        " 'supposed': 895,\n",
2203 |        " 'Midwest': 896,\n",
2204 |        " 'town': 897,\n",
2205 |        " 'Pretty': 898,\n",
2206 |        " 'involved': 899,\n",
2207 |        " 'lessons': 900,\n",
2208 |        " 'learned': 901,\n",
2209 |        " 'insights': 902,\n",
2210 |        " 'stilted': 903,\n",
2211 |        " 'quite': 904,\n",
2212 |        " 'ridiculous': 905,\n",
2213 |        " 'lots': 906,\n",
2214 |        " 'skin': 907,\n",
2215 |        " 'intrigues': 908,\n",
2216 |        " 'videotaped': 909,\n",
2217 |        " 'nonsenseWhat': 910,\n",
2218 |        " 'bisexual': 911,\n",
2219 |        " 'relationship': 912,\n",
2220 |        " 'nowhere': 913,\n",
2221 |        " 'heterosexual': 914,\n",
2222 |        " 'encounters': 915,\n",
2223 |        " 'absurd': 916,\n",
2224 |        " 'dance': 917,\n",
2225 |        " 'stereotyped': 918,\n",
2226 |        " 'Give': 919,\n",
2227 |        " 'pass': 920,\n",
2228 |        " 'million': 921,\n",
2229 |        " 'miles': 922,\n",
2230 |        " 'wasted': 923,\n",
2231 |        " 'could': 924,\n",
2232 |        " 'spent': 925,\n",
2233 |        " 'starving': 926,\n",
2234 |        " 'Aids': 927,\n",
2235 |        " 'Africa': 928,\n",
2236 |        " 'simply': 929,\n",
2237 |        " 'remade': 930,\n",
2238 |        " 'fails': 931,\n",
2239 |        " 'capture': 932,\n",
2240 |        " 'flavor': 933,\n",
2241 |        " 'terror': 934,\n",
2242 |        " '1963': 935,\n",
2243 |        " 'title': 936,\n",
2244 |        " 'Liam': 937,\n",
2245 |        " 'Neeson': 938,\n",
2246 |        " 'excellent': 939,\n",
2247 |        " 'always': 940,\n",
2248 |        " 'holds': 941,\n",
2249 |        " 'exception': 942,\n",
2250 |        " 'Owen': 943,\n",
2251 |        " 'Wilson': 944,\n",
2252 |        " 'feel': 945,\n",
2253 |        " 'character': 946,\n",
2254 |        " 'Luke': 947,\n",
2255 |        " 'major': 948,\n",
2256 |        " 'fault': 949,\n",
2257 |        " 'version': 950,\n",
2258 |        " 'strayed': 951,\n",
2259 |        " 'Shirley': 952,\n",
2260 |        " 'Jackson': 953,\n",
2261 |        " 'attempts': 954,\n",
2262 |        " 'grandiose': 955,\n",
2263 |        " 'thrill': 956,\n",
2264 |        " 'earlier': 957,\n",
2265 |        " 'trade': 958,\n",
2266 |        " 'snazzier': 959,\n",
2267 |        " 'special': 960,\n",
2268 |        " 'effects': 961,\n",
2269 |        " 'enjoy': 962,\n",
2270 |        " 'friction': 963,\n",
2271 |        " 'older': 964,\n",
2272 |        " 'top': 965,\n",
2273 |        " 'Horrible': 966,\n",
2274 |        " 'wasnt': 967,\n",
2275 |        " 'continuous': 968,\n",
2276 |        " 'minute': 969,\n",
2277 |        " 'fight': 970,\n",
2278 |        " 'chance': 971,\n",
2279 |        " 'development': 972,\n",
2280 |        " 'busy': 973,\n",
2281 |        " 'running': 974,\n",
2282 |        " 'sword': 975,\n",
2283 |        " 'emotional': 976,\n",
2284 |        " 'attachment': 977,\n",
2285 |        " 'except': 978,\n",
2286 |        " 'machine': 979,\n",
2287 |        " 'wanted': 980,\n",
2288 |        " 'destroy': 981,\n",
2289 |        " 'Scenes': 982,\n",
2290 |        " 'blatantly': 983,\n",
2291 |        " 'stolen': 984,\n",
2292 |        " 'LOTR': 985,\n",
2293 |        " 'Star': 986,\n",
2294 |        " 'Wars': 987,\n",
2295 |        " 'Matrix': 988,\n",
2296 |        " 'ExamplesThe': 989,\n",
2297 |        " 'ghost': 990,\n",
2298 |        " 'final': 991,\n",
2299 |        " 'Yoda': 992,\n",
2300 |        " 'Obee': 993,\n",
2301 |        " 'Vader': 994,\n",
2302 |        " 'spider': 995,\n",
2303 |        " 'beginning': 996,\n",
2304 |        " 'Frodo': 997,\n",
2305 |        " 'attacked': 998,\n",
2306 |        " 'Return': 999,\n",
2307 |        " ...}"
2308 |       ]
2309 |      },
2310 |      "execution_count": 13,
2311 |      "metadata": {},
2312 |      "output_type": "execute_result"
2313 |     }
2314 |    ],
2315 |    "source": [
2316 |     "vocab_to_index"
2317 |    ]
2318 |   },
2319 |   {
2320 |    "cell_type": "code",
2321 |    "execution_count": 14,
2322 |    "metadata": {},
2323 |    "outputs": [
2324 |     {
2325 |      "data": {
2326 |       "text/plain": [
2327 |        "256140"
2328 |       ]
2329 |      },
2330 |      "execution_count": 14,
2331 |      "metadata": {},
2332 |      "output_type": "execute_result"
2333 |     }
2334 |    ],
2335 |    "source": [
2336 |     "len(vocab_to_index)"
2337 |    ]
2338 |   },
2339 |   {
2340 |    "cell_type": "code",
2341 |    "execution_count": 16,
2342 |    "metadata": {
2343 |     "scrolled": true
2344 |    },
2345 |    "outputs": [],
2346 |    "source": [
2347 |     "result = [[vocab_to_index[word] for word in y] for y in vocab_lst]"
2348 |    ]
2349 |   },
2350 |   {
2351 |    "cell_type": "code",
2352 |    "execution_count": 17,
2353 |    "metadata": {
2354 |     "scrolled": true
2355 |    },
2356 |    "outputs": [
2357 |     {
2358 |      "data": {
2359 |       "text/plain": [
2360 |        "[624,\n",
2361 |        " 625,\n",
2362 |        " 184,\n",
2363 |        " 626,\n",
2364 |        " 586,\n",
2365 |        " 627,\n",
2366 |        " 628,\n",
2367 |        " 102,\n",
2368 |        " 629,\n",
2369 |        " 630,\n",
2370 |        " 194,\n",
2371 |        " 631,\n",
2372 |        " 632,\n",
2373 |        " 13,\n",
2374 |        " 633,\n",
2375 |        " 92,\n",
2376 |        " 537,\n",
2377 |        " 299,\n",
2378 |        " 634,\n",
2379 |        " 635,\n",
2380 |        " 398,\n",
2381 |        " 636,\n",
2382 |        " 629,\n",
2383 |        " 537,\n",
2384 |        " 637,\n",
2385 |        " 638,\n",
2386 |        " 639,\n",
2387 |        " 310,\n",
2388 |        " 640,\n",
2389 |        " 78,\n",
2390 |        " 641,\n",
2391 |        " 92,\n",
2392 |        " 284,\n",
2393 |        " 233,\n",
2394 |        " 642,\n",
2395 |        " 643,\n",
2396 |        " 644,\n",
2397 |        " 645,\n",
2398 |        " 310,\n",
2399 |        " 81,\n",
2400 |        " 84,\n",
2401 |        " 646,\n",
2402 |        " 647,\n",
2403 |        " 648,\n",
2404 |        " 649,\n",
2405 |        " 325,\n",
2406 |        " 650,\n",
2407 |        " 651,\n",
2408 |        " 652,\n",
2409 |        " 373,\n",
2410 |        " 653]"
2411 |       ]
2412 |      },
2413 |      "execution_count": 17,
2414 |      "metadata": {},
2415 |      "output_type": "execute_result"
2416 |     }
2417 |    ],
2418 |    "source": [
2419 |     "result[10]"
2420 |    ]
2421 |   },
2422 |   {
2423 |    "cell_type": "code",
2424 |    "execution_count": 18,
2425 |    "metadata": {},
2426 |    "outputs": [],
2427 |    "source": [
2428 |     "# KOREAN : https://github.com/e9t/nsmc\n",
2429 |     "# ENGLISH : https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews/kernels"
2430 |    ]
2431 |   }
2432 |  ],
2433 |  "metadata": {
2434 |   "kernelspec": {
2435 |    "display_name": "Python 3",
2436 |    "language": "python",
2437 |    "name": "python3"
2438 |   },
2439 |   "language_info": {
2440 |    "codemirror_mode": {
2441 |     "name": "ipython",
2442 |     "version": 3
2443 |    },
2444 |    "file_extension": ".py",
2445 |    "mimetype": "text/x-python",
2446 |    "name": "python",
2447 |    "nbconvert_exporter": "python",
2448 |    "pygments_lexer": "ipython3",
2449 |    "version": "3.6.8"
2450 |   }
2451 |  },
2452 |  "nbformat": 4,
2453 |  "nbformat_minor": 4
2454 | }
2455 | 


--------------------------------------------------------------------------------
/2_Representation_Vector.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "from torch.autograd import Variable\n",
 11 |     "import torch.nn as nn\n",
 12 |     "import torch.nn.functional as F\n",
 13 |     "import torch.optim as optim"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "vocab_size: 49\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "CONTEXT_SIZE = 2\n",
 31 |     "\n",
 32 |     "text = \"\"\"We are about to study the idea of a computational process.\n",
 33 |     "Computational processes are abstract beings that inhabit computers.\n",
 34 |     "As they evolve, processes manipulate other abstract things called data.\n",
 35 |     "The evolution of a process is directed by a pattern of rules\n",
 36 |     "called a program. People create programs to direct processes. In effect,\n",
 37 |     "we conjure the spirits of the computer with our spells.\"\"\".split()\n",
 38 |     "\n",
 39 |     "vocab = set(text)\n",
 40 |     "vocab_size = len(vocab)\n",
 41 |     "print('vocab_size:', vocab_size)\n",
 42 |     "\n",
 43 |     "w2i = {w: i for i, w in enumerate(vocab)}\n",
 44 |     "i2w = {i: w for i, w in enumerate(vocab)}"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 3,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "cbow sample (['We', 'are', 'to', 'study'], 'about')\n",
 57 |       "skipgram sample ('about', 'We', 1)\n"
 58 |      ]
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "def create_cbow_dataset(text):\n",
 63 |     "    data = []\n",
 64 |     "    for i in range(2, len(text) - 2):\n",
 65 |     "        context = [text[i - 2], text[i - 1],\n",
 66 |     "                   text[i + 1], text[i + 2]]\n",
 67 |     "        target = text[i]\n",
 68 |     "        data.append((context, target))\n",
 69 |     "    return data\n",
 70 |     "\n",
 71 |     "def create_skipgram_dataset(text):\n",
 72 |     "    import random\n",
 73 |     "    data = []\n",
 74 |     "    for i in range(2, len(text) - 2):\n",
 75 |     "        data.append((text[i], text[i-2], 1))\n",
 76 |     "        data.append((text[i], text[i-1], 1))\n",
 77 |     "        data.append((text[i], text[i+1], 1))\n",
 78 |     "        data.append((text[i], text[i+2], 1))\n",
 79 |     "        for _ in range(4):\n",
 80 |     "            if random.random() < 0.5 or i >= len(text) - 3:\n",
 81 |     "                rand_id = random.randint(0, i-1)\n",
 82 |     "            else:\n",
 83 |     "                rand_id = random.randint(i+3, len(text)-1)\n",
 84 |     "            data.append((text[i], text[rand_id], 0))\n",
 85 |     "    return data\n",
 86 |     "\n",
 87 |     "cbow_train = create_cbow_dataset(text)\n",
 88 |     "skipgram_train = create_skipgram_dataset(text)\n",
 89 |     "print('cbow sample', cbow_train[0])\n",
 90 |     "print('skipgram sample', skipgram_train[0])"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 4,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "class CBOW(nn.Module):\n",
100 |     "    def __init__(self, vocab_size, embd_size, context_size, hidden_size):\n",
101 |     "        super(CBOW, self).__init__()\n",
102 |     "        self.embeddings = nn.Embedding(vocab_size, embd_size)\n",
103 |     "        self.linear1 = nn.Linear(2*context_size*embd_size, hidden_size)\n",
104 |     "        self.linear2 = nn.Linear(hidden_size, vocab_size)\n",
105 |     "        \n",
106 |     "    def forward(self, inputs):\n",
107 |     "        embedded = self.embeddings(inputs).view((1, -1))\n",
108 |     "        hid = F.relu(self.linear1(embedded))\n",
109 |     "        out = self.linear2(hid)\n",
110 |     "        log_probs = F.log_softmax(out)\n",
111 |     "        return log_probs\n",
112 |     "\n",
113 |     "class SkipGram(nn.Module):\n",
114 |     "    def __init__(self, vocab_size, embd_size):\n",
115 |     "        super(SkipGram, self).__init__()\n",
116 |     "        self.embeddings = nn.Embedding(vocab_size, embd_size)\n",
117 |     "    \n",
118 |     "    def forward(self, focus, context):\n",
119 |     "        embed_focus = self.embeddings(focus).view((1, -1))\n",
120 |     "        embed_ctx = self.embeddings(context).view((1, -1))\n",
121 |     "        score = torch.mm(embed_focus, torch.t(embed_ctx))\n",
122 |     "        log_probs = F.logsigmoid(score)\n",
123 |     "    \n",
124 |     "        return log_probs"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 5,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "embd_size = 100\n",
134 |     "learning_rate = 0.001\n",
135 |     "n_epoch = 30\n",
136 |     "\n",
137 |     "def train_cbow():\n",
138 |     "    hidden_size = 64\n",
139 |     "    losses = []\n",
140 |     "    loss_fn = nn.NLLLoss()\n",
141 |     "    model = CBOW(vocab_size, embd_size, CONTEXT_SIZE, hidden_size)\n",
142 |     "    print(model)\n",
143 |     "    optimizer = optim.SGD(model.parameters(), lr=learning_rate)\n",
144 |     "\n",
145 |     "    for epoch in range(n_epoch):\n",
146 |     "        total_loss = .0\n",
147 |     "        for context, target in cbow_train:\n",
148 |     "            ctx_idxs = [w2i[w] for w in context]\n",
149 |     "            ctx_var = Variable(torch.LongTensor(ctx_idxs))\n",
150 |     "\n",
151 |     "            model.zero_grad()\n",
152 |     "            log_probs = model(ctx_var)\n",
153 |     "\n",
154 |     "            loss = loss_fn(log_probs, Variable(torch.LongTensor([w2i[target]])))\n",
155 |     "\n",
156 |     "            loss.backward()\n",
157 |     "            optimizer.step()\n",
158 |     "            total_loss += loss.data\n",
159 |     "        losses.append(total_loss)\n",
160 |     "    return model, losses\n",
161 |     "\n",
162 |     "def train_skipgram():\n",
163 |     "    losses = []\n",
164 |     "    loss_fn = nn.MSELoss()\n",
165 |     "    model = SkipGram(vocab_size, embd_size)\n",
166 |     "    print(model)\n",
167 |     "    optimizer = optim.SGD(model.parameters(), lr=learning_rate)\n",
168 |     "    \n",
169 |     "    for epoch in range(n_epoch):\n",
170 |     "        total_loss = .0\n",
171 |     "        for in_w, out_w, target in skipgram_train:\n",
172 |     "            in_w_var = Variable(torch.LongTensor([w2i[in_w]]))\n",
173 |     "            out_w_var = Variable(torch.LongTensor([w2i[out_w]]))\n",
174 |     "            \n",
175 |     "            model.zero_grad()\n",
176 |     "            log_probs = model(in_w_var, out_w_var)\n",
177 |     "            loss = loss_fn(log_probs[0], Variable(torch.Tensor([target])))\n",
178 |     "            \n",
179 |     "            loss.backward()\n",
180 |     "            optimizer.step()\n",
181 |     "\n",
182 |     "            total_loss += loss.data\n",
183 |     "        losses.append(total_loss)\n",
184 |     "    return model, losses"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 6,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "name": "stdout",
194 |      "output_type": "stream",
195 |      "text": [
196 |       "CBOW(\n",
197 |       "  (embeddings): Embedding(49, 100)\n",
198 |       "  (linear1): Linear(in_features=400, out_features=64, bias=True)\n",
199 |       "  (linear2): Linear(in_features=64, out_features=49, bias=True)\n",
200 |       ")\n"
201 |      ]
202 |     },
203 |     {
204 |      "name": "stderr",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:12: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.\n",
208 |       "  if sys.path[0] == '':\n"
209 |      ]
210 |     },
211 |     {
212 |      "name": "stdout",
213 |      "output_type": "stream",
214 |      "text": [
215 |       "SkipGram(\n",
216 |       "  (embeddings): Embedding(49, 100)\n",
217 |       ")\n"
218 |      ]
219 |     }
220 |    ],
221 |    "source": [
222 |     "cbow_model, cbow_losses = train_cbow()\n",
223 |     "sg_model, sg_losses = train_skipgram()"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 7,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "def test_cbow(test_data, model):\n",
233 |     "    print('====Test CBOW===')\n",
234 |     "    correct_ct = 0\n",
235 |     "    for ctx, target in test_data:\n",
236 |     "        ctx_idxs = [w2i[w] for w in ctx]\n",
237 |     "        ctx_var = Variable(torch.LongTensor(ctx_idxs))\n",
238 |     "\n",
239 |     "        model.zero_grad()\n",
240 |     "        log_probs = model(ctx_var)\n",
241 |     "        _, predicted = torch.max(log_probs.data, 1)\n",
242 |     "        predicted_word = i2w[predicted.item()]\n",
243 |     "        print('predicted:', predicted_word)\n",
244 |     "        print('label    :', target)\n",
245 |     "        if predicted_word == target:\n",
246 |     "            correct_ct += 1\n",
247 |     "            \n",
248 |     "    print('Accuracy: {:.1f}% ({:d}/{:d})'.format(correct_ct/len(test_data)*100, correct_ct, len(test_data)))\n",
249 |     "\n",
250 |     "def test_skipgram(test_data, model):\n",
251 |     "    print('====Test SkipGram===')\n",
252 |     "    correct_ct = 0\n",
253 |     "    for in_w, out_w, target in test_data:\n",
254 |     "        in_w_var = Variable(torch.LongTensor([w2i[in_w]]))\n",
255 |     "        out_w_var = Variable(torch.LongTensor([w2i[out_w]]))\n",
256 |     "\n",
257 |     "        model.zero_grad()\n",
258 |     "        log_probs = model(in_w_var, out_w_var)\n",
259 |     "        _, predicted = torch.max(log_probs.data, 1)\n",
260 |     "        predicted = predicted[0]\n",
261 |     "        if predicted == target:\n",
262 |     "            correct_ct += 1\n",
263 |     "\n",
264 |     "    print('Accuracy: {:.1f}% ({:d}/{:d})'.format(correct_ct/len(test_data)*100, correct_ct, len(test_data)))"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 8,
270 |    "metadata": {},
271 |    "outputs": [
272 |     {
273 |      "name": "stdout",
274 |      "output_type": "stream",
275 |      "text": [
276 |       "====Test CBOW===\n",
277 |       "predicted: about\n",
278 |       "label    : about\n",
279 |       "predicted: to\n",
280 |       "label    : to\n",
281 |       "predicted: study\n",
282 |       "label    : study\n",
283 |       "predicted: the\n",
284 |       "label    : the\n",
285 |       "predicted: idea\n",
286 |       "label    : idea\n",
287 |       "predicted: of\n",
288 |       "label    : of\n",
289 |       "predicted: a\n",
290 |       "label    : a\n",
291 |       "predicted: computational\n",
292 |       "label    : computational\n",
293 |       "predicted: process.\n",
294 |       "label    : process.\n",
295 |       "predicted: Computational\n",
296 |       "label    : Computational\n",
297 |       "predicted: processes\n",
298 |       "label    : processes\n",
299 |       "predicted: are\n",
300 |       "label    : are\n",
301 |       "predicted: abstract\n",
302 |       "label    : abstract\n",
303 |       "predicted: beings\n",
304 |       "label    : beings\n",
305 |       "predicted: that\n",
306 |       "label    : that\n",
307 |       "predicted: inhabit\n",
308 |       "label    : inhabit\n",
309 |       "predicted: computers.\n",
310 |       "label    : computers.\n",
311 |       "predicted: As\n",
312 |       "label    : As\n",
313 |       "predicted: they\n",
314 |       "label    : they\n",
315 |       "predicted: evolve,\n",
316 |       "label    : evolve,\n",
317 |       "predicted: processes\n",
318 |       "label    : processes\n",
319 |       "predicted: manipulate\n",
320 |       "label    : manipulate\n",
321 |       "predicted: other\n",
322 |       "label    : other\n",
323 |       "predicted: abstract\n",
324 |       "label    : abstract\n",
325 |       "predicted: things\n",
326 |       "label    : things\n",
327 |       "predicted: called\n",
328 |       "label    : called\n",
329 |       "predicted: data.\n",
330 |       "label    : data.\n",
331 |       "predicted: The\n",
332 |       "label    : The\n",
333 |       "predicted: evolution\n",
334 |       "label    : evolution\n",
335 |       "predicted: of\n",
336 |       "label    : of\n",
337 |       "predicted: a\n",
338 |       "label    : a\n",
339 |       "predicted: process\n",
340 |       "label    : process\n",
341 |       "predicted: is\n",
342 |       "label    : is\n",
343 |       "predicted: directed\n",
344 |       "label    : directed\n",
345 |       "predicted: by\n",
346 |       "label    : by\n",
347 |       "predicted: a\n",
348 |       "label    : a\n",
349 |       "predicted: pattern\n",
350 |       "label    : pattern\n",
351 |       "predicted: of\n",
352 |       "label    : of\n",
353 |       "predicted: rules\n",
354 |       "label    : rules\n",
355 |       "predicted: called\n",
356 |       "label    : called\n",
357 |       "predicted: a\n",
358 |       "label    : a\n",
359 |       "predicted: program.\n",
360 |       "label    : program.\n",
361 |       "predicted: People\n",
362 |       "label    : People\n",
363 |       "predicted: create\n",
364 |       "label    : create\n",
365 |       "predicted: programs\n",
366 |       "label    : programs\n",
367 |       "predicted: to\n",
368 |       "label    : to\n",
369 |       "predicted: direct\n",
370 |       "label    : direct\n",
371 |       "predicted: processes.\n",
372 |       "label    : processes.\n",
373 |       "predicted: In\n",
374 |       "label    : In\n",
375 |       "predicted: effect,\n",
376 |       "label    : effect,\n",
377 |       "predicted: we\n",
378 |       "label    : we\n",
379 |       "predicted: conjure\n",
380 |       "label    : conjure\n",
381 |       "predicted: the\n",
382 |       "label    : the\n",
383 |       "predicted: spirits\n",
384 |       "label    : spirits\n",
385 |       "predicted: of\n",
386 |       "label    : of\n",
387 |       "predicted: the\n",
388 |       "label    : the\n",
389 |       "predicted: computer\n",
390 |       "label    : computer\n",
391 |       "predicted: of\n",
392 |       "label    : with\n",
393 |       "Accuracy: 98.3% (57/58)\n",
394 |       "------\n",
395 |       "====Test SkipGram===\n",
396 |       "Accuracy: 50.0% (232/464)\n"
397 |      ]
398 |     },
399 |     {
400 |      "name": "stderr",
401 |      "output_type": "stream",
402 |      "text": [
403 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:12: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.\n",
404 |       "  if sys.path[0] == '':\n"
405 |      ]
406 |     }
407 |    ],
408 |    "source": [
409 |     "test_cbow(cbow_train, cbow_model)\n",
410 |     "print('------')\n",
411 |     "test_skipgram(skipgram_train, sg_model)"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 9,
417 |    "metadata": {},
418 |    "outputs": [
419 |     {
420 |      "data": {
421 |       "text/plain": [
422 |        "<Figure size 432x288 with 0 Axes>"
423 |       ]
424 |      },
425 |      "metadata": {},
426 |      "output_type": "display_data"
427 |     },
428 |     {
429 |      "data": {
430 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD4CAYAAADmWv3KAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3gWVfrG8e+TApHeQguhCSJVwAgokFhAEAQUG6KCIEVFpbiu+tPVdd3irhKqgqAIuBQbAhZKZDUJ3VCklwACAYQgvbfz+yMvbhYpISRM3jf357pyMTkzb/LMNXIznjlzjjnnEBGRwBPkdQEiIpI9FPAiIgFKAS8iEqAU8CIiAUoBLyISoEK8LgCgRIkSrmLFil6XISLiVxYtWrTbORd+of05IuArVqxIUlKS12WIiPgVM9t8sf3qohERCVAKeBGRAKWAFxEJUAp4EZEApYAXEQlQCngRkQClgBcRCVB+HfB7Dp/gja9Wcvj4Ka9LERHJcfw64Gcn72b03J9p9+4c1u886HU5IiI5il8HfNsbyvLvJxqy78gJ2g6dw+Ql27wuSUQkx/DrgAdoXKUE3zzXlNoRhenzyVL+78vlHDt52uuyREQ85/cBD1CqUBjjuzekZ0xlxi/Ywv3D57Ll1yNelyUi4qmACHiAkOAgXr6rOiM7RbHl1yO0HpLIjJW/eF2WiIhnAibgz2peoxTfPNeUisXz0/PjRfz929WcPH3G67JERK66gAt4gMhi+fj8qZt5rFEFRiRspOPI+fyy/5jXZYmIXFUBGfAAeUOCefOeWgzqUJeV2w/QenAiietTvS5LROSqCdiAP6td3QimPtOE4gXy0GnUQv45fY26bEQkV7hkwJtZpJl9b2arzWylmfX2tb9tZmvMbJmZfWlmRdJ95mUzSzaztWbWIjtPICOqlCzAlF5N6HBTJMN+2MCD789j6x6NshGRwJaRO/hTwPPOuepAI6CXmdUA4oBazrk6wDrgZQDfvg5ATaAl8J6ZBWdH8ZfjmjzB/KN9HYZ2rEfyzkO0GpzItOU7vC5LRCTbXDLgnXM7nHOLfdsHgdVAhHNupnPu7CQw84Fyvu12wETn3HHn3CYgGWiQ9aVnzt11yvJt76ZUDi/AU+MW84pejBKRAHVZffBmVhGoByw4Z1dXYJpvOwLYmm5fiq/t3J/Vw8ySzCwpNfXqPvyMLJaPz3reTM/oyoxbsIV2QzWXjYgEngwHvJkVAL4A+jjnDqRrf4W0bpxxZ5vO83H3uwbnRjjnopxzUeHh4ZdXdRbIExLEy62qM6ZrA3YfOk6bobOZuHALzv2uVBERv5ShgDezUNLCfZxzblK69s7A3cAj7r/JmAJEpvt4OWB71pSb9WKuC2da76bcWKEoL01azrMTlnDg2EmvyxIRuWIZGUVjwIfAaudcbLr2lsCLQFvnXPohKVOBDmaW18wqAVWBhVlbdtYqWSiMsV0b8kKLakxb8QutByeyaPNer8sSEbkiGbmDbww8BtxuZkt9X62AoUBBIM7XNhzAObcS+BRYBUwHejnncvxTzOAgo9dtVfi0ZyOcgwffn8eAuHWc0ph5EfFTlhP6nKOiolxSUpLXZfzmwLGT/HnqSiYt3kbdyCIMfKguFUvk97osEZH/YWaLnHNRF9of8G+yZkahsFBiH6zL0I712JiaNmZeD2BFxN8o4C/i7jplmdE3mrqRRXhp0nJ6fryIPYdPeF2WiEiGKOAvoUzha/j3Ew15pVV1flibSouBCcSv06RlIpLzKeAzICjI6B5dmcm9GlM0XyidRy3kz1NX6g1YEcnRFPCXoUbZQkx9pgldGldk9NyfaTNkNiu37/e6LBGR81LAX6aw0GBeb1OTMV0bsP/oSe55dw5D/7NewylFJMdRwGdSzHXhzOgTTYuapXln5jruGz6PDamHvC5LROQ3CvgrUDR/HoZ2rM+Qh+ux+dfDtBqUyEdzNnHmjIZTioj3FPBZoM0NZZnZJ5pbri3OG1+t4pEPFpCyVwuKiIi3FPBZpGShMEY9fhNvta/NspR9tByYyKc/btXLUSLiGQV8FjIzOjQoz/Q+0dQsW4g/frGMbmOS2HXwmNeliUgupIDPBpHF8jGheyNebV2dxOTdtBiQwDfLtDygiFxdCvhsEhRkdGtamW+fa0L5YvnoNX4xvcYtZveh416XJiK5hAI+m1UpWZDPn7qFF1pUI27VTu4ckMDUn7arb15Esp0C/ioIDQ6i121V+Pq5JkQWy8dzE5bQ8+NF6psXkWylgL+KritVkC+evJmX77qeH9al0jw2gUmLU3Q3LyLZQgF/lYUEB9Ez5lqm9W5KlZIF6PfpTzwxJolf9utuXkSylgLeI9eGF+DTnjfz2t01mLthN81j4/nkRy0qIiJZRwHvoeAgo2uTSszoE02NsoV48YvldBq1UG/BikiWUMDnABWK52dC90a82a4mizbvpcWABMbM/Vlz2ojIFVHA5xBBQcZjN1dkZt9obqxYjNenruSB9+eRvOug16WJiJ9SwOcw5YrmY0yXm4h98AY2pB6i1aDZDJ61nhOnNN+8iFweBXwOZGa0r1+O7/rFcGfNUsTGraPt0Nks3brP69JExI8o4HOwEgXyMrRjfT7oFMW+Iydp/94c/vr1Ko6cOOV1aSLiBxTwfqBZjVLM7BfNww3K88HsTbQYmMCc5N1elyUiOZwC3k8UCgvlb/fWZmKPRoQEBfHIBwt44bOf2HfkhNeliUgOpYD3M40qF2da76Y8deu1TFqyjWax8Xy9TJOXicjvKeD9UFhoMC+2vJ6pzzSmTOFreGb8ErqPTWL7vqNelyYiOcglA97MIs3sezNbbWYrzay3r72YmcWZ2Xrfn0V97WZmg80s2cyWmVn97D6J3Kpm2cJ8+fQtvNKqOrOT06Y7GDtPL0iJSJqM3MGfAp53zlUHGgG9zKwG8BIwyzlXFZjl+x7gLqCq76sHMCzLq5bfhAQH0T26MjP7xFC/QlFem7KS+4fPZd1OvSAlkttdMuCdczucc4t92weB1UAE0A4Y4ztsDHCPb7sdMNalmQ8UMbMyWV65/I/yxfMxtmsDYh+8gU27D9N6cCKxces4fuq016WJiEcuqw/ezCoC9YAFQCnn3A5I+0cAKOk7LALYmu5jKb62c39WDzNLMrOk1NTUy69cfif9C1Kta5dh8Kz1tBqUSNLPe7wuTUQ8kOGAN7MCwBdAH+fcgYsdep6233UKO+dGOOeinHNR4eHhGS1DMqB4gbwM7FCP0V1u4tjJM9w/fB6vfLmc/UdPel2aiFxFGQp4MwslLdzHOecm+Zp3nu168f25y9eeAkSm+3g5YHvWlCuX49ZqJZnZN5qujSsxYeEWDakUyWUyMorGgA+B1c652HS7pgKdfdudgSnp2jv5RtM0Avaf7cqRqy9/3hBea1ODKb2aUKpQXp4Zv4Suo39k6x7NOS8S6OxSd3Nm1gRIBJYDZ6c0/D/S+uE/BcoDW4AHnHN7fP8gDAVaAkeALs65pIv9jqioKJeUdNFDJAucOn2GsfM203/mWk47R99m19G1SSVCg/U6hIg/MrNFzrmoC+7PCf+7roC/urbvO8prU1by3eqdVC9TiL/fW4t65Yt6XZaIXKZLBbxu3XKhskWu4YPOUbz/2I3sPXyC9sPm8tqUFRw4poewIoFEAZ+LtahZmrh+0XS+uSIfz99M89h4pi3foYewIgFCAZ/LFQwL5c9tazL56caUKJCXp8Yt5okxSXoIKxIAFPACwA2RRZjSqzGvtq7O/I2/cueABN6P38DJ01oqUMRfKeDlNyHBQXRrWpm4fjE0qVqCf0xbQ5shs1m8Za/XpYlIJijg5XciilzDyE5pD2H3Hz3JfcPm8upkvQkr4m8U8HJBaQ9hY+hySyXGL0h7E/arn/QmrIi/UMDLRRXwvQk79ZkmlC4UxrMTltD5ox/Z8qseworkdAp4yZBaEYWZ3Ksxf25Tg8Wb99J8QDzvfp/MiVN6CCuSUyngJcOCg4zHG1fiu34x3H59Sd6esZbWgxNZuEnTEYvkRAp4uWylC4cx7NEbGfV4FEdOnObB9+fxwmc/sefwCa9LE5F0FPCSabdfX4q4ftE8GXMtXy7Zxh39f+CzpK16CCuSQyjg5YrkyxPCS3ddz9fPNaFyeAFe+HwZD42Yz3qtCSviOQW8ZInrSxfis54381b72qz95SCtBify9ow1HDupNWFFvKKAlywTFGR0aFCeWc/H0OaGsrz7/QbuHJDAD2t3XfrDIpLlFPCS5UoUyEvsg3UZ370hIcHG4x/9SK9xi/ll/zGvSxPJVRTwkm1uubYE03o35fnm1xG3eifNYuMZNXsTpzSBmchVoYCXbJU3JJhn76hKXN9obqxQlL98vYq2Q+ewRBOYiWQ7BbxcFRWK52d0l5t4t2N9fj18nPbD5vLKl8vZf0QTmIlkFwW8XDVmRus6ZfjON4HZhIVbuCP2B75ckqKx8yLZQAEvV13BsNDfJjArVzQffT/5iY4jF5C865DXpYkEFAW8eKZWRGEmPXULf7u3Fiu37+euQQm8PWMNR09o7LxIVlDAi6eCgoxHGlZg1vO30qZO2tj55gPimbV6p9elifg9BbzkCOEF8xL7UF0m9mhEWGgwT4xJosfYJLbtO+p1aSJ+SwEvOUqjysX59rmmvHTX9SSu302z/vEM+2GD5p0XyQQFvOQ4eUKCeDLmWuL6RdO0agn+OX0NrQcnMn/jr16XJuJXFPCSY5Urmo8RnaL4sHMUR0+epsOI+fT7ZCmpB497XZqIX1DAS453R/VSxPWN4ZnbqvDVsu3c0f8HPp6/mdNnNHZe5GIuGfBmNsrMdpnZinRtdc1svpktNbMkM2vgazczG2xmyWa2zMzqZ2fxkntckyeYP7SoxrTe0dSKKMyfJq+g/XtzWJ6y3+vSRHKsjNzBjwZantP2L+AN51xd4DXf9wB3AVV9Xz2AYVlTpkiaKiULMK5bQwZ1qMu2fcdo9+5sXp+yggPHNOWByLkuGfDOuQTg3FWVHVDIt10Y2O7bbgeMdWnmA0XMrExWFSsCaVMetKsbwaznY3isUQXGzt/MHf3jmbJ0m6Y8EEkns33wfYC3zWwr8A7wsq89Atia7rgUX9vvmFkPX/dOUmpqaibLkNys8DWhvNGuFlN7NaFM4TB6T1zKYx8uZGOqpjwQgcwH/FNAX+dcJNAX+NDXbuc59ry3VM65Ec65KOdcVHh4eCbLEIHa5Qrz5dONebNdTX5K2UfLgYnEzlyr5QIl18tswHcGJvm2PwMa+LZTgMh0x5Xjv903ItkmOMh47OaKzHo+hla1SzP4P8laLlByvcwG/HYgxrd9O7Detz0V6OQbTdMI2O+c23GFNYpkWMmCYQzsUI/x3f67XODT4xZpuUDJlUIudYCZTQBuBUqYWQrwOtAdGGRmIcAx0kbMAHwLtAKSgSNAl2yoWeSSbqmStlzgyISNDPlPMvFrU3n+zmp0urkCIcF6/UNyB8sJow6ioqJcUlKS12VIgNry6xH+NGUF8etSqVm2EH+7tzZ1I4t4XZbIFTOzRc65qAvt162MBLzyxfMxustNvPdIfXYfOs69783hT5NXsP+oxs5LYFPAS65gZrSqnbZc4OO3VGTcAo2dl8CngJdcpWBYKK+3qcnUZ5oQUeS/Y+c37T7sdWkiWU4BL7lSrYjCTDo7dn7rPloMTGDgd+s0dl4CigJecq3fxs7/IYaWNUsz8Lv1tBqUyNzk3V6XJpIlFPCS65UsGMbgh+sxtmsDTjtHxw8W0O+Tpfx6SPPOi39TwIv4RF8Xzow+0Tx7e9q887f3j2fiwi2c0bzz4qcU8CLphIUG8/yd1ZjWuynVShXkpUnLeWjEPNbtPOh1aSKXTQEvch5VShZkYo9G/Ou+OqzfdYhWgxL51/Q1HD2hh7DiPxTwIhcQFGQ8eFMks/rF0K5uBO/9sIE7B8ZrAjPxGwp4kUsoXiAv/R+8gfHdGxIaFMTjH/3IM+MXs+ugJjCTnE0BL5JBt1xbgml9mtKnWVVmrtxJs/7xjF+gh7CScyngRS5D3pBg+jS7jml9mlKjbCH+78vlPPi+HsJKzqSAF8mEa8MLMKF7I96+vw7JqYdoPTiRd2ZoFSnJWRTwIplkZjwQlfYQtk2dsgz9PpmWAxOYozdhJYdQwItcoeIF8hL7UF3GdWsIwCN6E1ZyCAW8SBZpXKUE0/tE88xtaW/C3hEbz6dJWzUdsXhGAS+ShcJCg/lDi2p881xTqoQX4I+fL6PjyAVsTD3kdWmSCyngRbLBdaUK8mnPm/lH+9qs2L6floMSGTJrPSdOnfG6NMlFFPAi2SQoyHi4QXlm9YuheY1S9I9bR+vBiST9vMfr0iSXUMCLZLOShcJ4t2N9Rj0exZETp7l/+Dxe+XK51oSVbKeAF7lKbr++FDP7RtOtSSUmLNxCs9h4vlm2Qw9hJdso4EWuovx5Q3j17hpM6dWEkgXz0mv8YrqNSWLbvqNelyYBSAEv4oHa5QozpVdjXm1dnbkbfqV5bDwfzt7Eac1rI1lIAS/ikZDgILo1rczMvtE0qFSMN79eRfv35rB6xwGvS5MAoYAX8VhksXx89PhNDOpQl5S9R2kzZDb/mr5G89rIFVPAi+QAZka7uhF81y+Ge+qlLS5y16BE5m341evSxI9dMuDNbJSZ7TKzFee0P2tma81spZn9K137y2aW7NvXIjuKFglURfPn4Z0HbuDfTzTk9BnHwyPn8+Lny9h/REMq5fJl5A5+NNAyfYOZ3Qa0A+o452oC7/jaawAdgJq+z7xnZsFZWbBIbtCkaglm9ImmZ3RlPl+cwh0aUimZcMmAd84lAOe+evcU8JZz7rjvmLOLVLYDJjrnjjvnNgHJQIMsrFck17gmTzAvt6rOlF6NKV04bUhl97FJ7NivIZWSMZntg78OaGpmC8ws3sxu8rVHAFvTHZfiaxORTKoVUZjJTzfmlVbVmZ28m2b94xk772ctFSiXlNmADwGKAo2AF4BPzcwAO8+x5/2v0Mx6mFmSmSWlpqZmsgyR3CEkOIju0ZWZ2SeG+hWK8tqUlTzw/jzWa6lAuYjMBnwKMMmlWQicAUr42iPTHVcO2H6+H+CcG+Gci3LORYWHh2eyDJHcpXzxfIzt2oD+D9zAhtRDtBqcyIC4dRw/pSGV8nuZDfjJwO0AZnYdkAfYDUwFOphZXjOrBFQFFmZFoSKSxsy478ZyfNcvhla1yzBo1npaD56tWSrldzIyTHICMA+oZmYpZvYEMAqo7Bs6ORHo7LubXwl8CqwCpgO9nHO6tRDJBiUK5GVQh3p81OUmjvpmqfzT5BUcPKYhlZLGcsKwq6ioKJeUlOR1GSJ+6/DxU7wzcy2j5/5MqYJhvHlPLZrXKOV1WZLNzGyRcy7qQvv1JqtIAMifN4TX29Rk0lO3UCRfKN3HJvH0uEXsOnjM69LEQwp4kQBSr3xRvnq2CS+0qMZ3q3fRrH88n2nh71xLAS8SYEKDg+h1WxWm9W5KtdIFeeHzZXQatZCte454XZpcZQp4kQB1bXgBPulxM2+2q8nizXtpMTCBj+ZozvncRAEvEsCCgozHbq7IzH4xNKhUjDe+WsUDw+fqBalcQgEvkgtEFLmGjx6/iYEP1WXT7sO0HjybIbPWc+LUGa9Lk2ykgBfJJcyMe+pFENcvhha1StM/bh1th85mWco+r0uTbKKAF8llShTIy5CH6zGyUxR7j5zgnnfn8PdvV3P0hN5JDDQKeJFcqnmNUsT1i+Ghm8ozImEjLQclaAWpAKOAF8nFCoWF8o/2tZnQvREAD4+cz8uTlnNA0x0EBAW8iHDztcWZ3jttBalPftxC89h4vlu10+uy5Aop4EUE+O8KUpN7NaZovjx0G5vEsxOWsPvQca9Lk0xSwIvI/6hTrghTn2nC882vY8aKX2geG8/kJds03YEfUsCLyO/kCQni2Tuq8s1zTahUIj99PllK19E/sn2f1oP1Jwp4EbmgqqUK8tmTt/B6mxrM37iH5rHxfDxP68H6CwW8iFxUcJDRpXElZvaNpn6Fovxpyko6jJjPxtRDXpcml6CAF5EMiSyWth7s2/fXYc0vB7hrUCLD4zdw6rSmO8ipFPAikmFmxgNRkXzXL4Zbq4Xz1rQ13PveXFZtP+B1aXIeCngRuWwlC4Ux/NEbee+R+uzYf5S2Q2fTf+Zajp/SdAc5iQJeRDLFzGhVuwxxfWNoW7csQ/6TzN2DZ7N4y16vSxMfBbyIXJGi+fMQ+2BdPupyE4ePn+K+YXP5y1erOHLilNel5XoKeBHJErdVK8nMfjE82rACo+ZsosXABOYm7/a6rFxNAS8iWaZA3hDevKcWn/RoREhQEB0/WKDJyzykgBeRLNewcnGm9W762+RlLQYk8P2aXV6Xleso4EUkW4SFpk1eNunpxhQMC6HL6B/p9+lS9h054XVpuYYCXkSyVd3IInz1bBOeu6MqU5dup1lsAtNX7PC6rFxBAS8i2S5vSDD9ml/H1GeaUKpQXp7892J6jVtM6kFNRZydFPAictXUKFuIyb0a80KLasSt2smdA+KZslRTEWeXSwa8mY0ys11mtuI8+/5gZs7MSvi+NzMbbGbJZrbMzOpnR9Ei4r9Cg4PodVsVvu2dNhVx74lL6TYmiZ0HjnldWsDJyB38aKDluY1mFgk0B7aka74LqOr76gEMu/ISRSQQVSmZNhXxq62rM2fDbprHxvNZ0lbdzWehSwa8cy4B2HOeXQOAPwLpr0Y7YKxLMx8oYmZlsqRSEQk4wUFGt6aVmd47mutLF+KFz5fRRQuLZJlM9cGbWVtgm3Pup3N2RQBb032f4ms738/oYWZJZpaUmpqamTJEJEBULJGfiT0a8UbbmizYuIcWAxKYuHCL7uav0GUHvJnlA14BXjvf7vO0nfcKOedGOOeinHNR4eHhl1uGiASYoCCj8y0VmdEnmloRhXlp0nI6jVpIyt4jXpfmtzJzB38tUAn4ycx+BsoBi82sNGl37JHpji0HbL/SIkUk9yhfPB/jujXkr/fUYvHmvbQYkMC/52/WMoGZcNkB75xb7pwr6Zyr6JyrSFqo13fO/QJMBTr5RtM0AvY75/RGg4hclqAg49FGFZjRN5p65Yvy6uQVPPLBArbu0d385cjIMMkJwDygmpmlmNkTFzn8W2AjkAyMBJ7OkipFJFcqVzQfHz/RgLfa12b5tv3cOSCB0XM26W4+gywnPMSIiopySUlJXpchIjnY9n1HeXnScuLXpdKgYjH+eX8dKpXI73VZnjKzRc65qAvt15usIuIXyha5htFdbuLt++uw+pcD3DUogQ8SN3Jad/MXpIAXEb+RftHvJlVK8NdvVvPA8Lkk7zrkdWk5kgJeRPxOqUJhjOwUxcCH6rJx92FaDU5k2A8bOHX6jNel5SgKeBHxS2bGPfUimNk3mtuqhfPP6Wu4b9hc1v5y0OvScgwFvIj4tZIFwxj+6I0M7ViPrXuPcveQRIbMWs9J3c0r4EXE/5kZd9cpS1zfaFrULE3/uHW0GzqHldv3e12apxTwIhIwihfIy9CO9Rn+6I3sOnicdkPnEDtzLcdPnfa6NE8o4EUk4LSsVZq4vtG0vaEsg/+TTJshs/lp6z6vy7rqFPAiEpCK5s9D7EN1GfV4FAeOnuLe9+bwj2mrOXYy99zNK+BFJKDdfn0pZvaL5sGoSN6P30irwYks2ny+JS4CjwJeRAJeobBQ3rqvDh8/0YDjJ89w//B5vPHVSo6cOOV1adlKAS8iuUbTquHM6BvNY40q8NGcn2k5MJF5G371uqxso4AXkVylQN4Q/tKuFp/0aIQZPDxyPq98uZyDx056XVqWU8CLSK7UsHJxpveOpnvTSkxYuIU7ByTwnzU7vS4rSyngRSTXuiZPMK+0rsGkpxtTKCyUrqOT6DNxCXsOn/C6tCyhgBeRXK9uZBG+erYJfZpV5ZvlO2gWG8/Un7b7/aLfCngRESBPSBB9ml3H1882JbJYPp6bsITuYxfxy/5jXpeWaQp4EZF0qpUuyKSnbuHV1tWZnZxK89h4Jizc4pd38wp4EZFzBAcZ3ZpWZkafaGpFFOblScvpOHIBm3897HVpl0UBLyJyARWK52d894b8o31tVmzbT4uB/rVMoAJeROQizIyHG5QnLt0yge39ZGERBbyISAaULpy2TODgh+uxdc8R7h6SyIC4dTl6KmIFvIhIBpkZbW8oy3f9YmhduwyDZq2nzZDZLNmy1+vSzksBLyJymYrlz8PADvUY9XgUB4+dov2wubz59aocN3mZAl5EJJNuv74UM/tG80jD8nw4exMtBiYwJ3m312X9RgEvInIFCoaF8td7avNJj0aEBAXxyAcLePHzZew/6v3kZQp4EZEs0LBycab1bkrPmMp8vjiFZrHxTF+xw9OaFPAiIlkkLDSYl++qzpRejQkvkJcn/72YJz9exK4D3kx3cMmAN7NRZrbLzFaka3vbzNaY2TIz+9LMiqTb97KZJZvZWjNrkV2Fi4jkVLUiCjPlmcb8sWU1/rN2F81i4/nkx6s/3UFG7uBHAy3PaYsDajnn6gDrgJcBzKwG0AGo6fvMe2YWnGXVioj4idDgIJ6+tQrTezfl+jKFePGL5TzywdWd7uCSAe+cSwD2nNM20zl3djzQfKCcb7sdMNE5d9w5twlIBhpkYb0iIn6lcngBJnZvxN/urcWylLTpDkYkbODU6TPZ/ruzog++KzDNtx0BbE23L8XX9jtm1sPMkswsKTU1NQvKEBHJmYKCjEcaViCuXzRNqpTg79+uof2wuazafiB7f++VfNjMXgFOAePONp3nsPN2OjnnRjjnopxzUeHh4VdShoiIXyhT+BpGdopiyMP12Lb3KG2HzuaDxI3Z9vtCMvtBM+sM3A3c4f775CAFiEx3WDlge+bLExEJLGZGmxvK0qRKCd78ZhWVSuTPtt+VqYA3s5bAi0CMc+5Iul1TgfFmFguUBaoCC6+4ShGRAFM0fx5iH6ybrb/jkgFvZhOAW4ESZpYCvE7aqJm8QJyZAcx3zj3pnFtpZp8Cq0jruunlnMu5U62JiAQwywnLUEVFRbmkpCSvyxAR8Stmtgf1HYQAAAQPSURBVMg5F3Wh/XqTVUQkQCngRUQClAJeRCRAKeBFRAKUAl5EJEAp4EVEAlSOGCZpZqnA5kx+vASQc9bIyhqBdk6Bdj4QeOcUaOcDgXdO5zufCs65C871kiMC/kqYWdLFxoH6o0A7p0A7Hwi8cwq084HAO6fMnI+6aEREApQCXkQkQAVCwI/wuoBsEGjnFGjnA4F3ToF2PhB453TZ5+P3ffAiInJ+gXAHLyIi56GAFxEJUH4d8GbW0szWmlmymb3kdT1Zwcx+NrPlZrbUzPxuDmUzG2Vmu8xsRbq2YmYWZ2brfX8W9bLGy3WBc/qzmW3zXaelZtbKyxovh5lFmtn3ZrbazFaaWW9fu19ep4ucjz9fozAzW2hmP/nO6Q1feyUzW+C7Rp+YWZ6L/hx/7YM3s2BgHdCctKUCfwQeds6t8rSwK2RmPwNRzjm/fEHDzKKBQ8BY51wtX9u/gD3Oubd8/xAXdc696GWdl+MC5/Rn4JBz7h0va8sMMysDlHHOLTazgsAi4B7gcfzwOl3kfB7Ef6+RAfmdc4fMLBSYDfQG+gGTnHMTzWw48JNzbtiFfo4/38E3AJKdcxudcyeAiUA7j2vK9ZxzCcCec5rbAWN822NI+8vnNy5wTn7LObfDObfYt30QWA1E4KfX6SLn47dcmkO+b0N9Xw64Hfjc137Ja+TPAR8BbE33fQp+flF9HDDTzBaZWQ+vi8kipZxzOyDtLyNQ0uN6ssozZrbM14XjF90Z5zKzikA9YAEBcJ3OOR/w42tkZsFmthTYBcQBG4B9zrlTvkMumXn+HPB2njb/7G/6X42dc/WBu4Bevu4ByXmGAdcCdYEdQH9vy7l8ZlYA+ALo45w74HU9V+o85+PX18g5d9o5VxcoR1qPRfXzHXaxn+HPAZ8CRKb7vhyw3aNasoxzbrvvz13Al6RdWH+309dPera/dJfH9Vwx59xO31/AM8BI/Ow6+fp1vwDGOecm+Zr99jqd73z8/Rqd5ZzbB/wANAKKmFmIb9clM8+fA/5HoKrvqXIeoAMw1eOaroiZ5fc9JMLM8gN3Aisu/im/MBXo7NvuDEzxsJYscTYIfe7Fj66T7wHeh8Bq51xsul1+eZ0udD5+fo3CzayIb/saoBlpzxa+B+73HXbJa+S3o2gAfMOeBgLBwCjn3N88LumKmFll0u7aAUKA8f52TmY2AbiVtKlNdwKvA5OBT4HywBbgAeec3zy0vMA53Ura//o74Geg59n+65zOzJoAicBy4Iyv+f9I67f2u+t0kfN5GP+9RnVIe4gaTNqN+KfOub/4MmIiUAxYAjzqnDt+wZ/jzwEvIiIX5s9dNCIichEKeBGRAKWAFxEJUAp4EZEApYAXEQlQCngRkQClgBcRCVD/D3iHVaXR1UsQAAAAAElFTkSuQmCC\n",
431 |       "text/plain": [
432 |        "<Figure size 432x288 with 1 Axes>"
433 |       ]
434 |      },
435 |      "metadata": {
436 |       "needs_background": "light"
437 |      },
438 |      "output_type": "display_data"
439 |     },
440 |     {
441 |      "data": {
442 |       "text/plain": [
443 |        "<Figure size 432x288 with 0 Axes>"
444 |       ]
445 |      },
446 |      "metadata": {},
447 |      "output_type": "display_data"
448 |     },
449 |     {
450 |      "data": {
451 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAe50lEQVR4nO3de3Rd5X3m8e9zdLElQJINgjGWg53gZriUJlgFJ2G6MpCAYZLYnUAXrMngyTDLnZSk6WRmGpj+4Q4pa5E2U1pWCFkkeGLSFOMALW7ihLqElEnKTQbCzSFWDMECg0XkC+CLbr/547ySj6VzJPkc2UfSfj5raZ19fvvd+7zbB/Ro396tiMDMzAwgV+0OmJnZ1OFQMDOzYQ4FMzMb5lAwM7NhDgUzMxtWW+0OlOukk06KhQsXVrsbZmbTyubNm9+MiNZS86dtKCxcuJCOjo5qd8PMbFqR9Kux5vvwkZmZDXMomJnZMIeCmZkNGzcUJK2RtFPScyPqn5P0oqTnJf15Qf16SZ1p3iUF9WWp1inpuoL6IkmPSdoq6W5J9ZO1cWZmdmQmsqfwLWBZYUHSvwWWA+dExFnAV1L9TOBK4Ky0zNck1UiqAW4FLgXOBK5KbQG+DNwcEYuBXcA1lW6UmZmVZ9xQiIiHgZ4R5c8AN0XEwdRmZ6ovB9ZFxMGIeAnoBM5LP50RsS0ieoF1wHJJAi4E7knLrwVWVLhNZmZWpnLPKfwG8G/SYZ9/lvTbqT4f2F7QrivVStVPBHZHRP+IelGSVknqkNTR3d1dZtfNzKyUckOhFpgDLAX+J7A+/dWvIm2jjHpREXF7RLRHRHtra8l7L8a09l9eZsPPXitrWTOzma7cUOgC7ou8x4FB4KRUX1DQrg14bYz6m0CLpNoR9aPmrsdf4R8cCmZmRZUbCn9P/lwAkn4DqCf/C34DcKWkWZIWAYuBx4EngMXpSqN68iejN0T+CT8PAZen9a4E7i93YyaiuaGOPfv7juZHmJlNWxO5JPUu4BHgvZK6JF0DrAHenS5TXQesTHsNzwPrgReAHwLXRsRAOmfwWeABYAuwPrUF+CLwBUmd5M8x3DG5m3i45oY69uxzKJiZFTPu2EcRcVWJWZ8q0f5G4MYi9Y3AxiL1beSvTjomWhrreKbLoWBmVkzm7mj24SMzs9IyGQr7+wY42D9Q7a6YmU052QuFxvwoGt5bMDMbLXuh0FAHwF6HgpnZKJkNhd2+AsnMbJTMhUJLCgUfPjIzGy1zoeA9BTOz0jIbCt5TMDMbLXOh0ORQMDMrKXOhUJMTJ8yudSiYmRWRuVAA39VsZlZKJkOhpdGhYGZWTCZDobmhjt37eqvdDTOzKSezoeA9BTOz0TIaCvXs2d8/fkMzs4zJaCjUsWd/L/kHv5mZ2ZCJPHltjaSd6SlrI+f9D0kh6aT0XpJukdQp6RlJ5xa0XSlpa/pZWVBfIunZtMwtkjRZG1dKc0MdfQPB/j4Pn21mVmgiewrfApaNLEpaAHwUeKWgfCn55zIvBlYBt6W2c4HVwPnkn7K2WtKctMxtqe3QcqM+a7K1NPoGNjOzYsYNhYh4GOgpMutm4I+BwmMwy4E70/OaHwVaJM0DLgE2RURPROwCNgHL0rymiHgk8sdy7gRWVLZJ4/P4R2ZmxZV1TkHSJ4BXI+JnI2bNB7YXvO9KtbHqXUXqR5VHSjUzK672SBeQ1Aj8CXBxsdlFalFGvdRnryJ/qIl3vetd4/a1FI9/ZGZWXDl7Cu8BFgE/k/Qy0AY8Kelfkf9Lf0FB2zbgtXHqbUXqRUXE7RHRHhHtra2tZXQ9b3ikVB8+MjM7zBGHQkQ8GxEnR8TCiFhI/hf7uRHxOrABuDpdhbQU2BMRO4AHgIslzUknmC8GHkjz3pK0NF11dDVw/yRtW0k+0WxmVtxELkm9C3gEeK+kLknXjNF8I7AN6AS+AfwBQET0AF8Cnkg/N6QawGeAb6Zlfgn8oLxNmbjjZ9VSk5NDwcxshHHPKUTEVePMX1gwHcC1JdqtAdYUqXcAZ4/Xj8kkiabZteze7/GPzMwKZfKOZoCWRg91YWY2UmZDocmD4pmZjZLZUGhuqGOPh882MztMZkOhxXsKZmajZDYU/EwFM7PRMh8Kg4MePtvMbEhmQ6GlsY7BgLd7fQWSmdmQzIZCk4e6MDMbJbOh0OxB8czMRslsKHj4bDOz0TIbCs0eFM/MbJTshoKfvmZmNkpmQ6GloR7wnoKZWaHMhsLsuhz1NTmHgplZgcyGgqQ0KJ7HPzIzG5LZUID8DWzeUzAzOyTToeDxj8zMDjeRx3GukbRT0nMFtb+Q9HNJz0j6O0ktBfOul9Qp6UVJlxTUl6Vap6TrCuqLJD0maaukuyXVT+YGjqW5oc5XH5mZFZjInsK3gGUjapuAsyPiHOAXwPUAks4ErgTOSst8TVKNpBrgVuBS4EzgqtQW4MvAzRGxGNgFjPUM6Enl4bPNzA43bihExMNAz4jaP0bE0EhyjwJtaXo5sC4iDkbES0AncF766YyIbRHRC6wDlksScCFwT1p+LbCiwm2aMD99zczscJNxTuE/Az9I0/OB7QXzulKtVP1EYHdBwAzVi5K0SlKHpI7u7u6KO97cUMdbB/oZ8PDZZmZAhaEg6U+AfuA7Q6UizaKMelERcXtEtEdEe2tr65F2d5SWNNTFXu8tmJkBUFvugpJWAh8DLoqIoV/kXcCCgmZtwGtpulj9TaBFUm3aWyhsf9QVjpQ657hjdn7bzGzKKmtPQdIy4IvAJyJiX8GsDcCVkmZJWgQsBh4HngAWpyuN6smfjN6QwuQh4PK0/Erg/vI25cgNj3/kPQUzM2Bil6TeBTwCvFdSl6RrgK8CJwCbJD0t6esAEfE8sB54AfghcG1EDKS9gM8CDwBbgPWpLeTD5QuSOsmfY7hjUrdwDC0eKdXM7DDjHj6KiKuKlEv+4o6IG4Ebi9Q3AhuL1LeRvzrpmPODdszMDpfpO5oPPZLT4x+ZmUHGQ8F7CmZmh8t0KMyqraGhrsahYGaWZDoUwOMfmZkVynwoePhsM7NDMh8KHv/IzOyQzIeCn6lgZnZI5kPBw2ebmR2S+VDwnoKZ2SEOhYY69vUO0Ns/WO2umJlVXeZDweMfmZkdkvlQGB7qYr+HujAzy3woeKgLM7NDMh8KLY35h+s4FMzMHAqHHrTjoS7MzBwKPnxkZnbIRJ68tkbSTknPFdTmStokaWt6nZPqknSLpE5Jz0g6t2CZlan91vR856H6EknPpmVukaTJ3sixNM3OP2fIoWBmNrE9hW8By0bUrgMejIjFwIPpPcCl5J/LvBhYBdwG+RABVgPnk3/K2uqhIEltVhUsN/KzjqramhwnzKr14SMzMyYQChHxMNAzorwcWJum1wIrCup3Rt6jQIukecAlwKaI6ImIXcAmYFma1xQRj0REAHcWrOuYaW6sY6/3FMzMyj6ncEpE7ABIryen+nxge0G7rlQbq95VpF6UpFWSOiR1dHd3l9n10TzUhZlZ3mSfaC52PiDKqBcVEbdHRHtEtLe2tpbZxdGaG+rY7VAwMys7FN5Ih35IrztTvQtYUNCuDXhtnHpbkfox5QftmJnllRsKG4ChK4hWAvcX1K9OVyEtBfakw0sPABdLmpNOMF8MPJDmvSVpabrq6OqCdR0zPnxkZpZXO14DSXcBHwZOktRF/iqim4D1kq4BXgGuSM03ApcBncA+4NMAEdEj6UvAE6ndDRExdPL6M+SvcGoAfpB+jqmmhjr27OsjIjjGV8SamU0p44ZCRFxVYtZFRdoGcG2J9awB1hSpdwBnj9ePo6mloZ7egUEO9A3SUF9Tza6YmVVV5u9oBt/VbGY2xKFAwfhHHj7bzDLOoUDBg3Z8V7OZZZxDAR8+MjMb4lCg8PCRQ8HMss2hQH7sI8DjH5lZ5jkUgOPra8nJh4/MzBwKQC4nmhrqPHy2mWWeQyFp8VAXZmYOhSEe/8jMzKEwrMnDZ5uZORSGtDTW++ojM8s8h0LS3FDrw0dmlnkOhWTonEJ+oFczs2xyKCQtDfUMDAZvH+yvdlfMzKrGoZB4/CMzM4fCsKah8Y98A5uZZVhFoSDpv0l6XtJzku6SNFvSIkmPSdoq6W5J9antrPS+M81fWLCe61P9RUmXVLZJ5Wnx+EdmZuWHgqT5wB8C7RFxNlADXAl8Gbg5IhYDu4Br0iLXALsi4nTg5tQOSWem5c4ClgFfk3TMn4npw0dmZpUfPqoFGiTVAo3ADuBC4J40fy2wIk0vT+9J8y+SpFRfFxEHI+IloBM4r8J+HTEPn21mVkEoRMSrwFeAV8iHwR5gM7A7IoYu4ekC5qfp+cD2tGx/an9iYb3IMoeRtEpSh6SO7u7ucrte1PDT1xwKZpZhlRw+mkP+r/xFwKnAccClRZoOXfivEvNK1UcXI26PiPaIaG9tbT3yTo+hoa6Guho5FMws0yo5fPQR4KWI6I6IPuA+4INASzqcBNAGvJamu4AFAGl+M9BTWC+yzDEjiWYPn21mGVdJKLwCLJXUmM4NXAS8ADwEXJ7arATuT9Mb0nvS/B9F/vbhDcCV6eqkRcBi4PEK+lW25oY6X31kZplWO36T4iLiMUn3AE8C/cBTwO3A94F1kv4s1e5Ii9wBfFtSJ/k9hCvTep6XtJ58oPQD10bEQLn9qoSHzzazrCs7FAAiYjWwekR5G0WuHoqIA8AVJdZzI3BjJX2ZDM0NdXS/fbDa3TAzqxrf0VygpbHeewpmlmkOhQLNDXXs8YlmM8swh0KBpoY69h7oZ2DQw2ebWTY5FAq0pLua3zrgvQUzyyaHQgGPf2RmWedQKNDs4bPNLOMcCgU8/pGZZZ1DoYAPH5lZ1jkUCnj4bDPLOodCgaFHcnr8IzPLKodCgdl1Ncyuy/nwkZlllkNhhPzw2b3V7oaZWVU4FEZoafD4R2aWXQ6FETx8tpllmUNhhCY/fc3MMsyhMEJLo5++ZmbZVVEoSGqRdI+kn0vaIukDkuZK2iRpa3qdk9pK0i2SOiU9I+ncgvWsTO23SlpZ+hOPvuaGOt+nYGaZVemewl8DP4yIfw38FrAFuA54MCIWAw+m9wCXkn/+8mJgFXAbgKS55J/edj75J7atHgqSamhpqGNf7wB9A4PV6oKZWdWUHQqSmoDfIT2DOSJ6I2I3sBxYm5qtBVak6eXAnZH3KNAiaR5wCbApInoiYhewCVhWbr8q1ezxj8wswyrZU3g30A38X0lPSfqmpOOAUyJiB0B6PTm1nw9sL1i+K9VK1UeRtEpSh6SO7u7uCrpemkdKNbMsqyQUaoFzgdsi4v3AOxw6VFSMitRijProYsTtEdEeEe2tra1H2t8J8aB4ZpZllYRCF9AVEY+l9/eQD4k30mEh0uvOgvYLCpZvA14bo14VzR7/yMwyrOxQiIjXge2S3ptKFwEvABuAoSuIVgL3p+kNwNXpKqSlwJ50eOkB4GJJc9IJ5otTrSoOjZTqoS7MLHtqK1z+c8B3JNUD24BPkw+a9ZKuAV4BrkhtNwKXAZ3AvtSWiOiR9CXgidTuhojoqbBfZWtprAdgj88pmFkGVRQKEfE00F5k1kVF2gZwbYn1rAHWVNKXydI0O/9Psmd/f5V7YmZ27PmO5hFqa3IcP6vWh4/MLJMcCkV4UDwzyyqHQhHNDR7/yMyyyaFQRLNHSjWzjHIoFNHS6MNHZpZNDoUifE7BzLLKoVCEh882s6xyKBTR3FhHb/8gB/oGqt0VM7NjyqFQhAfFM7OscigU4eGzzSyrHApFtDSk8Y+8p2BmGeNQKMKHj8wsqxwKRRw6fOTxj8wsWxwKRfg5zWaWVQ6FIk6YVYvkp6+ZWfY4FIrI5UTTbN/AZmbZU3EoSKqR9JSk76X3iyQ9JmmrpLvTU9mQNCu970zzFxas4/pUf1HSJZX2aTJ4/CMzy6LJ2FP4PLCl4P2XgZsjYjGwC7gm1a8BdkXE6cDNqR2SzgSuBM4ClgFfk1QzCf2qiMc/MrMsqigUJLUB/w74Znov4ELgntRkLbAiTS9P70nzL0rtlwPrIuJgRLxE/hnO51XSr8ng4bPNLIsq3VP4K+CPgcH0/kRgd0QMPeC4C5ifpucD2wHS/D2p/XC9yDKHkbRKUoekju7u7gq7PjY/aMfMsqjsUJD0MWBnRGwuLBdpGuPMG2uZw4sRt0dEe0S0t7a2HlF/j5QPH5lZFtVWsOyHgE9IugyYDTSR33NokVSb9gbagNdS+y5gAdAlqRZoBnoK6kMKl6maoeGzI4L8US4zs5mv7D2FiLg+ItoiYiH5E8U/ioj/ADwEXJ6arQTuT9Mb0nvS/B9FRKT6lenqpEXAYuDxcvs1WVoa6xgYDN7p9fDZZpYdlewplPJFYJ2kPwOeAu5I9TuAb0vqJL+HcCVARDwvaT3wAtAPXBsRVf9NXDjUxfGzjsY/k5nZ1DMpv+0i4sfAj9P0NopcPRQRB4ArSix/I3DjZPRlsiw88TgAnnxlN21zGqvcGzOzY8N3NJfw2wvnMr+lge92bB+/sZnZDOFQKCGXE59c0sZPOt9kx5791e6Omdkx4VAYwyfPnU8E3Pfkq9XuipnZMeFQGMNpJx7HeYvmcs/mLvIXSpmZzWwOhXFcsaSNl958h82/2lXtrpiZHXUOhXFc9pvzaKyv4Z7NXdXuipnZUedQGMdxs2q57Dfn8b1ndrCvt3/8BczMpjGHwgRcvqSNtw/288Dzr1e7K2ZmR5VDYQLOWziXd81t5LsdPoRkZjObQ2ECcjnxyXPb+Jdf/pquXfuq3R0zs6PGoTBBn1ySf8TDvZt9z4KZzVwOhQlqm9PIB99zIvc8uZ3BQd+zYGYzk0PhCFzR3sb2nv08/nJPtbtiZnZUOBSOwLKz5nH8rFrfs2BmM5ZD4Qg01NfwsXPmsfHZHbxz0PcsmNnM41A4QpcvaWNf7wAbn91R7a6YmU06h8IRWnLaHBaddBzf9SEkM5uByg4FSQskPSRpi6TnJX0+1edK2iRpa3qdk+qSdIukTknPSDq3YF0rU/utklaW+sypQBKXL2nj8Zd6+NWv36l2d8zMJlUlewr9wH+PiDOApcC1ks4ErgMejIjFwIPpPcClwOL0swq4DfIhAqwGzif/GM/VQ0EyVf37c+cjwb3eWzCzGabsUIiIHRHxZJp+C9gCzAeWA2tTs7XAijS9HLgz8h4FWiTNAy4BNkVET0TsAjYBy8rt17Ewr7mBC04/iXuffNX3LJjZjDIp5xQkLQTeDzwGnBIROyAfHMDJqdl8oPCBx12pVqpe7HNWSeqQ1NHd3T0ZXS/bFe0LeHX3fh7Z9uuq9sPMbDJVHAqSjgfuBf4oIvaO1bRILcaojy5G3B4R7RHR3traeuSdnUQXn3kKJ8z2PQtmNrNUFAqS6sgHwnci4r5UfiMdFiK97kz1LmBBweJtwGtj1Ke02XU1fPy3TuUHz+1g74G+anfHzGxSVHL1kYA7gC0R8ZcFszYAQ1cQrQTuL6hfna5CWgrsSYeXHgAuljQnnWC+ONWmvCuWtHGgb5CNz/ieBTObGSrZU/gQ8B+BCyU9nX4uA24CPippK/DR9B5gI7AN6AS+AfwBQET0AF8Cnkg/N6TalPe+BS28p9X3LJjZzFFb7oIR8ROKnw8AuKhI+wCuLbGuNcCacvtSLZK4on0BN/3g52zrfpt3tx5f7S6ZmVXEdzRX6HffP5+c4Ov//Etfnmpm055DoUKnNM3m6g8sZH1HF6u+vdknnc1sWnMoTILVHz+T1R8/k4de3MmKW39K5863qt0lM7OyOBQmgSQ+/aFFfOe/nM+efX0s/+pP+eFzr1e7W2ZmR8yhMImWvvtE/uFzF3D6ycfzX/9mM1954EUGfJ7BzKYRh8IkO7Wlgbt//wP8XnsbX32ok2vWPsGefT7PYGbTg0PhKJhdV8OXP3kOf7bibH7a+SafuPUnvPi6zzOY2dTnUDhKJPGppaexbtVS9vUOsOLWn/K9Z6b86B1mlnEOhaNsyWlz+f7nLuDMU5v47N8+xZe+9wKv7d5f7W6ZmRWl/I3G0097e3t0dHRUuxsT1ts/yA3fe56/efQVAM6Y18RHzjiZi844hXPmN5PLlbo53Mxs8kjaHBHtJec7FI6tzp1v8+CWN3hwy046ftXDYEDrCbO48L0nc9EZJ3PB4pNorC979BEzszE5FKawXe/08uNf7OSftuzk4Re7eetgP7Nqc3zwPSdy0RmnsOS0OSyY28jxsxwSZjY5HArTRG//IE+83MM/pb2IV3r2Dc+b01jHgrmNLJjTSNvcBhbMaUzvG5g/p4FZtTVV7LmZTScOhWkoIvhl99v8/PW32N6zn+279rG9Zx9du/bz6q799A4MDreVoPX4WbQ01tE0u46mhjqaG+poml1LU8NQrZam2fl6Q30Ns+uGfnLMrs1Pz6rN+byGWQaMFwo+LjEFSeL0k0/g9JNPGDVvYDB4Y+8BtvfsY/uu/Wzv2ceOPfvZu7+fvQf6eGPvAbbufIu9+/t560AfR3JDdX1tjtm1ueHQqK0R9TU56mpy1NaIupoc9QXTdem1NpejNidyOVGbEzVDrzWiRkO1HDU5yOXytZqckESNoCYtWyORU5rOQU75NjlBTcF0vg1peaFUU/q3y6nwdWh6aH6+LQXTEohD7WHkOvPz02Kjlsu/Dn15h9c0Yh0Mf8bh8ynyOUO11KNRyw1NDy9buKBZmRwK00xNTpza0sCpLQ2cP07bwcHgnd5+9h7oZ+/+Pvbs72N/7wAH+gY40D/Agb7B/PTQa/8AB4drA/QNBn39g/QPBn0Dg/T2D7K/b4C+A4P0DeRrfQOD9A8EA4NB/2AwGEH/wODw+6FXO/bGC5H8+0OJVnTeiPVQZF0j34xsV2wdo+aPaDeydfHlC1seHogq0p9inztSsUAu9hmlVlGsXOzzii4+6vNKr+f7f3jBUTtsPGVCQdIy4K+BGuCbEXHTOIvYOHI5ccLsOk6YXcf8loaq9mWwIDQGBoOBCGIQBtL7wTg0b3CQ4feDUTCd6hH55fK1IICIQ/MiLRek19Q2Pw+CQ20ZbnN4e0auk3w9LVLQNj89dBQ20oJDfYrCaQ6tGw4ty2Hzh9ZzaJ1weJ+iYPmRbYPDZ45c5+HLjZ434uWwzz28XtA3Dt+mkaKgL8XajexX6bbFP2fkZ47Vn1J9GLkcYy5X/I+cYtWi/x5F28XYbUYURobgZJoSoSCpBriV/OM7u4AnJG2IiBeq2zObLLmcqPc5C7Mpb6rc0Xwe0BkR2yKiF1gHLK9yn8zMMmeqhMJ8YHvB+65UO4ykVZI6JHV0d3cfs86ZmWXFVAmFYscVRh9Wi7g9Itojor21tfUYdMvMLFumSih0AQsK3rcBHlLUzOwYmyqh8ASwWNIiSfXAlcCGKvfJzCxzpsTVRxHRL+mzwAPkL0ldExHPV7lbZmaZMyVCASAiNgIbq90PM7MsmyqHj8zMbAqYtgPiSeoGflXm4icBb05id6ptpm0PzLxtmmnbAzNvm2ba9kDxbTotIkpevjltQ6ESkjrGGiVwuplp2wMzb5tm2vbAzNummbY9UN42+fCRmZkNcyiYmdmwrIbC7dXuwCSbadsDM2+bZtr2wMzbppm2PVDGNmXynIKZmRWX1T0FMzMrwqFgZmbDMhUKkpZJelFSp6Trqt2fySDpZUnPSnpaUke1+1MOSWsk7ZT0XEFtrqRNkram1znV7OORKLE9fyrp1fQ9PS3psmr28UhIWiDpIUlbJD0v6fOpPp2/o1LbNC2/J0mzJT0u6Wdpe/53qi+S9Fj6ju5OY8uNva6snFNIT3f7BQVPdwOumu5Pd5P0MtAeEdP2phtJvwO8DdwZEWen2p8DPRFxUwrwORHxxWr2c6JKbM+fAm9HxFeq2bdySJoHzIuIJyWdAGwGVgD/ien7HZXapt9jGn5Pyj/A+biIeFtSHfAT4PPAF4D7ImKdpK8DP4uI28ZaV5b2FPx0tykqIh4GekaUlwNr0/Ra8v/DTgsltmfaiogdEfFkmn4L2EL+IVjT+TsqtU3TUuS9nd7WpZ8ALgTuSfUJfUdZCoUJPd1tGgrgHyVtlrSq2p2ZRKdExA7I/w8MnFzl/kyGz0p6Jh1emjaHWgpJWgi8H3iMGfIdjdgmmKbfk6QaSU8DO4FNwC+B3RHRn5pM6HdelkJhQk93m4Y+FBHnApcC16ZDFzb13Aa8B3gfsAP4P9XtzpGTdDxwL/BHEbG32v2ZDEW2adp+TxExEBHvI/+QsvOAM4o1G289WQqFGfl0t4h4Lb3uBP6O/H8MM8Eb6bjv0PHfnVXuT0Ui4o30P+0g8A2m2feUjlPfC3wnIu5L5Wn9HRXbpun+PQFExG7gx8BSoEXS0CMSJvQ7L0uhMOOe7ibpuHSSDEnHARcDz4291LSxAViZplcC91exLxUb+uWZ/C7T6HtKJzHvALZExF8WzJq231GpbZqu35OkVkktaboB+Aj58yQPAZenZhP6jjJz9RFAurzsrzj0dLcbq9ylikh6N/m9A8g/MOlvp+M2SboL+DD5YX7fAFYDfw+sB94FvAJcERHT4uRtie35MPlDEgG8DPz+0PH4qU7SBcD/A54FBlP5f5E/Bj9dv6NS23QV0/B7knQO+RPJNeT/2F8fETek3xHrgLnAU8CnIuLgmOvKUiiYmdnYsnT4yMzMxuFQMDOzYQ4FMzMb5lAwM7NhDgUzMxvmUDAzs2EOBTMzG/b/AUv5vMyt2x8UAAAAAElFTkSuQmCC\n",
452 |       "text/plain": [
453 |        "<Figure size 432x288 with 1 Axes>"
454 |       ]
455 |      },
456 |      "metadata": {
457 |       "needs_background": "light"
458 |      },
459 |      "output_type": "display_data"
460 |     }
461 |    ],
462 |    "source": [
463 |     "%matplotlib inline\n",
464 |     "import matplotlib.pyplot as plt\n",
465 |     "import numpy as np\n",
466 |     "\n",
467 |     "def showPlot(points, title):\n",
468 |     "    plt.figure()\n",
469 |     "    fig, ax = plt.subplots()\n",
470 |     "    plt.plot(points)\n",
471 |     "\n",
472 |     "showPlot(cbow_losses, 'CBOW Losses')\n",
473 |     "showPlot(sg_losses, 'SkipGram Losses')"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "code",
478 |    "execution_count": null,
479 |    "metadata": {},
480 |    "outputs": [],
481 |    "source": []
482 |   },
483 |   {
484 |    "cell_type": "code",
485 |    "execution_count": 10,
486 |    "metadata": {},
487 |    "outputs": [
488 |     {
489 |      "name": "stdout",
490 |      "output_type": "stream",
491 |      "text": [
492 |       "Loaded 400000 words\n"
493 |      ]
494 |     }
495 |    ],
496 |    "source": [
497 |     "import torch\n",
498 |     "import torchtext.vocab as vocab\n",
499 |     "glove = vocab.GloVe(name = \"6B\", dim = 100)\n",
500 |     "print(\"Loaded {} words\".format(len(glove.itos)))"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": 11,
506 |    "metadata": {},
507 |    "outputs": [],
508 |    "source": [
509 |     "def get_word(word):\n",
510 |     "    return glove.vectors[glove.stoi[word]]"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": 12,
516 |    "metadata": {},
517 |    "outputs": [],
518 |    "source": [
519 |     "def closest(vec, n = 10):\n",
520 |     "    all_dists = [(w, torch.dist(vec, get_word(w))) for w in glove.itos]\n",
521 |     "    return sorted(all_dists, key = lambda t: t[1])[:n]"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": 13,
527 |    "metadata": {},
528 |    "outputs": [],
529 |    "source": [
530 |     "def print_tuples(tuples):\n",
531 |     "    for tuple in tuples:\n",
532 |     "        print(\"(%.4f) %s\" % (tuple[1], tuple[0]))"
533 |    ]
534 |   },
535 |   {
536 |    "cell_type": "code",
537 |    "execution_count": 14,
538 |    "metadata": {},
539 |    "outputs": [
540 |     {
541 |      "name": "stdout",
542 |      "output_type": "stream",
543 |      "text": [
544 |       "(0.0000) google\n",
545 |       "(3.0772) yahoo\n",
546 |       "(3.8836) microsoft\n",
547 |       "(4.1048) web\n",
548 |       "(4.1082) aol\n",
549 |       "(4.1165) facebook\n",
550 |       "(4.3917) ebay\n",
551 |       "(4.4122) msn\n",
552 |       "(4.4540) internet\n",
553 |       "(4.4651) netscape\n"
554 |      ]
555 |     }
556 |    ],
557 |    "source": [
558 |     "print_tuples(closest(get_word(\"google\")))"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "code",
563 |    "execution_count": 15,
564 |    "metadata": {},
565 |    "outputs": [],
566 |    "source": [
567 |     "def analogy(w1, w2, w3, n=5, filter_given=True):\n",
568 |     "    print('\\n[%s : %s :: %s : ?]' % (w1, w2, w3))\n",
569 |     "    closest_words = closest(get_word(w2) - get_word(w1) + get_word(w3))    \n",
570 |     "    if filter_given:\n",
571 |     "        closest_words = [t for t in closest_words if t[0] not in [w1, w2, w3]]\n",
572 |     "        \n",
573 |     "    print_tuples(closest_words[:n])"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 16,
579 |    "metadata": {},
580 |    "outputs": [
581 |     {
582 |      "name": "stdout",
583 |      "output_type": "stream",
584 |      "text": [
585 |       "\n",
586 |       "[king : man :: queen : ?]\n",
587 |       "(4.0811) woman\n",
588 |       "(4.6916) girl\n",
589 |       "(5.2703) she\n",
590 |       "(5.2788) teenager\n",
591 |       "(5.3084) boy\n"
592 |      ]
593 |     }
594 |    ],
595 |    "source": [
596 |     "analogy(\"king\", \"man\", \"queen\")"
597 |    ]
598 |   },
599 |   {
600 |    "cell_type": "code",
601 |    "execution_count": 17,
602 |    "metadata": {},
603 |    "outputs": [
604 |     {
605 |      "name": "stdout",
606 |      "output_type": "stream",
607 |      "text": [
608 |       "\n",
609 |       "[man : actor :: woman : ?]\n",
610 |       "(2.8133) actress\n",
611 |       "(5.0039) comedian\n",
612 |       "(5.1399) actresses\n",
613 |       "(5.2773) starred\n",
614 |       "(5.3085) screenwriter\n",
615 |       "\n",
616 |       "[cat : kitten :: dog : ?]\n",
617 |       "(3.8146) puppy\n",
618 |       "(4.2944) rottweiler\n",
619 |       "(4.5888) puppies\n",
620 |       "(4.6086) pooch\n",
621 |       "(4.6520) pug\n",
622 |       "\n",
623 |       "[dog : puppy :: cat : ?]\n",
624 |       "(3.8146) kitten\n",
625 |       "(4.0255) puppies\n",
626 |       "(4.1575) kittens\n",
627 |       "(4.1882) pterodactyl\n",
628 |       "(4.1945) scaredy\n",
629 |       "\n",
630 |       "[russia : moscow :: france : ?]\n",
631 |       "(3.2697) paris\n",
632 |       "(4.6857) french\n",
633 |       "(4.7085) lyon\n",
634 |       "(4.9087) strasbourg\n",
635 |       "(5.0362) marseille\n",
636 |       "\n",
637 |       "[obama : president :: trump : ?]\n",
638 |       "(6.4302) executive\n",
639 |       "(6.5149) founder\n",
640 |       "(6.6997) ceo\n",
641 |       "(6.7524) hilton\n",
642 |       "(6.7729) walt\n",
643 |       "\n",
644 |       "[rich : mansion :: poor : ?]\n",
645 |       "(5.8262) residence\n",
646 |       "(5.9444) riverside\n",
647 |       "(6.0283) hillside\n",
648 |       "(6.0328) abandoned\n",
649 |       "(6.0681) bungalow\n",
650 |       "\n",
651 |       "[elvis : rock :: eminem : ?]\n",
652 |       "(5.6597) rap\n",
653 |       "(6.2057) rappers\n",
654 |       "(6.2161) rapper\n",
655 |       "(6.2444) punk\n",
656 |       "(6.2690) hop\n",
657 |       "\n",
658 |       "[paper : newspaper :: screen : ?]\n",
659 |       "(4.7810) tv\n",
660 |       "(5.1049) television\n",
661 |       "(5.3818) cinema\n",
662 |       "(5.5524) feature\n",
663 |       "(5.5646) shows\n",
664 |       "\n",
665 |       "[monet : paint :: michelangelo : ?]\n",
666 |       "(6.0782) plaster\n",
667 |       "(6.3768) mold\n",
668 |       "(6.3922) tile\n",
669 |       "(6.5819) marble\n",
670 |       "(6.6524) image\n",
671 |       "\n",
672 |       "[beer : barley :: wine : ?]\n",
673 |       "(5.6021) grape\n",
674 |       "(5.6760) beans\n",
675 |       "(5.8174) grapes\n",
676 |       "(5.9035) lentils\n",
677 |       "(5.9454) figs\n",
678 |       "\n",
679 |       "[earth : moon :: sun : ?]\n",
680 |       "(6.2294) lee\n",
681 |       "(6.4125) kang\n",
682 |       "(6.4644) tan\n",
683 |       "(6.4757) yang\n",
684 |       "(6.4853) lin\n",
685 |       "\n",
686 |       "[house : roof :: castle : ?]\n",
687 |       "(6.2919) stonework\n",
688 |       "(6.3779) masonry\n",
689 |       "(6.4773) canopy\n",
690 |       "(6.4954) fortress\n",
691 |       "(6.5259) battlements\n",
692 |       "\n",
693 |       "[building : architect :: software : ?]\n",
694 |       "(5.8369) programmer\n",
695 |       "(6.8881) entrepreneur\n",
696 |       "(6.9240) inventor\n",
697 |       "(6.9730) developer\n",
698 |       "(6.9949) innovator\n",
699 |       "\n",
700 |       "[boston : bruins :: phoenix : ?]\n",
701 |       "(3.8546) suns\n",
702 |       "(4.1968) mavericks\n",
703 |       "(4.6126) coyotes\n",
704 |       "(4.6894) mavs\n",
705 |       "(4.6971) knicks\n",
706 |       "\n",
707 |       "[good : heaven :: bad : ?]\n",
708 |       "(4.3959) hell\n",
709 |       "(5.2864) ghosts\n",
710 |       "(5.2898) hades\n",
711 |       "(5.3414) madness\n",
712 |       "(5.3520) purgatory\n",
713 |       "\n",
714 |       "[jordan : basketball :: woods : ?]\n",
715 |       "(5.8607) golf\n",
716 |       "(6.4110) golfers\n",
717 |       "(6.4418) tournament\n",
718 |       "(6.4592) tennis\n",
719 |       "(6.6560) collegiate\n"
720 |      ]
721 |     }
722 |    ],
723 |    "source": [
724 |     "analogy('man', 'actor', 'woman')\n",
725 |     "analogy('cat', 'kitten', 'dog')\n",
726 |     "analogy('dog', 'puppy', 'cat')\n",
727 |     "analogy('russia', 'moscow', 'france')\n",
728 |     "analogy('obama', 'president', 'trump')\n",
729 |     "analogy('rich', 'mansion', 'poor')\n",
730 |     "analogy('elvis', 'rock', 'eminem')\n",
731 |     "analogy('paper', 'newspaper', 'screen')\n",
732 |     "analogy('monet', 'paint', 'michelangelo')\n",
733 |     "analogy('beer', 'barley', 'wine')\n",
734 |     "analogy('earth', 'moon', 'sun')\n",
735 |     "analogy('house', 'roof', 'castle')\n",
736 |     "analogy('building', 'architect', 'software')\n",
737 |     "analogy('boston', 'bruins', 'phoenix')\n",
738 |     "analogy('good', 'heaven', 'bad')\n",
739 |     "analogy('jordan', 'basketball', 'woods')"
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "code",
744 |    "execution_count": null,
745 |    "metadata": {},
746 |    "outputs": [],
747 |    "source": []
748 |   }
749 |  ],
750 |  "metadata": {
751 |   "kernelspec": {
752 |    "display_name": "Python 3",
753 |    "language": "python",
754 |    "name": "python3"
755 |   },
756 |   "language_info": {
757 |    "codemirror_mode": {
758 |     "name": "ipython",
759 |     "version": 3
760 |    },
761 |    "file_extension": ".py",
762 |    "mimetype": "text/x-python",
763 |    "name": "python",
764 |    "nbconvert_exporter": "python",
765 |    "pygments_lexer": "ipython3",
766 |    "version": "3.6.8"
767 |   }
768 |  },
769 |  "nbformat": 4,
770 |  "nbformat_minor": 4
771 | }
772 | 


--------------------------------------------------------------------------------
/3_Tagging_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import sys\n",
 11 |     "import time\n",
 12 |     "import torch\n",
 13 |     "import numpy as np\n",
 14 |     "import torch.nn as nn\n",
 15 |     "import torch.optim as optim\n",
 16 |     "\n",
 17 |     "from torch.nn import functional as F\n",
 18 |     "from torch.autograd import Variable\n",
 19 |     "from torchtext import data\n",
 20 |     "from torchtext import datasets\n",
 21 |     "from torchtext.vocab import Vectors, GloVe\n",
 22 |     "\n",
 23 |     "def load_dataset(test_sen=None):    \n",
 24 |     "    tokenize = lambda x: x.split()\n",
 25 |     "    TEXT = data.Field(sequential=True, tokenize=tokenize, lower=True, include_lengths=True, batch_first=True, fix_length=200)\n",
 26 |     "    LABEL = data.LabelField()\n",
 27 |     "    train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)\n",
 28 |     "    TEXT.build_vocab(train_data, vectors=GloVe(name='6B', dim=300))\n",
 29 |     "    LABEL.build_vocab(train_data)\n",
 30 |     "\n",
 31 |     "    word_embeddings = TEXT.vocab.vectors\n",
 32 |     "    print (\"Length of Text Vocabulary: \" + str(len(TEXT.vocab)))\n",
 33 |     "    print (\"Vector size of Text Vocabulary: \", TEXT.vocab.vectors.size())\n",
 34 |     "    print (\"Label Length: \" + str(len(LABEL.vocab)))\n",
 35 |     "\n",
 36 |     "    train_data, valid_data = train_data.split()\n",
 37 |     "    train_iter, valid_iter, test_iter = data.BucketIterator.splits((train_data, valid_data, test_data), batch_size=32, sort_key=lambda x: len(x.text), repeat=False, shuffle=True)\n",
 38 |     "\n",
 39 |     "    vocab_size = len(TEXT.vocab)\n",
 40 |     "\n",
 41 |     "    return TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "class RNN(nn.Module):\n",
 51 |     "    def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):\n",
 52 |     "        super(RNN, self).__init__()\n",
 53 |     "        \n",
 54 |     "        self.batch_size = batch_size\n",
 55 |     "        self.output_size = output_size\n",
 56 |     "        self.hidden_size = hidden_size\n",
 57 |     "        self.vocab_size = vocab_size\n",
 58 |     "        self.embedding_length = embedding_length\n",
 59 |     "        \n",
 60 |     "        self.word_embeddings = nn.Embedding(vocab_size, embedding_length)\n",
 61 |     "        self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False)\n",
 62 |     "        self.rnn = nn.RNN(embedding_length, hidden_size, num_layers=2, bidirectional=True)\n",
 63 |     "        self.label = nn.Linear(4*hidden_size, output_size)\n",
 64 |     "        \n",
 65 |     "    def forward(self, input_sentences, batch_size=None):\n",
 66 |     "        input = self.word_embeddings(input_sentences)\n",
 67 |     "        input = input.permute(1, 0, 2)\n",
 68 |     "        \n",
 69 |     "        if batch_size is None:\n",
 70 |     "            h_0 = Variable(torch.zeros(4, self.batch_size, self.hidden_size).cuda())\n",
 71 |     "        else:\n",
 72 |     "            h_0 = Variable(torch.zeros(4, batch_size, self.hidden_size).cuda())\n",
 73 |     "        \n",
 74 |     "        output, h_n = self.rnn(input, h_0)\n",
 75 |     "        h_n = h_n.permute(1, 0, 2)\n",
 76 |     "        h_n = h_n.contiguous().view(h_n.size()[0], h_n.size()[1]*h_n.size()[2])\n",
 77 |     "        logits = self.label(h_n)\n",
 78 |     "        \n",
 79 |     "        return logits"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 3,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "Length of Text Vocabulary: 251639\n",
 92 |       "Vector size of Text Vocabulary:  torch.Size([251639, 300])\n",
 93 |       "Label Length: 2\n",
 94 |       "Epoch: 1, Idx: 100, Training Loss: 0.6538, Training Accuracy:  59.38%\n",
 95 |       "Epoch: 1, Idx: 200, Training Loss: 0.7323, Training Accuracy:  28.12%\n",
 96 |       "Epoch: 1, Idx: 300, Training Loss: 0.6863, Training Accuracy:  62.50%\n",
 97 |       "Epoch: 1, Idx: 400, Training Loss: 0.6000, Training Accuracy:  68.75%\n",
 98 |       "Epoch: 1, Idx: 500, Training Loss: 0.6716, Training Accuracy:  65.62%\n",
 99 |       "Epoch: 01, Train Loss: 0.701, Train Acc: 56.00%, Val. Loss: 0.684682, Val. Acc: 53.44%\n",
100 |       "Epoch: 2, Idx: 100, Training Loss: 0.6071, Training Accuracy:  62.50%\n",
101 |       "Epoch: 2, Idx: 200, Training Loss: 0.6640, Training Accuracy:  53.12%\n",
102 |       "Epoch: 2, Idx: 300, Training Loss: 0.7665, Training Accuracy:  43.75%\n",
103 |       "Epoch: 2, Idx: 400, Training Loss: 0.7446, Training Accuracy:  65.62%\n",
104 |       "Epoch: 2, Idx: 500, Training Loss: 0.7483, Training Accuracy:  40.62%\n",
105 |       "Epoch: 02, Train Loss: 0.702, Train Acc: 55.19%, Val. Loss: 0.726367, Val. Acc: 53.12%\n",
106 |       "Epoch: 3, Idx: 100, Training Loss: 0.6248, Training Accuracy:  68.75%\n",
107 |       "Epoch: 3, Idx: 200, Training Loss: 0.7327, Training Accuracy:  50.00%\n",
108 |       "Epoch: 3, Idx: 300, Training Loss: 0.6317, Training Accuracy:  65.62%\n",
109 |       "Epoch: 3, Idx: 400, Training Loss: 0.5729, Training Accuracy:  71.88%\n",
110 |       "Epoch: 3, Idx: 500, Training Loss: 0.6937, Training Accuracy:  56.25%\n",
111 |       "Epoch: 03, Train Loss: 0.685, Train Acc: 60.74%, Val. Loss: 0.728285, Val. Acc: 54.92%\n",
112 |       "Epoch: 4, Idx: 100, Training Loss: 0.7239, Training Accuracy:  59.38%\n",
113 |       "Epoch: 4, Idx: 200, Training Loss: 0.5938, Training Accuracy:  75.00%\n",
114 |       "Epoch: 4, Idx: 300, Training Loss: 0.9016, Training Accuracy:  53.12%\n",
115 |       "Epoch: 4, Idx: 400, Training Loss: 0.5571, Training Accuracy:  78.12%\n",
116 |       "Epoch: 4, Idx: 500, Training Loss: 0.5879, Training Accuracy:  65.62%\n",
117 |       "Epoch: 04, Train Loss: 0.667, Train Acc: 61.56%, Val. Loss: 0.678201, Val. Acc: 57.76%\n",
118 |       "Epoch: 5, Idx: 100, Training Loss: 0.6851, Training Accuracy:  56.25%\n",
119 |       "Epoch: 5, Idx: 200, Training Loss: 0.6559, Training Accuracy:  62.50%\n",
120 |       "Epoch: 5, Idx: 300, Training Loss: 0.6128, Training Accuracy:  62.50%\n",
121 |       "Epoch: 5, Idx: 400, Training Loss: 0.6151, Training Accuracy:  59.38%\n",
122 |       "Epoch: 5, Idx: 500, Training Loss: 0.6839, Training Accuracy:  68.75%\n",
123 |       "Epoch: 05, Train Loss: 0.669, Train Acc: 61.76%, Val. Loss: 0.631271, Val. Acc: 66.40%\n",
124 |       "Epoch: 6, Idx: 100, Training Loss: 0.4968, Training Accuracy:  78.12%\n",
125 |       "Epoch: 6, Idx: 200, Training Loss: 0.7118, Training Accuracy:  62.50%\n",
126 |       "Epoch: 6, Idx: 300, Training Loss: 0.5181, Training Accuracy:  81.25%\n",
127 |       "Epoch: 6, Idx: 400, Training Loss: 0.5818, Training Accuracy:  75.00%\n",
128 |       "Epoch: 6, Idx: 500, Training Loss: 0.5787, Training Accuracy:  68.75%\n",
129 |       "Epoch: 06, Train Loss: 0.664, Train Acc: 62.54%, Val. Loss: 0.714283, Val. Acc: 53.20%\n",
130 |       "Epoch: 7, Idx: 100, Training Loss: 0.7741, Training Accuracy:  65.62%\n",
131 |       "Epoch: 7, Idx: 200, Training Loss: 0.6719, Training Accuracy:  62.50%\n",
132 |       "Epoch: 7, Idx: 300, Training Loss: 0.5993, Training Accuracy:  68.75%\n",
133 |       "Epoch: 7, Idx: 400, Training Loss: 0.7759, Training Accuracy:  46.88%\n",
134 |       "Epoch: 7, Idx: 500, Training Loss: 0.6450, Training Accuracy:  59.38%\n",
135 |       "Epoch: 07, Train Loss: 0.659, Train Acc: 62.85%, Val. Loss: 0.643714, Val. Acc: 61.96%\n",
136 |       "Epoch: 8, Idx: 100, Training Loss: 0.6427, Training Accuracy:  75.00%\n",
137 |       "Epoch: 8, Idx: 200, Training Loss: 0.7509, Training Accuracy:  46.88%\n",
138 |       "Epoch: 8, Idx: 300, Training Loss: 0.7016, Training Accuracy:  53.12%\n",
139 |       "Epoch: 8, Idx: 400, Training Loss: 0.6085, Training Accuracy:  71.88%\n",
140 |       "Epoch: 8, Idx: 500, Training Loss: 0.5723, Training Accuracy:  71.88%\n",
141 |       "Epoch: 08, Train Loss: 0.661, Train Acc: 63.19%, Val. Loss: 0.631669, Val. Acc: 66.58%\n",
142 |       "Epoch: 9, Idx: 100, Training Loss: 0.6699, Training Accuracy:  56.25%\n",
143 |       "Epoch: 9, Idx: 200, Training Loss: 0.7980, Training Accuracy:  56.25%\n",
144 |       "Epoch: 9, Idx: 300, Training Loss: 0.7833, Training Accuracy:  56.25%\n",
145 |       "Epoch: 9, Idx: 400, Training Loss: 0.6437, Training Accuracy:  56.25%\n",
146 |       "Epoch: 9, Idx: 500, Training Loss: 0.6734, Training Accuracy:  65.62%\n",
147 |       "Epoch: 09, Train Loss: 0.658, Train Acc: 63.68%, Val. Loss: 0.675087, Val. Acc: 61.79%\n",
148 |       "Epoch: 10, Idx: 100, Training Loss: 0.5863, Training Accuracy:  75.00%\n",
149 |       "Epoch: 10, Idx: 200, Training Loss: 0.6492, Training Accuracy:  71.88%\n",
150 |       "Epoch: 10, Idx: 300, Training Loss: 0.5064, Training Accuracy:  84.38%\n",
151 |       "Epoch: 10, Idx: 400, Training Loss: 0.8142, Training Accuracy:  53.12%\n",
152 |       "Epoch: 10, Idx: 500, Training Loss: 0.7308, Training Accuracy:  46.88%\n",
153 |       "Epoch: 10, Train Loss: 0.651, Train Acc: 64.40%, Val. Loss: 0.645745, Val. Acc: 65.99%\n",
154 |       "Test Loss: 0.647, Test Acc: 65.61%\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_dataset()\n",
160 |     "\n",
161 |     "def clip_gradient(model, clip_value):\n",
162 |     "    params = list(filter(lambda p: p.grad is not None, model.parameters()))\n",
163 |     "    for p in params:\n",
164 |     "        p.grad.data.clamp_(-clip_value, clip_value)\n",
165 |     "    \n",
166 |     "def train_model(model, train_iter, epoch):\n",
167 |     "    total_epoch_loss = 0\n",
168 |     "    total_epoch_acc = 0\n",
169 |     "    model.cuda()\n",
170 |     "    optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))\n",
171 |     "    steps = 0\n",
172 |     "    model.train()\n",
173 |     "    for idx, batch in enumerate(train_iter):\n",
174 |     "        text = batch.text[0]\n",
175 |     "        target = batch.label\n",
176 |     "        target = torch.autograd.Variable(target).long()\n",
177 |     "        if torch.cuda.is_available():\n",
178 |     "            text = text.cuda()\n",
179 |     "            target = target.cuda()\n",
180 |     "        if (text.size()[0] is not 32):# One of the batch returned by BucketIterator has length different than 32.\n",
181 |     "            continue\n",
182 |     "        optim.zero_grad()\n",
183 |     "        prediction = model(text)\n",
184 |     "        loss = loss_fn(prediction, target)\n",
185 |     "        num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum()\n",
186 |     "        acc = 100.0 * num_corrects/len(batch)\n",
187 |     "        loss.backward()\n",
188 |     "        clip_gradient(model, 1e-1)\n",
189 |     "        optim.step()\n",
190 |     "        steps += 1\n",
191 |     "        \n",
192 |     "        if steps % 100 == 0:\n",
193 |     "            print (f'Epoch: {epoch+1}, Idx: {idx+1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%')\n",
194 |     "        \n",
195 |     "        total_epoch_loss += loss.item()\n",
196 |     "        total_epoch_acc += acc.item()\n",
197 |     "        \n",
198 |     "    return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)\n",
199 |     "\n",
200 |     "def eval_model(model, val_iter):\n",
201 |     "    total_epoch_loss = 0\n",
202 |     "    total_epoch_acc = 0\n",
203 |     "    model.eval()\n",
204 |     "    with torch.no_grad():\n",
205 |     "        for idx, batch in enumerate(val_iter):\n",
206 |     "            text = batch.text[0]\n",
207 |     "            if (text.size()[0] is not 32):\n",
208 |     "                continue\n",
209 |     "            target = batch.label\n",
210 |     "            target = torch.autograd.Variable(target).long()\n",
211 |     "            if torch.cuda.is_available():\n",
212 |     "                text = text.cuda()\n",
213 |     "                target = target.cuda()\n",
214 |     "            prediction = model(text)\n",
215 |     "            loss = loss_fn(prediction, target)\n",
216 |     "            num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).sum()\n",
217 |     "            acc = 100.0 * num_corrects/len(batch)\n",
218 |     "            total_epoch_loss += loss.item()\n",
219 |     "            total_epoch_acc += acc.item()\n",
220 |     "\n",
221 |     "    return total_epoch_loss/len(val_iter), total_epoch_acc/len(val_iter)\n",
222 |     "\n",
223 |     "learning_rate = 1e-5\n",
224 |     "batch_size = 32\n",
225 |     "output_size = 2\n",
226 |     "hidden_size = 256\n",
227 |     "embedding_length = 300\n",
228 |     "\n",
229 |     "model = RNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings)\n",
230 |     "loss_fn = F.cross_entropy\n",
231 |     "\n",
232 |     "for epoch in range(10):\n",
233 |     "    train_loss, train_acc = train_model(model, train_iter, epoch)\n",
234 |     "    val_loss, val_acc = eval_model(model, valid_iter)\n",
235 |     "    \n",
236 |     "    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%')\n",
237 |     "    \n",
238 |     "test_loss, test_acc = eval_model(model, test_iter)\n",
239 |     "print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 4,
245 |    "metadata": {},
246 |    "outputs": [
247 |     {
248 |      "name": "stdout",
249 |      "output_type": "stream",
250 |      "text": [
251 |       "tensor([[0.1081, 0.8919]], device='cuda:0', grad_fn=<SoftmaxBackward>)\n",
252 |       "Sentiment: Positive\n"
253 |      ]
254 |     },
255 |     {
256 |      "name": "stderr",
257 |      "output_type": "stream",
258 |      "text": [
259 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:8: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n",
260 |       "  \n"
261 |      ]
262 |     }
263 |    ],
264 |    "source": [
265 |     "test_sen1 = \"This is one of the best creation of Nolan. I can say, it's his magnum opus. Loved the soundtrack and especially those creative dialogues.\"\n",
266 |     "\n",
267 |     "test_sen1 = TEXT.preprocess(test_sen1)\n",
268 |     "test_sen1 = [[TEXT.vocab.stoi[x] for x in test_sen1]]\n",
269 |     "\n",
270 |     "test_sen = np.asarray(test_sen1)\n",
271 |     "test_sen = torch.LongTensor(test_sen)\n",
272 |     "test_tensor = Variable(test_sen, volatile=True)\n",
273 |     "test_tensor = test_tensor.cuda()\n",
274 |     "model.eval()\n",
275 |     "output = model(test_tensor, 1)\n",
276 |     "out = F.softmax(output, 1)\n",
277 |     "print(out)\n",
278 |     "if (torch.argmax(out[0]) == 1):\n",
279 |     "    print (\"Sentiment: Positive\")\n",
280 |     "else:\n",
281 |     "    print (\"Sentiment: Negative\")"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 5,
287 |    "metadata": {},
288 |    "outputs": [
289 |     {
290 |      "name": "stdout",
291 |      "output_type": "stream",
292 |      "text": [
293 |       "tensor([[0.6741, 0.3259]], device='cuda:0', grad_fn=<SoftmaxBackward>)\n",
294 |       "Sentiment: Negative\n"
295 |      ]
296 |     },
297 |     {
298 |      "name": "stderr",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:7: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n",
302 |       "  import sys\n"
303 |      ]
304 |     }
305 |    ],
306 |    "source": [
307 |     "test_sen2 = \"Ohh, such a ridiculous movie. Not gonna recommend it to anyone. Complete waste of time and money.\"\n",
308 |     "test_sen2 = TEXT.preprocess(test_sen2)\n",
309 |     "test_sen2 = [[TEXT.vocab.stoi[x] for x in test_sen2]]\n",
310 |     "\n",
311 |     "test_sen = np.asarray(test_sen2)\n",
312 |     "test_sen = torch.LongTensor(test_sen)\n",
313 |     "test_tensor = Variable(test_sen, volatile=True)\n",
314 |     "test_tensor = test_tensor.cuda()\n",
315 |     "model.eval()\n",
316 |     "output = model(test_tensor, 1)\n",
317 |     "out = F.softmax(output, 1)\n",
318 |     "print(out)\n",
319 |     "if (torch.argmax(out[0]) == 1):\n",
320 |     "    print (\"Sentiment: Positive\")\n",
321 |     "else:\n",
322 |     "    print (\"Sentiment: Negative\")"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 6,
328 |    "metadata": {},
329 |    "outputs": [
330 |     {
331 |      "name": "stdout",
332 |      "output_type": "stream",
333 |      "text": [
334 |       "Epoch: 1, Idx: 100, Training Loss: 0.6934, Training Accuracy:  50.00%\n",
335 |       "Epoch: 1, Idx: 200, Training Loss: 0.7078, Training Accuracy:  46.88%\n",
336 |       "Epoch: 1, Idx: 300, Training Loss: 0.6930, Training Accuracy:  56.25%\n",
337 |       "Epoch: 1, Idx: 400, Training Loss: 0.6874, Training Accuracy:  56.25%\n",
338 |       "Epoch: 1, Idx: 500, Training Loss: 0.6718, Training Accuracy:  78.12%\n",
339 |       "Epoch: 01, Train Loss: 0.690, Train Acc: 51.97%, Val. Loss: 0.688056, Val. Acc: 51.27%\n",
340 |       "Epoch: 2, Idx: 100, Training Loss: 0.6545, Training Accuracy:  62.50%\n",
341 |       "Epoch: 2, Idx: 200, Training Loss: 0.6695, Training Accuracy:  62.50%\n",
342 |       "Epoch: 2, Idx: 300, Training Loss: 0.6926, Training Accuracy:  53.12%\n",
343 |       "Epoch: 2, Idx: 400, Training Loss: 0.6827, Training Accuracy:  56.25%\n",
344 |       "Epoch: 2, Idx: 500, Training Loss: 0.6129, Training Accuracy:  68.75%\n",
345 |       "Epoch: 02, Train Loss: 0.667, Train Acc: 58.72%, Val. Loss: 0.682012, Val. Acc: 54.80%\n",
346 |       "Epoch: 3, Idx: 100, Training Loss: 0.6308, Training Accuracy:  68.75%\n",
347 |       "Epoch: 3, Idx: 200, Training Loss: 0.4342, Training Accuracy:  84.38%\n",
348 |       "Epoch: 3, Idx: 300, Training Loss: 0.6503, Training Accuracy:  62.50%\n",
349 |       "Epoch: 3, Idx: 400, Training Loss: 0.6636, Training Accuracy:  68.75%\n",
350 |       "Epoch: 3, Idx: 500, Training Loss: 0.5156, Training Accuracy:  75.00%\n",
351 |       "Epoch: 03, Train Loss: 0.590, Train Acc: 69.50%, Val. Loss: 0.494978, Val. Acc: 77.09%\n",
352 |       "Epoch: 4, Idx: 100, Training Loss: 0.5605, Training Accuracy:  71.88%\n",
353 |       "Epoch: 4, Idx: 200, Training Loss: 0.8281, Training Accuracy:  65.62%\n",
354 |       "Epoch: 4, Idx: 300, Training Loss: 0.6036, Training Accuracy:  65.62%\n",
355 |       "Epoch: 4, Idx: 400, Training Loss: 0.4735, Training Accuracy:  71.88%\n",
356 |       "Epoch: 4, Idx: 500, Training Loss: 0.4546, Training Accuracy:  78.12%\n",
357 |       "Epoch: 04, Train Loss: 0.428, Train Acc: 80.67%, Val. Loss: 0.386465, Val. Acc: 82.08%\n",
358 |       "Epoch: 5, Idx: 100, Training Loss: 0.3328, Training Accuracy:  87.50%\n",
359 |       "Epoch: 5, Idx: 200, Training Loss: 0.3596, Training Accuracy:  78.12%\n",
360 |       "Epoch: 5, Idx: 300, Training Loss: 0.3249, Training Accuracy:  87.50%\n",
361 |       "Epoch: 5, Idx: 400, Training Loss: 0.6565, Training Accuracy:  68.75%\n",
362 |       "Epoch: 5, Idx: 500, Training Loss: 0.4050, Training Accuracy:  78.12%\n",
363 |       "Epoch: 05, Train Loss: 0.367, Train Acc: 83.72%, Val. Loss: 0.369900, Val. Acc: 82.79%\n",
364 |       "Epoch: 6, Idx: 100, Training Loss: 0.4549, Training Accuracy:  84.38%\n",
365 |       "Epoch: 6, Idx: 200, Training Loss: 0.3892, Training Accuracy:  81.25%\n",
366 |       "Epoch: 6, Idx: 300, Training Loss: 0.1442, Training Accuracy:  96.88%\n",
367 |       "Epoch: 6, Idx: 400, Training Loss: 0.3001, Training Accuracy:  87.50%\n",
368 |       "Epoch: 6, Idx: 500, Training Loss: 0.4553, Training Accuracy:  75.00%\n",
369 |       "Epoch: 06, Train Loss: 0.324, Train Acc: 85.52%, Val. Loss: 0.367029, Val. Acc: 83.53%\n",
370 |       "Epoch: 7, Idx: 100, Training Loss: 0.2308, Training Accuracy:  90.62%\n",
371 |       "Epoch: 7, Idx: 200, Training Loss: 0.3394, Training Accuracy:  81.25%\n",
372 |       "Epoch: 7, Idx: 300, Training Loss: 0.4261, Training Accuracy:  87.50%\n",
373 |       "Epoch: 7, Idx: 400, Training Loss: 0.3106, Training Accuracy:  90.62%\n",
374 |       "Epoch: 7, Idx: 500, Training Loss: 0.1421, Training Accuracy:  96.88%\n",
375 |       "Epoch: 07, Train Loss: 0.282, Train Acc: 87.91%, Val. Loss: 0.378974, Val. Acc: 83.80%\n",
376 |       "Epoch: 8, Idx: 100, Training Loss: 0.1280, Training Accuracy:  96.88%\n",
377 |       "Epoch: 8, Idx: 200, Training Loss: 0.4244, Training Accuracy:  84.38%\n",
378 |       "Epoch: 8, Idx: 300, Training Loss: 0.3225, Training Accuracy:  90.62%\n",
379 |       "Epoch: 8, Idx: 400, Training Loss: 0.3618, Training Accuracy:  84.38%\n",
380 |       "Epoch: 8, Idx: 500, Training Loss: 0.2334, Training Accuracy:  87.50%\n",
381 |       "Epoch: 08, Train Loss: 0.232, Train Acc: 90.26%, Val. Loss: 0.395538, Val. Acc: 83.34%\n",
382 |       "Epoch: 9, Idx: 100, Training Loss: 0.1379, Training Accuracy:  96.88%\n",
383 |       "Epoch: 9, Idx: 200, Training Loss: 0.2220, Training Accuracy:  87.50%\n",
384 |       "Epoch: 9, Idx: 300, Training Loss: 0.2743, Training Accuracy:  87.50%\n",
385 |       "Epoch: 9, Idx: 400, Training Loss: 0.3071, Training Accuracy:  84.38%\n",
386 |       "Epoch: 9, Idx: 500, Training Loss: 0.1465, Training Accuracy:  93.75%\n",
387 |       "Epoch: 09, Train Loss: 0.181, Train Acc: 92.97%, Val. Loss: 0.433728, Val. Acc: 83.30%\n",
388 |       "Epoch: 10, Idx: 100, Training Loss: 0.0535, Training Accuracy:  96.88%\n",
389 |       "Epoch: 10, Idx: 200, Training Loss: 0.1756, Training Accuracy:  90.62%\n",
390 |       "Epoch: 10, Idx: 300, Training Loss: 0.0970, Training Accuracy:  100.00%\n",
391 |       "Epoch: 10, Idx: 400, Training Loss: 0.1835, Training Accuracy:  93.75%\n",
392 |       "Epoch: 10, Idx: 500, Training Loss: 0.0648, Training Accuracy:  96.88%\n",
393 |       "Epoch: 10, Train Loss: 0.133, Train Acc: 94.96%, Val. Loss: 0.458091, Val. Acc: 82.26%\n",
394 |       "Test Loss: 0.456, Test Acc: 82.61%\n"
395 |      ]
396 |     }
397 |    ],
398 |    "source": [
399 |     "class LSTMClassifier(nn.Module):\n",
400 |     "    def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):\n",
401 |     "        super(LSTMClassifier, self).__init__()\n",
402 |     "        self.batch_size = batch_size\n",
403 |     "        self.output_size = output_size\n",
404 |     "        self.hidden_size = hidden_size\n",
405 |     "        self.vocab_size = vocab_size\n",
406 |     "        self.embedding_length = embedding_length\n",
407 |     "\n",
408 |     "        self.word_embeddings = nn.Embedding(vocab_size, embedding_length)# Initializing the look-up table.\n",
409 |     "        self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False) # Assigning the look-up table to the pre-trained GloVe word embedding.\n",
410 |     "        self.lstm = nn.LSTM(embedding_length, hidden_size)\n",
411 |     "        self.label = nn.Linear(hidden_size, output_size)\n",
412 |     "        \n",
413 |     "    def forward(self, input_sentence, batch_size=None):\n",
414 |     "        input = self.word_embeddings(input_sentence) # embedded input of shape = (batch_size, num_sequences,  embedding_length)\n",
415 |     "        input = input.permute(1, 0, 2) # input.size() = (num_sequences, batch_size, embedding_length)\n",
416 |     "        if batch_size is None:\n",
417 |     "            h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) # Initial hidden state of the LSTM\n",
418 |     "            c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) # Initial cell state of the LSTM\n",
419 |     "        else:\n",
420 |     "            h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())\n",
421 |     "            c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())\n",
422 |     "        output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))\n",
423 |     "        final_output = self.label(final_hidden_state[-1]) # final_hidden_state.size() = (1, batch_size, hidden_size) & final_output.size() = (batch_size, output_size)\n",
424 |     "        return final_output\n",
425 |     "    \n",
426 |     "model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings)\n",
427 |     "loss_fn = F.cross_entropy\n",
428 |     "\n",
429 |     "for epoch in range(10):\n",
430 |     "    train_loss, train_acc = train_model(model, train_iter, epoch)\n",
431 |     "    val_loss, val_acc = eval_model(model, valid_iter)\n",
432 |     "    \n",
433 |     "    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%')\n",
434 |     "    \n",
435 |     "test_loss, test_acc = eval_model(model, test_iter)\n",
436 |     "print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "code",
441 |    "execution_count": 7,
442 |    "metadata": {},
443 |    "outputs": [
444 |     {
445 |      "name": "stdout",
446 |      "output_type": "stream",
447 |      "text": [
448 |       "tensor([[5.6929e-06, 9.9999e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)\n",
449 |       "Sentiment: Positive\n"
450 |      ]
451 |     },
452 |     {
453 |      "name": "stderr",
454 |      "output_type": "stream",
455 |      "text": [
456 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:8: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n",
457 |       "  \n"
458 |      ]
459 |     }
460 |    ],
461 |    "source": [
462 |     "test_sen1 = \"This is one of the best creation of Nolan. I can say, it's his magnum opus. Loved the soundtrack and especially those creative dialogues.\"\n",
463 |     "\n",
464 |     "test_sen1 = TEXT.preprocess(test_sen1)\n",
465 |     "test_sen1 = [[TEXT.vocab.stoi[x] for x in test_sen1]]\n",
466 |     "\n",
467 |     "test_sen = np.asarray(test_sen1)\n",
468 |     "test_sen = torch.LongTensor(test_sen)\n",
469 |     "test_tensor = Variable(test_sen, volatile=True)\n",
470 |     "test_tensor = test_tensor.cuda()\n",
471 |     "model.eval()\n",
472 |     "output = model(test_tensor, 1)\n",
473 |     "out = F.softmax(output, 1)\n",
474 |     "print(out)\n",
475 |     "if (torch.argmax(out[0]) == 1):\n",
476 |     "    print (\"Sentiment: Positive\")\n",
477 |     "else:\n",
478 |     "    print (\"Sentiment: Negative\")"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": 8,
484 |    "metadata": {},
485 |    "outputs": [
486 |     {
487 |      "name": "stdout",
488 |      "output_type": "stream",
489 |      "text": [
490 |       "tensor([[0.9989, 0.0011]], device='cuda:0', grad_fn=<SoftmaxBackward>)\n",
491 |       "Sentiment: Negative\n"
492 |      ]
493 |     },
494 |     {
495 |      "name": "stderr",
496 |      "output_type": "stream",
497 |      "text": [
498 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:8: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n",
499 |       "  \n"
500 |      ]
501 |     }
502 |    ],
503 |    "source": [
504 |     "test_sen2 = \"Ohh, such a ridiculous movie. Not gonna recommend it to anyone. Complete waste of time and money.\"\n",
505 |     "test_sen2 = TEXT.preprocess(test_sen2)\n",
506 |     "test_sen2 = [[TEXT.vocab.stoi[x] for x in test_sen2]]\n",
507 |     "\n",
508 |     "\n",
509 |     "test_sen = np.asarray(test_sen2)\n",
510 |     "test_sen = torch.LongTensor(test_sen)\n",
511 |     "test_tensor = Variable(test_sen, volatile=True)\n",
512 |     "test_tensor = test_tensor.cuda()\n",
513 |     "model.eval()\n",
514 |     "output = model(test_tensor, 1)\n",
515 |     "out = F.softmax(output, 1)\n",
516 |     "print(out)\n",
517 |     "if (torch.argmax(out[0]) == 1):\n",
518 |     "    print (\"Sentiment: Positive\")\n",
519 |     "else:\n",
520 |     "    print (\"Sentiment: Negative\")"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": 9,
526 |    "metadata": {},
527 |    "outputs": [],
528 |    "source": [
529 |     "class AttentionModel(torch.nn.Module):\n",
530 |     "    def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):\n",
531 |     "        super(AttentionModel, self).__init__()\n",
532 |     "        self.batch_size = batch_size\n",
533 |     "        self.output_size = output_size\n",
534 |     "        self.hidden_size = hidden_size\n",
535 |     "\n",
536 |     "        self.vocab_size = vocab_size\n",
537 |     "        self.embedding_length = embedding_length\n",
538 |     "        \n",
539 |     "        self.word_embeddings = nn.Embedding(vocab_size, embedding_length)\n",
540 |     "        self.word_embeddings.weights = nn.Parameter(weights, requires_grad=False)\n",
541 |     "        self.lstm = nn.LSTM(embedding_length, hidden_size)\n",
542 |     "        self.label = nn.Linear(hidden_size, output_size)\n",
543 |     "        \n",
544 |     "    def attention_net(self, lstm_output, final_state):\n",
545 |     "        hidden = final_state.squeeze(0)\n",
546 |     "\n",
547 |     "        attn_weights = torch.bmm(lstm_output, hidden.unsqueeze(2)).squeeze(2)\n",
548 |     "        soft_attn_weights = F.softmax(attn_weights, 1)\n",
549 |     "        new_hidden_state = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)\n",
550 |     "        \n",
551 |     "        return new_hidden_state\n",
552 |     "    \n",
553 |     "    def forward(self, input_sentences, batch_size=None):\n",
554 |     "        input = self.word_embeddings(input_sentences)\n",
555 |     "\n",
556 |     "        input = input.permute(1, 0, 2)\n",
557 |     "        if batch_size is None:\n",
558 |     "            h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda())\n",
559 |     "            c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda())\n",
560 |     "        else:\n",
561 |     "            h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())\n",
562 |     "            c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())\n",
563 |     "\n",
564 |     "        output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))\n",
565 |     "        output = output.permute(1, 0, 2)\n",
566 |     "        \n",
567 |     "        attn_output = self.attention_net(output, final_hidden_state)\n",
568 |     "\n",
569 |     "        logits = self.label(attn_output)\n",
570 |     "        \n",
571 |     "        return logits"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "code",
576 |    "execution_count": 10,
577 |    "metadata": {},
578 |    "outputs": [
579 |     {
580 |      "name": "stdout",
581 |      "output_type": "stream",
582 |      "text": [
583 |       "Epoch: 1, Idx: 100, Training Loss: 0.7107, Training Accuracy:  46.88%\n",
584 |       "Epoch: 1, Idx: 200, Training Loss: 0.6477, Training Accuracy:  65.62%\n",
585 |       "Epoch: 1, Idx: 300, Training Loss: 0.6709, Training Accuracy:  59.38%\n",
586 |       "Epoch: 1, Idx: 400, Training Loss: 0.6348, Training Accuracy:  71.88%\n",
587 |       "Epoch: 1, Idx: 500, Training Loss: 0.6457, Training Accuracy:  71.88%\n",
588 |       "Epoch: 01, Train Loss: 0.670, Train Acc: 61.15%, Val. Loss: 0.622058, Val. Acc: 69.28%\n",
589 |       "Epoch: 2, Idx: 100, Training Loss: 0.5689, Training Accuracy:  81.25%\n",
590 |       "Epoch: 2, Idx: 200, Training Loss: 0.7160, Training Accuracy:  59.38%\n",
591 |       "Epoch: 2, Idx: 300, Training Loss: 0.5420, Training Accuracy:  78.12%\n",
592 |       "Epoch: 2, Idx: 400, Training Loss: 0.4943, Training Accuracy:  78.12%\n",
593 |       "Epoch: 2, Idx: 500, Training Loss: 0.4309, Training Accuracy:  87.50%\n",
594 |       "Epoch: 02, Train Loss: 0.517, Train Acc: 76.50%, Val. Loss: 0.500942, Val. Acc: 76.73%\n",
595 |       "Epoch: 3, Idx: 100, Training Loss: 0.4180, Training Accuracy:  78.12%\n",
596 |       "Epoch: 3, Idx: 200, Training Loss: 0.1869, Training Accuracy:  93.75%\n",
597 |       "Epoch: 3, Idx: 300, Training Loss: 0.4827, Training Accuracy:  75.00%\n",
598 |       "Epoch: 3, Idx: 400, Training Loss: 0.3836, Training Accuracy:  84.38%\n",
599 |       "Epoch: 3, Idx: 500, Training Loss: 0.6573, Training Accuracy:  68.75%\n",
600 |       "Epoch: 03, Train Loss: 0.315, Train Acc: 87.20%, Val. Loss: 0.440898, Val. Acc: 79.07%\n",
601 |       "Epoch: 4, Idx: 100, Training Loss: 0.1041, Training Accuracy:  96.88%\n",
602 |       "Epoch: 4, Idx: 200, Training Loss: 0.0691, Training Accuracy:  100.00%\n",
603 |       "Epoch: 4, Idx: 300, Training Loss: 0.0704, Training Accuracy:  96.88%\n",
604 |       "Epoch: 4, Idx: 400, Training Loss: 0.1435, Training Accuracy:  93.75%\n",
605 |       "Epoch: 4, Idx: 500, Training Loss: 0.1228, Training Accuracy:  96.88%\n",
606 |       "Epoch: 04, Train Loss: 0.163, Train Acc: 93.80%, Val. Loss: 0.498487, Val. Acc: 81.04%\n",
607 |       "Epoch: 5, Idx: 100, Training Loss: 0.0418, Training Accuracy:  96.88%\n",
608 |       "Epoch: 5, Idx: 200, Training Loss: 0.0405, Training Accuracy:  96.88%\n",
609 |       "Epoch: 5, Idx: 300, Training Loss: 0.1384, Training Accuracy:  90.62%\n",
610 |       "Epoch: 5, Idx: 400, Training Loss: 0.2633, Training Accuracy:  90.62%\n",
611 |       "Epoch: 5, Idx: 500, Training Loss: 0.0360, Training Accuracy:  100.00%\n",
612 |       "Epoch: 05, Train Loss: 0.079, Train Acc: 97.15%, Val. Loss: 0.572422, Val. Acc: 81.11%\n",
613 |       "Epoch: 6, Idx: 100, Training Loss: 0.0018, Training Accuracy:  100.00%\n",
614 |       "Epoch: 6, Idx: 200, Training Loss: 0.0444, Training Accuracy:  96.88%\n",
615 |       "Epoch: 6, Idx: 300, Training Loss: 0.1177, Training Accuracy:  96.88%\n",
616 |       "Epoch: 6, Idx: 400, Training Loss: 0.1992, Training Accuracy:  96.88%\n",
617 |       "Epoch: 6, Idx: 500, Training Loss: 0.0245, Training Accuracy:  100.00%\n",
618 |       "Epoch: 06, Train Loss: 0.034, Train Acc: 98.77%, Val. Loss: 0.777436, Val. Acc: 81.54%\n",
619 |       "Epoch: 7, Idx: 100, Training Loss: 0.0051, Training Accuracy:  100.00%\n",
620 |       "Epoch: 7, Idx: 200, Training Loss: 0.0011, Training Accuracy:  100.00%\n",
621 |       "Epoch: 7, Idx: 300, Training Loss: 0.0009, Training Accuracy:  100.00%\n",
622 |       "Epoch: 7, Idx: 400, Training Loss: 0.0038, Training Accuracy:  100.00%\n",
623 |       "Epoch: 7, Idx: 500, Training Loss: 0.0092, Training Accuracy:  100.00%\n",
624 |       "Epoch: 07, Train Loss: 0.021, Train Acc: 99.27%, Val. Loss: 0.816149, Val. Acc: 81.73%\n",
625 |       "Epoch: 8, Idx: 100, Training Loss: 0.0013, Training Accuracy:  100.00%\n",
626 |       "Epoch: 8, Idx: 200, Training Loss: 0.0011, Training Accuracy:  100.00%\n",
627 |       "Epoch: 8, Idx: 300, Training Loss: 0.0069, Training Accuracy:  100.00%\n",
628 |       "Epoch: 8, Idx: 400, Training Loss: 0.0105, Training Accuracy:  100.00%\n",
629 |       "Epoch: 8, Idx: 500, Training Loss: 0.0003, Training Accuracy:  100.00%\n",
630 |       "Epoch: 08, Train Loss: 0.016, Train Acc: 99.34%, Val. Loss: 0.735093, Val. Acc: 80.49%\n",
631 |       "Epoch: 9, Idx: 100, Training Loss: 0.0003, Training Accuracy:  100.00%\n",
632 |       "Epoch: 9, Idx: 200, Training Loss: 0.0004, Training Accuracy:  100.00%\n",
633 |       "Epoch: 9, Idx: 300, Training Loss: 0.0004, Training Accuracy:  100.00%\n",
634 |       "Epoch: 9, Idx: 400, Training Loss: 0.0025, Training Accuracy:  100.00%\n",
635 |       "Epoch: 9, Idx: 500, Training Loss: 0.0021, Training Accuracy:  100.00%\n",
636 |       "Epoch: 09, Train Loss: 0.008, Train Acc: 99.58%, Val. Loss: 1.018735, Val. Acc: 81.88%\n",
637 |       "Epoch: 10, Idx: 100, Training Loss: 0.0025, Training Accuracy:  100.00%\n",
638 |       "Epoch: 10, Idx: 200, Training Loss: 0.0192, Training Accuracy:  100.00%\n",
639 |       "Epoch: 10, Idx: 300, Training Loss: 0.0002, Training Accuracy:  100.00%\n",
640 |       "Epoch: 10, Idx: 400, Training Loss: 0.0000, Training Accuracy:  100.00%\n",
641 |       "Epoch: 10, Idx: 500, Training Loss: 0.0082, Training Accuracy:  100.00%\n",
642 |       "Epoch: 10, Train Loss: 0.011, Train Acc: 99.55%, Val. Loss: 0.810989, Val. Acc: 82.54%\n",
643 |       "Test Loss: 0.940, Test Acc: 80.09%\n"
644 |      ]
645 |     }
646 |    ],
647 |    "source": [
648 |     "model = AttentionModel(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings)\n",
649 |     "loss_fn = F.cross_entropy\n",
650 |     "\n",
651 |     "for epoch in range(10):\n",
652 |     "    train_loss, train_acc = train_model(model, train_iter, epoch)\n",
653 |     "    val_loss, val_acc = eval_model(model, valid_iter)\n",
654 |     "    \n",
655 |     "    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%')\n",
656 |     "    \n",
657 |     "test_loss, test_acc = eval_model(model, test_iter)\n",
658 |     "print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')"
659 |    ]
660 |   },
661 |   {
662 |    "cell_type": "code",
663 |    "execution_count": 11,
664 |    "metadata": {},
665 |    "outputs": [
666 |     {
667 |      "name": "stdout",
668 |      "output_type": "stream",
669 |      "text": [
670 |       "tensor([[3.2883e-07, 1.0000e+00]], device='cuda:0', grad_fn=<SoftmaxBackward>)\n",
671 |       "Sentiment: Positive\n"
672 |      ]
673 |     },
674 |     {
675 |      "name": "stderr",
676 |      "output_type": "stream",
677 |      "text": [
678 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:8: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n",
679 |       "  \n"
680 |      ]
681 |     }
682 |    ],
683 |    "source": [
684 |     "test_sen1 = \"This is one of the best creation of Nolan. I can say, it's his magnum opus. Loved the soundtrack and especially those creative dialogues.\"\n",
685 |     "\n",
686 |     "test_sen1 = TEXT.preprocess(test_sen1)\n",
687 |     "test_sen1 = [[TEXT.vocab.stoi[x] for x in test_sen1]]\n",
688 |     "\n",
689 |     "test_sen = np.asarray(test_sen1)\n",
690 |     "test_sen = torch.LongTensor(test_sen)\n",
691 |     "test_tensor = Variable(test_sen, volatile=True)\n",
692 |     "test_tensor = test_tensor.cuda()\n",
693 |     "model.eval()\n",
694 |     "output = model(test_tensor, 1)\n",
695 |     "out = F.softmax(output, 1)\n",
696 |     "print(out)\n",
697 |     "if (torch.argmax(out[0]) == 1):\n",
698 |     "    print (\"Sentiment: Positive\")\n",
699 |     "else:\n",
700 |     "    print (\"Sentiment: Negative\")"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": 12,
706 |    "metadata": {},
707 |    "outputs": [
708 |     {
709 |      "name": "stdout",
710 |      "output_type": "stream",
711 |      "text": [
712 |       "tensor([[1.0000e+00, 1.0964e-06]], device='cuda:0', grad_fn=<SoftmaxBackward>)\n",
713 |       "Sentiment: Negative\n"
714 |      ]
715 |     },
716 |     {
717 |      "name": "stderr",
718 |      "output_type": "stream",
719 |      "text": [
720 |       "c:\\users\\justin\\venv\\lib\\site-packages\\ipykernel_launcher.py:8: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.\n",
721 |       "  \n"
722 |      ]
723 |     }
724 |    ],
725 |    "source": [
726 |     "test_sen2 = \"Ohh, such a ridiculous movie. Not gonna recommend it to anyone. Complete waste of time and money.\"\n",
727 |     "test_sen2 = TEXT.preprocess(test_sen2)\n",
728 |     "test_sen2 = [[TEXT.vocab.stoi[x] for x in test_sen2]]\n",
729 |     "\n",
730 |     "\n",
731 |     "test_sen = np.asarray(test_sen2)\n",
732 |     "test_sen = torch.LongTensor(test_sen)\n",
733 |     "test_tensor = Variable(test_sen, volatile=True)\n",
734 |     "test_tensor = test_tensor.cuda()\n",
735 |     "model.eval()\n",
736 |     "output = model(test_tensor, 1)\n",
737 |     "out = F.softmax(output, 1)\n",
738 |     "print(out)\n",
739 |     "if (torch.argmax(out[0]) == 1):\n",
740 |     "    print (\"Sentiment: Positive\")\n",
741 |     "else:\n",
742 |     "    print (\"Sentiment: Negative\")"
743 |    ]
744 |   }
745 |  ],
746 |  "metadata": {
747 |   "kernelspec": {
748 |    "display_name": "Python 3",
749 |    "language": "python",
750 |    "name": "python3"
751 |   },
752 |   "language_info": {
753 |    "codemirror_mode": {
754 |     "name": "ipython",
755 |     "version": 3
756 |    },
757 |    "file_extension": ".py",
758 |    "mimetype": "text/x-python",
759 |    "name": "python",
760 |    "nbconvert_exporter": "python",
761 |    "pygments_lexer": "ipython3",
762 |    "version": "3.6.8"
763 |   }
764 |  },
765 |  "nbformat": 4,
766 |  "nbformat_minor": 4
767 | }
768 | 


--------------------------------------------------------------------------------
/4_NMT.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "import torch.nn as nn\n",
 11 |     "import torch.optim as optim\n",
 12 |     "\n",
 13 |     "from torchtext.datasets import TranslationDataset, Multi30k\n",
 14 |     "from torchtext.data import Field, BucketIterator\n",
 15 |     "\n",
 16 |     "import spacy\n",
 17 |     "import numpy as np\n",
 18 |     "\n",
 19 |     "import random\n",
 20 |     "import math\n",
 21 |     "import time"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "SEED = 1234\n",
 31 |     "\n",
 32 |     "random.seed(SEED)\n",
 33 |     "np.random.seed(SEED)\n",
 34 |     "torch.manual_seed(SEED)\n",
 35 |     "torch.cuda.manual_seed(SEED)\n",
 36 |     "torch.backends.cudnn.deterministic = True"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "spacy_en = spacy.load('en_core_web_sm')\n",
 46 |     "spacy_de = spacy.load('de_core_news_sm')"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 4,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "def tokenize_de(text):\n",
 56 |     "    return [tok.text for tok in spacy_de.tokenizer(text)][::-1]\n",
 57 |     "\n",
 58 |     "def tokenize_en(text):\n",
 59 |     "    return [tok.text for tok in spacy_en.tokenizer(text)]"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 5,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "SRC = Field(tokenize = tokenize_en, \n",
 69 |     "            init_token = '<sos>', \n",
 70 |     "            eos_token = '<eos>', \n",
 71 |     "            lower = True)\n",
 72 |     "\n",
 73 |     "TRG = Field(tokenize = tokenize_de, \n",
 74 |     "            init_token = '<sos>', \n",
 75 |     "            eos_token = '<eos>', \n",
 76 |     "            lower = True)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 6,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "train_data, valid_data, test_data = Multi30k.splits(exts = ('.de', '.en'), \n",
 86 |     "                                                    fields = (SRC, TRG))"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 7,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "Number of training examples: 29000\n",
 99 |       "Number of validation examples: 1014\n",
100 |       "Number of testing examples: 1000\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "print(f\"Number of training examples: {len(train_data.examples)}\")\n",
106 |     "print(f\"Number of validation examples: {len(valid_data.examples)}\")\n",
107 |     "print(f\"Number of testing examples: {len(test_data.examples)}\")"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 8,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "{'src': ['zwei', 'junge', 'weiße', 'männer', 'sind', 'i', 'm', 'freien', 'in', 'der', 'nähe', 'vieler', 'büsche', '.'], 'trg': ['.', 'bushes', 'many', 'near', 'outside', 'are', 'males', 'white', ',', 'young', 'two']}\n"
120 |      ]
121 |     }
122 |    ],
123 |    "source": [
124 |     "print(vars(train_data.examples[0]))"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 9,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "SRC.build_vocab(train_data, min_freq = 2)\n",
134 |     "TRG.build_vocab(train_data, min_freq = 2)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 10,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "name": "stdout",
144 |      "output_type": "stream",
145 |      "text": [
146 |       "Unique tokens in source (de) vocabulary: 7873\n",
147 |       "Unique tokens in target (en) vocabulary: 5923\n"
148 |      ]
149 |     }
150 |    ],
151 |    "source": [
152 |     "print(f\"Unique tokens in source (de) vocabulary: {len(SRC.vocab)}\")\n",
153 |     "print(f\"Unique tokens in target (en) vocabulary: {len(TRG.vocab)}\")"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 11,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 12,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "BATCH_SIZE = 128\n",
172 |     "\n",
173 |     "train_iterator, valid_iterator, test_iterator = BucketIterator.splits(\n",
174 |     "    (train_data, valid_data, test_data), \n",
175 |     "    batch_size = BATCH_SIZE, \n",
176 |     "    device = device)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 13,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "class Encoder(nn.Module):\n",
186 |     "    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):\n",
187 |     "        super().__init__()\n",
188 |     "        \n",
189 |     "        self.hid_dim = hid_dim\n",
190 |     "        self.n_layers = n_layers\n",
191 |     "        self.embedding = nn.Embedding(input_dim, emb_dim)\n",
192 |     "        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)\n",
193 |     "        self.dropout = nn.Dropout(dropout)\n",
194 |     "        \n",
195 |     "    def forward(self, src):\n",
196 |     "        embedded = self.dropout(self.embedding(src))\n",
197 |     "        outputs, (hidden, cell) = self.rnn(embedded)\n",
198 |     "        return hidden, cell"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 14,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "class Decoder(nn.Module):\n",
208 |     "    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):\n",
209 |     "        super().__init__()\n",
210 |     "        \n",
211 |     "        self.output_dim = output_dim\n",
212 |     "        self.hid_dim = hid_dim\n",
213 |     "        self.n_layers = n_layers\n",
214 |     "        self.embedding = nn.Embedding(output_dim, emb_dim)\n",
215 |     "        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)\n",
216 |     "        self.fc_out = nn.Linear(hid_dim, output_dim)\n",
217 |     "        self.dropout = nn.Dropout(dropout)\n",
218 |     "        \n",
219 |     "    def forward(self, input, hidden, cell):        \n",
220 |     "        input = input.unsqueeze(0)\n",
221 |     "        embedded = self.dropout(self.embedding(input))\n",
222 |     "        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))\n",
223 |     "        prediction = self.fc_out(output.squeeze(0))\n",
224 |     "        return prediction, hidden, cell"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 15,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "class Seq2Seq(nn.Module):\n",
234 |     "    def __init__(self, encoder, decoder, device):\n",
235 |     "        super().__init__()\n",
236 |     "        \n",
237 |     "        self.encoder = encoder\n",
238 |     "        self.decoder = decoder\n",
239 |     "        self.device = device\n",
240 |     "        \n",
241 |     "        assert encoder.hid_dim == decoder.hid_dim, \\\n",
242 |     "            \"Hidden dimensions of encoder and decoder must be equal!\"\n",
243 |     "        assert encoder.n_layers == decoder.n_layers, \\\n",
244 |     "            \"Encoder and decoder must have equal number of layers!\"\n",
245 |     "        \n",
246 |     "    def forward(self, src, trg, teacher_forcing_ratio = 0.5):\n",
247 |     "        \n",
248 |     "        batch_size = trg.shape[1]\n",
249 |     "        trg_len = trg.shape[0]\n",
250 |     "        trg_vocab_size = self.decoder.output_dim\n",
251 |     "        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)\n",
252 |     "        hidden, cell = self.encoder(src)\n",
253 |     "        input = trg[0,:]\n",
254 |     "        \n",
255 |     "        for t in range(1, trg_len):\n",
256 |     "            output, hidden, cell = self.decoder(input, hidden, cell)\n",
257 |     "            outputs[t] = output\n",
258 |     "            teacher_force = random.random() < teacher_forcing_ratio\n",
259 |     "            top1 = output.argmax(1) \n",
260 |     "            input = trg[t] if teacher_force else top1\n",
261 |     "        return outputs"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 16,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "\n",
271 |     "INPUT_DIM = len(SRC.vocab)\n",
272 |     "OUTPUT_DIM = len(TRG.vocab)\n",
273 |     "ENC_EMB_DIM = 256\n",
274 |     "DEC_EMB_DIM = 256\n",
275 |     "HID_DIM = 512\n",
276 |     "N_LAYERS = 2\n",
277 |     "ENC_DROPOUT = 0.5\n",
278 |     "DEC_DROPOUT = 0.5\n",
279 |     "\n",
280 |     "enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)\n",
281 |     "dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)\n",
282 |     "\n",
283 |     "model = Seq2Seq(enc, dec, device).to(device)"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": 17,
289 |    "metadata": {},
290 |    "outputs": [
291 |     {
292 |      "data": {
293 |       "text/plain": [
294 |        "Seq2Seq(\n",
295 |        "  (encoder): Encoder(\n",
296 |        "    (embedding): Embedding(7873, 256)\n",
297 |        "    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)\n",
298 |        "    (dropout): Dropout(p=0.5, inplace=False)\n",
299 |        "  )\n",
300 |        "  (decoder): Decoder(\n",
301 |        "    (embedding): Embedding(5923, 256)\n",
302 |        "    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)\n",
303 |        "    (fc_out): Linear(in_features=512, out_features=5923, bias=True)\n",
304 |        "    (dropout): Dropout(p=0.5, inplace=False)\n",
305 |        "  )\n",
306 |        ")"
307 |       ]
308 |      },
309 |      "execution_count": 17,
310 |      "metadata": {},
311 |      "output_type": "execute_result"
312 |     }
313 |    ],
314 |    "source": [
315 |     "def init_weights(m):\n",
316 |     "    for name, param in m.named_parameters():\n",
317 |     "        nn.init.uniform_(param.data, -0.08, 0.08)\n",
318 |     "        \n",
319 |     "model.apply(init_weights)"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 18,
325 |    "metadata": {},
326 |    "outputs": [
327 |     {
328 |      "name": "stdout",
329 |      "output_type": "stream",
330 |      "text": [
331 |       "The model has 13,926,691 trainable parameters\n"
332 |      ]
333 |     }
334 |    ],
335 |    "source": [
336 |     "def count_parameters(model):\n",
337 |     "    return sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
338 |     "\n",
339 |     "print(f'The model has {count_parameters(model):,} trainable parameters')"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 19,
345 |    "metadata": {},
346 |    "outputs": [],
347 |    "source": [
348 |     "optimizer = optim.Adam(model.parameters())"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": 20,
354 |    "metadata": {},
355 |    "outputs": [],
356 |    "source": [
357 |     "TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]\n",
358 |     "criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 21,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "def train(model, iterator, optimizer, criterion, clip):\n",
368 |     "    \n",
369 |     "    model.train()\n",
370 |     "    epoch_loss = 0\n",
371 |     "    for i, batch in enumerate(iterator):\n",
372 |     "        src = batch.src\n",
373 |     "        trg = batch.trg\n",
374 |     "        optimizer.zero_grad()\n",
375 |     "        output = model(src, trg)\n",
376 |     "        output_dim = output.shape[-1]\n",
377 |     "        output = output[1:].view(-1, output_dim)\n",
378 |     "        trg = trg[1:].view(-1)\n",
379 |     "        loss = criterion(output, trg)\n",
380 |     "        loss.backward()\n",
381 |     "        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)\n",
382 |     "        optimizer.step()\n",
383 |     "        epoch_loss += loss.item()\n",
384 |     "    return epoch_loss / len(iterator)"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": 22,
390 |    "metadata": {},
391 |    "outputs": [],
392 |    "source": [
393 |     "def evaluate(model, iterator, criterion):\n",
394 |     "    \n",
395 |     "    model.eval()\n",
396 |     "    epoch_loss = 0\n",
397 |     "    with torch.no_grad():\n",
398 |     "        for i, batch in enumerate(iterator):\n",
399 |     "            src = batch.src\n",
400 |     "            trg = batch.trg\n",
401 |     "            output = model(src, trg, 0)\n",
402 |     "            output_dim = output.shape[-1]\n",
403 |     "            output = output[1:].view(-1, output_dim)\n",
404 |     "            trg = trg[1:].view(-1)\n",
405 |     "            loss = criterion(output, trg)\n",
406 |     "            epoch_loss += loss.item()\n",
407 |     "    return epoch_loss / len(iterator)"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": 23,
413 |    "metadata": {},
414 |    "outputs": [],
415 |    "source": [
416 |     "def epoch_time(start_time, end_time):\n",
417 |     "    elapsed_time = end_time - start_time\n",
418 |     "    elapsed_mins = int(elapsed_time / 60)\n",
419 |     "    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
420 |     "    return elapsed_mins, elapsed_secs"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": 24,
426 |    "metadata": {},
427 |    "outputs": [
428 |     {
429 |      "name": "stdout",
430 |      "output_type": "stream",
431 |      "text": [
432 |       "Epoch: 01 | Time: 0m 34s\n",
433 |       "\tTrain Loss: 4.985 | Train PPL: 146.191\n",
434 |       "\t Val. Loss: 4.928 |  Val. PPL: 138.108\n",
435 |       "Epoch: 02 | Time: 0m 34s\n",
436 |       "\tTrain Loss: 4.462 | Train PPL:  86.666\n",
437 |       "\t Val. Loss: 4.883 |  Val. PPL: 131.987\n",
438 |       "Epoch: 03 | Time: 0m 34s\n",
439 |       "\tTrain Loss: 4.200 | Train PPL:  66.677\n",
440 |       "\t Val. Loss: 4.602 |  Val. PPL:  99.726\n",
441 |       "Epoch: 04 | Time: 0m 34s\n",
442 |       "\tTrain Loss: 3.999 | Train PPL:  54.560\n",
443 |       "\t Val. Loss: 4.467 |  Val. PPL:  87.056\n",
444 |       "Epoch: 05 | Time: 0m 34s\n",
445 |       "\tTrain Loss: 3.828 | Train PPL:  45.983\n",
446 |       "\t Val. Loss: 4.386 |  Val. PPL:  80.279\n",
447 |       "Epoch: 06 | Time: 0m 34s\n",
448 |       "\tTrain Loss: 3.653 | Train PPL:  38.600\n",
449 |       "\t Val. Loss: 4.248 |  Val. PPL:  69.934\n",
450 |       "Epoch: 07 | Time: 0m 34s\n",
451 |       "\tTrain Loss: 3.489 | Train PPL:  32.764\n",
452 |       "\t Val. Loss: 4.083 |  Val. PPL:  59.326\n",
453 |       "Epoch: 08 | Time: 0m 34s\n",
454 |       "\tTrain Loss: 3.339 | Train PPL:  28.182\n",
455 |       "\t Val. Loss: 4.000 |  Val. PPL:  54.601\n",
456 |       "Epoch: 09 | Time: 0m 34s\n",
457 |       "\tTrain Loss: 3.189 | Train PPL:  24.269\n",
458 |       "\t Val. Loss: 3.956 |  Val. PPL:  52.262\n",
459 |       "Epoch: 10 | Time: 0m 34s\n",
460 |       "\tTrain Loss: 3.056 | Train PPL:  21.245\n",
461 |       "\t Val. Loss: 3.917 |  Val. PPL:  50.249\n"
462 |      ]
463 |     }
464 |    ],
465 |    "source": [
466 |     "N_EPOCHS = 10\n",
467 |     "CLIP = 1\n",
468 |     "\n",
469 |     "best_valid_loss = float('inf')\n",
470 |     "\n",
471 |     "for epoch in range(N_EPOCHS):\n",
472 |     "    \n",
473 |     "    start_time = time.time()\n",
474 |     "    \n",
475 |     "    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)\n",
476 |     "    valid_loss = evaluate(model, valid_iterator, criterion)\n",
477 |     "    \n",
478 |     "    end_time = time.time()\n",
479 |     "    \n",
480 |     "    epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
481 |     "    \n",
482 |     "    if valid_loss < best_valid_loss:\n",
483 |     "        best_valid_loss = valid_loss\n",
484 |     "        torch.save(model.state_dict(), 'tut1-model.pt')\n",
485 |     "    \n",
486 |     "    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')\n",
487 |     "    print(f'\\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')\n",
488 |     "    print(f'\\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": 25,
494 |    "metadata": {},
495 |    "outputs": [
496 |     {
497 |      "name": "stdout",
498 |      "output_type": "stream",
499 |      "text": [
500 |       "| Test Loss: 4.011 | Test PPL:  55.177 |\n"
501 |      ]
502 |     }
503 |    ],
504 |    "source": [
505 |     "model.load_state_dict(torch.load('tut1-model.pt'))\n",
506 |     "\n",
507 |     "test_loss = evaluate(model, test_iterator, criterion)\n",
508 |     "\n",
509 |     "print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "code",
514 |    "execution_count": null,
515 |    "metadata": {},
516 |    "outputs": [],
517 |    "source": []
518 |   }
519 |  ],
520 |  "metadata": {
521 |   "kernelspec": {
522 |    "display_name": "Python 3",
523 |    "language": "python",
524 |    "name": "python3"
525 |   },
526 |   "language_info": {
527 |    "codemirror_mode": {
528 |     "name": "ipython",
529 |     "version": 3
530 |    },
531 |    "file_extension": ".py",
532 |    "mimetype": "text/x-python",
533 |    "name": "python",
534 |    "nbconvert_exporter": "python",
535 |    "pygments_lexer": "ipython3",
536 |    "version": "3.6.8"
537 |   }
538 |  },
539 |  "nbformat": 4,
540 |  "nbformat_minor": 4
541 | }
542 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # torch_nlp_basic
2 | - Basic Concept to understand Natural Language Process
3 | - Please contact to me by e-mail
4 | 


--------------------------------------------------------------------------------
	review	sentiment
0	One of the other reviewers has mentioned that ...	positive
1	A wonderful little production. <br /><br />The...	positive
2	I thought this was a wonderful way to spend ti...	positive
3	Basically there's a family where a little boy ...	negative
4	Petter Mattei's \"Love in the Time of Money\" is...	positive