├── .gitignore └── .gitignore ├── Data_Pre-Processing.ipynb ├── Dataset └── readme.md ├── Embeddings └── readme.md ├── LICENSE ├── Model_Backup └── readme.md ├── Processed_Data └── readme.md ├── README.md └── Summarization.ipynb /.gitignore/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /Data_Pre-Processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Load and Process Dataset \n", 8 | "\n", 9 | "Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "Processing data # 0\n", 22 | "Processing data # 10000\n", 23 | "Processing data # 20000\n", 24 | "Processing data # 30000\n", 25 | "Processing data # 40000\n", 26 | "Processing data # 50000\n", 27 | "Processing data # 60000\n", 28 | "Processing data # 70000\n", 29 | "Processing data # 80000\n", 30 | "Processing data # 90000\n", 31 | "Processing data # 100000\n", 32 | "Processing data # 110000\n", 33 | "Processing data # 120000\n", 34 | "Processing data # 130000\n", 35 | "Processing data # 140000\n", 36 | "Processing data # 150000\n", 37 | "Processing data # 160000\n", 38 | "Processing data # 170000\n", 39 | "Processing data # 180000\n", 40 | "Processing data # 190000\n", 41 | "Processing data # 200000\n", 42 | "Processing data # 210000\n", 43 | "Processing data # 220000\n", 44 | "Processing data # 230000\n", 45 | "Processing data # 240000\n", 46 | "Processing data # 250000\n", 47 | "Processing data # 260000\n", 48 | "Processing data # 270000\n", 49 | "Processing data # 280000\n", 50 | "Processing data # 290000\n", 51 | "Processing data # 300000\n", 52 | "Processing data # 310000\n", 53 | "Processing data # 320000\n", 54 | "Processing data # 330000\n", 55 | "\n", 56 | "# of Data: 337465\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "import csv\n", 62 | "from nltk import word_tokenize\n", 63 | "\n", 64 | "import string\n", 65 | "\n", 66 | "summaries = []\n", 67 | "texts = []\n", 68 | "\n", 69 | "def clean(text):\n", 70 | " text = text.lower()\n", 71 | " printable = set(string.printable)\n", 72 | " text = \"\".join(list(filter(lambda x: x in printable, text))) #filter funny characters, if any.\n", 73 | " return text\n", 74 | "\n", 75 | "text_max_len = 500\n", 76 | "text_min_len = 25\n", 77 | "summary_max_len = 30\n", 78 | "vocab2idx = {}\n", 79 | "\n", 80 | "#Data from https://www.kaggle.com/snap/amazon-fine-food-reviews\n", 81 | "with open('Dataset/Reviews.csv') as csvfile: \n", 82 | " \n", 83 | " Reviews = csv.DictReader(csvfile)\n", 84 | " \n", 85 | " i=0\n", 86 | " \n", 87 | " for row in Reviews:\n", 88 | " \n", 89 | " text = row['Text']\n", 90 | " summary = row['Summary']\n", 91 | " \n", 92 | " if len(text) <= text_max_len and len(text) >= text_min_len and len(summary) <= summary_max_len:\n", 93 | " #print(i)\n", 94 | "\n", 95 | " clean_text = clean(text)\n", 96 | " clean_summary = clean(summary)\n", 97 | " \n", 98 | " tokenized_summary = word_tokenize(clean_summary)\n", 99 | " tokenized_text = word_tokenize(clean_text)\n", 100 | " \n", 101 | " # BUILD VOCABULARY\n", 102 | " \n", 103 | " for word in tokenized_text:\n", 104 | " if word not in vocab2idx:\n", 105 | " vocab2idx[word]=len(vocab2idx)\n", 106 | " \n", 107 | " for word in tokenized_summary:\n", 108 | " if word not in vocab2idx:\n", 109 | " vocab2idx[word]=len(vocab2idx)\n", 110 | " \n", 111 | " ## ________________\n", 112 | "\n", 113 | " summaries.append(tokenized_summary)\n", 114 | " texts.append(tokenized_text)\n", 115 | "\n", 116 | " if i%10000==0:\n", 117 | " print(\"Processing data # {}\".format(i))\n", 118 | "\n", 119 | " i+=1\n", 120 | "\n", 121 | "print(\"\\n# of Data: {}\".format(len(texts)))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## Random Sample" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 2, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "SAMPLE CLEANED & TOKENIZED TEXT: \n", 141 | "\n", 142 | "['i', 'like', 'these', 'better', 'than', 'any', 'chips', 'around', '--', 'high-potency', 'cheese', 'flavor', ',', 'crispy', '--', 'what', \"'s\", 'not', 'to', 'like', '?', 'i', 'have', 'to', 'package', 'these', 'up', 'in', '``', 'single-serve', \"''\", 'packs', 'or', 'i', 'will', 'eat', 'half', 'a', 'box', 'at', 'one', 'sitting', '!', '<', 'br', '/', '>', '<', 'br', '/', '>', 'mine', 'arrived', 'in', 'great', 'shape', ',', 'too', '--', 'no', 'more', '``', 'crumbs', \"''\", 'than', 'i', 'would', 'expect', 'to', 'find', 'in', 'a', 'box', 'purchased', 'at', 'the', 'supermarket', '.', 'and', 'these', 'are', 'much', 'cheaper', 'than', 'the', 'local', 'stores', ',', 'too', '.']\n", 143 | "\n", 144 | "\n", 145 | "SAMPLE CLEANED & TOKENIZED SUMMARY: \n", 146 | "\n", 147 | "['my', 'favorite', 'snack', 'crackers', '...']\n", 148 | "\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "import random\n", 154 | "\n", 155 | "index = random.randint(0,len(texts)-1)\n", 156 | "\n", 157 | "print(\"SAMPLE CLEANED & TOKENIZED TEXT: \\n\\n{}\\n\\n\".format(texts[index]))\n", 158 | "print(\"SAMPLE CLEANED & TOKENIZED SUMMARY: \\n\\n{}\\n\".format(summaries[index]))" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "## Load Embeddings\n", 166 | "\n", 167 | "Loading pre-trained GloVe embeddings. Source of Data: https://nlp.stanford.edu/projects/glove/" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 3, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "Embedding Loaded.\n", 180 | "Vocabulary Size: 43544\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "import numpy as np\n", 186 | "\n", 187 | "vocab = []\n", 188 | "embd = []\n", 189 | "special_tags = ['','','']\n", 190 | "\n", 191 | "\n", 192 | "def loadEmbeddings(filename):\n", 193 | " vocab2embd = {}\n", 194 | " \n", 195 | " with open(filename) as infile: \n", 196 | " for line in infile:\n", 197 | " row = line.strip().split(' ')\n", 198 | " word = row[0].lower()\n", 199 | " if word not in vocab2embd:\n", 200 | " vocab2embd[word]=np.asarray(row[1:],np.float32)\n", 201 | "\n", 202 | " print('Embedding Loaded.')\n", 203 | " return vocab2embd\n", 204 | "\n", 205 | "vocab2embd = loadEmbeddings('Embeddings/glove.6B.100d.txt')\n", 206 | "\n", 207 | "for word in vocab2idx:\n", 208 | " if word in vocab2embd:\n", 209 | " vocab.append(word)\n", 210 | " embd.append(vocab2embd[word])\n", 211 | " \n", 212 | "for special_tag in special_tags:\n", 213 | " vocab.append(special_tag)\n", 214 | " embd.append(np.random.rand(len(embd[0]),))\n", 215 | " \n", 216 | "vocab2idx = {word:idx for idx,word in enumerate(vocab)}\n", 217 | "embd = np.asarray(embd,np.float32)\n", 218 | "\n", 219 | "print(\"Vocabulary Size: {}\".format(len(vocab2idx)))\n", 220 | " \n", 221 | "\n" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 4, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "43543\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "print(vocab2idx[''])" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "## Vectorize Data" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 5, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "vec_texts=[]\n", 255 | "vec_summaries=[]\n", 256 | "\n", 257 | "for text,summary in zip(texts,summaries):\n", 258 | " # Replace out of vocab words with index for '' tag\n", 259 | " vec_texts.append([vocab2idx.get(word,vocab2idx['']) for word in text])\n", 260 | " vec_summaries.append([vocab2idx.get(word,vocab2idx['']) for word in summary])" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "## Shuffle Data" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 6, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "import random\n", 277 | "random.seed(101)\n", 278 | "\n", 279 | "texts_idx = [idx for idx in range(len(vec_texts))]\n", 280 | "random.shuffle(texts_idx)\n", 281 | "\n", 282 | "vec_texts = [vec_texts[idx] for idx in texts_idx]\n", 283 | "vec_summaries = [vec_summaries[idx] for idx in texts_idx]" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "## Split Data into train, validation, and test sets." 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 7, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "# Use first 10000 data for testing, the next 10000 data for validation, and rest for training\n", 300 | "\n", 301 | "test_summaries = vec_summaries[0:10000]\n", 302 | "test_texts = vec_texts[0:10000]\n", 303 | "\n", 304 | "val_summaries = vec_summaries[10000:20000]\n", 305 | "val_texts = vec_texts[10000:20000]\n", 306 | "\n", 307 | "train_summaries = vec_summaries[20000:]\n", 308 | "train_texts = vec_texts[20000:]" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "## Bucket And Batch Function" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 8, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "def bucket_and_batch(texts,summaries,batch_size=32):\n", 325 | " \n", 326 | " # Sort summaries and texts according to the length of text\n", 327 | " # (So that texts with similar lengths tend to remain in the same batch and thus require less padding)\n", 328 | " \n", 329 | " text_lens = [len(text) for text in texts]\n", 330 | " sortedidx = np.flip(np.argsort(text_lens),axis=0)\n", 331 | " texts=[texts[idx] for idx in sortedidx]\n", 332 | " summaries=[summaries[idx] for idx in sortedidx]\n", 333 | " \n", 334 | " batches_text=[]\n", 335 | " batches_summary=[]\n", 336 | " batches_true_text_len = []\n", 337 | " batches_true_summary_len = []\n", 338 | " \n", 339 | " i=0\n", 340 | " while i < (len(texts)-batch_size):\n", 341 | " \n", 342 | " max_len = len(texts[i])\n", 343 | " \n", 344 | " batch_text=[]\n", 345 | " batch_summary=[]\n", 346 | " batch_true_text_len=[]\n", 347 | " batch_true_summary_len=[]\n", 348 | " \n", 349 | " for j in range(batch_size):\n", 350 | " \n", 351 | " padded_text = texts[i+j]\n", 352 | " padded_summary = summaries[i+j]\n", 353 | " \n", 354 | " batch_true_text_len.append(len(texts[i+j]))\n", 355 | " batch_true_summary_len.append(len(summaries[i+j])+1)\n", 356 | " \n", 357 | " while len(padded_text) < max_len:\n", 358 | " padded_text.append(vocab2idx[''])\n", 359 | "\n", 360 | " padded_summary.append(vocab2idx['']) #End of Sentence Marker\n", 361 | " while len(padded_summary) < summary_max_len+1:\n", 362 | " padded_summary.append(vocab2idx[''])\n", 363 | " \n", 364 | " \n", 365 | " batch_text.append(padded_text)\n", 366 | " batch_summary.append(padded_summary)\n", 367 | " \n", 368 | " batches_text.append(batch_text)\n", 369 | " batches_summary.append(batch_summary)\n", 370 | " batches_true_text_len.append(batch_true_text_len)\n", 371 | " batches_true_summary_len.append(batch_true_summary_len)\n", 372 | " \n", 373 | " i+=batch_size\n", 374 | " \n", 375 | " return batches_text, batches_summary, batches_true_text_len, batches_true_summary_len" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "## Prepare Batches" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 9, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "train_batches_text, train_batches_summary, train_batches_true_text_len, train_batches_true_summary_len \\\n", 392 | "= bucket_and_batch(train_texts, train_summaries)\n", 393 | "\n", 394 | "val_batches_text, val_batches_summary, val_batches_true_text_len, val_batches_true_summary_len \\\n", 395 | "= bucket_and_batch(val_texts, val_summaries)\n", 396 | "\n", 397 | "test_batches_text, test_batches_summary, test_batches_true_text_len, test_batches_true_summary_len \\\n", 398 | "= bucket_and_batch(test_texts, test_summaries)" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "## Save Data" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 10, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "import json\n", 415 | "\n", 416 | "d = {}\n", 417 | "\n", 418 | "d[\"vocab\"] = vocab2idx\n", 419 | "d[\"embd\"] = embd.tolist()\n", 420 | "d[\"train_batches_text\"] = train_batches_text\n", 421 | "d[\"test_batches_text\"] = test_batches_text\n", 422 | "d[\"val_batches_text\"] = val_batches_text\n", 423 | "d[\"train_batches_summary\"] = train_batches_summary\n", 424 | "d[\"test_batches_summary\"] = test_batches_summary\n", 425 | "d[\"val_batches_summary\"] = val_batches_summary\n", 426 | "d[\"train_batches_true_text_len\"] = train_batches_true_text_len\n", 427 | "d[\"val_batches_true_text_len\"] = val_batches_true_text_len\n", 428 | "d[\"test_batches_true_text_len\"] = test_batches_true_text_len\n", 429 | "d[\"train_batches_true_summary_len\"] = train_batches_true_summary_len\n", 430 | "d[\"val_batches_true_summary_len\"] = val_batches_true_summary_len\n", 431 | "d[\"test_batches_true_summary_len\"] = test_batches_true_summary_len\n", 432 | "\n", 433 | "with open('Processed_Data/Amazon_Reviews_Processed.json', 'w') as outfile:\n", 434 | " json.dump(d, outfile)\n" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [] 443 | } 444 | ], 445 | "metadata": { 446 | "kernelspec": { 447 | "display_name": "Python 3", 448 | "language": "python", 449 | "name": "python3" 450 | }, 451 | "language_info": { 452 | "codemirror_mode": { 453 | "name": "ipython", 454 | "version": 3 455 | }, 456 | "file_extension": ".py", 457 | "mimetype": "text/x-python", 458 | "name": "python", 459 | "nbconvert_exporter": "python", 460 | "pygments_lexer": "ipython3", 461 | "version": "3.6.9" 462 | } 463 | }, 464 | "nbformat": 4, 465 | "nbformat_minor": 4 466 | } 467 | -------------------------------------------------------------------------------- /Dataset/readme.md: -------------------------------------------------------------------------------- 1 | Amazon Fine Food Reviews dataset (Reviews.csv) should be put here. 2 | Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews 3 | -------------------------------------------------------------------------------- /Embeddings/readme.md: -------------------------------------------------------------------------------- 1 | Glove 100 dimensional embedding should be put here. 2 | Download 'Wikipedia 2014 + Gigaword 5 (6B tokens, 400K vocab, uncased, 50d, 100d, 200d, & 300d vectors, 822 MB download): glove.6B.zip' 3 | from https://nlp.stanford.edu/projects/glove/ and extract the files in this folder. 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jishnu Ray Chowdhury 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Model_Backup/readme.md: -------------------------------------------------------------------------------- 1 | Pre-Trained Tensorflow Model Parameters and checkpoints will be saved inside this folder. 2 | -------------------------------------------------------------------------------- /Processed_Data/readme.md: -------------------------------------------------------------------------------- 1 | Processed Data will be put here. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Abstractive Summarization 2 | 3 | Based on [Seq2seq learning](https://arxiv.org/abs/1409.3215) 4 | with [attention mechanism](https://arxiv.org/abs/1409.0473), specifically [local attention](https://nlp.stanford.edu/pubs/emnlp15_attn.pdf). 5 | 6 | ### Loading Pre-processed Dataset 7 | 8 | The Data is preprocessed in [Data_Pre-Processing.ipynb](https://github.com/JRC1995/Abstractive-Summarization/blob/master/Data_Pre-Processing.ipynb) 9 | 10 | Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews 11 | 12 | 13 | ```python 14 | import json 15 | 16 | with open('Processed_Data/Amazon_Reviews_Processed.json') as file: 17 | 18 | for json_data in file: 19 | saved_data = json.loads(json_data) 20 | 21 | vocab2idx = saved_data["vocab"] 22 | embd = saved_data["embd"] 23 | train_batches_text = saved_data["train_batches_text"] 24 | test_batches_text = saved_data["test_batches_text"] 25 | val_batches_text = saved_data["val_batches_text"] 26 | train_batches_summary = saved_data["train_batches_summary"] 27 | test_batches_summary = saved_data["test_batches_summary"] 28 | val_batches_summary = saved_data["val_batches_summary"] 29 | train_batches_true_text_len = saved_data["train_batches_true_text_len"] 30 | val_batches_true_text_len = saved_data["val_batches_true_text_len"] 31 | test_batches_true_text_len = saved_data["test_batches_true_text_len"] 32 | train_batches_true_summary_len = saved_data["train_batches_true_summary_len"] 33 | val_batches_true_summary_len = saved_data["val_batches_true_summary_len"] 34 | test_batches_true_summary_len = saved_data["test_batches_true_summary_len"] 35 | 36 | break 37 | 38 | idx2vocab = {v:k for k,v in vocab2idx.items()} 39 | ``` 40 | 41 | ## Hyperparameters 42 | 43 | 44 | ```python 45 | hidden_size = 300 46 | learning_rate = 0.001 47 | epochs = 5 48 | max_summary_len = 31 # should be summary_max_len as used in data_preprocessing with +1 (+1 for ) 49 | D = 5 # D determines local attention window size 50 | window_len = 2*D+1 51 | l2=1e-6 52 | ``` 53 | 54 | ## Tensorflow Placeholders 55 | 56 | 57 | ```python 58 | import tensorflow.compat.v1 as tf 59 | 60 | tf.disable_v2_behavior() 61 | tf.disable_eager_execution() 62 | 63 | embd_dim = len(embd[0]) 64 | 65 | tf_text = tf.placeholder(tf.int32, [None, None]) 66 | tf_embd = tf.placeholder(tf.float32, [len(vocab2idx),embd_dim]) 67 | tf_true_summary_len = tf.placeholder(tf.int32, [None]) 68 | tf_summary = tf.placeholder(tf.int32,[None, None]) 69 | tf_train = tf.placeholder(tf.bool) 70 | ``` 71 | 72 | WARNING:tensorflow:From /home/jishnu/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version. 73 | Instructions for updating: 74 | non-resource variables are not supported in the long term 75 | 76 | 77 | ## Dropout Function 78 | 79 | 80 | ```python 81 | def dropout(x,rate,training): 82 | return tf.cond(tf_train, 83 | lambda: tf.nn.dropout(x,rate=0.3), 84 | lambda: x) 85 | 86 | 87 | ``` 88 | 89 | ## Embed vectorized text 90 | 91 | Dropout used for regularization 92 | (https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf) 93 | 94 | 95 | ```python 96 | embd_text = tf.nn.embedding_lookup(tf_embd, tf_text) 97 | 98 | embd_text = dropout(embd_text,rate=0.3,training=tf_train) 99 | ``` 100 | 101 | ## LSTM function 102 | 103 | More info: 104 |
105 | https://dl.acm.org/citation.cfm?id=1246450, 106 |
107 | https://www.bioinf.jku.at/publications/older/2604.pdf, 108 |
109 | https://en.wikipedia.org/wiki/Long_short-term_memory 110 | 111 | 112 | ```python 113 | def LSTM(x,hidden_state,cell,input_dim,hidden_size,scope): 114 | 115 | with tf.variable_scope(scope,reuse=tf.AUTO_REUSE): 116 | 117 | w = tf.get_variable("w", shape=[4,input_dim,hidden_size], 118 | dtype=tf.float32, 119 | trainable=True, 120 | initializer=tf.glorot_uniform_initializer()) 121 | 122 | u = tf.get_variable("u", shape=[4,hidden_size,hidden_size], 123 | dtype=tf.float32, 124 | trainable=True, 125 | initializer=tf.glorot_uniform_initializer()) 126 | 127 | b = tf.get_variable("bias", shape=[4,1,hidden_size], 128 | dtype=tf.float32, 129 | trainable=True, 130 | initializer=tf.zeros_initializer()) 131 | 132 | input_gate = tf.nn.sigmoid( tf.matmul(x,w[0]) + tf.matmul(hidden_state,u[0]) + b[0]) 133 | forget_gate = tf.nn.sigmoid( tf.matmul(x,w[1]) + tf.matmul(hidden_state,u[1]) + b[1]) 134 | output_gate = tf.nn.sigmoid( tf.matmul(x,w[2]) + tf.matmul(hidden_state,u[2]) + b[2]) 135 | cell_ = tf.nn.tanh( tf.matmul(x,w[3]) + tf.matmul(hidden_state,u[3]) + b[3]) 136 | cell = forget_gate*cell + input_gate*cell_ 137 | hidden_state = output_gate*tf.tanh(cell) 138 | 139 | return hidden_state, cell 140 | 141 | ``` 142 | 143 | ## Bi-Directional LSTM Encoder 144 | 145 | (https://maxwell.ict.griffith.edu.au/spl/publications/papers/ieeesp97_schuster.pdf) 146 | 147 | More Info: https://machinelearningmastery.com/develop-bidirectional-lstm-sequence-classification-python-keras/ 148 | 149 | Bi-directional LSTM encoder has a forward encoder and a backward encoder. The forward encoder encodes a text sequence from start to end, and the backward encoder encodes the text sequence from end to start. 150 | The final output is a combination (in this case, a concatenation) of the forward encoded text and the backward encoded text 151 | 152 | 153 | 154 | ## Forward Encoding 155 | 156 | 157 | ```python 158 | S = tf.shape(embd_text)[1] #text sequence length 159 | N = tf.shape(embd_text)[0] #batch_size 160 | 161 | i=0 162 | hidden=tf.zeros([N, hidden_size], dtype=tf.float32) 163 | cell=tf.zeros([N, hidden_size], dtype=tf.float32) 164 | hidden_forward=tf.TensorArray(size=S, dtype=tf.float32) 165 | 166 | #shape of embd_text: [N,S,embd_dim] 167 | embd_text_t = tf.transpose(embd_text,[1,0,2]) 168 | #current shape of embd_text: [S,N,embd_dim] 169 | 170 | def cond(i, hidden, cell, hidden_forward): 171 | return i < S 172 | 173 | def body(i, hidden, cell, hidden_forward): 174 | x = embd_text_t[i] 175 | 176 | hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope="forward_encoder") 177 | hidden_forward = hidden_forward.write(i, hidden) 178 | 179 | return i+1, hidden, cell, hidden_forward 180 | 181 | _, _, _, hidden_forward = tf.while_loop(cond, body, [i, hidden, cell, hidden_forward]) 182 | ``` 183 | 184 | ## Backward Encoding 185 | 186 | 187 | ```python 188 | i=S-1 189 | hidden=tf.zeros([N, hidden_size], dtype=tf.float32) 190 | cell=tf.zeros([N, hidden_size], dtype=tf.float32) 191 | hidden_backward=tf.TensorArray(size=S, dtype=tf.float32) 192 | 193 | def cond(i, hidden, cell, hidden_backward): 194 | return i >= 0 195 | 196 | def body(i, hidden, cell, hidden_backward): 197 | x = embd_text_t[i] 198 | hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope="backward_encoder") 199 | hidden_backward = hidden_backward.write(i, hidden) 200 | 201 | return i-1, hidden, cell, hidden_backward 202 | 203 | _, _, _, hidden_backward = tf.while_loop(cond, body, [i, hidden, cell, hidden_backward]) 204 | ``` 205 | 206 | ## Merge Forward and Backward Encoder Hidden States 207 | 208 | 209 | ```python 210 | hidden_forward = hidden_forward.stack() 211 | hidden_backward = hidden_backward.stack() 212 | 213 | encoder_states = tf.concat([hidden_forward,hidden_backward],axis=-1) 214 | encoder_states = tf.transpose(encoder_states,[1,0,2]) 215 | 216 | encoder_states = dropout(encoder_states,rate=0.3,training=tf_train) 217 | 218 | final_encoded_state = dropout(tf.concat([hidden_forward[-1],hidden_backward[-1]],axis=-1),rate=0.3,training=tf_train) 219 | 220 | 221 | ``` 222 | 223 | ## Implementation of attention scoring function 224 | 225 | Given a sequence of encoder states ($H_s$) and the decoder hidden state ($H_t$) of current timestep $t$, the equation for computing attention score is: 226 | 227 | $$Score = (H_s.W_a).H_t^T $$ 228 | 229 | ($W_a$ = trainable parameters) 230 | 231 | (https://nlp.stanford.edu/pubs/emnlp15_attn.pdf) 232 | 233 | 234 | ```python 235 | def attention_score(encoder_states,decoder_hidden_state,scope="attention_score"): 236 | 237 | with tf.variable_scope(scope,reuse=tf.AUTO_REUSE): 238 | Wa = tf.get_variable("Wa", shape=[2*hidden_size,2*hidden_size], 239 | dtype=tf.float32, 240 | trainable=True, 241 | initializer=tf.glorot_uniform_initializer()) 242 | 243 | encoder_states = tf.reshape(encoder_states,[N*S,2*hidden_size]) 244 | 245 | encoder_states = tf.reshape(tf.matmul(encoder_states,Wa),[N,S,2*hidden_size]) 246 | decoder_hidden_state = tf.reshape(decoder_hidden_state,[N,2*hidden_size,1]) 247 | 248 | return tf.reshape(tf.matmul(encoder_states,decoder_hidden_state),[N,S]) 249 | 250 | ``` 251 | 252 | ## Local Attention Function 253 | 254 | Based on: https://nlp.stanford.edu/pubs/emnlp15_attn.pdf 255 | 256 | 257 | ```python 258 | 259 | def align(encoder_states, decoder_hidden_state,scope="attention"): 260 | 261 | with tf.variable_scope(scope,reuse=tf.AUTO_REUSE): 262 | Wp = tf.get_variable("Wp", shape=[2*hidden_size,128], 263 | dtype=tf.float32, 264 | trainable=True, 265 | initializer=tf.glorot_uniform_initializer()) 266 | 267 | Vp = tf.get_variable("Vp", shape=[128,1], 268 | dtype=tf.float32, 269 | trainable=True, 270 | initializer=tf.glorot_uniform_initializer()) 271 | 272 | positions = tf.cast(S-window_len,dtype=tf.float32) # Maximum valid attention window starting position 273 | 274 | # Predict attention window starting position 275 | ps = positions*tf.nn.sigmoid(tf.matmul(tf.tanh(tf.matmul(decoder_hidden_state,Wp)),Vp)) 276 | # ps = (soft-)predicted starting position of attention window 277 | pt = ps+D # pt = center of attention window where the whole window length is 2*D+1 278 | pt = tf.reshape(pt,[N]) 279 | 280 | i = 0 281 | gaussian_position_based_scores = tf.TensorArray(size=S,dtype=tf.float32) 282 | sigma = tf.constant(D/2,dtype=tf.float32) 283 | 284 | def cond(i,gaussian_position_based_scores): 285 | 286 | return i < S 287 | 288 | def body(i,gaussian_position_based_scores): 289 | 290 | score = tf.exp(-((tf.square(tf.cast(i,tf.float32)-pt))/(2*tf.square(sigma)))) 291 | # (equation (10) in https://nlp.stanford.edu/pubs/emnlp15_attn.pdf) 292 | gaussian_position_based_scores = gaussian_position_based_scores.write(i,score) 293 | 294 | return i+1,gaussian_position_based_scores 295 | 296 | i,gaussian_position_based_scores = tf.while_loop(cond,body,[i,gaussian_position_based_scores]) 297 | 298 | gaussian_position_based_scores = gaussian_position_based_scores.stack() 299 | gaussian_position_based_scores = tf.transpose(gaussian_position_based_scores,[1,0]) 300 | gaussian_position_based_scores = tf.reshape(gaussian_position_based_scores,[N,S]) 301 | 302 | scores = attention_score(encoder_states,decoder_hidden_state)*gaussian_position_based_scores 303 | scores = tf.nn.softmax(scores,axis=-1) 304 | 305 | return tf.reshape(scores,[N,S,1]) 306 | ``` 307 | 308 | ## LSTM Decoder With Local Attention 309 | 310 | 311 | ```python 312 | with tf.variable_scope("decoder",reuse=tf.AUTO_REUSE): 313 | SOS = tf.get_variable("sos", shape=[1,embd_dim], 314 | dtype=tf.float32, 315 | trainable=True, 316 | initializer=tf.glorot_uniform_initializer()) 317 | 318 | # SOS represents starting marker 319 | # It tells the decoder that it is about to decode the first word of the output 320 | # I have set SOS as a trainable parameter 321 | 322 | Wc = tf.get_variable("Wc", shape=[4*hidden_size,embd_dim], 323 | dtype=tf.float32, 324 | trainable=True, 325 | initializer=tf.glorot_uniform_initializer()) 326 | 327 | 328 | 329 | SOS = tf.tile(SOS,[N,1]) #now SOS shape: [N,embd_dim] 330 | inp = SOS 331 | hidden=final_encoded_state 332 | cell=tf.zeros([N, 2*hidden_size], dtype=tf.float32) 333 | decoder_outputs=tf.TensorArray(size=max_summary_len, dtype=tf.float32) 334 | outputs=tf.TensorArray(size=max_summary_len, dtype=tf.int32) 335 | 336 | attention_scores = align(encoder_states,hidden) 337 | encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1) 338 | 339 | for i in range(max_summary_len): 340 | 341 | inp = dropout(inp,rate=0.3,training=tf_train) 342 | 343 | inp = tf.concat([inp,encoder_context_vector],axis=-1) 344 | 345 | hidden,cell = LSTM(inp,hidden,cell,embd_dim+2*hidden_size,2*hidden_size,scope="decoder") 346 | 347 | hidden = dropout(hidden,rate=0.3,training=tf_train) 348 | 349 | attention_scores = align(encoder_states,hidden) 350 | encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1) 351 | 352 | concated = tf.concat([hidden,encoder_context_vector],axis=-1) 353 | 354 | linear_out = tf.nn.tanh(tf.matmul(concated,Wc)) 355 | decoder_output = tf.matmul(linear_out,tf.transpose(tf_embd,[1,0])) 356 | # produce unnormalized probability distribution over vocabulary 357 | 358 | 359 | decoder_outputs = decoder_outputs.write(i,decoder_output) 360 | 361 | # Pick out most probable vocab indices based on the unnormalized probability distribution 362 | 363 | next_word_vec = tf.cast(tf.argmax(decoder_output,1),tf.int32) 364 | 365 | next_word_vec = tf.reshape(next_word_vec, [N]) 366 | 367 | outputs = outputs.write(i,next_word_vec) 368 | 369 | next_word = tf.nn.embedding_lookup(tf_embd, next_word_vec) 370 | inp = tf.reshape(next_word, [N, embd_dim]) 371 | 372 | 373 | decoder_outputs = decoder_outputs.stack() 374 | outputs = outputs.stack() 375 | 376 | decoder_outputs = tf.transpose(decoder_outputs,[1,0,2]) 377 | outputs = tf.transpose(outputs,[1,0]) 378 | 379 | 380 | 381 | ``` 382 | 383 | ## Define Cross Entropy Cost Function and L2 Regularization 384 | 385 | 386 | ```python 387 | filtered_trainables = [var for var in tf.trainable_variables() if 388 | not("Bias" in var.name or "bias" in var.name 389 | or "noreg" in var.name)] 390 | 391 | regularization = tf.reduce_sum([tf.nn.l2_loss(var) for var 392 | in filtered_trainables]) 393 | 394 | with tf.variable_scope("loss"): 395 | 396 | epsilon = tf.constant(1e-9, tf.float32) 397 | 398 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 399 | labels=tf_summary, logits=decoder_outputs) 400 | 401 | pad_mask = tf.sequence_mask(tf_true_summary_len, 402 | maxlen=max_summary_len, 403 | dtype=tf.float32) 404 | 405 | masked_cross_entropy = cross_entropy*pad_mask 406 | 407 | cost = tf.reduce_mean(masked_cross_entropy) + \ 408 | l2*regularization 409 | 410 | cross_entropy = tf.reduce_mean(masked_cross_entropy) 411 | ``` 412 | 413 | ## Accuracy 414 | 415 | 416 | ```python 417 | # Comparing predicted sequence with labels 418 | comparison = tf.cast(tf.equal(outputs, tf_summary), 419 | tf.float32) 420 | 421 | # Masking to ignore the effect of pads while calculating accuracy 422 | pad_mask = tf.sequence_mask(tf_true_summary_len, 423 | maxlen=max_summary_len, 424 | dtype=tf.bool) 425 | 426 | masked_comparison = tf.boolean_mask(comparison, pad_mask) 427 | 428 | # Accuracy 429 | accuracy = tf.reduce_mean(masked_comparison) 430 | ``` 431 | 432 | ## Define Optimizer 433 | 434 | 435 | ```python 436 | all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) 437 | 438 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 439 | 440 | gvs = optimizer.compute_gradients(cost, all_vars) 441 | 442 | capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs] # Gradient Clipping 443 | 444 | train_op = optimizer.apply_gradients(capped_gvs) 445 | ``` 446 | 447 | ## Training and Validation 448 | 449 | 450 | ```python 451 | import pickle 452 | import random 453 | 454 | with tf.Session() as sess: # Start Tensorflow Session 455 | display_step = 100 456 | patience = 5 457 | 458 | load = input("\nLoad checkpoint? y/n: ") 459 | print("") 460 | saver = tf.train.Saver() 461 | 462 | if load.lower() == 'y': 463 | 464 | print('Loading pre-trained weights for the model...') 465 | 466 | saver.restore(sess, 'Model_Backup/Seq2seq_summarization.ckpt') 467 | sess.run(tf.global_variables()) 468 | sess.run(tf.tables_initializer()) 469 | 470 | with open('Model_Backup/Seq2seq_summarization.pkl', 'rb') as fp: 471 | train_data = pickle.load(fp) 472 | 473 | covered_epochs = train_data['covered_epochs'] 474 | best_loss = train_data['best_loss'] 475 | impatience = 0 476 | 477 | print('\nRESTORATION COMPLETE\n') 478 | 479 | else: 480 | best_loss = 2**30 481 | impatience = 0 482 | covered_epochs = 0 483 | 484 | init = tf.global_variables_initializer() 485 | sess.run(init) 486 | sess.run(tf.tables_initializer()) 487 | 488 | epoch=0 489 | while (epoch+covered_epochs)") for vec in train_batches_text[j][idx]]) 529 | predicted_summary = [idx2vocab.get(vec,"") for vec in prediction[idx]] 530 | actual_summary = [idx2vocab.get(vec,"") for vec in train_batches_summary[j][idx]] 531 | 532 | print("\nSample Text\n") 533 | print(text) 534 | print("\nSample Predicted Summary\n") 535 | for word in predicted_summary: 536 | if word == '': 537 | break 538 | else: 539 | print(word,end=" ") 540 | print("\n\nSample Actual Summary\n") 541 | for word in actual_summary: 542 | if word == '': 543 | break 544 | else: 545 | print(word,end=" ") 546 | print("\n\n") 547 | 548 | print("\n\nSTARTING VALIDATION\n\n") 549 | 550 | total_val_loss=0 551 | total_val_acc=0 552 | 553 | for i in range(0, len(val_batches_text)): 554 | 555 | if i%100==0: 556 | print("Validating data # {}".format(i)) 557 | 558 | cost, prediction,\ 559 | acc = sess.run([cross_entropy, 560 | outputs, 561 | accuracy], 562 | feed_dict={tf_text: val_batches_text[i], 563 | tf_embd: embd, 564 | tf_summary: val_batches_summary[i], 565 | tf_true_summary_len: val_batches_true_summary_len[i], 566 | tf_train: False}) 567 | 568 | total_val_loss += cost 569 | total_val_acc += acc 570 | 571 | avg_val_loss = total_val_loss/len(val_batches_text) 572 | 573 | print("\n\nEpoch: {}\n\n".format(epoch+covered_epochs)) 574 | print("Average Training Loss: {:.3f}".format(total_train_loss/len(train_batches_text))) 575 | print("Average Training Accuracy: {:.2f}".format(100*total_train_acc/len(train_batches_text))) 576 | print("Average Validation Loss: {:.3f}".format(avg_val_loss)) 577 | print("Average Validation Accuracy: {:.2f}".format(100*total_val_acc/len(val_batches_text))) 578 | 579 | if (avg_val_loss < best_loss): 580 | best_loss = avg_val_loss 581 | save_data={'best_loss':best_loss,'covered_epochs':covered_epochs+epoch+1} 582 | impatience=0 583 | with open('Model_Backup/Seq2seq_summarization.pkl', 'wb') as fp: 584 | pickle.dump(save_data, fp) 585 | saver.save(sess, 'Model_Backup/Seq2seq_summarization.ckpt') 586 | print("\nModel saved\n") 587 | 588 | else: 589 | impatience+=1 590 | 591 | if impatience > patience: 592 | break 593 | 594 | 595 | epoch+=1 596 | 597 | ``` 598 | 599 | 600 | Load checkpoint? y/n: n 601 | 602 | 603 | 604 | 605 | 606 | STARTING TRAINING 607 | 608 | 609 | Iter 0, Cost= 1.493, Acc = 0.00% 610 | 611 | Sample Text 612 | 613 | i was given these as a gift ... they were so amazing i now order them for all occasions and sometimes just because i had n't had them in a while . a little warning ; they are completely addictive . i like the ones ; my girlfriend likes the rocky road . highly recommended ! < br / > < br / > sure to be appreciated by everyone on your gift list . 614 | 615 | Sample Predicted Summary 616 | 617 | condolence s.e. foodstuff condolence webbed poverty squarely poverty poverty assists foodstuff webbed poverty methodist foodstuff webbed poverty gephardt foodstuff ethier articulos meh rojos cols colombians webbed poverty condolence poverty condolence hourly 618 | 619 | Sample Actual Summary 620 | 621 | simply amazing brownies ... 622 | 623 | 624 | Iter 100, Cost= 0.684, Acc = 26.98% 625 | Iter 200, Cost= 0.649, Acc = 27.19% 626 | Iter 300, Cost= 0.744, Acc = 25.93% 627 | Iter 400, Cost= 0.976, Acc = 19.88% 628 | Iter 500, Cost= 0.839, Acc = 21.53% 629 | 630 | Sample Text 631 | 632 | for those looking for a water beverage and one with a neutral taste that does n't have aftertaste , this one 's for < br / > < br / > also , traditional tap water is slightly more acidic ( i believe ph 7-8 ) . 's is supposed at 9.5 ph , so if you 're very sensitive to acidic products , this might help you out . 633 | 634 | Sample Predicted Summary 635 | 636 | good 637 | 638 | Sample Actual Summary 639 | 640 | neutral taste , low ph 641 | 642 | 643 | Iter 600, Cost= 0.697, Acc = 27.82% 644 | Iter 700, Cost= 0.763, Acc = 24.24% 645 | Iter 800, Cost= 0.792, Acc = 24.82% 646 | Iter 900, Cost= 0.866, Acc = 23.13% 647 | Iter 1000, Cost= 0.838, Acc = 23.03% 648 | 649 | Sample Text 650 | 651 | i love my starbucks sumatra first thing in the morning . i was not always up early enough to take the detour to starbucks and now i do n't have to ! these are perfect and delicious . now i can have my fav coffee even before i take off my slippers ! i love this product ! it 's easy to order - arrived quickly and the price was good . 652 | 653 | Sample Predicted Summary 654 | 655 | great 656 | 657 | Sample Actual Summary 658 | 659 | no drive through at starbucks ? 660 | 661 | 662 | Iter 1100, Cost= 0.648, Acc = 30.58% 663 | Iter 1200, Cost= 0.977, Acc = 19.08% 664 | Iter 1300, Cost= 0.788, Acc = 23.29% 665 | Iter 1400, Cost= 0.681, Acc = 28.23% 666 | Iter 1500, Cost= 0.608, Acc = 29.32% 667 | 668 | Sample Text 669 | 670 | husband loves this tea especially in the recommend using the large cup setting on your keurig brewer unless you prefer your tea extra strong . 671 | 672 | Sample Predicted Summary 673 | 674 | great tea 675 | 676 | Sample Actual Summary 677 | 678 | good substitute for coffee . 679 | 680 | 681 | Iter 1600, Cost= 0.709, Acc = 27.48% 682 | Iter 1700, Cost= 0.729, Acc = 31.11% 683 | Iter 1800, Cost= 0.627, Acc = 28.93% 684 | Iter 1900, Cost= 0.798, Acc = 26.36% 685 | Iter 2000, Cost= 0.856, Acc = 22.08% 686 | 687 | Sample Text 688 | 689 | can no longer find this product locally anymore . i purchased it previously at a warehouse club but costco , bj ` s and sam ` s club no longer stock it in my area stores . my two golden retriever ` s love this gravy when added to their mix of both dry and moist dog food . hope it stays on the market ... ! 690 | 691 | Sample Predicted Summary 692 | 693 | great 694 | 695 | Sample Actual Summary 696 | 697 | best pet food gravy 698 | 699 | 700 | Iter 2100, Cost= 0.640, Acc = 30.77% 701 | Iter 2200, Cost= 0.792, Acc = 24.49% 702 | Iter 2300, Cost= 0.735, Acc = 22.86% 703 | Iter 2400, Cost= 0.769, Acc = 21.68% 704 | Iter 2500, Cost= 0.900, Acc = 21.15% 705 | 706 | Sample Text 707 | 708 | i want to start out by saying that i thought at first that a bag with only 120 calories and 4 grams of fat ( no saturated or trans ) for every 20 chips was going to taste like crap . i must say that not only was i wrong , that this is my favorite bbq chip on the market today . they are light and you can not taste any fat or grease after eating them . that 's because they are n't baked or fried , just popped as their name suggests . these chips are very easy to dip as well . fantastic product ! 709 | 710 | Sample Predicted Summary 711 | 712 | great chips 713 | 714 | Sample Actual Summary 715 | 716 | fantastic chips ! ! ! 717 | 718 | 719 | Iter 2600, Cost= 0.740, Acc = 22.86% 720 | Iter 2700, Cost= 0.848, Acc = 24.84% 721 | Iter 2800, Cost= 0.677, Acc = 28.57% 722 | Iter 2900, Cost= 0.779, Acc = 25.90% 723 | Iter 3000, Cost= 0.718, Acc = 27.34% 724 | 725 | Sample Text 726 | 727 | this of 7-ounce `` taster 's choice french roast '' canisters , is a good buy . the coffee is flavored differently than original flavor , but the difference is very subtle , and refreshingly good . overall , this taster 's choice coffee is a bargain , and highly recommended . 728 | 729 | Sample Predicted Summary 730 | 731 | great flavor 732 | 733 | Sample Actual Summary 734 | 735 | good buy 736 | 737 | 738 | ### Future Works 739 | 740 | * Beam Search 741 | * Pointer Mechanisms 742 | * BLEU\ROUGE evaluation 743 | * Implement Testing 744 | * Complete Training and Optimize Hyperparameters 745 | 746 | 747 | ```python 748 | 749 | ``` 750 | -------------------------------------------------------------------------------- /Summarization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Abstractive Summarization" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Based on [Seq2seq learning](https://arxiv.org/abs/1409.3215)\n", 15 | "with [attention mechanism](https://arxiv.org/abs/1409.0473), specifically [local attention](https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)." 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Loading Pre-processed Dataset\n", 23 | "\n", 24 | "The Data is preprocessed in [Data_Pre-Processing.ipynb](https://github.com/JRC1995/Abstractive-Summarization/blob/master/Data_Pre-Processing.ipynb)\n", 25 | "\n", 26 | "Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import json\n", 36 | "\n", 37 | "with open('Processed_Data/Amazon_Reviews_Processed.json') as file:\n", 38 | "\n", 39 | " for json_data in file:\n", 40 | " saved_data = json.loads(json_data)\n", 41 | "\n", 42 | " vocab2idx = saved_data[\"vocab\"]\n", 43 | " embd = saved_data[\"embd\"]\n", 44 | " train_batches_text = saved_data[\"train_batches_text\"]\n", 45 | " test_batches_text = saved_data[\"test_batches_text\"]\n", 46 | " val_batches_text = saved_data[\"val_batches_text\"]\n", 47 | " train_batches_summary = saved_data[\"train_batches_summary\"]\n", 48 | " test_batches_summary = saved_data[\"test_batches_summary\"]\n", 49 | " val_batches_summary = saved_data[\"val_batches_summary\"]\n", 50 | " train_batches_true_text_len = saved_data[\"train_batches_true_text_len\"]\n", 51 | " val_batches_true_text_len = saved_data[\"val_batches_true_text_len\"]\n", 52 | " test_batches_true_text_len = saved_data[\"test_batches_true_text_len\"]\n", 53 | " train_batches_true_summary_len = saved_data[\"train_batches_true_summary_len\"]\n", 54 | " val_batches_true_summary_len = saved_data[\"val_batches_true_summary_len\"]\n", 55 | " test_batches_true_summary_len = saved_data[\"test_batches_true_summary_len\"]\n", 56 | "\n", 57 | " break\n", 58 | " \n", 59 | "idx2vocab = {v:k for k,v in vocab2idx.items()}" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## Hyperparameters" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 2, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "hidden_size = 300\n", 76 | "learning_rate = 0.001\n", 77 | "epochs = 5\n", 78 | "max_summary_len = 31 # should be summary_max_len as used in data_preprocessing with +1 (+1 for ) \n", 79 | "D = 5 # D determines local attention window size\n", 80 | "window_len = 2*D+1\n", 81 | "l2=1e-6" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Tensorflow Placeholders" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "WARNING:tensorflow:From /home/jishnu/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", 101 | "Instructions for updating:\n", 102 | "non-resource variables are not supported in the long term\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "import tensorflow.compat.v1 as tf \n", 108 | "\n", 109 | "tf.disable_v2_behavior()\n", 110 | "tf.disable_eager_execution()\n", 111 | "\n", 112 | "embd_dim = len(embd[0])\n", 113 | "\n", 114 | "tf_text = tf.placeholder(tf.int32, [None, None])\n", 115 | "tf_embd = tf.placeholder(tf.float32, [len(vocab2idx),embd_dim])\n", 116 | "tf_true_summary_len = tf.placeholder(tf.int32, [None])\n", 117 | "tf_summary = tf.placeholder(tf.int32,[None, None])\n", 118 | "tf_train = tf.placeholder(tf.bool)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## Dropout Function" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 4, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "def dropout(x,rate,training):\n", 135 | " return tf.cond(tf_train,\n", 136 | " lambda: tf.nn.dropout(x,rate=0.3),\n", 137 | " lambda: x)\n", 138 | "\n", 139 | " " 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "## Embed vectorized text\n", 147 | "\n", 148 | "Dropout used for regularization \n", 149 | "(https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 5, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "embd_text = tf.nn.embedding_lookup(tf_embd, tf_text)\n", 159 | "\n", 160 | "embd_text = dropout(embd_text,rate=0.3,training=tf_train)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "## LSTM function\n", 168 | "\n", 169 | "More info: \n", 170 | "
\n", 171 | "https://dl.acm.org/citation.cfm?id=1246450, \n", 172 | "
\n", 173 | "https://www.bioinf.jku.at/publications/older/2604.pdf,\n", 174 | "
\n", 175 | "https://en.wikipedia.org/wiki/Long_short-term_memory" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 6, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "def LSTM(x,hidden_state,cell,input_dim,hidden_size,scope):\n", 185 | " \n", 186 | " with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):\n", 187 | " \n", 188 | " w = tf.get_variable(\"w\", shape=[4,input_dim,hidden_size],\n", 189 | " dtype=tf.float32,\n", 190 | " trainable=True,\n", 191 | " initializer=tf.glorot_uniform_initializer())\n", 192 | " \n", 193 | " u = tf.get_variable(\"u\", shape=[4,hidden_size,hidden_size],\n", 194 | " dtype=tf.float32,\n", 195 | " trainable=True,\n", 196 | " initializer=tf.glorot_uniform_initializer())\n", 197 | " \n", 198 | " b = tf.get_variable(\"bias\", shape=[4,1,hidden_size],\n", 199 | " dtype=tf.float32,\n", 200 | " trainable=True,\n", 201 | " initializer=tf.zeros_initializer())\n", 202 | " \n", 203 | " input_gate = tf.nn.sigmoid( tf.matmul(x,w[0]) + tf.matmul(hidden_state,u[0]) + b[0])\n", 204 | " forget_gate = tf.nn.sigmoid( tf.matmul(x,w[1]) + tf.matmul(hidden_state,u[1]) + b[1])\n", 205 | " output_gate = tf.nn.sigmoid( tf.matmul(x,w[2]) + tf.matmul(hidden_state,u[2]) + b[2])\n", 206 | " cell_ = tf.nn.tanh( tf.matmul(x,w[3]) + tf.matmul(hidden_state,u[3]) + b[3])\n", 207 | " cell = forget_gate*cell + input_gate*cell_\n", 208 | " hidden_state = output_gate*tf.tanh(cell)\n", 209 | " \n", 210 | " return hidden_state, cell\n", 211 | " " 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "## Bi-Directional LSTM Encoder\n", 219 | "\n", 220 | "(https://maxwell.ict.griffith.edu.au/spl/publications/papers/ieeesp97_schuster.pdf)\n", 221 | "\n", 222 | "More Info: https://machinelearningmastery.com/develop-bidirectional-lstm-sequence-classification-python-keras/\n", 223 | "\n", 224 | "Bi-directional LSTM encoder has a forward encoder and a backward encoder. The forward encoder encodes a text sequence from start to end, and the backward encoder encodes the text sequence from end to start.\n", 225 | "The final output is a combination (in this case, a concatenation) of the forward encoded text and the backward encoded text\n", 226 | " \n" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "## Forward Encoding" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 7, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "S = tf.shape(embd_text)[1] #text sequence length\n", 243 | "N = tf.shape(embd_text)[0] #batch_size\n", 244 | "\n", 245 | "i=0\n", 246 | "hidden=tf.zeros([N, hidden_size], dtype=tf.float32)\n", 247 | "cell=tf.zeros([N, hidden_size], dtype=tf.float32)\n", 248 | "hidden_forward=tf.TensorArray(size=S, dtype=tf.float32)\n", 249 | "\n", 250 | "#shape of embd_text: [N,S,embd_dim]\n", 251 | "embd_text_t = tf.transpose(embd_text,[1,0,2]) \n", 252 | "#current shape of embd_text: [S,N,embd_dim]\n", 253 | "\n", 254 | "def cond(i, hidden, cell, hidden_forward):\n", 255 | " return i < S\n", 256 | "\n", 257 | "def body(i, hidden, cell, hidden_forward):\n", 258 | " x = embd_text_t[i]\n", 259 | " \n", 260 | " hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope=\"forward_encoder\")\n", 261 | " hidden_forward = hidden_forward.write(i, hidden)\n", 262 | "\n", 263 | " return i+1, hidden, cell, hidden_forward\n", 264 | "\n", 265 | "_, _, _, hidden_forward = tf.while_loop(cond, body, [i, hidden, cell, hidden_forward])" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "## Backward Encoding" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 8, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "i=S-1\n", 282 | "hidden=tf.zeros([N, hidden_size], dtype=tf.float32)\n", 283 | "cell=tf.zeros([N, hidden_size], dtype=tf.float32)\n", 284 | "hidden_backward=tf.TensorArray(size=S, dtype=tf.float32)\n", 285 | "\n", 286 | "def cond(i, hidden, cell, hidden_backward):\n", 287 | " return i >= 0\n", 288 | "\n", 289 | "def body(i, hidden, cell, hidden_backward):\n", 290 | " x = embd_text_t[i]\n", 291 | " hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope=\"backward_encoder\")\n", 292 | " hidden_backward = hidden_backward.write(i, hidden)\n", 293 | "\n", 294 | " return i-1, hidden, cell, hidden_backward\n", 295 | "\n", 296 | "_, _, _, hidden_backward = tf.while_loop(cond, body, [i, hidden, cell, hidden_backward])" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "## Merge Forward and Backward Encoder Hidden States" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 9, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "hidden_forward = hidden_forward.stack()\n", 313 | "hidden_backward = hidden_backward.stack()\n", 314 | "\n", 315 | "encoder_states = tf.concat([hidden_forward,hidden_backward],axis=-1)\n", 316 | "encoder_states = tf.transpose(encoder_states,[1,0,2])\n", 317 | "\n", 318 | "encoder_states = dropout(encoder_states,rate=0.3,training=tf_train)\n", 319 | "\n", 320 | "final_encoded_state = dropout(tf.concat([hidden_forward[-1],hidden_backward[-1]],axis=-1),rate=0.3,training=tf_train)\n", 321 | "\n" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "## Implementation of attention scoring function\n", 329 | "\n", 330 | "Given a sequence of encoder states ($H_s$) and the decoder hidden state ($H_t$) of current timestep $t$, the equation for computing attention score is:\n", 331 | "\n", 332 | "$$Score = (H_s.W_a).H_t^T $$\n", 333 | "\n", 334 | "($W_a$ = trainable parameters)\n", 335 | "\n", 336 | "(https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 10, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "def attention_score(encoder_states,decoder_hidden_state,scope=\"attention_score\"):\n", 346 | " \n", 347 | " with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):\n", 348 | " Wa = tf.get_variable(\"Wa\", shape=[2*hidden_size,2*hidden_size],\n", 349 | " dtype=tf.float32,\n", 350 | " trainable=True,\n", 351 | " initializer=tf.glorot_uniform_initializer())\n", 352 | " \n", 353 | " encoder_states = tf.reshape(encoder_states,[N*S,2*hidden_size])\n", 354 | " \n", 355 | " encoder_states = tf.reshape(tf.matmul(encoder_states,Wa),[N,S,2*hidden_size])\n", 356 | " decoder_hidden_state = tf.reshape(decoder_hidden_state,[N,2*hidden_size,1])\n", 357 | " \n", 358 | " return tf.reshape(tf.matmul(encoder_states,decoder_hidden_state),[N,S])\n" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "## Local Attention Function\n", 366 | "\n", 367 | "Based on: https://nlp.stanford.edu/pubs/emnlp15_attn.pdf" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 11, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "\n", 377 | "def align(encoder_states, decoder_hidden_state,scope=\"attention\"):\n", 378 | " \n", 379 | " with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):\n", 380 | " Wp = tf.get_variable(\"Wp\", shape=[2*hidden_size,128],\n", 381 | " dtype=tf.float32,\n", 382 | " trainable=True,\n", 383 | " initializer=tf.glorot_uniform_initializer())\n", 384 | " \n", 385 | " Vp = tf.get_variable(\"Vp\", shape=[128,1],\n", 386 | " dtype=tf.float32,\n", 387 | " trainable=True,\n", 388 | " initializer=tf.glorot_uniform_initializer())\n", 389 | " \n", 390 | " positions = tf.cast(S-window_len,dtype=tf.float32) # Maximum valid attention window starting position\n", 391 | " \n", 392 | " # Predict attention window starting position \n", 393 | " ps = positions*tf.nn.sigmoid(tf.matmul(tf.tanh(tf.matmul(decoder_hidden_state,Wp)),Vp))\n", 394 | " # ps = (soft-)predicted starting position of attention window\n", 395 | " pt = ps+D # pt = center of attention window where the whole window length is 2*D+1\n", 396 | " pt = tf.reshape(pt,[N])\n", 397 | " \n", 398 | " i = 0\n", 399 | " gaussian_position_based_scores = tf.TensorArray(size=S,dtype=tf.float32)\n", 400 | " sigma = tf.constant(D/2,dtype=tf.float32)\n", 401 | " \n", 402 | " def cond(i,gaussian_position_based_scores):\n", 403 | " \n", 404 | " return i < S\n", 405 | " \n", 406 | " def body(i,gaussian_position_based_scores):\n", 407 | " \n", 408 | " score = tf.exp(-((tf.square(tf.cast(i,tf.float32)-pt))/(2*tf.square(sigma)))) \n", 409 | " # (equation (10) in https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)\n", 410 | " gaussian_position_based_scores = gaussian_position_based_scores.write(i,score)\n", 411 | " \n", 412 | " return i+1,gaussian_position_based_scores\n", 413 | " \n", 414 | " i,gaussian_position_based_scores = tf.while_loop(cond,body,[i,gaussian_position_based_scores])\n", 415 | " \n", 416 | " gaussian_position_based_scores = gaussian_position_based_scores.stack()\n", 417 | " gaussian_position_based_scores = tf.transpose(gaussian_position_based_scores,[1,0])\n", 418 | " gaussian_position_based_scores = tf.reshape(gaussian_position_based_scores,[N,S])\n", 419 | " \n", 420 | " scores = attention_score(encoder_states,decoder_hidden_state)*gaussian_position_based_scores\n", 421 | " scores = tf.nn.softmax(scores,axis=-1)\n", 422 | " \n", 423 | " return tf.reshape(scores,[N,S,1])" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "## LSTM Decoder With Local Attention" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 12, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "with tf.variable_scope(\"decoder\",reuse=tf.AUTO_REUSE):\n", 440 | " SOS = tf.get_variable(\"sos\", shape=[1,embd_dim],\n", 441 | " dtype=tf.float32,\n", 442 | " trainable=True,\n", 443 | " initializer=tf.glorot_uniform_initializer())\n", 444 | " \n", 445 | " # SOS represents starting marker \n", 446 | " # It tells the decoder that it is about to decode the first word of the output\n", 447 | " # I have set SOS as a trainable parameter\n", 448 | " \n", 449 | " Wc = tf.get_variable(\"Wc\", shape=[4*hidden_size,embd_dim],\n", 450 | " dtype=tf.float32,\n", 451 | " trainable=True,\n", 452 | " initializer=tf.glorot_uniform_initializer())\n", 453 | " \n", 454 | "\n", 455 | "\n", 456 | "SOS = tf.tile(SOS,[N,1]) #now SOS shape: [N,embd_dim]\n", 457 | "inp = SOS\n", 458 | "hidden=final_encoded_state\n", 459 | "cell=tf.zeros([N, 2*hidden_size], dtype=tf.float32)\n", 460 | "decoder_outputs=tf.TensorArray(size=max_summary_len, dtype=tf.float32)\n", 461 | "outputs=tf.TensorArray(size=max_summary_len, dtype=tf.int32)\n", 462 | "\n", 463 | "attention_scores = align(encoder_states,hidden)\n", 464 | "encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1)\n", 465 | "\n", 466 | "for i in range(max_summary_len):\n", 467 | " \n", 468 | " inp = dropout(inp,rate=0.3,training=tf_train)\n", 469 | " \n", 470 | " inp = tf.concat([inp,encoder_context_vector],axis=-1)\n", 471 | " \n", 472 | " hidden,cell = LSTM(inp,hidden,cell,embd_dim+2*hidden_size,2*hidden_size,scope=\"decoder\")\n", 473 | " \n", 474 | " hidden = dropout(hidden,rate=0.3,training=tf_train)\n", 475 | " \n", 476 | " attention_scores = align(encoder_states,hidden)\n", 477 | " encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1)\n", 478 | " \n", 479 | " concated = tf.concat([hidden,encoder_context_vector],axis=-1)\n", 480 | " \n", 481 | " linear_out = tf.nn.tanh(tf.matmul(concated,Wc))\n", 482 | " decoder_output = tf.matmul(linear_out,tf.transpose(tf_embd,[1,0])) \n", 483 | " # produce unnormalized probability distribution over vocabulary\n", 484 | " \n", 485 | " \n", 486 | " decoder_outputs = decoder_outputs.write(i,decoder_output)\n", 487 | " \n", 488 | " # Pick out most probable vocab indices based on the unnormalized probability distribution\n", 489 | " \n", 490 | " next_word_vec = tf.cast(tf.argmax(decoder_output,1),tf.int32)\n", 491 | "\n", 492 | " next_word_vec = tf.reshape(next_word_vec, [N])\n", 493 | "\n", 494 | " outputs = outputs.write(i,next_word_vec)\n", 495 | "\n", 496 | " next_word = tf.nn.embedding_lookup(tf_embd, next_word_vec)\n", 497 | " inp = tf.reshape(next_word, [N, embd_dim])\n", 498 | " \n", 499 | " \n", 500 | "decoder_outputs = decoder_outputs.stack()\n", 501 | "outputs = outputs.stack()\n", 502 | "\n", 503 | "decoder_outputs = tf.transpose(decoder_outputs,[1,0,2])\n", 504 | "outputs = tf.transpose(outputs,[1,0])\n", 505 | "\n", 506 | " \n", 507 | " " 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "## Define Cross Entropy Cost Function and L2 Regularization" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 13, 520 | "metadata": {}, 521 | "outputs": [], 522 | "source": [ 523 | "filtered_trainables = [var for var in tf.trainable_variables() if\n", 524 | " not(\"Bias\" in var.name or \"bias\" in var.name\n", 525 | " or \"noreg\" in var.name)]\n", 526 | "\n", 527 | "regularization = tf.reduce_sum([tf.nn.l2_loss(var) for var\n", 528 | " in filtered_trainables])\n", 529 | "\n", 530 | "with tf.variable_scope(\"loss\"):\n", 531 | "\n", 532 | " epsilon = tf.constant(1e-9, tf.float32)\n", 533 | "\n", 534 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(\n", 535 | " labels=tf_summary, logits=decoder_outputs)\n", 536 | "\n", 537 | " pad_mask = tf.sequence_mask(tf_true_summary_len,\n", 538 | " maxlen=max_summary_len,\n", 539 | " dtype=tf.float32)\n", 540 | "\n", 541 | " masked_cross_entropy = cross_entropy*pad_mask\n", 542 | "\n", 543 | " cost = tf.reduce_mean(masked_cross_entropy) + \\\n", 544 | " l2*regularization\n", 545 | "\n", 546 | " cross_entropy = tf.reduce_mean(masked_cross_entropy)" 547 | ] 548 | }, 549 | { 550 | "cell_type": "markdown", 551 | "metadata": {}, 552 | "source": [ 553 | "## Accuracy" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 14, 559 | "metadata": {}, 560 | "outputs": [], 561 | "source": [ 562 | "# Comparing predicted sequence with labels\n", 563 | "comparison = tf.cast(tf.equal(outputs, tf_summary),\n", 564 | " tf.float32)\n", 565 | "\n", 566 | "# Masking to ignore the effect of pads while calculating accuracy\n", 567 | "pad_mask = tf.sequence_mask(tf_true_summary_len,\n", 568 | " maxlen=max_summary_len,\n", 569 | " dtype=tf.bool)\n", 570 | "\n", 571 | "masked_comparison = tf.boolean_mask(comparison, pad_mask)\n", 572 | "\n", 573 | "# Accuracy\n", 574 | "accuracy = tf.reduce_mean(masked_comparison)" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": {}, 580 | "source": [ 581 | "## Define Optimizer" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 15, 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)\n", 591 | "\n", 592 | "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n", 593 | "\n", 594 | "gvs = optimizer.compute_gradients(cost, all_vars)\n", 595 | "\n", 596 | "capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs] # Gradient Clipping\n", 597 | "\n", 598 | "train_op = optimizer.apply_gradients(capped_gvs)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "metadata": {}, 604 | "source": [ 605 | "## Training and Validation" 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": 16, 611 | "metadata": {}, 612 | "outputs": [ 613 | { 614 | "name": "stdin", 615 | "output_type": "stream", 616 | "text": [ 617 | "\n", 618 | "Load checkpoint? y/n: n\n" 619 | ] 620 | }, 621 | { 622 | "name": "stdout", 623 | "output_type": "stream", 624 | "text": [ 625 | "\n", 626 | "\n", 627 | "\n", 628 | "STARTING TRAINING\n", 629 | "\n", 630 | "\n", 631 | "Iter 0, Cost= 1.493, Acc = 0.00%\n", 632 | "\n", 633 | "Sample Text\n", 634 | "\n", 635 | "i was given these as a gift ... they were so amazing i now order them for all occasions and sometimes just because i had n't had them in a while . a little warning ; they are completely addictive . i like the ones ; my girlfriend likes the rocky road . highly recommended ! < br / > < br / > sure to be appreciated by everyone on your gift list .\n", 636 | "\n", 637 | "Sample Predicted Summary\n", 638 | "\n", 639 | "condolence s.e. foodstuff condolence webbed poverty squarely poverty poverty assists foodstuff webbed poverty methodist foodstuff webbed poverty gephardt foodstuff ethier articulos meh rojos cols colombians webbed poverty condolence poverty condolence hourly \n", 640 | "\n", 641 | "Sample Actual Summary\n", 642 | "\n", 643 | "simply amazing brownies ... \n", 644 | "\n", 645 | "\n", 646 | "Iter 100, Cost= 0.684, Acc = 26.98%\n", 647 | "Iter 200, Cost= 0.649, Acc = 27.19%\n", 648 | "Iter 300, Cost= 0.744, Acc = 25.93%\n", 649 | "Iter 400, Cost= 0.976, Acc = 19.88%\n", 650 | "Iter 500, Cost= 0.839, Acc = 21.53%\n", 651 | "\n", 652 | "Sample Text\n", 653 | "\n", 654 | "for those looking for a water beverage and one with a neutral taste that does n't have aftertaste , this one 's for < br / > < br / > also , traditional tap water is slightly more acidic ( i believe ph 7-8 ) . 's is supposed at 9.5 ph , so if you 're very sensitive to acidic products , this might help you out .\n", 655 | "\n", 656 | "Sample Predicted Summary\n", 657 | "\n", 658 | "good \n", 659 | "\n", 660 | "Sample Actual Summary\n", 661 | "\n", 662 | "neutral taste , low ph \n", 663 | "\n", 664 | "\n", 665 | "Iter 600, Cost= 0.697, Acc = 27.82%\n", 666 | "Iter 700, Cost= 0.763, Acc = 24.24%\n", 667 | "Iter 800, Cost= 0.792, Acc = 24.82%\n", 668 | "Iter 900, Cost= 0.866, Acc = 23.13%\n", 669 | "Iter 1000, Cost= 0.838, Acc = 23.03%\n", 670 | "\n", 671 | "Sample Text\n", 672 | "\n", 673 | "i love my starbucks sumatra first thing in the morning . i was not always up early enough to take the detour to starbucks and now i do n't have to ! these are perfect and delicious . now i can have my fav coffee even before i take off my slippers ! i love this product ! it 's easy to order - arrived quickly and the price was good .\n", 674 | "\n", 675 | "Sample Predicted Summary\n", 676 | "\n", 677 | "great \n", 678 | "\n", 679 | "Sample Actual Summary\n", 680 | "\n", 681 | "no drive through at starbucks ? \n", 682 | "\n", 683 | "\n", 684 | "Iter 1100, Cost= 0.648, Acc = 30.58%\n", 685 | "Iter 1200, Cost= 0.977, Acc = 19.08%\n", 686 | "Iter 1300, Cost= 0.788, Acc = 23.29%\n", 687 | "Iter 1400, Cost= 0.681, Acc = 28.23%\n", 688 | "Iter 1500, Cost= 0.608, Acc = 29.32%\n", 689 | "\n", 690 | "Sample Text\n", 691 | "\n", 692 | "husband loves this tea especially in the recommend using the large cup setting on your keurig brewer unless you prefer your tea extra strong .\n", 693 | "\n", 694 | "Sample Predicted Summary\n", 695 | "\n", 696 | "great tea \n", 697 | "\n", 698 | "Sample Actual Summary\n", 699 | "\n", 700 | "good substitute for coffee . \n", 701 | "\n", 702 | "\n", 703 | "Iter 1600, Cost= 0.709, Acc = 27.48%\n", 704 | "Iter 1700, Cost= 0.729, Acc = 31.11%\n", 705 | "Iter 1800, Cost= 0.627, Acc = 28.93%\n", 706 | "Iter 1900, Cost= 0.798, Acc = 26.36%\n", 707 | "Iter 2000, Cost= 0.856, Acc = 22.08%\n", 708 | "\n", 709 | "Sample Text\n", 710 | "\n", 711 | "can no longer find this product locally anymore . i purchased it previously at a warehouse club but costco , bj ` s and sam ` s club no longer stock it in my area stores . my two golden retriever ` s love this gravy when added to their mix of both dry and moist dog food . hope it stays on the market ... !\n", 712 | "\n", 713 | "Sample Predicted Summary\n", 714 | "\n", 715 | "great \n", 716 | "\n", 717 | "Sample Actual Summary\n", 718 | "\n", 719 | "best pet food gravy \n", 720 | "\n", 721 | "\n", 722 | "Iter 2100, Cost= 0.640, Acc = 30.77%\n", 723 | "Iter 2200, Cost= 0.792, Acc = 24.49%\n", 724 | "Iter 2300, Cost= 0.735, Acc = 22.86%\n", 725 | "Iter 2400, Cost= 0.769, Acc = 21.68%\n", 726 | "Iter 2500, Cost= 0.900, Acc = 21.15%\n", 727 | "\n", 728 | "Sample Text\n", 729 | "\n", 730 | "i want to start out by saying that i thought at first that a bag with only 120 calories and 4 grams of fat ( no saturated or trans ) for every 20 chips was going to taste like crap . i must say that not only was i wrong , that this is my favorite bbq chip on the market today . they are light and you can not taste any fat or grease after eating them . that 's because they are n't baked or fried , just popped as their name suggests . these chips are very easy to dip as well . fantastic product !\n", 731 | "\n", 732 | "Sample Predicted Summary\n", 733 | "\n", 734 | "great chips \n", 735 | "\n", 736 | "Sample Actual Summary\n", 737 | "\n", 738 | "fantastic chips ! ! ! \n", 739 | "\n", 740 | "\n", 741 | "Iter 2600, Cost= 0.740, Acc = 22.86%\n", 742 | "Iter 2700, Cost= 0.848, Acc = 24.84%\n", 743 | "Iter 2800, Cost= 0.677, Acc = 28.57%\n", 744 | "Iter 2900, Cost= 0.779, Acc = 25.90%\n", 745 | "Iter 3000, Cost= 0.718, Acc = 27.34%\n", 746 | "\n", 747 | "Sample Text\n", 748 | "\n", 749 | "this of 7-ounce `` taster 's choice french roast '' canisters , is a good buy . the coffee is flavored differently than original flavor , but the difference is very subtle , and refreshingly good . overall , this taster 's choice coffee is a bargain , and highly recommended .\n", 750 | "\n", 751 | "Sample Predicted Summary\n", 752 | "\n", 753 | "great flavor \n", 754 | "\n", 755 | "Sample Actual Summary\n", 756 | "\n", 757 | "good buy \n", 758 | "\n", 759 | "\n" 760 | ] 761 | }, 762 | { 763 | "ename": "KeyboardInterrupt", 764 | "evalue": "", 765 | "output_type": "error", 766 | "traceback": [ 767 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 768 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 769 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mtf_summary\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtrain_batches_summary\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mtf_true_summary_len\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtrain_batches_true_summary_len\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 62\u001b[0;31m tf_train: True})\n\u001b[0m\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0mtotal_train_acc\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0macc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 770 | "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 954\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 955\u001b[0m result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 956\u001b[0;31m run_metadata_ptr)\n\u001b[0m\u001b[1;32m 957\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 958\u001b[0m \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 771 | "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1178\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfeed_dict_tensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1179\u001b[0m results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m-> 1180\u001b[0;31m feed_dict_tensor, options, run_metadata)\n\u001b[0m\u001b[1;32m 1181\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1182\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 772 | "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m 1357\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1358\u001b[0m return self._do_call(_run_fn, feeds, fetches, targets, options,\n\u001b[0;32m-> 1359\u001b[0;31m run_metadata)\n\u001b[0m\u001b[1;32m 1360\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1361\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_prun_fn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 773 | "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1363\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1364\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1365\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1366\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1367\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 774 | "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1348\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_extend_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1349\u001b[0m return self._call_tf_sessionrun(options, feed_dict, fetch_list,\n\u001b[0;32m-> 1350\u001b[0;31m target_list, run_metadata)\n\u001b[0m\u001b[1;32m 1351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1352\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 775 | "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_call_tf_sessionrun\u001b[0;34m(self, options, feed_dict, fetch_list, target_list, run_metadata)\u001b[0m\n\u001b[1;32m 1441\u001b[0m return tf_session.TF_SessionRun_wrapper(self._session, options, feed_dict,\n\u001b[1;32m 1442\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1443\u001b[0;31m run_metadata)\n\u001b[0m\u001b[1;32m 1444\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1445\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_call_tf_sessionprun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 776 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 777 | ] 778 | } 779 | ], 780 | "source": [ 781 | "import pickle\n", 782 | "import random\n", 783 | "\n", 784 | "with tf.Session() as sess: # Start Tensorflow Session\n", 785 | " display_step = 100\n", 786 | " patience = 5\n", 787 | "\n", 788 | " load = input(\"\\nLoad checkpoint? y/n: \")\n", 789 | " print(\"\")\n", 790 | " saver = tf.train.Saver()\n", 791 | "\n", 792 | " if load.lower() == 'y':\n", 793 | "\n", 794 | " print('Loading pre-trained weights for the model...')\n", 795 | "\n", 796 | " saver.restore(sess, 'Model_Backup/Seq2seq_summarization.ckpt')\n", 797 | " sess.run(tf.global_variables())\n", 798 | " sess.run(tf.tables_initializer())\n", 799 | "\n", 800 | " with open('Model_Backup/Seq2seq_summarization.pkl', 'rb') as fp:\n", 801 | " train_data = pickle.load(fp)\n", 802 | "\n", 803 | " covered_epochs = train_data['covered_epochs']\n", 804 | " best_loss = train_data['best_loss']\n", 805 | " impatience = 0\n", 806 | " \n", 807 | " print('\\nRESTORATION COMPLETE\\n')\n", 808 | "\n", 809 | " else:\n", 810 | " best_loss = 2**30\n", 811 | " impatience = 0\n", 812 | " covered_epochs = 0\n", 813 | "\n", 814 | " init = tf.global_variables_initializer()\n", 815 | " sess.run(init)\n", 816 | " sess.run(tf.tables_initializer())\n", 817 | "\n", 818 | " epoch=0\n", 819 | " while (epoch+covered_epochs)\") for vec in train_batches_text[j][idx]])\n", 859 | " predicted_summary = [idx2vocab.get(vec,\"\") for vec in prediction[idx]]\n", 860 | " actual_summary = [idx2vocab.get(vec,\"\") for vec in train_batches_summary[j][idx]]\n", 861 | " \n", 862 | " print(\"\\nSample Text\\n\")\n", 863 | " print(text)\n", 864 | " print(\"\\nSample Predicted Summary\\n\")\n", 865 | " for word in predicted_summary:\n", 866 | " if word == '':\n", 867 | " break\n", 868 | " else:\n", 869 | " print(word,end=\" \")\n", 870 | " print(\"\\n\\nSample Actual Summary\\n\")\n", 871 | " for word in actual_summary:\n", 872 | " if word == '':\n", 873 | " break\n", 874 | " else:\n", 875 | " print(word,end=\" \")\n", 876 | " print(\"\\n\\n\")\n", 877 | " \n", 878 | " print(\"\\n\\nSTARTING VALIDATION\\n\\n\")\n", 879 | " \n", 880 | " total_val_loss=0\n", 881 | " total_val_acc=0\n", 882 | " \n", 883 | " for i in range(0, len(val_batches_text)):\n", 884 | " \n", 885 | " if i%100==0:\n", 886 | " print(\"Validating data # {}\".format(i))\n", 887 | "\n", 888 | " cost, prediction,\\\n", 889 | " acc = sess.run([cross_entropy,\n", 890 | " outputs,\n", 891 | " accuracy],\n", 892 | " feed_dict={tf_text: val_batches_text[i],\n", 893 | " tf_embd: embd,\n", 894 | " tf_summary: val_batches_summary[i],\n", 895 | " tf_true_summary_len: val_batches_true_summary_len[i],\n", 896 | " tf_train: False})\n", 897 | " \n", 898 | " total_val_loss += cost\n", 899 | " total_val_acc += acc\n", 900 | " \n", 901 | " avg_val_loss = total_val_loss/len(val_batches_text)\n", 902 | " \n", 903 | " print(\"\\n\\nEpoch: {}\\n\\n\".format(epoch+covered_epochs))\n", 904 | " print(\"Average Training Loss: {:.3f}\".format(total_train_loss/len(train_batches_text)))\n", 905 | " print(\"Average Training Accuracy: {:.2f}\".format(100*total_train_acc/len(train_batches_text)))\n", 906 | " print(\"Average Validation Loss: {:.3f}\".format(avg_val_loss))\n", 907 | " print(\"Average Validation Accuracy: {:.2f}\".format(100*total_val_acc/len(val_batches_text)))\n", 908 | " \n", 909 | " if (avg_val_loss < best_loss):\n", 910 | " best_loss = avg_val_loss\n", 911 | " save_data={'best_loss':best_loss,'covered_epochs':covered_epochs+epoch+1}\n", 912 | " impatience=0\n", 913 | " with open('Model_Backup/Seq2seq_summarization.pkl', 'wb') as fp:\n", 914 | " pickle.dump(save_data, fp)\n", 915 | " saver.save(sess, 'Model_Backup/Seq2seq_summarization.ckpt')\n", 916 | " print(\"\\nModel saved\\n\")\n", 917 | " \n", 918 | " else:\n", 919 | " impatience+=1\n", 920 | " \n", 921 | " if impatience > patience:\n", 922 | " break\n", 923 | " \n", 924 | " \n", 925 | " epoch+=1\n", 926 | " " 927 | ] 928 | }, 929 | { 930 | "cell_type": "markdown", 931 | "metadata": {}, 932 | "source": [ 933 | "### Future Works\n", 934 | "\n", 935 | "* Beam Search\n", 936 | "* Pointer Mechanisms\n", 937 | "* BLEU\\ROUGE evaluation\n", 938 | "* Implement Testing\n", 939 | "* Complete Training and Optimize Hyperparameters" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": null, 945 | "metadata": {}, 946 | "outputs": [], 947 | "source": [] 948 | } 949 | ], 950 | "metadata": { 951 | "kernelspec": { 952 | "display_name": "Python 3", 953 | "language": "python", 954 | "name": "python3" 955 | }, 956 | "language_info": { 957 | "codemirror_mode": { 958 | "name": "ipython", 959 | "version": 3 960 | }, 961 | "file_extension": ".py", 962 | "mimetype": "text/x-python", 963 | "name": "python", 964 | "nbconvert_exporter": "python", 965 | "pygments_lexer": "ipython3", 966 | "version": "3.6.9" 967 | } 968 | }, 969 | "nbformat": 4, 970 | "nbformat_minor": 4 971 | } 972 | --------------------------------------------------------------------------------