├── .gitignore
    └── .gitignore
├── Data_Pre-Processing.ipynb
├── Dataset
    └── readme.md
├── Embeddings
    └── readme.md
├── LICENSE
├── Model_Backup
    └── readme.md
├── Processed_Data
    └── readme.md
├── README.md
└── Summarization.ipynb


/.gitignore/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/Data_Pre-Processing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Load and Process Dataset \n",
  8 |     "\n",
  9 |     "Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Processing data # 0\n",
 22 |       "Processing data # 10000\n",
 23 |       "Processing data # 20000\n",
 24 |       "Processing data # 30000\n",
 25 |       "Processing data # 40000\n",
 26 |       "Processing data # 50000\n",
 27 |       "Processing data # 60000\n",
 28 |       "Processing data # 70000\n",
 29 |       "Processing data # 80000\n",
 30 |       "Processing data # 90000\n",
 31 |       "Processing data # 100000\n",
 32 |       "Processing data # 110000\n",
 33 |       "Processing data # 120000\n",
 34 |       "Processing data # 130000\n",
 35 |       "Processing data # 140000\n",
 36 |       "Processing data # 150000\n",
 37 |       "Processing data # 160000\n",
 38 |       "Processing data # 170000\n",
 39 |       "Processing data # 180000\n",
 40 |       "Processing data # 190000\n",
 41 |       "Processing data # 200000\n",
 42 |       "Processing data # 210000\n",
 43 |       "Processing data # 220000\n",
 44 |       "Processing data # 230000\n",
 45 |       "Processing data # 240000\n",
 46 |       "Processing data # 250000\n",
 47 |       "Processing data # 260000\n",
 48 |       "Processing data # 270000\n",
 49 |       "Processing data # 280000\n",
 50 |       "Processing data # 290000\n",
 51 |       "Processing data # 300000\n",
 52 |       "Processing data # 310000\n",
 53 |       "Processing data # 320000\n",
 54 |       "Processing data # 330000\n",
 55 |       "\n",
 56 |       "# of Data: 337465\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "import csv\n",
 62 |     "from nltk import word_tokenize\n",
 63 |     "\n",
 64 |     "import string\n",
 65 |     "\n",
 66 |     "summaries = []\n",
 67 |     "texts = []\n",
 68 |     "\n",
 69 |     "def clean(text):\n",
 70 |     "    text = text.lower()\n",
 71 |     "    printable = set(string.printable)\n",
 72 |     "    text = \"\".join(list(filter(lambda x: x in printable, text))) #filter funny characters, if any.\n",
 73 |     "    return text\n",
 74 |     "\n",
 75 |     "text_max_len = 500\n",
 76 |     "text_min_len = 25\n",
 77 |     "summary_max_len = 30\n",
 78 |     "vocab2idx = {}\n",
 79 |     "\n",
 80 |     "#Data from https://www.kaggle.com/snap/amazon-fine-food-reviews\n",
 81 |     "with open('Dataset/Reviews.csv') as csvfile: \n",
 82 |     "    \n",
 83 |     "    Reviews = csv.DictReader(csvfile)\n",
 84 |     "    \n",
 85 |     "    i=0\n",
 86 |     "    \n",
 87 |     "    for row in Reviews:\n",
 88 |     "        \n",
 89 |     "        text = row['Text']\n",
 90 |     "        summary = row['Summary']\n",
 91 |     "        \n",
 92 |     "        if len(text) <= text_max_len and len(text) >= text_min_len and len(summary) <= summary_max_len:\n",
 93 |     "            #print(i)\n",
 94 |     "\n",
 95 |     "            clean_text = clean(text)\n",
 96 |     "            clean_summary = clean(summary)\n",
 97 |     "            \n",
 98 |     "            tokenized_summary = word_tokenize(clean_summary)\n",
 99 |     "            tokenized_text = word_tokenize(clean_text)\n",
100 |     "            \n",
101 |     "            # BUILD VOCABULARY\n",
102 |     "            \n",
103 |     "            for word in tokenized_text:\n",
104 |     "                if word not in vocab2idx:\n",
105 |     "                    vocab2idx[word]=len(vocab2idx)\n",
106 |     "                    \n",
107 |     "            for word in tokenized_summary:\n",
108 |     "                if word not in vocab2idx:\n",
109 |     "                    vocab2idx[word]=len(vocab2idx)\n",
110 |     "                    \n",
111 |     "            ## ________________\n",
112 |     "\n",
113 |     "            summaries.append(tokenized_summary)\n",
114 |     "            texts.append(tokenized_text)\n",
115 |     "\n",
116 |     "            if i%10000==0:\n",
117 |     "                print(\"Processing data # {}\".format(i))\n",
118 |     "\n",
119 |     "            i+=1\n",
120 |     "\n",
121 |     "print(\"\\n# of Data: {}\".format(len(texts)))"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "## Random Sample"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 2,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "name": "stdout",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "SAMPLE CLEANED & TOKENIZED TEXT: \n",
141 |       "\n",
142 |       "['i', 'like', 'these', 'better', 'than', 'any', 'chips', 'around', '--', 'high-potency', 'cheese', 'flavor', ',', 'crispy', '--', 'what', \"'s\", 'not', 'to', 'like', '?', 'i', 'have', 'to', 'package', 'these', 'up', 'in', '``', 'single-serve', \"''\", 'packs', 'or', 'i', 'will', 'eat', 'half', 'a', 'box', 'at', 'one', 'sitting', '!', '<', 'br', '/', '>', '<', 'br', '/', '>', 'mine', 'arrived', 'in', 'great', 'shape', ',', 'too', '--', 'no', 'more', '``', 'crumbs', \"''\", 'than', 'i', 'would', 'expect', 'to', 'find', 'in', 'a', 'box', 'purchased', 'at', 'the', 'supermarket', '.', 'and', 'these', 'are', 'much', 'cheaper', 'than', 'the', 'local', 'stores', ',', 'too', '.']\n",
143 |       "\n",
144 |       "\n",
145 |       "SAMPLE CLEANED & TOKENIZED SUMMARY: \n",
146 |       "\n",
147 |       "['my', 'favorite', 'snack', 'crackers', '...']\n",
148 |       "\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "import random\n",
154 |     "\n",
155 |     "index = random.randint(0,len(texts)-1)\n",
156 |     "\n",
157 |     "print(\"SAMPLE CLEANED & TOKENIZED TEXT: \\n\\n{}\\n\\n\".format(texts[index]))\n",
158 |     "print(\"SAMPLE CLEANED & TOKENIZED SUMMARY: \\n\\n{}\\n\".format(summaries[index]))"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "## Load Embeddings\n",
166 |     "\n",
167 |     "Loading pre-trained GloVe embeddings. Source of Data: https://nlp.stanford.edu/projects/glove/"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 3,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "Embedding Loaded.\n",
180 |       "Vocabulary Size: 43544\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "import numpy as np\n",
186 |     "\n",
187 |     "vocab = []\n",
188 |     "embd = []\n",
189 |     "special_tags = ['<UNK>','<PAD>','<EOS>']\n",
190 |     "\n",
191 |     "\n",
192 |     "def loadEmbeddings(filename):\n",
193 |     "    vocab2embd = {}\n",
194 |     "    \n",
195 |     "    with open(filename) as infile:     \n",
196 |     "        for line in infile:\n",
197 |     "            row = line.strip().split(' ')\n",
198 |     "            word = row[0].lower()\n",
199 |     "            if word not in vocab2embd:\n",
200 |     "                vocab2embd[word]=np.asarray(row[1:],np.float32)\n",
201 |     "\n",
202 |     "    print('Embedding Loaded.')\n",
203 |     "    return vocab2embd\n",
204 |     "\n",
205 |     "vocab2embd = loadEmbeddings('Embeddings/glove.6B.100d.txt')\n",
206 |     "\n",
207 |     "for word in vocab2idx:\n",
208 |     "    if word in vocab2embd:\n",
209 |     "        vocab.append(word)\n",
210 |     "        embd.append(vocab2embd[word])\n",
211 |     "        \n",
212 |     "for special_tag in special_tags:\n",
213 |     "    vocab.append(special_tag)\n",
214 |     "    embd.append(np.random.rand(len(embd[0]),))\n",
215 |     "    \n",
216 |     "vocab2idx = {word:idx for idx,word in enumerate(vocab)}\n",
217 |     "embd = np.asarray(embd,np.float32)\n",
218 |     "\n",
219 |     "print(\"Vocabulary Size: {}\".format(len(vocab2idx)))\n",
220 |     "    \n",
221 |     "\n"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 4,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "name": "stdout",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "43543\n"
234 |      ]
235 |     }
236 |    ],
237 |    "source": [
238 |     "print(vocab2idx['<EOS>'])"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "## Vectorize Data"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 5,
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "vec_texts=[]\n",
255 |     "vec_summaries=[]\n",
256 |     "\n",
257 |     "for text,summary in zip(texts,summaries):\n",
258 |     "    # Replace out of vocab words with index for '<UNK>' tag\n",
259 |     "    vec_texts.append([vocab2idx.get(word,vocab2idx['<UNK>']) for word in text])\n",
260 |     "    vec_summaries.append([vocab2idx.get(word,vocab2idx['<UNK>']) for word in summary])"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "markdown",
265 |    "metadata": {},
266 |    "source": [
267 |     "## Shuffle Data"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 6,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "import random\n",
277 |     "random.seed(101)\n",
278 |     "\n",
279 |     "texts_idx = [idx for idx in range(len(vec_texts))]\n",
280 |     "random.shuffle(texts_idx)\n",
281 |     "\n",
282 |     "vec_texts = [vec_texts[idx] for idx in texts_idx]\n",
283 |     "vec_summaries = [vec_summaries[idx] for idx in texts_idx]"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "markdown",
288 |    "metadata": {},
289 |    "source": [
290 |     "## Split Data into train, validation, and test sets."
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 7,
296 |    "metadata": {},
297 |    "outputs": [],
298 |    "source": [
299 |     "# Use first 10000 data for testing, the next 10000 data for validation, and rest for training\n",
300 |     "\n",
301 |     "test_summaries = vec_summaries[0:10000]\n",
302 |     "test_texts = vec_texts[0:10000]\n",
303 |     "\n",
304 |     "val_summaries = vec_summaries[10000:20000]\n",
305 |     "val_texts = vec_texts[10000:20000]\n",
306 |     "\n",
307 |     "train_summaries = vec_summaries[20000:]\n",
308 |     "train_texts = vec_texts[20000:]"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "## Bucket And Batch Function"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": 8,
321 |    "metadata": {},
322 |    "outputs": [],
323 |    "source": [
324 |     "def bucket_and_batch(texts,summaries,batch_size=32):\n",
325 |     "    \n",
326 |     "    # Sort summaries and texts according to the length of text\n",
327 |     "    # (So that texts with similar lengths tend to remain in the same batch and thus require less padding)\n",
328 |     "    \n",
329 |     "    text_lens = [len(text) for text in texts]\n",
330 |     "    sortedidx = np.flip(np.argsort(text_lens),axis=0)\n",
331 |     "    texts=[texts[idx] for idx in sortedidx]\n",
332 |     "    summaries=[summaries[idx] for idx in sortedidx]\n",
333 |     "    \n",
334 |     "    batches_text=[]\n",
335 |     "    batches_summary=[]\n",
336 |     "    batches_true_text_len = []\n",
337 |     "    batches_true_summary_len = []\n",
338 |     "    \n",
339 |     "    i=0\n",
340 |     "    while i < (len(texts)-batch_size):\n",
341 |     "        \n",
342 |     "        max_len = len(texts[i])\n",
343 |     "        \n",
344 |     "        batch_text=[]\n",
345 |     "        batch_summary=[]\n",
346 |     "        batch_true_text_len=[]\n",
347 |     "        batch_true_summary_len=[]\n",
348 |     "        \n",
349 |     "        for j in range(batch_size):\n",
350 |     "            \n",
351 |     "            padded_text = texts[i+j]\n",
352 |     "            padded_summary = summaries[i+j]\n",
353 |     "            \n",
354 |     "            batch_true_text_len.append(len(texts[i+j]))\n",
355 |     "            batch_true_summary_len.append(len(summaries[i+j])+1)\n",
356 |     "     \n",
357 |     "            while len(padded_text) < max_len:\n",
358 |     "                padded_text.append(vocab2idx['<PAD>'])\n",
359 |     "\n",
360 |     "            padded_summary.append(vocab2idx['<EOS>']) #End of Sentence Marker\n",
361 |     "            while len(padded_summary) < summary_max_len+1:\n",
362 |     "                padded_summary.append(vocab2idx['<PAD>'])\n",
363 |     "            \n",
364 |     "        \n",
365 |     "            batch_text.append(padded_text)\n",
366 |     "            batch_summary.append(padded_summary)\n",
367 |     "        \n",
368 |     "        batches_text.append(batch_text)\n",
369 |     "        batches_summary.append(batch_summary)\n",
370 |     "        batches_true_text_len.append(batch_true_text_len)\n",
371 |     "        batches_true_summary_len.append(batch_true_summary_len)\n",
372 |     "        \n",
373 |     "        i+=batch_size\n",
374 |     "        \n",
375 |     "    return batches_text, batches_summary, batches_true_text_len, batches_true_summary_len"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "markdown",
380 |    "metadata": {},
381 |    "source": [
382 |     "## Prepare Batches"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": 9,
388 |    "metadata": {},
389 |    "outputs": [],
390 |    "source": [
391 |     "train_batches_text, train_batches_summary, train_batches_true_text_len, train_batches_true_summary_len \\\n",
392 |     "= bucket_and_batch(train_texts, train_summaries)\n",
393 |     "\n",
394 |     "val_batches_text, val_batches_summary, val_batches_true_text_len, val_batches_true_summary_len \\\n",
395 |     "= bucket_and_batch(val_texts, val_summaries)\n",
396 |     "\n",
397 |     "test_batches_text, test_batches_summary, test_batches_true_text_len, test_batches_true_summary_len \\\n",
398 |     "= bucket_and_batch(test_texts, test_summaries)"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "markdown",
403 |    "metadata": {},
404 |    "source": [
405 |     "## Save Data"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 10,
411 |    "metadata": {},
412 |    "outputs": [],
413 |    "source": [
414 |     "import json\n",
415 |     "\n",
416 |     "d = {}\n",
417 |     "\n",
418 |     "d[\"vocab\"] = vocab2idx\n",
419 |     "d[\"embd\"] = embd.tolist()\n",
420 |     "d[\"train_batches_text\"] = train_batches_text\n",
421 |     "d[\"test_batches_text\"] = test_batches_text\n",
422 |     "d[\"val_batches_text\"] = val_batches_text\n",
423 |     "d[\"train_batches_summary\"] = train_batches_summary\n",
424 |     "d[\"test_batches_summary\"] = test_batches_summary\n",
425 |     "d[\"val_batches_summary\"] = val_batches_summary\n",
426 |     "d[\"train_batches_true_text_len\"] = train_batches_true_text_len\n",
427 |     "d[\"val_batches_true_text_len\"] = val_batches_true_text_len\n",
428 |     "d[\"test_batches_true_text_len\"] = test_batches_true_text_len\n",
429 |     "d[\"train_batches_true_summary_len\"] = train_batches_true_summary_len\n",
430 |     "d[\"val_batches_true_summary_len\"] = val_batches_true_summary_len\n",
431 |     "d[\"test_batches_true_summary_len\"] = test_batches_true_summary_len\n",
432 |     "\n",
433 |     "with open('Processed_Data/Amazon_Reviews_Processed.json', 'w') as outfile:\n",
434 |     "    json.dump(d, outfile)\n"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": null,
440 |    "metadata": {},
441 |    "outputs": [],
442 |    "source": []
443 |   }
444 |  ],
445 |  "metadata": {
446 |   "kernelspec": {
447 |    "display_name": "Python 3",
448 |    "language": "python",
449 |    "name": "python3"
450 |   },
451 |   "language_info": {
452 |    "codemirror_mode": {
453 |     "name": "ipython",
454 |     "version": 3
455 |    },
456 |    "file_extension": ".py",
457 |    "mimetype": "text/x-python",
458 |    "name": "python",
459 |    "nbconvert_exporter": "python",
460 |    "pygments_lexer": "ipython3",
461 |    "version": "3.6.9"
462 |   }
463 |  },
464 |  "nbformat": 4,
465 |  "nbformat_minor": 4
466 | }
467 | 


--------------------------------------------------------------------------------
/Dataset/readme.md:
--------------------------------------------------------------------------------
1 | Amazon Fine Food Reviews dataset (Reviews.csv) should be put here.
2 | Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews
3 | 


--------------------------------------------------------------------------------
/Embeddings/readme.md:
--------------------------------------------------------------------------------
1 | Glove 100 dimensional embedding should be put here.
2 | Download 'Wikipedia 2014 + Gigaword 5 (6B tokens, 400K vocab, uncased, 50d, 100d, 200d, & 300d vectors, 822 MB download): glove.6B.zip'
3 | from https://nlp.stanford.edu/projects/glove/ and extract the files in this folder.
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Jishnu Ray Chowdhury
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Model_Backup/readme.md:
--------------------------------------------------------------------------------
1 | Pre-Trained Tensorflow Model Parameters and checkpoints will be saved inside this folder.
2 | 


--------------------------------------------------------------------------------
/Processed_Data/readme.md:
--------------------------------------------------------------------------------
1 | Processed Data will be put here.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Abstractive Summarization
  2 | 
  3 | Based on [Seq2seq learning](https://arxiv.org/abs/1409.3215)
  4 | with [attention mechanism](https://arxiv.org/abs/1409.0473), specifically [local attention](https://nlp.stanford.edu/pubs/emnlp15_attn.pdf).
  5 | 
  6 | ### Loading Pre-processed Dataset
  7 | 
  8 | The Data is preprocessed in [Data_Pre-Processing.ipynb](https://github.com/JRC1995/Abstractive-Summarization/blob/master/Data_Pre-Processing.ipynb)
  9 | 
 10 | Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews
 11 | 
 12 | 
 13 | ```python
 14 | import json
 15 | 
 16 | with open('Processed_Data/Amazon_Reviews_Processed.json') as file:
 17 | 
 18 |     for json_data in file:
 19 |         saved_data = json.loads(json_data)
 20 | 
 21 |         vocab2idx = saved_data["vocab"]
 22 |         embd = saved_data["embd"]
 23 |         train_batches_text = saved_data["train_batches_text"]
 24 |         test_batches_text = saved_data["test_batches_text"]
 25 |         val_batches_text = saved_data["val_batches_text"]
 26 |         train_batches_summary = saved_data["train_batches_summary"]
 27 |         test_batches_summary = saved_data["test_batches_summary"]
 28 |         val_batches_summary = saved_data["val_batches_summary"]
 29 |         train_batches_true_text_len = saved_data["train_batches_true_text_len"]
 30 |         val_batches_true_text_len = saved_data["val_batches_true_text_len"]
 31 |         test_batches_true_text_len = saved_data["test_batches_true_text_len"]
 32 |         train_batches_true_summary_len = saved_data["train_batches_true_summary_len"]
 33 |         val_batches_true_summary_len = saved_data["val_batches_true_summary_len"]
 34 |         test_batches_true_summary_len = saved_data["test_batches_true_summary_len"]
 35 | 
 36 |         break
 37 |         
 38 | idx2vocab = {v:k for k,v in vocab2idx.items()}
 39 | ```
 40 | 
 41 | ## Hyperparameters
 42 | 
 43 | 
 44 | ```python
 45 | hidden_size = 300
 46 | learning_rate = 0.001
 47 | epochs = 5
 48 | max_summary_len = 31 # should be summary_max_len as used in data_preprocessing with +1 (+1 for <EOS>) 
 49 | D = 5 # D determines local attention window size
 50 | window_len = 2*D+1
 51 | l2=1e-6
 52 | ```
 53 | 
 54 | ## Tensorflow Placeholders
 55 | 
 56 | 
 57 | ```python
 58 | import tensorflow.compat.v1 as tf 
 59 | 
 60 | tf.disable_v2_behavior()
 61 | tf.disable_eager_execution()
 62 | 
 63 | embd_dim = len(embd[0])
 64 | 
 65 | tf_text = tf.placeholder(tf.int32, [None, None])
 66 | tf_embd = tf.placeholder(tf.float32, [len(vocab2idx),embd_dim])
 67 | tf_true_summary_len = tf.placeholder(tf.int32, [None])
 68 | tf_summary = tf.placeholder(tf.int32,[None, None])
 69 | tf_train = tf.placeholder(tf.bool)
 70 | ```
 71 | 
 72 |     WARNING:tensorflow:From /home/jishnu/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
 73 |     Instructions for updating:
 74 |     non-resource variables are not supported in the long term
 75 | 
 76 | 
 77 | ## Dropout Function
 78 | 
 79 | 
 80 | ```python
 81 | def dropout(x,rate,training):
 82 |     return tf.cond(tf_train,
 83 |                     lambda: tf.nn.dropout(x,rate=0.3),
 84 |                     lambda: x)
 85 | 
 86 |     
 87 | ```
 88 | 
 89 | ## Embed vectorized text
 90 | 
 91 | Dropout used for regularization 
 92 | (https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf)
 93 | 
 94 | 
 95 | ```python
 96 | embd_text = tf.nn.embedding_lookup(tf_embd, tf_text)
 97 | 
 98 | embd_text = dropout(embd_text,rate=0.3,training=tf_train)
 99 | ```
100 | 
101 | ## LSTM function
102 | 
103 | More info: 
104 | <br>
105 | https://dl.acm.org/citation.cfm?id=1246450, 
106 | <br>
107 | https://www.bioinf.jku.at/publications/older/2604.pdf,
108 | <br>
109 | https://en.wikipedia.org/wiki/Long_short-term_memory
110 | 
111 | 
112 | ```python
113 | def LSTM(x,hidden_state,cell,input_dim,hidden_size,scope):
114 |     
115 |     with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):
116 |         
117 |         w = tf.get_variable("w", shape=[4,input_dim,hidden_size],
118 |                                     dtype=tf.float32,
119 |                                     trainable=True,
120 |                                     initializer=tf.glorot_uniform_initializer())
121 |         
122 |         u = tf.get_variable("u", shape=[4,hidden_size,hidden_size],
123 |                             dtype=tf.float32,
124 |                             trainable=True,
125 |                             initializer=tf.glorot_uniform_initializer())
126 |         
127 |         b = tf.get_variable("bias", shape=[4,1,hidden_size],
128 |                     dtype=tf.float32,
129 |                     trainable=True,
130 |                     initializer=tf.zeros_initializer())
131 |         
132 |     input_gate = tf.nn.sigmoid( tf.matmul(x,w[0]) + tf.matmul(hidden_state,u[0]) + b[0])
133 |     forget_gate = tf.nn.sigmoid( tf.matmul(x,w[1]) + tf.matmul(hidden_state,u[1]) + b[1])
134 |     output_gate = tf.nn.sigmoid( tf.matmul(x,w[2]) + tf.matmul(hidden_state,u[2]) + b[2])
135 |     cell_ = tf.nn.tanh( tf.matmul(x,w[3]) + tf.matmul(hidden_state,u[3]) + b[3])
136 |     cell = forget_gate*cell + input_gate*cell_
137 |     hidden_state = output_gate*tf.tanh(cell)
138 |     
139 |     return hidden_state, cell
140 |       
141 | ```
142 | 
143 | ## Bi-Directional LSTM Encoder
144 | 
145 | (https://maxwell.ict.griffith.edu.au/spl/publications/papers/ieeesp97_schuster.pdf)
146 | 
147 | More Info: https://machinelearningmastery.com/develop-bidirectional-lstm-sequence-classification-python-keras/
148 | 
149 | Bi-directional LSTM encoder has a forward encoder and a backward encoder. The forward encoder encodes a text sequence from start to end, and the backward encoder encodes the text sequence from end to start.
150 | The final output is a combination (in this case, a concatenation) of the forward encoded text and the backward encoded text
151 |     
152 | 
153 | 
154 | ## Forward Encoding
155 | 
156 | 
157 | ```python
158 | S = tf.shape(embd_text)[1] #text sequence length
159 | N = tf.shape(embd_text)[0] #batch_size
160 | 
161 | i=0
162 | hidden=tf.zeros([N, hidden_size], dtype=tf.float32)
163 | cell=tf.zeros([N, hidden_size], dtype=tf.float32)
164 | hidden_forward=tf.TensorArray(size=S, dtype=tf.float32)
165 | 
166 | #shape of embd_text: [N,S,embd_dim]
167 | embd_text_t = tf.transpose(embd_text,[1,0,2]) 
168 | #current shape of embd_text: [S,N,embd_dim]
169 | 
170 | def cond(i, hidden, cell, hidden_forward):
171 |     return i < S
172 | 
173 | def body(i, hidden, cell, hidden_forward):
174 |     x = embd_text_t[i]
175 |     
176 |     hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope="forward_encoder")
177 |     hidden_forward = hidden_forward.write(i, hidden)
178 | 
179 |     return i+1, hidden, cell, hidden_forward
180 | 
181 | _, _, _, hidden_forward = tf.while_loop(cond, body, [i, hidden, cell, hidden_forward])
182 | ```
183 | 
184 | ## Backward Encoding
185 | 
186 | 
187 | ```python
188 | i=S-1
189 | hidden=tf.zeros([N, hidden_size], dtype=tf.float32)
190 | cell=tf.zeros([N, hidden_size], dtype=tf.float32)
191 | hidden_backward=tf.TensorArray(size=S, dtype=tf.float32)
192 | 
193 | def cond(i, hidden, cell, hidden_backward):
194 |     return i >= 0
195 | 
196 | def body(i, hidden, cell, hidden_backward):
197 |     x = embd_text_t[i]
198 |     hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope="backward_encoder")
199 |     hidden_backward = hidden_backward.write(i, hidden)
200 | 
201 |     return i-1, hidden, cell, hidden_backward
202 | 
203 | _, _, _, hidden_backward = tf.while_loop(cond, body, [i, hidden, cell, hidden_backward])
204 | ```
205 | 
206 | ## Merge Forward and Backward Encoder Hidden States
207 | 
208 | 
209 | ```python
210 | hidden_forward = hidden_forward.stack()
211 | hidden_backward = hidden_backward.stack()
212 | 
213 | encoder_states = tf.concat([hidden_forward,hidden_backward],axis=-1)
214 | encoder_states = tf.transpose(encoder_states,[1,0,2])
215 | 
216 | encoder_states = dropout(encoder_states,rate=0.3,training=tf_train)
217 | 
218 | final_encoded_state = dropout(tf.concat([hidden_forward[-1],hidden_backward[-1]],axis=-1),rate=0.3,training=tf_train)
219 | 
220 | 
221 | ```
222 | 
223 | ## Implementation of attention scoring function
224 | 
225 | Given a sequence of encoder states ($H_s$) and the decoder hidden state ($H_t$) of current timestep $t$, the equation for computing attention score is:
226 | 
227 | $$Score = (H_s.W_a).H_t^T $$
228 | 
229 | ($W_a$ = trainable parameters)
230 | 
231 | (https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)
232 | 
233 | 
234 | ```python
235 | def attention_score(encoder_states,decoder_hidden_state,scope="attention_score"):
236 |     
237 |     with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):
238 |         Wa = tf.get_variable("Wa", shape=[2*hidden_size,2*hidden_size],
239 |                                     dtype=tf.float32,
240 |                                     trainable=True,
241 |                                     initializer=tf.glorot_uniform_initializer())
242 |         
243 |     encoder_states = tf.reshape(encoder_states,[N*S,2*hidden_size])
244 |     
245 |     encoder_states = tf.reshape(tf.matmul(encoder_states,Wa),[N,S,2*hidden_size])
246 |     decoder_hidden_state = tf.reshape(decoder_hidden_state,[N,2*hidden_size,1])
247 |     
248 |     return tf.reshape(tf.matmul(encoder_states,decoder_hidden_state),[N,S])
249 | 
250 | ```
251 | 
252 | ## Local Attention Function
253 | 
254 | Based on: https://nlp.stanford.edu/pubs/emnlp15_attn.pdf
255 | 
256 | 
257 | ```python
258 | 
259 | def align(encoder_states, decoder_hidden_state,scope="attention"):
260 |     
261 |     with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):
262 |         Wp = tf.get_variable("Wp", shape=[2*hidden_size,128],
263 |                                     dtype=tf.float32,
264 |                                     trainable=True,
265 |                                     initializer=tf.glorot_uniform_initializer())
266 |         
267 |         Vp = tf.get_variable("Vp", shape=[128,1],
268 |                             dtype=tf.float32,
269 |                             trainable=True,
270 |                             initializer=tf.glorot_uniform_initializer())
271 |     
272 |     positions = tf.cast(S-window_len,dtype=tf.float32) # Maximum valid attention window starting position
273 |     
274 |     # Predict attention window starting position 
275 |     ps = positions*tf.nn.sigmoid(tf.matmul(tf.tanh(tf.matmul(decoder_hidden_state,Wp)),Vp))
276 |     # ps = (soft-)predicted starting position of attention window
277 |     pt = ps+D # pt = center of attention window where the whole window length is 2*D+1
278 |     pt = tf.reshape(pt,[N])
279 |     
280 |     i = 0
281 |     gaussian_position_based_scores = tf.TensorArray(size=S,dtype=tf.float32)
282 |     sigma = tf.constant(D/2,dtype=tf.float32)
283 |     
284 |     def cond(i,gaussian_position_based_scores):
285 |         
286 |         return i < S
287 |                       
288 |     def body(i,gaussian_position_based_scores):
289 |         
290 |         score = tf.exp(-((tf.square(tf.cast(i,tf.float32)-pt))/(2*tf.square(sigma)))) 
291 |         # (equation (10) in https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)
292 |         gaussian_position_based_scores = gaussian_position_based_scores.write(i,score)
293 |             
294 |         return i+1,gaussian_position_based_scores
295 |                       
296 |     i,gaussian_position_based_scores = tf.while_loop(cond,body,[i,gaussian_position_based_scores])
297 |     
298 |     gaussian_position_based_scores = gaussian_position_based_scores.stack()
299 |     gaussian_position_based_scores = tf.transpose(gaussian_position_based_scores,[1,0])
300 |     gaussian_position_based_scores = tf.reshape(gaussian_position_based_scores,[N,S])
301 |     
302 |     scores = attention_score(encoder_states,decoder_hidden_state)*gaussian_position_based_scores
303 |     scores = tf.nn.softmax(scores,axis=-1)
304 |     
305 |     return tf.reshape(scores,[N,S,1])
306 | ```
307 | 
308 | ## LSTM Decoder With Local Attention
309 | 
310 | 
311 | ```python
312 | with tf.variable_scope("decoder",reuse=tf.AUTO_REUSE):
313 |     SOS = tf.get_variable("sos", shape=[1,embd_dim],
314 |                                 dtype=tf.float32,
315 |                                 trainable=True,
316 |                                 initializer=tf.glorot_uniform_initializer())
317 |     
318 |     # SOS represents starting marker 
319 |     # It tells the decoder that it is about to decode the first word of the output
320 |     # I have set SOS as a trainable parameter
321 |     
322 |     Wc = tf.get_variable("Wc", shape=[4*hidden_size,embd_dim],
323 |                             dtype=tf.float32,
324 |                             trainable=True,
325 |                             initializer=tf.glorot_uniform_initializer())
326 |     
327 | 
328 | 
329 | SOS = tf.tile(SOS,[N,1]) #now SOS shape: [N,embd_dim]
330 | inp = SOS
331 | hidden=final_encoded_state
332 | cell=tf.zeros([N, 2*hidden_size], dtype=tf.float32)
333 | decoder_outputs=tf.TensorArray(size=max_summary_len, dtype=tf.float32)
334 | outputs=tf.TensorArray(size=max_summary_len, dtype=tf.int32)
335 | 
336 | attention_scores = align(encoder_states,hidden)
337 | encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1)
338 | 
339 | for i in range(max_summary_len):
340 |     
341 |     inp = dropout(inp,rate=0.3,training=tf_train)
342 |     
343 |     inp = tf.concat([inp,encoder_context_vector],axis=-1)
344 |     
345 |     hidden,cell = LSTM(inp,hidden,cell,embd_dim+2*hidden_size,2*hidden_size,scope="decoder")
346 |     
347 |     hidden = dropout(hidden,rate=0.3,training=tf_train)
348 |     
349 |     attention_scores = align(encoder_states,hidden)
350 |     encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1)
351 |     
352 |     concated = tf.concat([hidden,encoder_context_vector],axis=-1)
353 |     
354 |     linear_out = tf.nn.tanh(tf.matmul(concated,Wc))
355 |     decoder_output = tf.matmul(linear_out,tf.transpose(tf_embd,[1,0])) 
356 |     # produce unnormalized probability distribution over vocabulary
357 |     
358 |     
359 |     decoder_outputs = decoder_outputs.write(i,decoder_output)
360 |     
361 |     # Pick out most probable vocab indices based on the unnormalized probability distribution
362 |     
363 |     next_word_vec = tf.cast(tf.argmax(decoder_output,1),tf.int32)
364 | 
365 |     next_word_vec = tf.reshape(next_word_vec, [N])
366 | 
367 |     outputs = outputs.write(i,next_word_vec)
368 | 
369 |     next_word = tf.nn.embedding_lookup(tf_embd, next_word_vec)
370 |     inp = tf.reshape(next_word, [N, embd_dim])
371 |     
372 |     
373 | decoder_outputs = decoder_outputs.stack()
374 | outputs = outputs.stack()
375 | 
376 | decoder_outputs = tf.transpose(decoder_outputs,[1,0,2])
377 | outputs = tf.transpose(outputs,[1,0])
378 | 
379 |     
380 |     
381 | ```
382 | 
383 | ## Define Cross Entropy Cost Function and L2 Regularization
384 | 
385 | 
386 | ```python
387 | filtered_trainables = [var for var in tf.trainable_variables() if
388 |                        not("Bias" in var.name or "bias" in var.name
389 |                            or "noreg" in var.name)]
390 | 
391 | regularization = tf.reduce_sum([tf.nn.l2_loss(var) for var
392 |                                 in filtered_trainables])
393 | 
394 | with tf.variable_scope("loss"):
395 | 
396 |     epsilon = tf.constant(1e-9, tf.float32)
397 | 
398 |     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
399 |         labels=tf_summary, logits=decoder_outputs)
400 | 
401 |     pad_mask = tf.sequence_mask(tf_true_summary_len,
402 |                                 maxlen=max_summary_len,
403 |                                 dtype=tf.float32)
404 | 
405 |     masked_cross_entropy = cross_entropy*pad_mask
406 | 
407 |     cost = tf.reduce_mean(masked_cross_entropy) + \
408 |         l2*regularization
409 | 
410 |     cross_entropy = tf.reduce_mean(masked_cross_entropy)
411 | ```
412 | 
413 | ## Accuracy
414 | 
415 | 
416 | ```python
417 | # Comparing predicted sequence with labels
418 | comparison = tf.cast(tf.equal(outputs, tf_summary),
419 |                      tf.float32)
420 | 
421 | # Masking to ignore the effect of pads while calculating accuracy
422 | pad_mask = tf.sequence_mask(tf_true_summary_len,
423 |                             maxlen=max_summary_len,
424 |                             dtype=tf.bool)
425 | 
426 | masked_comparison = tf.boolean_mask(comparison, pad_mask)
427 | 
428 | # Accuracy
429 | accuracy = tf.reduce_mean(masked_comparison)
430 | ```
431 | 
432 | ## Define Optimizer
433 | 
434 | 
435 | ```python
436 | all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
437 | 
438 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
439 | 
440 | gvs = optimizer.compute_gradients(cost, all_vars)
441 | 
442 | capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs] # Gradient Clipping
443 | 
444 | train_op = optimizer.apply_gradients(capped_gvs)
445 | ```
446 | 
447 | ## Training and Validation
448 | 
449 | 
450 | ```python
451 | import pickle
452 | import random
453 | 
454 | with tf.Session() as sess:  # Start Tensorflow Session
455 |     display_step = 100
456 |     patience = 5
457 | 
458 |     load = input("\nLoad checkpoint? y/n: ")
459 |     print("")
460 |     saver = tf.train.Saver()
461 | 
462 |     if load.lower() == 'y':
463 | 
464 |         print('Loading pre-trained weights for the model...')
465 | 
466 |         saver.restore(sess, 'Model_Backup/Seq2seq_summarization.ckpt')
467 |         sess.run(tf.global_variables())
468 |         sess.run(tf.tables_initializer())
469 | 
470 |         with open('Model_Backup/Seq2seq_summarization.pkl', 'rb') as fp:
471 |             train_data = pickle.load(fp)
472 | 
473 |         covered_epochs = train_data['covered_epochs']
474 |         best_loss = train_data['best_loss']
475 |         impatience = 0
476 |         
477 |         print('\nRESTORATION COMPLETE\n')
478 | 
479 |     else:
480 |         best_loss = 2**30
481 |         impatience = 0
482 |         covered_epochs = 0
483 | 
484 |         init = tf.global_variables_initializer()
485 |         sess.run(init)
486 |         sess.run(tf.tables_initializer())
487 | 
488 |     epoch=0
489 |     while (epoch+covered_epochs)<epochs:
490 |         
491 |         print("\n\nSTARTING TRAINING\n\n")
492 |         
493 |         batches_indices = [i for i in range(0, len(train_batches_text))]
494 |         random.shuffle(batches_indices)
495 | 
496 |         total_train_acc = 0
497 |         total_train_loss = 0
498 | 
499 |         for i in range(0, len(train_batches_text)):
500 |             
501 |             j = int(batches_indices[i])
502 | 
503 |             cost,prediction,\
504 |                 acc, _ = sess.run([cross_entropy,
505 |                                    outputs,
506 |                                    accuracy,
507 |                                    train_op],
508 |                                   feed_dict={tf_text: train_batches_text[j],
509 |                                              tf_embd: embd,
510 |                                              tf_summary: train_batches_summary[j],
511 |                                              tf_true_summary_len: train_batches_true_summary_len[j],
512 |                                              tf_train: True})
513 |             
514 |             total_train_acc += acc
515 |             total_train_loss += cost
516 | 
517 |             if i % display_step == 0:
518 |                 print("Iter "+str(i)+", Cost= " +
519 |                       "{:.3f}".format(cost)+", Acc = " +
520 |                       "{:.2f}%".format(acc*100))
521 |             
522 |             if i % 500 == 0:
523 |                 
524 |                 idx = random.randint(0,len(train_batches_text[j])-1)
525 |                 
526 |                 
527 |                 
528 |                 text = " ".join([idx2vocab.get(vec,"<UNK>") for vec in train_batches_text[j][idx]])
529 |                 predicted_summary = [idx2vocab.get(vec,"<UNK>") for vec in prediction[idx]]
530 |                 actual_summary = [idx2vocab.get(vec,"<UNK>") for vec in train_batches_summary[j][idx]]
531 |                 
532 |                 print("\nSample Text\n")
533 |                 print(text)
534 |                 print("\nSample Predicted Summary\n")
535 |                 for word in predicted_summary:
536 |                     if word == '<EOS>':
537 |                         break
538 |                     else:
539 |                         print(word,end=" ")
540 |                 print("\n\nSample Actual Summary\n")
541 |                 for word in actual_summary:
542 |                     if word == '<EOS>':
543 |                         break
544 |                     else:
545 |                         print(word,end=" ")
546 |                 print("\n\n")
547 |                 
548 |         print("\n\nSTARTING VALIDATION\n\n")
549 |                 
550 |         total_val_loss=0
551 |         total_val_acc=0
552 |                 
553 |         for i in range(0, len(val_batches_text)):
554 |             
555 |             if i%100==0:
556 |                 print("Validating data # {}".format(i))
557 | 
558 |             cost, prediction,\
559 |                 acc = sess.run([cross_entropy,
560 |                                 outputs,
561 |                                 accuracy],
562 |                                   feed_dict={tf_text: val_batches_text[i],
563 |                                              tf_embd: embd,
564 |                                              tf_summary: val_batches_summary[i],
565 |                                              tf_true_summary_len: val_batches_true_summary_len[i],
566 |                                              tf_train: False})
567 |             
568 |             total_val_loss += cost
569 |             total_val_acc += acc
570 |             
571 |         avg_val_loss = total_val_loss/len(val_batches_text)
572 |         
573 |         print("\n\nEpoch: {}\n\n".format(epoch+covered_epochs))
574 |         print("Average Training Loss: {:.3f}".format(total_train_loss/len(train_batches_text)))
575 |         print("Average Training Accuracy: {:.2f}".format(100*total_train_acc/len(train_batches_text)))
576 |         print("Average Validation Loss: {:.3f}".format(avg_val_loss))
577 |         print("Average Validation Accuracy: {:.2f}".format(100*total_val_acc/len(val_batches_text)))
578 |               
579 |         if (avg_val_loss < best_loss):
580 |             best_loss = avg_val_loss
581 |             save_data={'best_loss':best_loss,'covered_epochs':covered_epochs+epoch+1}
582 |             impatience=0
583 |             with open('Model_Backup/Seq2seq_summarization.pkl', 'wb') as fp:
584 |                 pickle.dump(save_data, fp)
585 |             saver.save(sess, 'Model_Backup/Seq2seq_summarization.ckpt')
586 |             print("\nModel saved\n")
587 |               
588 |         else:
589 |             impatience+=1
590 |               
591 |         if impatience > patience:
592 |               break
593 |               
594 |               
595 |         epoch+=1
596 |             
597 | ```
598 | 
599 |     
600 |     Load checkpoint? y/n:  n
601 | 
602 | 
603 |     
604 |     
605 |     
606 |     STARTING TRAINING
607 |     
608 |     
609 |     Iter 0, Cost= 1.493, Acc = 0.00%
610 |     
611 |     Sample Text
612 |     
613 |     i was given these as a gift ... they were so amazing i now order them for all occasions and sometimes just because i had n't had them in a while . a little warning ; they are completely addictive . i like the <UNK> ones ; my girlfriend likes the rocky road . highly recommended ! < br / > < br / > sure to be appreciated by everyone on your gift list .
614 |     
615 |     Sample Predicted Summary
616 |     
617 |     condolence s.e. foodstuff condolence webbed poverty squarely poverty poverty assists foodstuff webbed poverty methodist foodstuff webbed poverty gephardt foodstuff ethier articulos meh rojos cols colombians webbed poverty condolence poverty condolence hourly 
618 |     
619 |     Sample Actual Summary
620 |     
621 |     simply amazing brownies ... 
622 |     
623 |     
624 |     Iter 100, Cost= 0.684, Acc = 26.98%
625 |     Iter 200, Cost= 0.649, Acc = 27.19%
626 |     Iter 300, Cost= 0.744, Acc = 25.93%
627 |     Iter 400, Cost= 0.976, Acc = 19.88%
628 |     Iter 500, Cost= 0.839, Acc = 21.53%
629 |     
630 |     Sample Text
631 |     
632 |     for those looking for a <UNK> water beverage and one with a neutral taste that does n't have <UNK> aftertaste , this one 's for <UNK> < br / > < br / > also , traditional tap water is slightly more acidic ( i believe ph 7-8 ) . <UNK> 's is supposed at 9.5 ph , so if you 're very sensitive to acidic products , this might help you out .
633 |     
634 |     Sample Predicted Summary
635 |     
636 |     good 
637 |     
638 |     Sample Actual Summary
639 |     
640 |     neutral taste , low ph 
641 |     
642 |     
643 |     Iter 600, Cost= 0.697, Acc = 27.82%
644 |     Iter 700, Cost= 0.763, Acc = 24.24%
645 |     Iter 800, Cost= 0.792, Acc = 24.82%
646 |     Iter 900, Cost= 0.866, Acc = 23.13%
647 |     Iter 1000, Cost= 0.838, Acc = 23.03%
648 |     
649 |     Sample Text
650 |     
651 |     i love my starbucks sumatra first thing in the morning . i was not always up early enough to take the detour to starbucks and now i do n't have to ! these <UNK> are perfect and delicious . now i can have my fav coffee even before i take off my slippers ! i love this product ! it 's easy to order - arrived quickly and the price was good .
652 |     
653 |     Sample Predicted Summary
654 |     
655 |     great 
656 |     
657 |     Sample Actual Summary
658 |     
659 |     no drive through at starbucks ? 
660 |     
661 |     
662 |     Iter 1100, Cost= 0.648, Acc = 30.58%
663 |     Iter 1200, Cost= 0.977, Acc = 19.08%
664 |     Iter 1300, Cost= 0.788, Acc = 23.29%
665 |     Iter 1400, Cost= 0.681, Acc = 28.23%
666 |     Iter 1500, Cost= 0.608, Acc = 29.32%
667 |     
668 |     Sample Text
669 |     
670 |     husband loves this tea especially in the <UNK> recommend using the large cup setting on your keurig brewer unless you prefer your tea extra strong .
671 |     
672 |     Sample Predicted Summary
673 |     
674 |     great tea 
675 |     
676 |     Sample Actual Summary
677 |     
678 |     good substitute for coffee . 
679 |     
680 |     
681 |     Iter 1600, Cost= 0.709, Acc = 27.48%
682 |     Iter 1700, Cost= 0.729, Acc = 31.11%
683 |     Iter 1800, Cost= 0.627, Acc = 28.93%
684 |     Iter 1900, Cost= 0.798, Acc = 26.36%
685 |     Iter 2000, Cost= 0.856, Acc = 22.08%
686 |     
687 |     Sample Text
688 |     
689 |     can no longer find this product locally anymore . i purchased it previously at a warehouse club but costco , bj ` s and sam ` s club no longer stock it in my area stores . my two golden retriever ` s love this gravy when added to their mix of both dry and moist dog food . hope it stays on the market ... <UNK> !
690 |     
691 |     Sample Predicted Summary
692 |     
693 |     great 
694 |     
695 |     Sample Actual Summary
696 |     
697 |     best pet food gravy 
698 |     
699 |     
700 |     Iter 2100, Cost= 0.640, Acc = 30.77%
701 |     Iter 2200, Cost= 0.792, Acc = 24.49%
702 |     Iter 2300, Cost= 0.735, Acc = 22.86%
703 |     Iter 2400, Cost= 0.769, Acc = 21.68%
704 |     Iter 2500, Cost= 0.900, Acc = 21.15%
705 |     
706 |     Sample Text
707 |     
708 |     i want to start out by saying that i thought at first that a bag with only 120 calories and 4 grams of fat ( no saturated or trans ) for every 20 chips was going to taste like crap . i must say that not only was i wrong , that this is my favorite bbq chip on the market today . they are light and you can not taste any fat or grease after eating them . that 's because they are n't baked or fried , just popped as their name suggests . these chips are very easy to dip as well . fantastic product !
709 |     
710 |     Sample Predicted Summary
711 |     
712 |     great chips 
713 |     
714 |     Sample Actual Summary
715 |     
716 |     fantastic chips ! ! ! 
717 |     
718 |     
719 |     Iter 2600, Cost= 0.740, Acc = 22.86%
720 |     Iter 2700, Cost= 0.848, Acc = 24.84%
721 |     Iter 2800, Cost= 0.677, Acc = 28.57%
722 |     Iter 2900, Cost= 0.779, Acc = 25.90%
723 |     Iter 3000, Cost= 0.718, Acc = 27.34%
724 |     
725 |     Sample Text
726 |     
727 |     this <UNK> of 7-ounce `` taster 's choice french roast '' canisters , is a good buy . the coffee is flavored differently than original flavor , but the difference is very subtle , and refreshingly good . overall , this taster 's choice coffee is a bargain , and highly recommended .
728 |     
729 |     Sample Predicted Summary
730 |     
731 |     great flavor 
732 |     
733 |     Sample Actual Summary
734 |     
735 |     good buy 
736 | 
737 | 
738 | ### Future Works
739 | 
740 | * Beam Search
741 | * Pointer Mechanisms
742 | * BLEU\ROUGE evaluation
743 | * Implement Testing
744 | * Complete Training and Optimize Hyperparameters
745 | 
746 | 
747 | ```python
748 | 
749 | ```
750 | 


--------------------------------------------------------------------------------
/Summarization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Abstractive Summarization"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Based on [Seq2seq learning](https://arxiv.org/abs/1409.3215)\n",
 15 |     "with [attention mechanism](https://arxiv.org/abs/1409.0473), specifically [local attention](https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "### Loading Pre-processed Dataset\n",
 23 |     "\n",
 24 |     "The Data is preprocessed in [Data_Pre-Processing.ipynb](https://github.com/JRC1995/Abstractive-Summarization/blob/master/Data_Pre-Processing.ipynb)\n",
 25 |     "\n",
 26 |     "Dataset source: https://www.kaggle.com/snap/amazon-fine-food-reviews"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 1,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import json\n",
 36 |     "\n",
 37 |     "with open('Processed_Data/Amazon_Reviews_Processed.json') as file:\n",
 38 |     "\n",
 39 |     "    for json_data in file:\n",
 40 |     "        saved_data = json.loads(json_data)\n",
 41 |     "\n",
 42 |     "        vocab2idx = saved_data[\"vocab\"]\n",
 43 |     "        embd = saved_data[\"embd\"]\n",
 44 |     "        train_batches_text = saved_data[\"train_batches_text\"]\n",
 45 |     "        test_batches_text = saved_data[\"test_batches_text\"]\n",
 46 |     "        val_batches_text = saved_data[\"val_batches_text\"]\n",
 47 |     "        train_batches_summary = saved_data[\"train_batches_summary\"]\n",
 48 |     "        test_batches_summary = saved_data[\"test_batches_summary\"]\n",
 49 |     "        val_batches_summary = saved_data[\"val_batches_summary\"]\n",
 50 |     "        train_batches_true_text_len = saved_data[\"train_batches_true_text_len\"]\n",
 51 |     "        val_batches_true_text_len = saved_data[\"val_batches_true_text_len\"]\n",
 52 |     "        test_batches_true_text_len = saved_data[\"test_batches_true_text_len\"]\n",
 53 |     "        train_batches_true_summary_len = saved_data[\"train_batches_true_summary_len\"]\n",
 54 |     "        val_batches_true_summary_len = saved_data[\"val_batches_true_summary_len\"]\n",
 55 |     "        test_batches_true_summary_len = saved_data[\"test_batches_true_summary_len\"]\n",
 56 |     "\n",
 57 |     "        break\n",
 58 |     "        \n",
 59 |     "idx2vocab = {v:k for k,v in vocab2idx.items()}"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## Hyperparameters"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 2,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "hidden_size = 300\n",
 76 |     "learning_rate = 0.001\n",
 77 |     "epochs = 5\n",
 78 |     "max_summary_len = 31 # should be summary_max_len as used in data_preprocessing with +1 (+1 for <EOS>) \n",
 79 |     "D = 5 # D determines local attention window size\n",
 80 |     "window_len = 2*D+1\n",
 81 |     "l2=1e-6"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## Tensorflow Placeholders"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 3,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "WARNING:tensorflow:From /home/jishnu/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n",
101 |       "Instructions for updating:\n",
102 |       "non-resource variables are not supported in the long term\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "import tensorflow.compat.v1 as tf \n",
108 |     "\n",
109 |     "tf.disable_v2_behavior()\n",
110 |     "tf.disable_eager_execution()\n",
111 |     "\n",
112 |     "embd_dim = len(embd[0])\n",
113 |     "\n",
114 |     "tf_text = tf.placeholder(tf.int32, [None, None])\n",
115 |     "tf_embd = tf.placeholder(tf.float32, [len(vocab2idx),embd_dim])\n",
116 |     "tf_true_summary_len = tf.placeholder(tf.int32, [None])\n",
117 |     "tf_summary = tf.placeholder(tf.int32,[None, None])\n",
118 |     "tf_train = tf.placeholder(tf.bool)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "## Dropout Function"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 4,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "def dropout(x,rate,training):\n",
135 |     "    return tf.cond(tf_train,\n",
136 |     "                    lambda: tf.nn.dropout(x,rate=0.3),\n",
137 |     "                    lambda: x)\n",
138 |     "\n",
139 |     "    "
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "## Embed vectorized text\n",
147 |     "\n",
148 |     "Dropout used for regularization \n",
149 |     "(https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 5,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "embd_text = tf.nn.embedding_lookup(tf_embd, tf_text)\n",
159 |     "\n",
160 |     "embd_text = dropout(embd_text,rate=0.3,training=tf_train)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "## LSTM function\n",
168 |     "\n",
169 |     "More info: \n",
170 |     "<br>\n",
171 |     "https://dl.acm.org/citation.cfm?id=1246450, \n",
172 |     "<br>\n",
173 |     "https://www.bioinf.jku.at/publications/older/2604.pdf,\n",
174 |     "<br>\n",
175 |     "https://en.wikipedia.org/wiki/Long_short-term_memory"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 6,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "def LSTM(x,hidden_state,cell,input_dim,hidden_size,scope):\n",
185 |     "    \n",
186 |     "    with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):\n",
187 |     "        \n",
188 |     "        w = tf.get_variable(\"w\", shape=[4,input_dim,hidden_size],\n",
189 |     "                                    dtype=tf.float32,\n",
190 |     "                                    trainable=True,\n",
191 |     "                                    initializer=tf.glorot_uniform_initializer())\n",
192 |     "        \n",
193 |     "        u = tf.get_variable(\"u\", shape=[4,hidden_size,hidden_size],\n",
194 |     "                            dtype=tf.float32,\n",
195 |     "                            trainable=True,\n",
196 |     "                            initializer=tf.glorot_uniform_initializer())\n",
197 |     "        \n",
198 |     "        b = tf.get_variable(\"bias\", shape=[4,1,hidden_size],\n",
199 |     "                    dtype=tf.float32,\n",
200 |     "                    trainable=True,\n",
201 |     "                    initializer=tf.zeros_initializer())\n",
202 |     "        \n",
203 |     "    input_gate = tf.nn.sigmoid( tf.matmul(x,w[0]) + tf.matmul(hidden_state,u[0]) + b[0])\n",
204 |     "    forget_gate = tf.nn.sigmoid( tf.matmul(x,w[1]) + tf.matmul(hidden_state,u[1]) + b[1])\n",
205 |     "    output_gate = tf.nn.sigmoid( tf.matmul(x,w[2]) + tf.matmul(hidden_state,u[2]) + b[2])\n",
206 |     "    cell_ = tf.nn.tanh( tf.matmul(x,w[3]) + tf.matmul(hidden_state,u[3]) + b[3])\n",
207 |     "    cell = forget_gate*cell + input_gate*cell_\n",
208 |     "    hidden_state = output_gate*tf.tanh(cell)\n",
209 |     "    \n",
210 |     "    return hidden_state, cell\n",
211 |     "      "
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "## Bi-Directional LSTM Encoder\n",
219 |     "\n",
220 |     "(https://maxwell.ict.griffith.edu.au/spl/publications/papers/ieeesp97_schuster.pdf)\n",
221 |     "\n",
222 |     "More Info: https://machinelearningmastery.com/develop-bidirectional-lstm-sequence-classification-python-keras/\n",
223 |     "\n",
224 |     "Bi-directional LSTM encoder has a forward encoder and a backward encoder. The forward encoder encodes a text sequence from start to end, and the backward encoder encodes the text sequence from end to start.\n",
225 |     "The final output is a combination (in this case, a concatenation) of the forward encoded text and the backward encoded text\n",
226 |     "    \n"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "## Forward Encoding"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 7,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "S = tf.shape(embd_text)[1] #text sequence length\n",
243 |     "N = tf.shape(embd_text)[0] #batch_size\n",
244 |     "\n",
245 |     "i=0\n",
246 |     "hidden=tf.zeros([N, hidden_size], dtype=tf.float32)\n",
247 |     "cell=tf.zeros([N, hidden_size], dtype=tf.float32)\n",
248 |     "hidden_forward=tf.TensorArray(size=S, dtype=tf.float32)\n",
249 |     "\n",
250 |     "#shape of embd_text: [N,S,embd_dim]\n",
251 |     "embd_text_t = tf.transpose(embd_text,[1,0,2]) \n",
252 |     "#current shape of embd_text: [S,N,embd_dim]\n",
253 |     "\n",
254 |     "def cond(i, hidden, cell, hidden_forward):\n",
255 |     "    return i < S\n",
256 |     "\n",
257 |     "def body(i, hidden, cell, hidden_forward):\n",
258 |     "    x = embd_text_t[i]\n",
259 |     "    \n",
260 |     "    hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope=\"forward_encoder\")\n",
261 |     "    hidden_forward = hidden_forward.write(i, hidden)\n",
262 |     "\n",
263 |     "    return i+1, hidden, cell, hidden_forward\n",
264 |     "\n",
265 |     "_, _, _, hidden_forward = tf.while_loop(cond, body, [i, hidden, cell, hidden_forward])"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "## Backward Encoding"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 8,
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": [
281 |     "i=S-1\n",
282 |     "hidden=tf.zeros([N, hidden_size], dtype=tf.float32)\n",
283 |     "cell=tf.zeros([N, hidden_size], dtype=tf.float32)\n",
284 |     "hidden_backward=tf.TensorArray(size=S, dtype=tf.float32)\n",
285 |     "\n",
286 |     "def cond(i, hidden, cell, hidden_backward):\n",
287 |     "    return i >= 0\n",
288 |     "\n",
289 |     "def body(i, hidden, cell, hidden_backward):\n",
290 |     "    x = embd_text_t[i]\n",
291 |     "    hidden,cell = LSTM(x,hidden,cell,embd_dim,hidden_size,scope=\"backward_encoder\")\n",
292 |     "    hidden_backward = hidden_backward.write(i, hidden)\n",
293 |     "\n",
294 |     "    return i-1, hidden, cell, hidden_backward\n",
295 |     "\n",
296 |     "_, _, _, hidden_backward = tf.while_loop(cond, body, [i, hidden, cell, hidden_backward])"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "## Merge Forward and Backward Encoder Hidden States"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 9,
309 |    "metadata": {},
310 |    "outputs": [],
311 |    "source": [
312 |     "hidden_forward = hidden_forward.stack()\n",
313 |     "hidden_backward = hidden_backward.stack()\n",
314 |     "\n",
315 |     "encoder_states = tf.concat([hidden_forward,hidden_backward],axis=-1)\n",
316 |     "encoder_states = tf.transpose(encoder_states,[1,0,2])\n",
317 |     "\n",
318 |     "encoder_states = dropout(encoder_states,rate=0.3,training=tf_train)\n",
319 |     "\n",
320 |     "final_encoded_state = dropout(tf.concat([hidden_forward[-1],hidden_backward[-1]],axis=-1),rate=0.3,training=tf_train)\n",
321 |     "\n"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     "## Implementation of attention scoring function\n",
329 |     "\n",
330 |     "Given a sequence of encoder states ($H_s$) and the decoder hidden state ($H_t$) of current timestep $t$, the equation for computing attention score is:\n",
331 |     "\n",
332 |     "$$Score = (H_s.W_a).H_t^T $$\n",
333 |     "\n",
334 |     "($W_a$ = trainable parameters)\n",
335 |     "\n",
336 |     "(https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 10,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "def attention_score(encoder_states,decoder_hidden_state,scope=\"attention_score\"):\n",
346 |     "    \n",
347 |     "    with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):\n",
348 |     "        Wa = tf.get_variable(\"Wa\", shape=[2*hidden_size,2*hidden_size],\n",
349 |     "                                    dtype=tf.float32,\n",
350 |     "                                    trainable=True,\n",
351 |     "                                    initializer=tf.glorot_uniform_initializer())\n",
352 |     "        \n",
353 |     "    encoder_states = tf.reshape(encoder_states,[N*S,2*hidden_size])\n",
354 |     "    \n",
355 |     "    encoder_states = tf.reshape(tf.matmul(encoder_states,Wa),[N,S,2*hidden_size])\n",
356 |     "    decoder_hidden_state = tf.reshape(decoder_hidden_state,[N,2*hidden_size,1])\n",
357 |     "    \n",
358 |     "    return tf.reshape(tf.matmul(encoder_states,decoder_hidden_state),[N,S])\n"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "## Local Attention Function\n",
366 |     "\n",
367 |     "Based on: https://nlp.stanford.edu/pubs/emnlp15_attn.pdf"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": 11,
373 |    "metadata": {},
374 |    "outputs": [],
375 |    "source": [
376 |     "\n",
377 |     "def align(encoder_states, decoder_hidden_state,scope=\"attention\"):\n",
378 |     "    \n",
379 |     "    with tf.variable_scope(scope,reuse=tf.AUTO_REUSE):\n",
380 |     "        Wp = tf.get_variable(\"Wp\", shape=[2*hidden_size,128],\n",
381 |     "                                    dtype=tf.float32,\n",
382 |     "                                    trainable=True,\n",
383 |     "                                    initializer=tf.glorot_uniform_initializer())\n",
384 |     "        \n",
385 |     "        Vp = tf.get_variable(\"Vp\", shape=[128,1],\n",
386 |     "                            dtype=tf.float32,\n",
387 |     "                            trainable=True,\n",
388 |     "                            initializer=tf.glorot_uniform_initializer())\n",
389 |     "    \n",
390 |     "    positions = tf.cast(S-window_len,dtype=tf.float32) # Maximum valid attention window starting position\n",
391 |     "    \n",
392 |     "    # Predict attention window starting position \n",
393 |     "    ps = positions*tf.nn.sigmoid(tf.matmul(tf.tanh(tf.matmul(decoder_hidden_state,Wp)),Vp))\n",
394 |     "    # ps = (soft-)predicted starting position of attention window\n",
395 |     "    pt = ps+D # pt = center of attention window where the whole window length is 2*D+1\n",
396 |     "    pt = tf.reshape(pt,[N])\n",
397 |     "    \n",
398 |     "    i = 0\n",
399 |     "    gaussian_position_based_scores = tf.TensorArray(size=S,dtype=tf.float32)\n",
400 |     "    sigma = tf.constant(D/2,dtype=tf.float32)\n",
401 |     "    \n",
402 |     "    def cond(i,gaussian_position_based_scores):\n",
403 |     "        \n",
404 |     "        return i < S\n",
405 |     "                      \n",
406 |     "    def body(i,gaussian_position_based_scores):\n",
407 |     "        \n",
408 |     "        score = tf.exp(-((tf.square(tf.cast(i,tf.float32)-pt))/(2*tf.square(sigma)))) \n",
409 |     "        # (equation (10) in https://nlp.stanford.edu/pubs/emnlp15_attn.pdf)\n",
410 |     "        gaussian_position_based_scores = gaussian_position_based_scores.write(i,score)\n",
411 |     "            \n",
412 |     "        return i+1,gaussian_position_based_scores\n",
413 |     "                      \n",
414 |     "    i,gaussian_position_based_scores = tf.while_loop(cond,body,[i,gaussian_position_based_scores])\n",
415 |     "    \n",
416 |     "    gaussian_position_based_scores = gaussian_position_based_scores.stack()\n",
417 |     "    gaussian_position_based_scores = tf.transpose(gaussian_position_based_scores,[1,0])\n",
418 |     "    gaussian_position_based_scores = tf.reshape(gaussian_position_based_scores,[N,S])\n",
419 |     "    \n",
420 |     "    scores = attention_score(encoder_states,decoder_hidden_state)*gaussian_position_based_scores\n",
421 |     "    scores = tf.nn.softmax(scores,axis=-1)\n",
422 |     "    \n",
423 |     "    return tf.reshape(scores,[N,S,1])"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "## LSTM Decoder With Local Attention"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": 12,
436 |    "metadata": {},
437 |    "outputs": [],
438 |    "source": [
439 |     "with tf.variable_scope(\"decoder\",reuse=tf.AUTO_REUSE):\n",
440 |     "    SOS = tf.get_variable(\"sos\", shape=[1,embd_dim],\n",
441 |     "                                dtype=tf.float32,\n",
442 |     "                                trainable=True,\n",
443 |     "                                initializer=tf.glorot_uniform_initializer())\n",
444 |     "    \n",
445 |     "    # SOS represents starting marker \n",
446 |     "    # It tells the decoder that it is about to decode the first word of the output\n",
447 |     "    # I have set SOS as a trainable parameter\n",
448 |     "    \n",
449 |     "    Wc = tf.get_variable(\"Wc\", shape=[4*hidden_size,embd_dim],\n",
450 |     "                            dtype=tf.float32,\n",
451 |     "                            trainable=True,\n",
452 |     "                            initializer=tf.glorot_uniform_initializer())\n",
453 |     "    \n",
454 |     "\n",
455 |     "\n",
456 |     "SOS = tf.tile(SOS,[N,1]) #now SOS shape: [N,embd_dim]\n",
457 |     "inp = SOS\n",
458 |     "hidden=final_encoded_state\n",
459 |     "cell=tf.zeros([N, 2*hidden_size], dtype=tf.float32)\n",
460 |     "decoder_outputs=tf.TensorArray(size=max_summary_len, dtype=tf.float32)\n",
461 |     "outputs=tf.TensorArray(size=max_summary_len, dtype=tf.int32)\n",
462 |     "\n",
463 |     "attention_scores = align(encoder_states,hidden)\n",
464 |     "encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1)\n",
465 |     "\n",
466 |     "for i in range(max_summary_len):\n",
467 |     "    \n",
468 |     "    inp = dropout(inp,rate=0.3,training=tf_train)\n",
469 |     "    \n",
470 |     "    inp = tf.concat([inp,encoder_context_vector],axis=-1)\n",
471 |     "    \n",
472 |     "    hidden,cell = LSTM(inp,hidden,cell,embd_dim+2*hidden_size,2*hidden_size,scope=\"decoder\")\n",
473 |     "    \n",
474 |     "    hidden = dropout(hidden,rate=0.3,training=tf_train)\n",
475 |     "    \n",
476 |     "    attention_scores = align(encoder_states,hidden)\n",
477 |     "    encoder_context_vector = tf.reduce_sum(encoder_states*attention_scores,axis=1)\n",
478 |     "    \n",
479 |     "    concated = tf.concat([hidden,encoder_context_vector],axis=-1)\n",
480 |     "    \n",
481 |     "    linear_out = tf.nn.tanh(tf.matmul(concated,Wc))\n",
482 |     "    decoder_output = tf.matmul(linear_out,tf.transpose(tf_embd,[1,0])) \n",
483 |     "    # produce unnormalized probability distribution over vocabulary\n",
484 |     "    \n",
485 |     "    \n",
486 |     "    decoder_outputs = decoder_outputs.write(i,decoder_output)\n",
487 |     "    \n",
488 |     "    # Pick out most probable vocab indices based on the unnormalized probability distribution\n",
489 |     "    \n",
490 |     "    next_word_vec = tf.cast(tf.argmax(decoder_output,1),tf.int32)\n",
491 |     "\n",
492 |     "    next_word_vec = tf.reshape(next_word_vec, [N])\n",
493 |     "\n",
494 |     "    outputs = outputs.write(i,next_word_vec)\n",
495 |     "\n",
496 |     "    next_word = tf.nn.embedding_lookup(tf_embd, next_word_vec)\n",
497 |     "    inp = tf.reshape(next_word, [N, embd_dim])\n",
498 |     "    \n",
499 |     "    \n",
500 |     "decoder_outputs = decoder_outputs.stack()\n",
501 |     "outputs = outputs.stack()\n",
502 |     "\n",
503 |     "decoder_outputs = tf.transpose(decoder_outputs,[1,0,2])\n",
504 |     "outputs = tf.transpose(outputs,[1,0])\n",
505 |     "\n",
506 |     "    \n",
507 |     "    "
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "## Define Cross Entropy Cost Function and L2 Regularization"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": 13,
520 |    "metadata": {},
521 |    "outputs": [],
522 |    "source": [
523 |     "filtered_trainables = [var for var in tf.trainable_variables() if\n",
524 |     "                       not(\"Bias\" in var.name or \"bias\" in var.name\n",
525 |     "                           or \"noreg\" in var.name)]\n",
526 |     "\n",
527 |     "regularization = tf.reduce_sum([tf.nn.l2_loss(var) for var\n",
528 |     "                                in filtered_trainables])\n",
529 |     "\n",
530 |     "with tf.variable_scope(\"loss\"):\n",
531 |     "\n",
532 |     "    epsilon = tf.constant(1e-9, tf.float32)\n",
533 |     "\n",
534 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
535 |     "        labels=tf_summary, logits=decoder_outputs)\n",
536 |     "\n",
537 |     "    pad_mask = tf.sequence_mask(tf_true_summary_len,\n",
538 |     "                                maxlen=max_summary_len,\n",
539 |     "                                dtype=tf.float32)\n",
540 |     "\n",
541 |     "    masked_cross_entropy = cross_entropy*pad_mask\n",
542 |     "\n",
543 |     "    cost = tf.reduce_mean(masked_cross_entropy) + \\\n",
544 |     "        l2*regularization\n",
545 |     "\n",
546 |     "    cross_entropy = tf.reduce_mean(masked_cross_entropy)"
547 |    ]
548 |   },
549 |   {
550 |    "cell_type": "markdown",
551 |    "metadata": {},
552 |    "source": [
553 |     "## Accuracy"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "code",
558 |    "execution_count": 14,
559 |    "metadata": {},
560 |    "outputs": [],
561 |    "source": [
562 |     "# Comparing predicted sequence with labels\n",
563 |     "comparison = tf.cast(tf.equal(outputs, tf_summary),\n",
564 |     "                     tf.float32)\n",
565 |     "\n",
566 |     "# Masking to ignore the effect of pads while calculating accuracy\n",
567 |     "pad_mask = tf.sequence_mask(tf_true_summary_len,\n",
568 |     "                            maxlen=max_summary_len,\n",
569 |     "                            dtype=tf.bool)\n",
570 |     "\n",
571 |     "masked_comparison = tf.boolean_mask(comparison, pad_mask)\n",
572 |     "\n",
573 |     "# Accuracy\n",
574 |     "accuracy = tf.reduce_mean(masked_comparison)"
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "markdown",
579 |    "metadata": {},
580 |    "source": [
581 |     "## Define Optimizer"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "code",
586 |    "execution_count": 15,
587 |    "metadata": {},
588 |    "outputs": [],
589 |    "source": [
590 |     "all_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)\n",
591 |     "\n",
592 |     "optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
593 |     "\n",
594 |     "gvs = optimizer.compute_gradients(cost, all_vars)\n",
595 |     "\n",
596 |     "capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs] # Gradient Clipping\n",
597 |     "\n",
598 |     "train_op = optimizer.apply_gradients(capped_gvs)"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "markdown",
603 |    "metadata": {},
604 |    "source": [
605 |     "## Training and Validation"
606 |    ]
607 |   },
608 |   {
609 |    "cell_type": "code",
610 |    "execution_count": 16,
611 |    "metadata": {},
612 |    "outputs": [
613 |     {
614 |      "name": "stdin",
615 |      "output_type": "stream",
616 |      "text": [
617 |       "\n",
618 |       "Load checkpoint? y/n:  n\n"
619 |      ]
620 |     },
621 |     {
622 |      "name": "stdout",
623 |      "output_type": "stream",
624 |      "text": [
625 |       "\n",
626 |       "\n",
627 |       "\n",
628 |       "STARTING TRAINING\n",
629 |       "\n",
630 |       "\n",
631 |       "Iter 0, Cost= 1.493, Acc = 0.00%\n",
632 |       "\n",
633 |       "Sample Text\n",
634 |       "\n",
635 |       "i was given these as a gift ... they were so amazing i now order them for all occasions and sometimes just because i had n't had them in a while . a little warning ; they are completely addictive . i like the <UNK> ones ; my girlfriend likes the rocky road . highly recommended ! < br / > < br / > sure to be appreciated by everyone on your gift list .\n",
636 |       "\n",
637 |       "Sample Predicted Summary\n",
638 |       "\n",
639 |       "condolence s.e. foodstuff condolence webbed poverty squarely poverty poverty assists foodstuff webbed poverty methodist foodstuff webbed poverty gephardt foodstuff ethier articulos meh rojos cols colombians webbed poverty condolence poverty condolence hourly \n",
640 |       "\n",
641 |       "Sample Actual Summary\n",
642 |       "\n",
643 |       "simply amazing brownies ... \n",
644 |       "\n",
645 |       "\n",
646 |       "Iter 100, Cost= 0.684, Acc = 26.98%\n",
647 |       "Iter 200, Cost= 0.649, Acc = 27.19%\n",
648 |       "Iter 300, Cost= 0.744, Acc = 25.93%\n",
649 |       "Iter 400, Cost= 0.976, Acc = 19.88%\n",
650 |       "Iter 500, Cost= 0.839, Acc = 21.53%\n",
651 |       "\n",
652 |       "Sample Text\n",
653 |       "\n",
654 |       "for those looking for a <UNK> water beverage and one with a neutral taste that does n't have <UNK> aftertaste , this one 's for <UNK> < br / > < br / > also , traditional tap water is slightly more acidic ( i believe ph 7-8 ) . <UNK> 's is supposed at 9.5 ph , so if you 're very sensitive to acidic products , this might help you out .\n",
655 |       "\n",
656 |       "Sample Predicted Summary\n",
657 |       "\n",
658 |       "good \n",
659 |       "\n",
660 |       "Sample Actual Summary\n",
661 |       "\n",
662 |       "neutral taste , low ph \n",
663 |       "\n",
664 |       "\n",
665 |       "Iter 600, Cost= 0.697, Acc = 27.82%\n",
666 |       "Iter 700, Cost= 0.763, Acc = 24.24%\n",
667 |       "Iter 800, Cost= 0.792, Acc = 24.82%\n",
668 |       "Iter 900, Cost= 0.866, Acc = 23.13%\n",
669 |       "Iter 1000, Cost= 0.838, Acc = 23.03%\n",
670 |       "\n",
671 |       "Sample Text\n",
672 |       "\n",
673 |       "i love my starbucks sumatra first thing in the morning . i was not always up early enough to take the detour to starbucks and now i do n't have to ! these <UNK> are perfect and delicious . now i can have my fav coffee even before i take off my slippers ! i love this product ! it 's easy to order - arrived quickly and the price was good .\n",
674 |       "\n",
675 |       "Sample Predicted Summary\n",
676 |       "\n",
677 |       "great \n",
678 |       "\n",
679 |       "Sample Actual Summary\n",
680 |       "\n",
681 |       "no drive through at starbucks ? \n",
682 |       "\n",
683 |       "\n",
684 |       "Iter 1100, Cost= 0.648, Acc = 30.58%\n",
685 |       "Iter 1200, Cost= 0.977, Acc = 19.08%\n",
686 |       "Iter 1300, Cost= 0.788, Acc = 23.29%\n",
687 |       "Iter 1400, Cost= 0.681, Acc = 28.23%\n",
688 |       "Iter 1500, Cost= 0.608, Acc = 29.32%\n",
689 |       "\n",
690 |       "Sample Text\n",
691 |       "\n",
692 |       "husband loves this tea especially in the <UNK> recommend using the large cup setting on your keurig brewer unless you prefer your tea extra strong .\n",
693 |       "\n",
694 |       "Sample Predicted Summary\n",
695 |       "\n",
696 |       "great tea \n",
697 |       "\n",
698 |       "Sample Actual Summary\n",
699 |       "\n",
700 |       "good substitute for coffee . \n",
701 |       "\n",
702 |       "\n",
703 |       "Iter 1600, Cost= 0.709, Acc = 27.48%\n",
704 |       "Iter 1700, Cost= 0.729, Acc = 31.11%\n",
705 |       "Iter 1800, Cost= 0.627, Acc = 28.93%\n",
706 |       "Iter 1900, Cost= 0.798, Acc = 26.36%\n",
707 |       "Iter 2000, Cost= 0.856, Acc = 22.08%\n",
708 |       "\n",
709 |       "Sample Text\n",
710 |       "\n",
711 |       "can no longer find this product locally anymore . i purchased it previously at a warehouse club but costco , bj ` s and sam ` s club no longer stock it in my area stores . my two golden retriever ` s love this gravy when added to their mix of both dry and moist dog food . hope it stays on the market ... <UNK> !\n",
712 |       "\n",
713 |       "Sample Predicted Summary\n",
714 |       "\n",
715 |       "great \n",
716 |       "\n",
717 |       "Sample Actual Summary\n",
718 |       "\n",
719 |       "best pet food gravy \n",
720 |       "\n",
721 |       "\n",
722 |       "Iter 2100, Cost= 0.640, Acc = 30.77%\n",
723 |       "Iter 2200, Cost= 0.792, Acc = 24.49%\n",
724 |       "Iter 2300, Cost= 0.735, Acc = 22.86%\n",
725 |       "Iter 2400, Cost= 0.769, Acc = 21.68%\n",
726 |       "Iter 2500, Cost= 0.900, Acc = 21.15%\n",
727 |       "\n",
728 |       "Sample Text\n",
729 |       "\n",
730 |       "i want to start out by saying that i thought at first that a bag with only 120 calories and 4 grams of fat ( no saturated or trans ) for every 20 chips was going to taste like crap . i must say that not only was i wrong , that this is my favorite bbq chip on the market today . they are light and you can not taste any fat or grease after eating them . that 's because they are n't baked or fried , just popped as their name suggests . these chips are very easy to dip as well . fantastic product !\n",
731 |       "\n",
732 |       "Sample Predicted Summary\n",
733 |       "\n",
734 |       "great chips \n",
735 |       "\n",
736 |       "Sample Actual Summary\n",
737 |       "\n",
738 |       "fantastic chips ! ! ! \n",
739 |       "\n",
740 |       "\n",
741 |       "Iter 2600, Cost= 0.740, Acc = 22.86%\n",
742 |       "Iter 2700, Cost= 0.848, Acc = 24.84%\n",
743 |       "Iter 2800, Cost= 0.677, Acc = 28.57%\n",
744 |       "Iter 2900, Cost= 0.779, Acc = 25.90%\n",
745 |       "Iter 3000, Cost= 0.718, Acc = 27.34%\n",
746 |       "\n",
747 |       "Sample Text\n",
748 |       "\n",
749 |       "this <UNK> of 7-ounce `` taster 's choice french roast '' canisters , is a good buy . the coffee is flavored differently than original flavor , but the difference is very subtle , and refreshingly good . overall , this taster 's choice coffee is a bargain , and highly recommended .\n",
750 |       "\n",
751 |       "Sample Predicted Summary\n",
752 |       "\n",
753 |       "great flavor \n",
754 |       "\n",
755 |       "Sample Actual Summary\n",
756 |       "\n",
757 |       "good buy \n",
758 |       "\n",
759 |       "\n"
760 |      ]
761 |     },
762 |     {
763 |      "ename": "KeyboardInterrupt",
764 |      "evalue": "",
765 |      "output_type": "error",
766 |      "traceback": [
767 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
768 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
769 |       "\u001b[0;32m<ipython-input-16-f62e58d1b154>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     60\u001b[0m                                              \u001b[0mtf_summary\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtrain_batches_summary\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     61\u001b[0m                                              \u001b[0mtf_true_summary_len\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtrain_batches_true_summary_len\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 62\u001b[0;31m                                              tf_train: True})\n\u001b[0m\u001b[1;32m     63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     64\u001b[0m             \u001b[0mtotal_train_acc\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0macc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
770 |       "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m    954\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    955\u001b[0m       result = self._run(None, fetches, feed_dict, options_ptr,\n\u001b[0;32m--> 956\u001b[0;31m                          run_metadata_ptr)\n\u001b[0m\u001b[1;32m    957\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mrun_metadata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    958\u001b[0m         \u001b[0mproto_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTF_GetBuffer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_metadata_ptr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
771 |       "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_run\u001b[0;34m(self, handle, fetches, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m   1178\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mfinal_fetches\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mfinal_targets\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mfeed_dict_tensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1179\u001b[0m       results = self._do_run(handle, final_targets, final_fetches,\n\u001b[0;32m-> 1180\u001b[0;31m                              feed_dict_tensor, options, run_metadata)\n\u001b[0m\u001b[1;32m   1181\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1182\u001b[0m       \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
772 |       "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_do_run\u001b[0;34m(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)\u001b[0m\n\u001b[1;32m   1357\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mhandle\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1358\u001b[0m       return self._do_call(_run_fn, feeds, fetches, targets, options,\n\u001b[0;32m-> 1359\u001b[0;31m                            run_metadata)\n\u001b[0m\u001b[1;32m   1360\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1361\u001b[0m       \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_prun_fn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeeds\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
773 |       "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m   1363\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1364\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1365\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1366\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1367\u001b[0m       \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
774 |       "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m   1348\u001b[0m       \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_extend_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1349\u001b[0m       return self._call_tf_sessionrun(options, feed_dict, fetch_list,\n\u001b[0;32m-> 1350\u001b[0;31m                                       target_list, run_metadata)\n\u001b[0m\u001b[1;32m   1351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1352\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
775 |       "\u001b[0;32m~/miniconda3/envs/ML/lib/python3.6/site-packages/tensorflow_core/python/client/session.py\u001b[0m in \u001b[0;36m_call_tf_sessionrun\u001b[0;34m(self, options, feed_dict, fetch_list, target_list, run_metadata)\u001b[0m\n\u001b[1;32m   1441\u001b[0m     return tf_session.TF_SessionRun_wrapper(self._session, options, feed_dict,\n\u001b[1;32m   1442\u001b[0m                                             \u001b[0mfetch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_list\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1443\u001b[0;31m                                             run_metadata)\n\u001b[0m\u001b[1;32m   1444\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1445\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_call_tf_sessionprun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
776 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
777 |      ]
778 |     }
779 |    ],
780 |    "source": [
781 |     "import pickle\n",
782 |     "import random\n",
783 |     "\n",
784 |     "with tf.Session() as sess:  # Start Tensorflow Session\n",
785 |     "    display_step = 100\n",
786 |     "    patience = 5\n",
787 |     "\n",
788 |     "    load = input(\"\\nLoad checkpoint? y/n: \")\n",
789 |     "    print(\"\")\n",
790 |     "    saver = tf.train.Saver()\n",
791 |     "\n",
792 |     "    if load.lower() == 'y':\n",
793 |     "\n",
794 |     "        print('Loading pre-trained weights for the model...')\n",
795 |     "\n",
796 |     "        saver.restore(sess, 'Model_Backup/Seq2seq_summarization.ckpt')\n",
797 |     "        sess.run(tf.global_variables())\n",
798 |     "        sess.run(tf.tables_initializer())\n",
799 |     "\n",
800 |     "        with open('Model_Backup/Seq2seq_summarization.pkl', 'rb') as fp:\n",
801 |     "            train_data = pickle.load(fp)\n",
802 |     "\n",
803 |     "        covered_epochs = train_data['covered_epochs']\n",
804 |     "        best_loss = train_data['best_loss']\n",
805 |     "        impatience = 0\n",
806 |     "        \n",
807 |     "        print('\\nRESTORATION COMPLETE\\n')\n",
808 |     "\n",
809 |     "    else:\n",
810 |     "        best_loss = 2**30\n",
811 |     "        impatience = 0\n",
812 |     "        covered_epochs = 0\n",
813 |     "\n",
814 |     "        init = tf.global_variables_initializer()\n",
815 |     "        sess.run(init)\n",
816 |     "        sess.run(tf.tables_initializer())\n",
817 |     "\n",
818 |     "    epoch=0\n",
819 |     "    while (epoch+covered_epochs)<epochs:\n",
820 |     "        \n",
821 |     "        print(\"\\n\\nSTARTING TRAINING\\n\\n\")\n",
822 |     "        \n",
823 |     "        batches_indices = [i for i in range(0, len(train_batches_text))]\n",
824 |     "        random.shuffle(batches_indices)\n",
825 |     "\n",
826 |     "        total_train_acc = 0\n",
827 |     "        total_train_loss = 0\n",
828 |     "\n",
829 |     "        for i in range(0, len(train_batches_text)):\n",
830 |     "            \n",
831 |     "            j = int(batches_indices[i])\n",
832 |     "\n",
833 |     "            cost,prediction,\\\n",
834 |     "                acc, _ = sess.run([cross_entropy,\n",
835 |     "                                   outputs,\n",
836 |     "                                   accuracy,\n",
837 |     "                                   train_op],\n",
838 |     "                                  feed_dict={tf_text: train_batches_text[j],\n",
839 |     "                                             tf_embd: embd,\n",
840 |     "                                             tf_summary: train_batches_summary[j],\n",
841 |     "                                             tf_true_summary_len: train_batches_true_summary_len[j],\n",
842 |     "                                             tf_train: True})\n",
843 |     "            \n",
844 |     "            total_train_acc += acc\n",
845 |     "            total_train_loss += cost\n",
846 |     "\n",
847 |     "            if i % display_step == 0:\n",
848 |     "                print(\"Iter \"+str(i)+\", Cost= \" +\n",
849 |     "                      \"{:.3f}\".format(cost)+\", Acc = \" +\n",
850 |     "                      \"{:.2f}%\".format(acc*100))\n",
851 |     "            \n",
852 |     "            if i % 500 == 0:\n",
853 |     "                \n",
854 |     "                idx = random.randint(0,len(train_batches_text[j])-1)\n",
855 |     "                \n",
856 |     "                \n",
857 |     "                \n",
858 |     "                text = \" \".join([idx2vocab.get(vec,\"<UNK>\") for vec in train_batches_text[j][idx]])\n",
859 |     "                predicted_summary = [idx2vocab.get(vec,\"<UNK>\") for vec in prediction[idx]]\n",
860 |     "                actual_summary = [idx2vocab.get(vec,\"<UNK>\") for vec in train_batches_summary[j][idx]]\n",
861 |     "                \n",
862 |     "                print(\"\\nSample Text\\n\")\n",
863 |     "                print(text)\n",
864 |     "                print(\"\\nSample Predicted Summary\\n\")\n",
865 |     "                for word in predicted_summary:\n",
866 |     "                    if word == '<EOS>':\n",
867 |     "                        break\n",
868 |     "                    else:\n",
869 |     "                        print(word,end=\" \")\n",
870 |     "                print(\"\\n\\nSample Actual Summary\\n\")\n",
871 |     "                for word in actual_summary:\n",
872 |     "                    if word == '<EOS>':\n",
873 |     "                        break\n",
874 |     "                    else:\n",
875 |     "                        print(word,end=\" \")\n",
876 |     "                print(\"\\n\\n\")\n",
877 |     "                \n",
878 |     "        print(\"\\n\\nSTARTING VALIDATION\\n\\n\")\n",
879 |     "                \n",
880 |     "        total_val_loss=0\n",
881 |     "        total_val_acc=0\n",
882 |     "                \n",
883 |     "        for i in range(0, len(val_batches_text)):\n",
884 |     "            \n",
885 |     "            if i%100==0:\n",
886 |     "                print(\"Validating data # {}\".format(i))\n",
887 |     "\n",
888 |     "            cost, prediction,\\\n",
889 |     "                acc = sess.run([cross_entropy,\n",
890 |     "                                outputs,\n",
891 |     "                                accuracy],\n",
892 |     "                                  feed_dict={tf_text: val_batches_text[i],\n",
893 |     "                                             tf_embd: embd,\n",
894 |     "                                             tf_summary: val_batches_summary[i],\n",
895 |     "                                             tf_true_summary_len: val_batches_true_summary_len[i],\n",
896 |     "                                             tf_train: False})\n",
897 |     "            \n",
898 |     "            total_val_loss += cost\n",
899 |     "            total_val_acc += acc\n",
900 |     "            \n",
901 |     "        avg_val_loss = total_val_loss/len(val_batches_text)\n",
902 |     "        \n",
903 |     "        print(\"\\n\\nEpoch: {}\\n\\n\".format(epoch+covered_epochs))\n",
904 |     "        print(\"Average Training Loss: {:.3f}\".format(total_train_loss/len(train_batches_text)))\n",
905 |     "        print(\"Average Training Accuracy: {:.2f}\".format(100*total_train_acc/len(train_batches_text)))\n",
906 |     "        print(\"Average Validation Loss: {:.3f}\".format(avg_val_loss))\n",
907 |     "        print(\"Average Validation Accuracy: {:.2f}\".format(100*total_val_acc/len(val_batches_text)))\n",
908 |     "              \n",
909 |     "        if (avg_val_loss < best_loss):\n",
910 |     "            best_loss = avg_val_loss\n",
911 |     "            save_data={'best_loss':best_loss,'covered_epochs':covered_epochs+epoch+1}\n",
912 |     "            impatience=0\n",
913 |     "            with open('Model_Backup/Seq2seq_summarization.pkl', 'wb') as fp:\n",
914 |     "                pickle.dump(save_data, fp)\n",
915 |     "            saver.save(sess, 'Model_Backup/Seq2seq_summarization.ckpt')\n",
916 |     "            print(\"\\nModel saved\\n\")\n",
917 |     "              \n",
918 |     "        else:\n",
919 |     "            impatience+=1\n",
920 |     "              \n",
921 |     "        if impatience > patience:\n",
922 |     "              break\n",
923 |     "              \n",
924 |     "              \n",
925 |     "        epoch+=1\n",
926 |     "            "
927 |    ]
928 |   },
929 |   {
930 |    "cell_type": "markdown",
931 |    "metadata": {},
932 |    "source": [
933 |     "### Future Works\n",
934 |     "\n",
935 |     "* Beam Search\n",
936 |     "* Pointer Mechanisms\n",
937 |     "* BLEU\\ROUGE evaluation\n",
938 |     "* Implement Testing\n",
939 |     "* Complete Training and Optimize Hyperparameters"
940 |    ]
941 |   },
942 |   {
943 |    "cell_type": "code",
944 |    "execution_count": null,
945 |    "metadata": {},
946 |    "outputs": [],
947 |    "source": []
948 |   }
949 |  ],
950 |  "metadata": {
951 |   "kernelspec": {
952 |    "display_name": "Python 3",
953 |    "language": "python",
954 |    "name": "python3"
955 |   },
956 |   "language_info": {
957 |    "codemirror_mode": {
958 |     "name": "ipython",
959 |     "version": 3
960 |    },
961 |    "file_extension": ".py",
962 |    "mimetype": "text/x-python",
963 |    "name": "python",
964 |    "nbconvert_exporter": "python",
965 |    "pygments_lexer": "ipython3",
966 |    "version": "3.6.9"
967 |   }
968 |  },
969 |  "nbformat": 4,
970 |  "nbformat_minor": 4
971 | }
972 | 


--------------------------------------------------------------------------------