├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── assets └── encoderdecoder.png ├── checkpoint ├── .gitkeep ├── bidirectional │ ├── .gitkeep │ ├── gru │ │ ├── .gitkeep │ │ ├── Attention │ │ │ └── .gitkeep │ │ └── noAttention │ │ │ └── .gitkeep │ └── lstm │ │ ├── .gitkeep │ │ ├── Attention │ │ └── .gitkeep │ │ └── noAttention │ │ └── .gitkeep ├── data │ └── .gitkeep ├── simple │ ├── .gitkeep │ ├── gru │ │ ├── .gitkeep │ │ ├── Attention │ │ │ └── .gitkeep │ │ └── noAttention │ │ │ └── .gitkeep │ └── lstm │ │ ├── .gitkeep │ │ ├── Attention │ │ └── .gitkeep │ │ └── noAttention │ │ └── .gitkeep ├── stackedBidirectional │ ├── .gitkeep │ ├── gru │ │ ├── .gitkeep │ │ ├── Attention │ │ │ └── .gitkeep │ │ └── noAttention │ │ │ └── .gitkeep │ └── lstm │ │ ├── .gitkeep │ │ ├── Attention │ │ └── .gitkeep │ │ └── noAttention │ │ └── .gitkeep └── stackedSimple │ ├── .gitkeep │ ├── gru │ ├── .gitkeep │ ├── Attention │ │ └── .gitkeep │ └── noAttention │ │ └── .gitkeep │ └── lstm │ ├── .gitkeep │ ├── Attention │ └── .gitkeep │ └── noAttention │ └── .gitkeep ├── condor_logs ├── output_gru_attention.txt ├── output_gru_no_attention.txt ├── output_lstm_attention.txt └── output_lstm_no_attention.txt ├── condor_rest.sh ├── condor_seed.sh ├── docs ├── Makefile ├── build │ ├── doctrees │ │ ├── environment.pickle │ │ ├── helpers.doctree │ │ ├── index.doctree │ │ ├── metrics.bleu.doctree │ │ ├── metrics.doctree │ │ ├── metrics.rouge.doctree │ │ ├── metrics.tokenizer.doctree │ │ ├── models.doctree │ │ └── modules.doctree │ └── html │ │ ├── .buildinfo │ │ ├── _sources │ │ ├── helpers.txt │ │ ├── index.txt │ │ ├── metrics.bleu.txt │ │ ├── metrics.rouge.txt │ │ ├── metrics.tokenizer.txt │ │ ├── metrics.txt │ │ ├── models.txt │ │ └── modules.txt │ │ ├── _static │ │ ├── ajax-loader.gif │ │ ├── alabaster.css │ │ ├── basic.css │ │ ├── comment-bright.png │ │ ├── comment-close.png │ │ ├── comment.png │ │ ├── doctools.js │ │ ├── down-pressed.png │ │ ├── down.png │ │ ├── file.png │ │ ├── jquery-1.11.1.js │ │ ├── jquery.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── underscore-1.3.1.js │ │ ├── underscore.js │ │ ├── up-pressed.png │ │ ├── up.png │ │ └── websupport.js │ │ ├── genindex.html │ │ ├── helpers.html │ │ ├── index.html │ │ ├── metrics.bleu.html │ │ ├── metrics.html │ │ ├── metrics.rouge.html │ │ ├── metrics.tokenizer.html │ │ ├── models.html │ │ ├── modules.html │ │ ├── objects.inv │ │ ├── py-modindex.html │ │ ├── search.html │ │ └── searchindex.js └── source │ ├── conf.py │ ├── helpers.rst │ ├── index.rst │ ├── metrics.bleu.rst │ ├── metrics.rouge.rst │ ├── metrics.rst │ ├── metrics.tokenizer.rst │ ├── models.rst │ └── modules.rst ├── evaluation_plot_script.py ├── extracter_script.py ├── helpers ├── __init__.py ├── checkpoint.py ├── data2tensor.py ├── extracter.py ├── metric.py └── plotter.py ├── metrics ├── __init__.py ├── bleu │ ├── __init__.py │ ├── bleu.py │ └── bleu_scorer.py ├── hypothesis │ ├── gen1.txt │ ├── gen2.txt │ └── gen3.txt ├── reference │ ├── ref1.txt │ ├── ref2.txt │ └── ref3.txt ├── rouge │ ├── __init__.py │ └── rouge.py ├── tester.py └── tokenizer │ ├── __init__.py │ ├── ptbtokenizer.py │ └── stanford-corenlp-3.4.1.jar ├── models ├── __init__.py ├── bidirectional.py ├── gru_bidirectional.py ├── gru_simple.py ├── gru_stacked_bidirectional.py ├── gru_stacked_simple.py ├── lstm_bidirectional.py ├── lstm_simple.py ├── lstm_stacked_bidirectional.py ├── lstm_stacked_simple.py ├── sequenceNet.py ├── simple.py ├── stacked_bidirectional.py └── stacked_simple.py ├── raw_data └── food_raw.txt ├── requirements.txt ├── result ├── .gitkeep ├── bidirectional │ ├── .gitkeep │ ├── gru │ │ └── .gitkeep │ └── lstm │ │ └── .gitkeep ├── simple │ ├── .gitkeep │ ├── gru │ │ └── .gitkeep │ └── lstm │ │ └── .gitkeep ├── stacked_bidirectional │ ├── .gitkeep │ ├── gru │ │ └── .gitkeep │ └── lstm │ │ └── .gitkeep └── stacked_simple │ ├── .gitkeep │ ├── gru │ └── .gitkeep │ └── lstm │ └── .gitkeep └── train_scripts ├── __init__.py ├── train_script_gru_bidirectional_attn.py ├── train_script_gru_bidirectional_no_attn.py ├── train_script_gru_simple_attn.py ├── train_script_gru_simple_no_attn.py ├── train_script_gru_stacked_bidirectional_attn.py ├── train_script_gru_stacked_bidirectional_no_attn.py ├── train_script_gru_stacked_simple_attn.py ├── train_script_gru_stacked_simple_no_attn.py ├── train_script_lstm_bidirectional_attn.py ├── train_script_lstm_bidirectional_no_attn.py ├── train_script_lstm_simple_attn.py ├── train_script_lstm_simple_no_attn.py ├── train_script_lstm_stacked_bidirectional_attention.py ├── train_script_lstm_stacked_bidirectional_no_attention.py ├── train_script_lstm_stacked_simple_attention.py └── train_script_lstm_stacked_simple_no_attention.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.csv filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # MAC_extensions 10 | .DS_Store 11 | 12 | # Raw and extracted files 13 | raw_data/foods.txt 14 | 15 | 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *,cover 54 | .hypothesis/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | #Ipython Notebook 70 | .ipynb_checkpoints 71 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Harshal Priyadarshi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Summarization 2 | Uses Recurrent Neural Network (LSTM and GRU units) for developing Seq2Seq Encoder Decoded model with and without attention mechanism for summarization of amazon food reviews into abstractive tips. 3 | 4 | ## Contents 5 | - [Encoder Decoder Model](#encoder-decoder-model) 6 | - [DataSet](#dataset) 7 | - [Installation Requirements](#installation-requirements) 8 | - [Run Instructions](#run-instructions) 9 | - [Documentation](#documentation) 10 | - [References](#references) 11 | 12 | ## Encoder Decoder Model 13 | ![Model](/assets/encoderdecoder.png) 14 | 15 | ## DataSet 16 | - **DataSet Information** - This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review. 17 | 18 | The dataset can be downloaded from [here](https://snap.stanford.edu/data/web-FineFoods.html) 19 | 20 | A sample dataset example looks like this - 21 | ``` 22 | product/productId: B001E4KFG0 23 | review/userId: A3SGXH7AUHU8GW 24 | review/profileName: delmartian 25 | review/helpfulness: 1/1 26 | review/score: 5.0 27 | review/time: 1303862400 28 | review/summary: Good Quality Dog Food 29 | review/text: I have bought several of the Vitality canned dog food products and have 30 | found them all to be of good quality. The product looks more like a stew than a 31 | processed meat and it smells better. My Labrador is finicky and she appreciates this 32 | product better than most. 33 | ``` 34 | 35 | The input review has key `review/text` and the target summary that we wish to generate has key `review/summary`. For the purpose of this project, all other fields are ignored and the following two fields are extracted by the extracter script provided. 36 | 37 | 38 | ## Installation Requirements 39 | 1) Create a barebone virtual environment and activate it 40 | ``` 41 | virtualenv deepsum --no-site-packages 42 | source deepsum/bin/activate 43 | ``` 44 | 45 | 2) Install the project requirements 46 | ``` 47 | pip install -r requirements.txt 48 | ``` 49 | 50 | ## Run Instructions 51 | 52 | 1) Extract the reviews and target tips using the following command 53 | ``` 54 | python extracter_script.py raw_data/finefoods.txt extracted_data/review_summary.csv 55 | ``` 56 | NOTE: Don't forget extracting the dataset and keeping it in the raw_data directory before running the above command. 57 | 58 | 2) Then run the seed script to create the required permuted training and testing dataset and also train and evaluate the model 59 | ``` 60 | # Simple - No Attention 61 | python train_scripts/train_script_gru_simple_no_attn.py 62 | ``` 63 | This runs the Simple GRU Cell Based (Without Attention Mechanism) Encoder Decoder model. 64 | 65 | 3) Once the above script has completed execution run one of the following scripts in whichever order desired. 66 | 67 | - For Models without Attention Mechanism 68 | 69 | ``` 70 | # Simple - No Attention 71 | python train_scripts/train_script_lstm_simple_no_attn.py 72 | 73 | # Stacked Simple - No Attention 74 | python train_scripts/train_script_gru_stacked_simple_no_attn.py 75 | python train_scripts/train_script_lstm_stacked_simple_no_attention.py 76 | 77 | # Bidirectional - No Attention 78 | python train_scripts/train_script_gru_bidirectional_no_attn.py 79 | python train_scripts/train_script_lstm_bidirectional_no_attn.py 80 | 81 | # Stacked Bidirectional - No Attention 82 | python train_scripts/train_script_gru_stacked_bidirectional_no_attn.py 83 | python train_scripts/train_script_lstm_stacked_bidirectional_no_attention.py 84 | 85 | ``` 86 | 87 | - For Models with Attention Mechanism 88 | 89 | ``` 90 | # Simple - Attention 91 | python train_scripts/train_script_gru_simple_attn.py 92 | python train_scripts/train_script_lstm_simple_attn.py 93 | 94 | # Stacked Simple - Attention 95 | python train_scripts/train_script_gru_stacked_simple_attn.py 96 | python train_scripts/train_script_lstm_stacked_simple_attention.py 97 | 98 | # Bidirectional - Attention 99 | python train_scripts/train_script_gru_bidirectional_attn.py 100 | python train_scripts/train_script_lstm_bidirectional_attn.py 101 | 102 | # Stacked Bidirectional - Attention 103 | python train_scripts/train_script_gru_stacked_bidirectional_attn.py 104 | python train_scripts/train_script_lstm_stacked_bidirectional_attention.py 105 | ``` 106 | 107 | 4) Finally exit the virtual environment once you have completed the project. You can reactivate the env later. 108 | ``` 109 | deactivate 110 | ``` 111 | 112 | ## Documentation 113 | The documentation was created automatically, and thus can be error prone. Please report any in the issue table. Some methods have missing documentation. This is not an error, but laziness on my part. I will add those documentations, when I get some free time. 114 | 115 | To access documentation, just open index.html located at 116 | ``` 117 | docs/build/html/index.html 118 | ``` 119 | on your favorite browser. You can open them locally for now. I will try hosting them on Github pages once i get time. 120 | ## References 121 | 1) J. McAuley and J. Leskovec. From amateurs to connoisseurs: modeling the evolution of user expertise through online reviews. WWW, 2013. 122 | 123 | 2) Sutskever, Ilya, Oriol Vinyals, and Quoc V. Le. "Sequence to sequence learning with neural networks." Advances in neural information processing systems. 2014. 124 | 125 | 3) Cho, Kyunghyun, et al. "Learning phrase representations using RNN encoder-decoder for statistical machine translation." arXiv preprint arXiv:1406.1078 (2014). 126 | -------------------------------------------------------------------------------- /assets/encoderdecoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/assets/encoderdecoder.png -------------------------------------------------------------------------------- /checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/.gitkeep -------------------------------------------------------------------------------- /checkpoint/bidirectional/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/bidirectional/.gitkeep -------------------------------------------------------------------------------- /checkpoint/bidirectional/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/bidirectional/gru/.gitkeep -------------------------------------------------------------------------------- /checkpoint/bidirectional/gru/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/bidirectional/gru/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/bidirectional/gru/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/bidirectional/gru/noAttention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/bidirectional/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/bidirectional/lstm/.gitkeep -------------------------------------------------------------------------------- /checkpoint/bidirectional/lstm/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/bidirectional/lstm/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/bidirectional/lstm/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/bidirectional/lstm/noAttention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/data/.gitkeep -------------------------------------------------------------------------------- /checkpoint/simple/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/simple/.gitkeep -------------------------------------------------------------------------------- /checkpoint/simple/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/simple/gru/.gitkeep -------------------------------------------------------------------------------- /checkpoint/simple/gru/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/simple/gru/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/simple/gru/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/simple/gru/noAttention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/simple/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/simple/lstm/.gitkeep -------------------------------------------------------------------------------- /checkpoint/simple/lstm/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/simple/lstm/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/simple/lstm/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/simple/lstm/noAttention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedBidirectional/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedBidirectional/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedBidirectional/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedBidirectional/gru/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedBidirectional/gru/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedBidirectional/gru/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedBidirectional/gru/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedBidirectional/gru/noAttention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedBidirectional/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedBidirectional/lstm/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedBidirectional/lstm/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedBidirectional/lstm/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedBidirectional/lstm/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedBidirectional/lstm/noAttention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedSimple/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedSimple/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedSimple/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedSimple/gru/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedSimple/gru/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedSimple/gru/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedSimple/gru/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedSimple/gru/noAttention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedSimple/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedSimple/lstm/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedSimple/lstm/Attention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedSimple/lstm/Attention/.gitkeep -------------------------------------------------------------------------------- /checkpoint/stackedSimple/lstm/noAttention/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/checkpoint/stackedSimple/lstm/noAttention/.gitkeep -------------------------------------------------------------------------------- /condor_logs/output_gru_attention.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/condor_logs/output_gru_attention.txt -------------------------------------------------------------------------------- /condor_logs/output_lstm_attention.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/condor_logs/output_lstm_attention.txt -------------------------------------------------------------------------------- /condor_logs/output_lstm_no_attention.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/condor_logs/output_lstm_no_attention.txt -------------------------------------------------------------------------------- /condor_rest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Simple - No Attention 3 | condorify_gpu_email python train_scripts/train_script_lstm_simple_no_attn.py condor_logs/lstm_simple_no_attn.txt 4 | 5 | # Stacked Simple - No Attention 6 | condorify_gpu_email python train_scripts/train_script_gru_stacked_simple_no_attn.py condor_logs/gru_stacked_simple_no_attn.txt 7 | condorify_gpu_email python train_scripts/train_script_lstm_stacked_simple_no_attention.py condor_logs/lstm_stacked_simple_no_attn.txt 8 | 9 | # Bidirectional - No Attention 10 | condorify_gpu_email python train_scripts/train_script_gru_bidirectional_no_attn.py condor_logs/gru_bidirectional_no_attn.txt 11 | condorify_gpu_email python train_scripts/train_script_lstm_bidirectional_no_attn.py condor_logs/lstm_bidirectional_no_attn.txt 12 | 13 | # Stacked Bidirectional - No Attention 14 | condorify_gpu_email python train_scripts/train_script_gru_stacked_bidirectional_no_attn.py condor_logs/gru_stacked_bidirectional_no_attn.txt 15 | condorify_gpu_email python train_scripts/train_script_lstm_stacked_bidirectional_no_attention.py condor_logs/lstm_stacked_bidirectional_no_attn.txt 16 | -------------------------------------------------------------------------------- /condor_seed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Simple - No Attention - GRU (for seeding) 4 | condorify_gpu_email python train_scripts/train_script_gru_simple_no_attn.py condor_logs/gru_simple_no_attn.txt 5 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/DeepSummarization.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/DeepSummarization.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/DeepSummarization" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/DeepSummarization" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/doctrees/helpers.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/helpers.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/metrics.bleu.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/metrics.bleu.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/metrics.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/metrics.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/metrics.rouge.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/metrics.rouge.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/metrics.tokenizer.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/metrics.tokenizer.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/models.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/models.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/doctrees/modules.doctree -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: ac6a6d92901e21365dcd0f546e216f99 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/build/html/_sources/helpers.txt: -------------------------------------------------------------------------------- 1 | helpers package 2 | =============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | helpers.checkpoint module 8 | ------------------------- 9 | 10 | .. automodule:: helpers.checkpoint 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | helpers.data2tensor module 16 | -------------------------- 17 | 18 | .. automodule:: helpers.data2tensor 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | helpers.extracter module 24 | ------------------------ 25 | 26 | .. automodule:: helpers.extracter 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | helpers.metric module 32 | --------------------- 33 | 34 | .. automodule:: helpers.metric 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | helpers.plotter module 40 | ---------------------- 41 | 42 | .. automodule:: helpers.plotter 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: helpers 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/build/html/_sources/index.txt: -------------------------------------------------------------------------------- 1 | .. Deep Summarization documentation master file, created by 2 | sphinx-quickstart on Tue Apr 11 18:20:37 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Deep Summarization's documentation! 7 | ============================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/metrics.bleu.txt: -------------------------------------------------------------------------------- 1 | metrics.bleu package 2 | ==================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | metrics.bleu.bleu module 8 | ------------------------ 9 | 10 | .. automodule:: metrics.bleu.bleu 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | metrics.bleu.bleu_scorer module 16 | ------------------------------- 17 | 18 | .. automodule:: metrics.bleu.bleu_scorer 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: metrics.bleu 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/build/html/_sources/metrics.rouge.txt: -------------------------------------------------------------------------------- 1 | metrics.rouge package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | metrics.rouge.rouge module 8 | -------------------------- 9 | 10 | .. automodule:: metrics.rouge.rouge 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: metrics.rouge 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/metrics.tokenizer.txt: -------------------------------------------------------------------------------- 1 | metrics.tokenizer package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | metrics.tokenizer.ptbtokenizer module 8 | ------------------------------------- 9 | 10 | .. automodule:: metrics.tokenizer.ptbtokenizer 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: metrics.tokenizer 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/metrics.txt: -------------------------------------------------------------------------------- 1 | metrics package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | metrics.bleu 10 | metrics.rouge 11 | metrics.tokenizer 12 | 13 | Submodules 14 | ---------- 15 | 16 | metrics.tester module 17 | --------------------- 18 | 19 | .. automodule:: metrics.tester 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | Module contents 26 | --------------- 27 | 28 | .. automodule:: metrics 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /docs/build/html/_sources/models.txt: -------------------------------------------------------------------------------- 1 | models package 2 | ============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | models.bidirectional module 8 | --------------------------- 9 | 10 | .. automodule:: models.bidirectional 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | models.gru_bidirectional module 16 | ------------------------------- 17 | 18 | .. automodule:: models.gru_bidirectional 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | models.gru_simple module 24 | ------------------------ 25 | 26 | .. automodule:: models.gru_simple 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | models.gru_stacked_bidirectional module 32 | --------------------------------------- 33 | 34 | .. automodule:: models.gru_stacked_bidirectional 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | models.gru_stacked_simple module 40 | -------------------------------- 41 | 42 | .. automodule:: models.gru_stacked_simple 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | models.lstm_bidirectional module 48 | -------------------------------- 49 | 50 | .. automodule:: models.lstm_bidirectional 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | models.lstm_simple module 56 | ------------------------- 57 | 58 | .. automodule:: models.lstm_simple 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | models.lstm_stacked_bidirectional module 64 | ---------------------------------------- 65 | 66 | .. automodule:: models.lstm_stacked_bidirectional 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | models.lstm_stacked_simple module 72 | --------------------------------- 73 | 74 | .. automodule:: models.lstm_stacked_simple 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | models.sequenceNet module 80 | ------------------------- 81 | 82 | .. automodule:: models.sequenceNet 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | models.simple module 88 | -------------------- 89 | 90 | .. automodule:: models.simple 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | models.stacked_bidirectional module 96 | ----------------------------------- 97 | 98 | .. automodule:: models.stacked_bidirectional 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | models.stacked_simple module 104 | ---------------------------- 105 | 106 | .. automodule:: models.stacked_simple 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | 112 | Module contents 113 | --------------- 114 | 115 | .. automodule:: models 116 | :members: 117 | :undoc-members: 118 | :show-inheritance: 119 | -------------------------------------------------------------------------------- /docs/build/html/_sources/modules.txt: -------------------------------------------------------------------------------- 1 | .. 2 | == 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | evaluation_plot_script 8 | extracter_script 9 | helpers 10 | metrics 11 | models 12 | train_scripts 13 | -------------------------------------------------------------------------------- /docs/build/html/_static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/ajax-loader.gif -------------------------------------------------------------------------------- /docs/build/html/_static/comment-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/comment-bright.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment-close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/comment-close.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/comment.png -------------------------------------------------------------------------------- /docs/build/html/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Sphinx JavaScript utilities for all documentation. 6 | * 7 | * :copyright: Copyright 2007-2015 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | /** 13 | * select a different prefix for underscore 14 | */ 15 | $u = _.noConflict(); 16 | 17 | /** 18 | * make the code below compatible with browsers without 19 | * an installed firebug like debugger 20 | if (!window.console || !console.firebug) { 21 | var names = ["log", "debug", "info", "warn", "error", "assert", "dir", 22 | "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", 23 | "profile", "profileEnd"]; 24 | window.console = {}; 25 | for (var i = 0; i < names.length; ++i) 26 | window.console[names[i]] = function() {}; 27 | } 28 | */ 29 | 30 | /** 31 | * small helper function to urldecode strings 32 | */ 33 | jQuery.urldecode = function(x) { 34 | return decodeURIComponent(x).replace(/\+/g, ' '); 35 | }; 36 | 37 | /** 38 | * small helper function to urlencode strings 39 | */ 40 | jQuery.urlencode = encodeURIComponent; 41 | 42 | /** 43 | * This function returns the parsed url parameters of the 44 | * current request. Multiple values per key are supported, 45 | * it will always return arrays of strings for the value parts. 46 | */ 47 | jQuery.getQueryParameters = function(s) { 48 | if (typeof s == 'undefined') 49 | s = document.location.search; 50 | var parts = s.substr(s.indexOf('?') + 1).split('&'); 51 | var result = {}; 52 | for (var i = 0; i < parts.length; i++) { 53 | var tmp = parts[i].split('=', 2); 54 | var key = jQuery.urldecode(tmp[0]); 55 | var value = jQuery.urldecode(tmp[1]); 56 | if (key in result) 57 | result[key].push(value); 58 | else 59 | result[key] = [value]; 60 | } 61 | return result; 62 | }; 63 | 64 | /** 65 | * highlight a given string on a jquery object by wrapping it in 66 | * span elements with the given class name. 67 | */ 68 | jQuery.fn.highlightText = function(text, className) { 69 | function highlight(node) { 70 | if (node.nodeType == 3) { 71 | var val = node.nodeValue; 72 | var pos = val.toLowerCase().indexOf(text); 73 | if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) { 74 | var span = document.createElement("span"); 75 | span.className = className; 76 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 77 | node.parentNode.insertBefore(span, node.parentNode.insertBefore( 78 | document.createTextNode(val.substr(pos + text.length)), 79 | node.nextSibling)); 80 | node.nodeValue = val.substr(0, pos); 81 | } 82 | } 83 | else if (!jQuery(node).is("button, select, textarea")) { 84 | jQuery.each(node.childNodes, function() { 85 | highlight(this); 86 | }); 87 | } 88 | } 89 | return this.each(function() { 90 | highlight(this); 91 | }); 92 | }; 93 | 94 | /* 95 | * backward compatibility for jQuery.browser 96 | * This will be supported until firefox bug is fixed. 97 | */ 98 | if (!jQuery.browser) { 99 | jQuery.uaMatch = function(ua) { 100 | ua = ua.toLowerCase(); 101 | 102 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || 103 | /(webkit)[ \/]([\w.]+)/.exec(ua) || 104 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || 105 | /(msie) ([\w.]+)/.exec(ua) || 106 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || 107 | []; 108 | 109 | return { 110 | browser: match[ 1 ] || "", 111 | version: match[ 2 ] || "0" 112 | }; 113 | }; 114 | jQuery.browser = {}; 115 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; 116 | } 117 | 118 | /** 119 | * Small JavaScript module for the documentation. 120 | */ 121 | var Documentation = { 122 | 123 | init : function() { 124 | this.fixFirefoxAnchorBug(); 125 | this.highlightSearchWords(); 126 | this.initIndexTable(); 127 | }, 128 | 129 | /** 130 | * i18n support 131 | */ 132 | TRANSLATIONS : {}, 133 | PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; }, 134 | LOCALE : 'unknown', 135 | 136 | // gettext and ngettext don't access this so that the functions 137 | // can safely bound to a different name (_ = Documentation.gettext) 138 | gettext : function(string) { 139 | var translated = Documentation.TRANSLATIONS[string]; 140 | if (typeof translated == 'undefined') 141 | return string; 142 | return (typeof translated == 'string') ? translated : translated[0]; 143 | }, 144 | 145 | ngettext : function(singular, plural, n) { 146 | var translated = Documentation.TRANSLATIONS[singular]; 147 | if (typeof translated == 'undefined') 148 | return (n == 1) ? singular : plural; 149 | return translated[Documentation.PLURALEXPR(n)]; 150 | }, 151 | 152 | addTranslations : function(catalog) { 153 | for (var key in catalog.messages) 154 | this.TRANSLATIONS[key] = catalog.messages[key]; 155 | this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); 156 | this.LOCALE = catalog.locale; 157 | }, 158 | 159 | /** 160 | * add context elements like header anchor links 161 | */ 162 | addContextElements : function() { 163 | $('div[id] > :header:first').each(function() { 164 | $('\u00B6'). 165 | attr('href', '#' + this.id). 166 | attr('title', _('Permalink to this headline')). 167 | appendTo(this); 168 | }); 169 | $('dt[id]').each(function() { 170 | $('\u00B6'). 171 | attr('href', '#' + this.id). 172 | attr('title', _('Permalink to this definition')). 173 | appendTo(this); 174 | }); 175 | }, 176 | 177 | /** 178 | * workaround a firefox stupidity 179 | * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 180 | */ 181 | fixFirefoxAnchorBug : function() { 182 | if (document.location.hash) 183 | window.setTimeout(function() { 184 | document.location.href += ''; 185 | }, 10); 186 | }, 187 | 188 | /** 189 | * highlight the search words provided in the url in the text 190 | */ 191 | highlightSearchWords : function() { 192 | var params = $.getQueryParameters(); 193 | var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; 194 | if (terms.length) { 195 | var body = $('div.body'); 196 | if (!body.length) { 197 | body = $('body'); 198 | } 199 | window.setTimeout(function() { 200 | $.each(terms, function() { 201 | body.highlightText(this.toLowerCase(), 'highlighted'); 202 | }); 203 | }, 10); 204 | $('') 206 | .appendTo($('#searchbox')); 207 | } 208 | }, 209 | 210 | /** 211 | * init the domain index toggle buttons 212 | */ 213 | initIndexTable : function() { 214 | var togglers = $('img.toggler').click(function() { 215 | var src = $(this).attr('src'); 216 | var idnum = $(this).attr('id').substr(7); 217 | $('tr.cg-' + idnum).toggle(); 218 | if (src.substr(-9) == 'minus.png') 219 | $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); 220 | else 221 | $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); 222 | }).css('display', ''); 223 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { 224 | togglers.click(); 225 | } 226 | }, 227 | 228 | /** 229 | * helper function to hide the search marks again 230 | */ 231 | hideSearchWords : function() { 232 | $('#searchbox .highlight-link').fadeOut(300); 233 | $('span.highlighted').removeClass('highlighted'); 234 | }, 235 | 236 | /** 237 | * make the url absolute 238 | */ 239 | makeURL : function(relativeURL) { 240 | return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; 241 | }, 242 | 243 | /** 244 | * get the current relative url 245 | */ 246 | getCurrentURL : function() { 247 | var path = document.location.pathname; 248 | var parts = path.split(/\//); 249 | $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { 250 | if (this == '..') 251 | parts.pop(); 252 | }); 253 | var url = parts.join('/'); 254 | return path.substring(url.lastIndexOf('/') + 1, path.length - 1); 255 | } 256 | }; 257 | 258 | // quick alias for translations 259 | _ = Documentation.gettext; 260 | 261 | $(document).ready(function() { 262 | Documentation.init(); 263 | }); 264 | -------------------------------------------------------------------------------- /docs/build/html/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/down-pressed.png -------------------------------------------------------------------------------- /docs/build/html/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/down.png -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/build/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight { background: #eeffcc; } 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */ 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 6 | .highlight .o { color: #666666 } /* Operator */ 7 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ 8 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 9 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ 10 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ 11 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 12 | .highlight .ge { font-style: italic } /* Generic.Emph */ 13 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 14 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 15 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 16 | .highlight .go { color: #333333 } /* Generic.Output */ 17 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 18 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 19 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 20 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 21 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 22 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 23 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 24 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 25 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 26 | .highlight .kt { color: #902000 } /* Keyword.Type */ 27 | .highlight .m { color: #208050 } /* Literal.Number */ 28 | .highlight .s { color: #4070a0 } /* Literal.String */ 29 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 30 | .highlight .nb { color: #007020 } /* Name.Builtin */ 31 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 32 | .highlight .no { color: #60add5 } /* Name.Constant */ 33 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 34 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 35 | .highlight .ne { color: #007020 } /* Name.Exception */ 36 | .highlight .nf { color: #06287e } /* Name.Function */ 37 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 38 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 39 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 40 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 41 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 42 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 43 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */ 44 | .highlight .mf { color: #208050 } /* Literal.Number.Float */ 45 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */ 46 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */ 47 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */ 48 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 49 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 50 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 51 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 52 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 53 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 54 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 55 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 56 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 57 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 58 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 59 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 60 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 61 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 62 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 63 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/build/html/_static/up-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/up-pressed.png -------------------------------------------------------------------------------- /docs/build/html/_static/up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/_static/up.png -------------------------------------------------------------------------------- /docs/build/html/index.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Welcome to Deep Summarization’s documentation! — Deep Summarization 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
35 |
36 |
37 |
38 | 39 |
40 |

Welcome to Deep Summarization’s documentation!

41 |

Contents:

42 |
43 |
    44 |
45 |
46 |
47 |
48 |

Indices and tables

49 | 54 |
55 | 56 | 57 |
58 |
59 |
60 | 96 |
97 |
98 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /docs/build/html/metrics.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | metrics package — Deep Summarization 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
35 |
36 |
37 |
38 | 39 |
40 |

metrics package

41 | 67 |
68 |

Submodules

69 |
70 |
71 |

metrics.tester module

72 |

Computes the BLEU, ROUGE 73 | using the COCO metrics scripts

74 |
75 |
76 | metrics.tester.load_textfiles(references, hypothesis)
77 |
78 | 79 |
80 |
81 | metrics.tester.main()
82 |
83 | 84 |
85 |
86 | metrics.tester.score(ref, hypo)
87 |

ref, dictionary of reference sentences (id, sentence) 88 | hypo, dictionary of hypothesis sentences (id, sentence) 89 | score, dictionary of scores

90 |
91 | 92 |
93 |
94 |

Module contents

95 |
96 |
97 | 98 | 99 |
100 |
101 |
102 | 143 |
144 |
145 | 156 | 157 | 158 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /docs/build/html/metrics.rouge.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | metrics.rouge package — Deep Summarization 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
35 |
36 |
37 |
38 | 39 |
40 |

metrics.rouge package

41 |
42 |

Submodules

43 |
44 |
45 |

metrics.rouge.rouge module

46 |
47 |
48 | class metrics.rouge.rouge.Rouge
49 |

Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set

50 |
51 |
52 | calc_score(candidate, refs)
53 |

Compute ROUGE-L score given one candidate and references for an image

54 | 55 | 56 | 57 | 58 | 63 | 64 | 66 | 67 | 68 |
Parameters:
    59 |
  • candidate – str : candidate sentence to be evaluated
  • 60 |
  • refs – list of str : COCO reference sentences for the particular image to be evaluated
  • 61 |
62 |
Returns score:

int (ROUGE-L score for the candidate evaluated against references)

65 |
69 |
70 | 71 |
72 |
73 | compute_score(gts, res)
74 |

Computes Rouge-L score given a set of reference and candidate sentences for the dataset 75 | Invoked by evaluate_captions.py

76 | 77 | 78 | 79 | 80 | 85 | 86 | 88 | 89 | 90 |
Parameters:
    81 |
  • hypo_for_image – dict : candidate / test sentences with “image name” key and “tokenized sentences” as values
  • 82 |
  • ref_for_image – dict : reference MS-COCO sentences with “image name” key and “tokenized sentences” as values
  • 83 |
84 |
Returns:

average_score: float (mean ROUGE-L score computed by averaging scores for all the images)

87 |
91 |
92 | 93 |
94 |
95 | method()
96 |
97 | 98 | 99 | 100 | 101 | 102 | 103 |
Returns:
104 |
105 | 106 |
107 | 108 |
109 |
110 | metrics.rouge.rouge.my_lcs(string, sub)
111 |

Calculates longest common subsequence for a pair of tokenized strings

112 |

:param string : list of str : tokens from a string split using whitespace 113 | :param sub : list of str : shorter string, also split using whitespace 114 | :returns: length (list of int): length of the longest common subsequence between the two strings

115 |

Note: my_lcs only gives length of the longest common subsequence, not the actual LCS

116 |
117 | 118 |
119 |
120 |

Module contents

121 |
122 |
123 | 124 | 125 |
126 |
127 |
128 | 168 |
169 |
170 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /docs/build/html/metrics.tokenizer.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | metrics.tokenizer package — Deep Summarization 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
35 |
36 |
37 |
38 | 39 |
40 |

metrics.tokenizer package

41 |
42 |

Submodules

43 |
44 |
45 |

metrics.tokenizer.ptbtokenizer module

46 |
47 |
48 | class metrics.tokenizer.ptbtokenizer.PTBTokenizer
49 |

Python wrapper of Stanford PTBTokenizer

50 |
51 |
52 | tokenize(captions_for_image)
53 |
54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
Parameters:captions_for_image
Returns:
63 |
64 | 65 |
66 | 67 |
68 |
69 |

Module contents

70 |
71 |
72 | 73 | 74 |
75 |
76 |
77 | 117 |
118 |
119 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /docs/build/html/modules.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | <no title> — Deep Summarization 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
35 | 105 | 135 |
136 |
137 | 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/docs/build/html/objects.inv -------------------------------------------------------------------------------- /docs/build/html/py-modindex.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Python Module Index — Deep Summarization 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 |
38 |
39 |
40 |
41 | 42 | 43 |

Python Module Index

44 | 45 |
46 | h | 47 | m 48 |
49 | 50 | 51 | 52 | 54 | 55 | 57 | 60 | 61 | 62 | 65 | 66 | 67 | 70 | 71 | 72 | 75 | 76 | 77 | 80 | 81 | 82 | 85 | 86 | 88 | 89 | 91 | 94 | 95 | 96 | 99 | 100 | 101 | 104 | 105 | 106 | 109 | 110 | 111 | 114 | 115 | 116 | 119 | 120 | 121 | 124 | 125 | 126 | 129 | 130 | 131 | 134 | 135 | 137 | 140 | 141 | 142 | 145 | 146 | 147 | 150 | 151 | 152 | 155 | 156 | 157 | 160 | 161 | 162 | 165 | 166 | 167 | 170 | 171 | 172 | 175 | 176 | 177 | 180 | 181 | 182 | 185 | 186 | 187 | 190 | 191 | 192 | 195 | 196 | 197 | 200 | 201 | 202 | 205 |
 
53 | h
58 | helpers 59 |
    63 | helpers.checkpoint 64 |
    68 | helpers.data2tensor 69 |
    73 | helpers.extracter 74 |
    78 | helpers.metric 79 |
    83 | helpers.plotter 84 |
 
87 | m
92 | metrics 93 |
    97 | metrics.bleu 98 |
    102 | metrics.bleu.bleu 103 |
    107 | metrics.bleu.bleu_scorer 108 |
    112 | metrics.rouge 113 |
    117 | metrics.rouge.rouge 118 |
    122 | metrics.tester 123 |
    127 | metrics.tokenizer 128 |
    132 | metrics.tokenizer.ptbtokenizer 133 |
138 | models 139 |
    143 | models.bidirectional 144 |
    148 | models.gru_bidirectional 149 |
    153 | models.gru_simple 154 |
    158 | models.gru_stacked_bidirectional 159 |
    163 | models.gru_stacked_simple 164 |
    168 | models.lstm_bidirectional 169 |
    173 | models.lstm_simple 174 |
    178 | models.lstm_stacked_bidirectional 179 |
    183 | models.lstm_stacked_simple 184 |
    188 | models.sequenceNet 189 |
    193 | models.simple 194 |
    198 | models.stacked_bidirectional 199 |
    203 | models.stacked_simple 204 |
206 | 207 | 208 |
209 |
210 |
211 | 234 |
235 |
236 | 244 | 245 | 246 | 247 | 248 | 249 | -------------------------------------------------------------------------------- /docs/build/html/search.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Search — Deep Summarization 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 |
43 |
44 |
45 |
46 | 47 |

Search

48 |
49 | 50 |

51 | Please activate JavaScript to enable the search 52 | functionality. 53 |

54 |
55 |

56 | From here you can search these documents. Enter your search 57 | words into the box below and click "search". Note that the search 58 | function will automatically search for all of the words. Pages 59 | containing fewer words won't appear in the result list. 60 |

61 |
62 | 63 | 64 | 65 |
66 | 67 |
68 | 69 |
70 | 71 |
72 |
73 |
74 | 84 |
85 |
86 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /docs/build/html/searchindex.js: -------------------------------------------------------------------------------- 1 | Search.setIndex({envversion:46,filenames:["helpers","index","metrics","metrics.bleu","metrics.rouge","metrics.tokenizer","models","modules"],objects:{"":{helpers:[0,0,0,"-"],metrics:[2,0,0,"-"],models:[6,0,0,"-"]},"helpers.checkpoint":{Checkpointer:[0,2,1,""]},"helpers.checkpoint.Checkpointer":{delete_previous_checkpoints:[0,1,1,""],get_checkpoint_location:[0,1,1,""],get_checkpoint_steps:[0,1,1,""],get_data_file_location:[0,1,1,""],get_last_checkpoint:[0,1,1,""],get_mapper_file_location:[0,1,1,""],get_mapper_folder_location:[0,1,1,""],get_prediction_checkpoint_steps:[0,1,1,""],get_result_location:[0,1,1,""],get_save_address:[0,1,1,""],get_step_file:[0,1,1,""],is_checkpointed:[0,1,1,""],is_mapper_checkpointed:[0,1,1,""],is_output_file_present:[0,1,1,""],set_result_location:[0,1,1,""],steps_per_checkpoint:[0,1,1,""],steps_per_prediction:[0,1,1,""]},"helpers.data2tensor":{Mapper:[0,2,1,""]},"helpers.data2tensor.Mapper":{generate_vocabulary:[0,1,1,""],get_reverse_map:[0,1,1,""],get_seq_length:[0,1,1,""],get_tensor:[0,1,1,""],get_vocabulary_size:[0,1,1,""]},"helpers.extracter":{Spider:[0,2,1,""]},"helpers.extracter.Spider":{crawl_for_reviews_and_summary:[0,1,1,""],save_review_summary_frame:[0,1,1,""]},"helpers.metric":{Calculator:[0,2,1,""]},"helpers.metric.Calculator":{evaluate_all_ref_hyp_pairs:[0,1,1,""],get_all_metrics:[0,1,1,""],get_steps:[0,1,1,""],load_result:[0,1,1,""]},"helpers.plotter":{Plotter:[0,2,1,""]},"helpers.plotter.Plotter":{plot_all_metrics:[0,1,1,""],plot_one_metric:[0,1,1,""],set_file_description:[0,1,1,""],set_metrics:[0,1,1,""],set_steps:[0,1,1,""],show_plots:[0,1,1,""]},"metrics.bleu":{bleu:[3,0,0,"-"],bleu_scorer:[3,0,0,"-"]},"metrics.bleu.bleu":{Bleu:[3,2,1,""]},"metrics.bleu.bleu.Bleu":{compute_score:[3,1,1,""],method:[3,1,1,""]},"metrics.bleu.bleu_scorer":{BleuScorer:[3,2,1,""],cook_refs:[3,3,1,""],cook_test:[3,3,1,""],precook:[3,3,1,""]},"metrics.bleu.bleu_scorer.BleuScorer":{compatible:[3,1,1,""],compute_score:[3,1,1,""],cook_append:[3,1,1,""],copy:[3,1,1,""],crefs:[3,4,1,""],ctest:[3,4,1,""],n:[3,4,1,""],ratio:[3,1,1,""],recompute_score:[3,1,1,""],reflen:[3,1,1,""],rescore:[3,1,1,""],retest:[3,1,1,""],score_ratio:[3,1,1,""],score_ratio_str:[3,1,1,""],single_reflen:[3,1,1,""],size:[3,1,1,""],special_reflen:[3,4,1,""],testlen:[3,1,1,""]},"metrics.rouge":{rouge:[4,0,0,"-"]},"metrics.rouge.rouge":{Rouge:[4,2,1,""],my_lcs:[4,3,1,""]},"metrics.rouge.rouge.Rouge":{calc_score:[4,1,1,""],compute_score:[4,1,1,""],method:[4,1,1,""]},"metrics.tester":{load_textfiles:[2,3,1,""],main:[2,3,1,""],score:[2,3,1,""]},"metrics.tokenizer":{ptbtokenizer:[5,0,0,"-"]},"metrics.tokenizer.ptbtokenizer":{PTBTokenizer:[5,2,1,""]},"metrics.tokenizer.ptbtokenizer.PTBTokenizer":{tokenize:[5,1,1,""]},"models.bidirectional":{Bidirectional:[6,2,1,""]},"models.bidirectional.Bidirectional":{fit:[6,1,1,""],generate_one_summary:[6,1,1,""],get_cell:[6,1,1,""],predict:[6,1,1,""]},"models.gru_bidirectional":{GruBidirectional:[6,2,1,""]},"models.gru_bidirectional.GruBidirectional":{get_cell:[6,1,1,""]},"models.gru_simple":{GruSimple:[6,2,1,""]},"models.gru_simple.GruSimple":{get_cell:[6,1,1,""]},"models.gru_stacked_bidirectional":{GruStackedBidirectional:[6,2,1,""]},"models.gru_stacked_bidirectional.GruStackedBidirectional":{get_cell:[6,1,1,""]},"models.gru_stacked_simple":{GruStackedSimple:[6,2,1,""]},"models.gru_stacked_simple.GruStackedSimple":{get_cell:[6,1,1,""]},"models.lstm_bidirectional":{LstmBidirectional:[6,2,1,""]},"models.lstm_bidirectional.LstmBidirectional":{get_cell:[6,1,1,""]},"models.lstm_simple":{LstmSimple:[6,2,1,""]},"models.lstm_simple.LstmSimple":{get_cell:[6,1,1,""]},"models.lstm_stacked_bidirectional":{LstmStackedBidirectional:[6,2,1,""]},"models.lstm_stacked_bidirectional.LstmStackedBidirectional":{get_cell:[6,1,1,""]},"models.lstm_stacked_simple":{LstmStackedSimple:[6,2,1,""]},"models.lstm_stacked_simple.LstmStackedSimple":{get_cell:[6,1,1,""]},"models.sequenceNet":{NeuralNet:[6,2,1,""]},"models.sequenceNet.NeuralNet":{begin_session:[6,1,1,""],fit:[6,1,1,""],form_model_graph:[6,1,1,""],set_parameters:[6,1,1,""],store_test_predictions:[6,1,1,""]},"models.simple":{Simple:[6,2,1,""]},"models.simple.Simple":{fit:[6,1,1,""],generate_one_summary:[6,1,1,""],get_cell:[6,1,1,""],predict:[6,1,1,""]},"models.stacked_bidirectional":{StackedBidirectional:[6,2,1,""]},"models.stacked_bidirectional.StackedBidirectional":{fit:[6,1,1,""],generate_one_summary:[6,1,1,""],get_cell:[6,1,1,""],predict:[6,1,1,""]},"models.stacked_simple":{StackedSimple:[6,2,1,""]},"models.stacked_simple.StackedSimple":{fit:[6,1,1,""],generate_one_summary:[6,1,1,""],get_cell:[6,1,1,""],predict:[6,1,1,""]},helpers:{checkpoint:[0,0,0,"-"],data2tensor:[0,0,0,"-"],extracter:[0,0,0,"-"],metric:[0,0,0,"-"],plotter:[0,0,0,"-"]},metrics:{bleu:[3,0,0,"-"],rouge:[4,0,0,"-"],tester:[2,0,0,"-"],tokenizer:[5,0,0,"-"]},models:{bidirectional:[6,0,0,"-"],gru_bidirectional:[6,0,0,"-"],gru_simple:[6,0,0,"-"],gru_stacked_bidirectional:[6,0,0,"-"],gru_stacked_simple:[6,0,0,"-"],lstm_bidirectional:[6,0,0,"-"],lstm_simple:[6,0,0,"-"],lstm_stacked_bidirectional:[6,0,0,"-"],lstm_stacked_simple:[6,0,0,"-"],sequenceNet:[6,0,0,"-"],simple:[6,0,0,"-"],stacked_bidirectional:[6,0,0,"-"],stacked_simple:[6,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","method","Python method"],"2":["py","class","Python class"],"3":["py","function","Python function"],"4":["py","attribute","Python attribute"]},objtypes:{"0":"py:module","1":"py:method","2":"py:class","3":"py:function","4":"py:attribute"},terms:{"__iadd__":3,"_final":6,"class":[0,3,4,5,6],"float":4,"int":4,"new":3,"return":[0,3,4,5,6],about:3,actual:4,against:4,all:[0,4],also:4,append:6,argument:3,assum:0,atom:6,attent:6,attention_typ:0,averag:[3,4],average_scor:4,avoid:3,backward:6,base:[3,6],batch:6,befor:0,begin:6,begin_sess:6,between:4,bleu:2,bleu_1:0,bleu_2:0,bleu_3:0,bleu_4:0,bleu_scor:2,bleuscor:3,blow:0,calc_scor:4,calcul:[0,4],call:3,can:3,candid:4,captions_for_imag:5,cell:6,cell_nm:0,checkpint_id:0,checpoint:0,ckpt:0,coco:[2,4],common:4,compat:3,comput:[2,4],compute_scor:[3,4],constructor:3,contain:0,cook:3,cook_append:3,cook_ref:3,cook_test:3,copi:3,crawl:0,crawl_for_reviews_and_summari:0,creat:[3,6],cref:3,csv:[0,6],ctest:3,data:6,dataset:[0,4],decod:6,delet:0,delete_previous_checkpoint:0,descent:6,dict:4,dictionari:2,direct:6,done:0,due:0,eff:3,either:3,encapsul:3,encod:6,evalu:4,evaluate_all_ref_hyp_pair:0,evaluate_capt:4,everyth:3,exact:0,exampl:6,fals:[0,3,6],file:[0,6],file_desc:0,fit:6,form:3,form_model_graph:6,format:0,forward:6,from:4,generate_one_summari:6,generate_vocabulari:0,get_all_metr:0,get_cel:6,get_checkpoint_loc:0,get_checkpoint_step:0,get_data_file_loc:0,get_last_checkpoint:0,get_mapper_file_loc:0,get_mapper_folder_loc:0,get_prediction_checkpoint_step:0,get_result_loc:0,get_reverse_map:0,get_save_address:0,get_seq_length:0,get_step:0,get_step_fil:0,get_tensor:0,get_vocabulary_s:0,give:4,given:[3,4],gradient:6,graph:6,grubidirect:6,grusimpl:6,grustackedbidirect:6,grustackedsimpl:6,hidden:6,higher:0,hypo:2,hypo_for_imag:4,hypothesi:2,hypothesis_store_loc:0,imag:4,index:1,input:[0,3,6],input_fil:0,instanc:3,invok:4,is_checkpoint:0,is_mapper_checkpoint:0,is_output_file_pres:0,kei:4,know:3,last:0,learn:6,learning_r:6,len_ratio:3,length:[4,6],list:[3,4],load:6,load_result:0,load_textfil:2,locat:0,longest:4,lstmbidirect:6,lstmsimpl:6,lstmstackedbidirect:6,lstmstackedsimpl:6,main:2,make:6,mani:0,mapper:0,mean:4,memori:0,memory_dim:6,method:[3,4],model_100:0,model_:0,model_nm:0,models_metr:0,my_lc:4,name:[4,6],need:3,neuralnet:6,new_test:3,none:[0,3,6],note:4,num_lay:6,num_previ:0,num_review:0,num_step:0,object:[3,6],onli:4,optim:6,option:3,other:3,out:[0,3],outfil:0,output:6,output_fil:0,page:1,pair:[0,3,4],param:4,paramet:[0,3,4,5,6],particular:4,plot_all_metr:0,plot_one_metr:0,precook:3,predict:6,prediction_id:6,present:0,prevent:0,previou:0,produc:6,provid:3,ptbtoken:2,python:5,rate:6,ratio:3,recompute_scor:3,ref:[2,3,4],ref_for_imag:4,refer:[2,3,4],reference_store_loc:0,reflen:3,refmaxcount:3,replac:3,rescor:3,result_fil:0,retest:3,reverseflag:0,review:[0,6],review_bwd:6,review_fwd:6,review_summary_fil:[0,6],rnn:6,roug:[0,2],save:0,save_review_summary_fram:0,score:[2,3,4],score_cook:3,score_ratio:3,score_ratio_str:3,scorer:3,script:2,search:1,segment:3,sentenc:[2,3,4],seq2seq:6,session:6,set:[4,6],set_file_descript:0,set_metr:0,set_paramet:6,set_result_loc:0,set_step:0,shorter:4,show_plot:0,singl:3,single_reflen:3,size:[3,6],special_reflen:3,spider:0,split:4,stackedbidirect:6,stackedsimpl:6,stanford:5,start:6,step:0,steps_per_checkpoint:0,steps_per_predict:0,stochast:6,store:6,store_test_predict:6,str:4,string:[3,4],sub:4,subsequ:4,summari:[0,6],take:3,test:[3,4,6],test_batch_s:6,testlen:3,them:3,thi:[0,3,6],time:6,titl:0,togeth:3,token:[2,4],too:0,train:6,train_batch_s:6,transform:3,two:4,txt:0,type:6,uniqu:6,usabl:3,valu:4,vector:6,verbos:3,well:3,where:0,whitespac:4,wrapper:5},titles:["helpers package","Welcome to Deep Summarization’s documentation!","metrics package","metrics.bleu package","metrics.rouge package","metrics.tokenizer package","models package","<no title>"],titleterms:{bidirect:6,bleu:3,bleu_scor:3,checkpoint:0,content:[0,2,3,4,5,6],data2tensor:0,deep:1,document:1,extract:0,gru_bidirect:6,gru_simpl:6,gru_stacked_bidirect:6,gru_stacked_simpl:6,helper:0,indic:1,lstm_bidirect:6,lstm_simpl:6,lstm_stacked_bidirect:6,lstm_stacked_simpl:6,metric:[0,2,3,4,5],model:6,modul:[0,2,3,4,5,6],packag:[0,2,3,4,5,6],plotter:0,ptbtoken:5,roug:4,sequencenet:6,simpl:6,stacked_bidirect:6,stacked_simpl:6,submodul:[0,2,3,4,5,6],subpackag:2,summar:1,tabl:1,tester:2,token:5,welcom:1}}) -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Deep Summarization documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Apr 11 18:20:37 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | import shlex 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath('../../')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.intersphinx', 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # The suffix(es) of source filenames. 41 | # You can specify multiple suffix as a list of string: 42 | # source_suffix = ['.rst', '.md'] 43 | source_suffix = '.rst' 44 | 45 | # The encoding of source files. 46 | #source_encoding = 'utf-8-sig' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = u'Deep Summarization' 53 | copyright = u'2017, Harshal Priyadarshi' 54 | author = u'Harshal Priyadarshi' 55 | 56 | # The version info for the project you're documenting, acts as replacement for 57 | # |version| and |release|, also used in various other places throughout the 58 | # built documents. 59 | # 60 | # The short X.Y version. 61 | version = '1.0' 62 | # The full version, including alpha/beta/rc tags. 63 | release = '1.0' 64 | 65 | # The language for content autogenerated by Sphinx. Refer to documentation 66 | # for a list of supported languages. 67 | # 68 | # This is also used if you do content translation via gettext catalogs. 69 | # Usually you set "language" from the command line for these cases. 70 | language = None 71 | 72 | # There are two options for replacing |today|: either, you set today to some 73 | # non-false value, then it is used: 74 | #today = '' 75 | # Else, today_fmt is used as the format for a strftime call. 76 | #today_fmt = '%B %d, %Y' 77 | 78 | # List of patterns, relative to source directory, that match files and 79 | # directories to ignore when looking for source files. 80 | exclude_patterns = [] 81 | 82 | # The reST default role (used for this markup: `text`) to use for all 83 | # documents. 84 | #default_role = None 85 | 86 | # If true, '()' will be appended to :func: etc. cross-reference text. 87 | #add_function_parentheses = True 88 | 89 | # If true, the current module name will be prepended to all description 90 | # unit titles (such as .. function::). 91 | #add_module_names = True 92 | 93 | # If true, sectionauthor and moduleauthor directives will be shown in the 94 | # output. They are ignored by default. 95 | #show_authors = False 96 | 97 | # The name of the Pygments (syntax highlighting) style to use. 98 | pygments_style = 'sphinx' 99 | 100 | # A list of ignored prefixes for module index sorting. 101 | #modindex_common_prefix = [] 102 | 103 | # If true, keep warnings as "system message" paragraphs in the built documents. 104 | #keep_warnings = False 105 | 106 | # If true, `todo` and `todoList` produce output, else they produce nothing. 107 | todo_include_todos = False 108 | 109 | 110 | # -- Options for HTML output ---------------------------------------------- 111 | 112 | # The theme to use for HTML and HTML Help pages. See the documentation for 113 | # a list of builtin themes. 114 | html_theme = 'alabaster' 115 | 116 | # Theme options are theme-specific and customize the look and feel of a theme 117 | # further. For a list of options available for each theme, see the 118 | # documentation. 119 | #html_theme_options = {} 120 | 121 | # Add any paths that contain custom themes here, relative to this directory. 122 | #html_theme_path = [] 123 | 124 | # The name for this set of Sphinx documents. If None, it defaults to 125 | # " v documentation". 126 | #html_title = None 127 | 128 | # A shorter title for the navigation bar. Default is the same as html_title. 129 | #html_short_title = None 130 | 131 | # The name of an image file (relative to this directory) to place at the top 132 | # of the sidebar. 133 | #html_logo = None 134 | 135 | # The name of an image file (within the static path) to use as favicon of the 136 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 137 | # pixels large. 138 | #html_favicon = None 139 | 140 | # Add any paths that contain custom static files (such as style sheets) here, 141 | # relative to this directory. They are copied after the builtin static files, 142 | # so a file named "default.css" will overwrite the builtin "default.css". 143 | html_static_path = ['_static'] 144 | 145 | # Add any extra paths that contain custom files (such as robots.txt or 146 | # .htaccess) here, relative to this directory. These files are copied 147 | # directly to the root of the documentation. 148 | #html_extra_path = [] 149 | 150 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 151 | # using the given strftime format. 152 | #html_last_updated_fmt = '%b %d, %Y' 153 | 154 | # If true, SmartyPants will be used to convert quotes and dashes to 155 | # typographically correct entities. 156 | #html_use_smartypants = True 157 | 158 | # Custom sidebar templates, maps document names to template names. 159 | #html_sidebars = {} 160 | 161 | # Additional templates that should be rendered to pages, maps page names to 162 | # template names. 163 | #html_additional_pages = {} 164 | 165 | # If false, no module index is generated. 166 | #html_domain_indices = True 167 | 168 | # If false, no index is generated. 169 | #html_use_index = True 170 | 171 | # If true, the index is split into individual pages for each letter. 172 | #html_split_index = False 173 | 174 | # If true, links to the reST sources are added to the pages. 175 | #html_show_sourcelink = True 176 | 177 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 178 | #html_show_sphinx = True 179 | 180 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 181 | #html_show_copyright = True 182 | 183 | # If true, an OpenSearch description file will be output, and all pages will 184 | # contain a tag referring to it. The value of this option must be the 185 | # base URL from which the finished HTML is served. 186 | #html_use_opensearch = '' 187 | 188 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 189 | #html_file_suffix = None 190 | 191 | # Language to be used for generating the HTML full-text search index. 192 | # Sphinx supports the following languages: 193 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 194 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' 195 | #html_search_language = 'en' 196 | 197 | # A dictionary with options for the search language support, empty by default. 198 | # Now only 'ja' uses this config value 199 | #html_search_options = {'type': 'default'} 200 | 201 | # The name of a javascript file (relative to the configuration directory) that 202 | # implements a search results scorer. If empty, the default will be used. 203 | #html_search_scorer = 'scorer.js' 204 | 205 | # Output file base name for HTML help builder. 206 | htmlhelp_basename = 'DeepSummarizationdoc' 207 | 208 | # -- Options for LaTeX output --------------------------------------------- 209 | 210 | latex_elements = { 211 | # The paper size ('letterpaper' or 'a4paper'). 212 | #'papersize': 'letterpaper', 213 | 214 | # The font size ('10pt', '11pt' or '12pt'). 215 | #'pointsize': '10pt', 216 | 217 | # Additional stuff for the LaTeX preamble. 218 | #'preamble': '', 219 | 220 | # Latex figure (float) alignment 221 | #'figure_align': 'htbp', 222 | } 223 | 224 | # Grouping the document tree into LaTeX files. List of tuples 225 | # (source start file, target name, title, 226 | # author, documentclass [howto, manual, or own class]). 227 | latex_documents = [ 228 | (master_doc, 'DeepSummarization.tex', u'Deep Summarization Documentation', 229 | u'Harshal Priyadarshi', 'manual'), 230 | ] 231 | 232 | # The name of an image file (relative to this directory) to place at the top of 233 | # the title page. 234 | #latex_logo = None 235 | 236 | # For "manual" documents, if this is true, then toplevel headings are parts, 237 | # not chapters. 238 | #latex_use_parts = False 239 | 240 | # If true, show page references after internal links. 241 | #latex_show_pagerefs = False 242 | 243 | # If true, show URL addresses after external links. 244 | #latex_show_urls = False 245 | 246 | # Documents to append as an appendix to all manuals. 247 | #latex_appendices = [] 248 | 249 | # If false, no module index is generated. 250 | #latex_domain_indices = True 251 | 252 | 253 | # -- Options for manual page output --------------------------------------- 254 | 255 | # One entry per manual page. List of tuples 256 | # (source start file, name, description, authors, manual section). 257 | man_pages = [ 258 | (master_doc, 'deepsummarization', u'Deep Summarization Documentation', 259 | [author], 1) 260 | ] 261 | 262 | # If true, show URL addresses after external links. 263 | #man_show_urls = False 264 | 265 | 266 | # -- Options for Texinfo output ------------------------------------------- 267 | 268 | # Grouping the document tree into Texinfo files. List of tuples 269 | # (source start file, target name, title, author, 270 | # dir menu entry, description, category) 271 | texinfo_documents = [ 272 | (master_doc, 'DeepSummarization', u'Deep Summarization Documentation', 273 | author, 'DeepSummarization', 'One line description of project.', 274 | 'Miscellaneous'), 275 | ] 276 | 277 | # Documents to append as an appendix to all manuals. 278 | #texinfo_appendices = [] 279 | 280 | # If false, no module index is generated. 281 | #texinfo_domain_indices = True 282 | 283 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 284 | #texinfo_show_urls = 'footnote' 285 | 286 | # If true, do not generate a @detailmenu in the "Top" node's menu. 287 | #texinfo_no_detailmenu = False 288 | 289 | 290 | # Example configuration for intersphinx: refer to the Python standard library. 291 | intersphinx_mapping = {'https://docs.python.org/': None} 292 | -------------------------------------------------------------------------------- /docs/source/helpers.rst: -------------------------------------------------------------------------------- 1 | helpers package 2 | =============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | helpers.checkpoint module 8 | ------------------------- 9 | 10 | .. automodule:: helpers.checkpoint 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | helpers.data2tensor module 16 | -------------------------- 17 | 18 | .. automodule:: helpers.data2tensor 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | helpers.extracter module 24 | ------------------------ 25 | 26 | .. automodule:: helpers.extracter 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | helpers.metric module 32 | --------------------- 33 | 34 | .. automodule:: helpers.metric 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | helpers.plotter module 40 | ---------------------- 41 | 42 | .. automodule:: helpers.plotter 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: helpers 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Deep Summarization documentation master file, created by 2 | sphinx-quickstart on Tue Apr 11 18:20:37 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Deep Summarization's documentation! 7 | ============================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | 23 | -------------------------------------------------------------------------------- /docs/source/metrics.bleu.rst: -------------------------------------------------------------------------------- 1 | metrics.bleu package 2 | ==================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | metrics.bleu.bleu module 8 | ------------------------ 9 | 10 | .. automodule:: metrics.bleu.bleu 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | metrics.bleu.bleu_scorer module 16 | ------------------------------- 17 | 18 | .. automodule:: metrics.bleu.bleu_scorer 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: metrics.bleu 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/metrics.rouge.rst: -------------------------------------------------------------------------------- 1 | metrics.rouge package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | metrics.rouge.rouge module 8 | -------------------------- 9 | 10 | .. automodule:: metrics.rouge.rouge 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: metrics.rouge 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/metrics.rst: -------------------------------------------------------------------------------- 1 | metrics package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | metrics.bleu 10 | metrics.rouge 11 | metrics.tokenizer 12 | 13 | Submodules 14 | ---------- 15 | 16 | metrics.tester module 17 | --------------------- 18 | 19 | .. automodule:: metrics.tester 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | Module contents 26 | --------------- 27 | 28 | .. automodule:: metrics 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | -------------------------------------------------------------------------------- /docs/source/metrics.tokenizer.rst: -------------------------------------------------------------------------------- 1 | metrics.tokenizer package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | metrics.tokenizer.ptbtokenizer module 8 | ------------------------------------- 9 | 10 | .. automodule:: metrics.tokenizer.ptbtokenizer 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: metrics.tokenizer 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/models.rst: -------------------------------------------------------------------------------- 1 | models package 2 | ============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | models.bidirectional module 8 | --------------------------- 9 | 10 | .. automodule:: models.bidirectional 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | models.gru_bidirectional module 16 | ------------------------------- 17 | 18 | .. automodule:: models.gru_bidirectional 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | models.gru_simple module 24 | ------------------------ 25 | 26 | .. automodule:: models.gru_simple 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | models.gru_stacked_bidirectional module 32 | --------------------------------------- 33 | 34 | .. automodule:: models.gru_stacked_bidirectional 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | models.gru_stacked_simple module 40 | -------------------------------- 41 | 42 | .. automodule:: models.gru_stacked_simple 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | models.lstm_bidirectional module 48 | -------------------------------- 49 | 50 | .. automodule:: models.lstm_bidirectional 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | models.lstm_simple module 56 | ------------------------- 57 | 58 | .. automodule:: models.lstm_simple 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | models.lstm_stacked_bidirectional module 64 | ---------------------------------------- 65 | 66 | .. automodule:: models.lstm_stacked_bidirectional 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | models.lstm_stacked_simple module 72 | --------------------------------- 73 | 74 | .. automodule:: models.lstm_stacked_simple 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | models.sequenceNet module 80 | ------------------------- 81 | 82 | .. automodule:: models.sequenceNet 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | models.simple module 88 | -------------------- 89 | 90 | .. automodule:: models.simple 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | models.stacked_bidirectional module 96 | ----------------------------------- 97 | 98 | .. automodule:: models.stacked_bidirectional 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | models.stacked_simple module 104 | ---------------------------- 105 | 106 | .. automodule:: models.stacked_simple 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | 112 | Module contents 113 | --------------- 114 | 115 | .. automodule:: models 116 | :members: 117 | :undoc-members: 118 | :show-inheritance: 119 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | .. 2 | == 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | evaluation_plot_script 8 | extracter_script 9 | helpers 10 | metrics 11 | models 12 | train_scripts 13 | -------------------------------------------------------------------------------- /evaluation_plot_script.py: -------------------------------------------------------------------------------- 1 | from helpers.plotter import Plotter 2 | from helpers.metric import Calculator 3 | import matplotlib.pyplot as plt 4 | 5 | ############## ALL GRU PLOTS ############################ 6 | result_file_1 = 'result/simple/gru/no_attention.csv' 7 | result_file_2 = 'result/bidirectional/gru/no_attention.csv' 8 | result_file_3 = 'result/stacked_simple/gru/no_attention.csv' 9 | result_file_4 = 'result/stacked_bidirectional/gru/no_attention.csv' 10 | 11 | 12 | result_file_description = ['gru_smpl', 'gru_bidr', 'gru_stack_smpl', 'gru_stack_bidr'] 13 | hypothesis_dir = 'metrics/hypothesis' 14 | reference_dir = 'metrics/reference' 15 | 16 | bleu_1 = [] 17 | bleu_2 = [] 18 | bleu_3 = [] 19 | bleu_4 = [] 20 | rouge = [] 21 | 22 | 23 | calculator = Calculator(3,hypothesis_dir,reference_dir) 24 | calculator.load_result(result_file_1) 25 | calculator.evaluate_all_ref_hyp_pairs() 26 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 27 | bleu_1.append(bleu_1_val) 28 | bleu_2.append(bleu_2_val) 29 | bleu_3.append(bleu_3_val) 30 | bleu_4.append(bleu_4_val) 31 | rouge.append(rouge_val) 32 | 33 | calculator = Calculator(3,hypothesis_dir,reference_dir) 34 | calculator.load_result(result_file_2) 35 | calculator.evaluate_all_ref_hyp_pairs() 36 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 37 | bleu_1.append(bleu_1_val) 38 | bleu_2.append(bleu_2_val) 39 | bleu_3.append(bleu_3_val) 40 | bleu_4.append(bleu_4_val) 41 | rouge.append(rouge_val) 42 | 43 | calculator = Calculator(3,hypothesis_dir,reference_dir) 44 | calculator.load_result(result_file_3) 45 | calculator.evaluate_all_ref_hyp_pairs() 46 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 47 | bleu_1.append(bleu_1_val) 48 | bleu_2.append(bleu_2_val) 49 | bleu_3.append(bleu_3_val) 50 | bleu_4.append(bleu_4_val) 51 | rouge.append(rouge_val) 52 | 53 | calculator = Calculator(3,hypothesis_dir,reference_dir) 54 | calculator.load_result(result_file_4) 55 | calculator.evaluate_all_ref_hyp_pairs() 56 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 57 | bleu_1.append(bleu_1_val) 58 | bleu_2.append(bleu_2_val) 59 | bleu_3.append(bleu_3_val) 60 | bleu_4.append(bleu_4_val) 61 | rouge.append(rouge_val) 62 | 63 | steps = calculator.get_steps() 64 | 65 | plotter = Plotter() 66 | plotter.set_metrics(bleu_1,bleu_2,bleu_3,bleu_4,rouge) 67 | plotter.set_file_description(result_file_description) 68 | plotter.set_steps(steps) 69 | plotter.plot_all_metrics() 70 | 71 | 72 | ########## ALL LSTM PLOTS #################### 73 | result_file_1 = 'result/simple/lstm/no_attention.csv' 74 | result_file_2 = 'result/bidirectional/lstm/no_attention.csv' 75 | result_file_3 = 'result/stacked_simple/lstm/no_attention.csv' 76 | result_file_4 = 'result/stacked_bidirectional/lstm/no_attention.csv' 77 | 78 | 79 | result_file_description = ['lstm_smpl','lstm_bidr','lstm_stack_smpl','lstm_stack_bidr'] 80 | hypothesis_dir = 'metrics/hypothesis' 81 | reference_dir = 'metrics/reference' 82 | 83 | bleu_1 = [] 84 | bleu_2 = [] 85 | bleu_3 = [] 86 | bleu_4 = [] 87 | rouge = [] 88 | 89 | 90 | calculator = Calculator(3,hypothesis_dir,reference_dir) 91 | calculator.load_result(result_file_1) 92 | calculator.evaluate_all_ref_hyp_pairs() 93 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 94 | bleu_1.append(bleu_1_val) 95 | bleu_2.append(bleu_2_val) 96 | bleu_3.append(bleu_3_val) 97 | bleu_4.append(bleu_4_val) 98 | rouge.append(rouge_val) 99 | 100 | calculator = Calculator(3,hypothesis_dir,reference_dir) 101 | calculator.load_result(result_file_2) 102 | calculator.evaluate_all_ref_hyp_pairs() 103 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 104 | bleu_1.append(bleu_1_val) 105 | bleu_2.append(bleu_2_val) 106 | bleu_3.append(bleu_3_val) 107 | bleu_4.append(bleu_4_val) 108 | rouge.append(rouge_val) 109 | 110 | calculator = Calculator(3,hypothesis_dir,reference_dir) 111 | calculator.load_result(result_file_3) 112 | calculator.evaluate_all_ref_hyp_pairs() 113 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 114 | bleu_1.append(bleu_1_val) 115 | bleu_2.append(bleu_2_val) 116 | bleu_3.append(bleu_3_val) 117 | bleu_4.append(bleu_4_val) 118 | rouge.append(rouge_val) 119 | 120 | calculator = Calculator(3,hypothesis_dir,reference_dir) 121 | calculator.load_result(result_file_4) 122 | calculator.evaluate_all_ref_hyp_pairs() 123 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 124 | bleu_1.append(bleu_1_val) 125 | bleu_2.append(bleu_2_val) 126 | bleu_3.append(bleu_3_val) 127 | bleu_4.append(bleu_4_val) 128 | rouge.append(rouge_val) 129 | 130 | steps = calculator.get_steps() 131 | 132 | plotter = Plotter() 133 | plotter.set_metrics(bleu_1,bleu_2,bleu_3,bleu_4,rouge) 134 | plotter.set_file_description(result_file_description) 135 | plotter.set_steps(steps) 136 | plotter.plot_all_metrics() 137 | 138 | #### GRU and LSTM Comparison plots ##### 139 | 140 | ## SIMPLE 141 | result_file_1 = 'result/simple/gru/no_attention.csv' 142 | result_file_2 = 'result/simple/lstm/no_attention.csv' 143 | 144 | result_file_description = ['gru_simple','lstm_simple'] 145 | 146 | bleu_1 = [] 147 | bleu_2 = [] 148 | bleu_3 = [] 149 | bleu_4 = [] 150 | rouge = [] 151 | 152 | 153 | calculator = Calculator(3,hypothesis_dir,reference_dir) 154 | calculator.load_result(result_file_1) 155 | calculator.evaluate_all_ref_hyp_pairs() 156 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 157 | bleu_1.append(bleu_1_val) 158 | bleu_2.append(bleu_2_val) 159 | bleu_3.append(bleu_3_val) 160 | bleu_4.append(bleu_4_val) 161 | rouge.append(rouge_val) 162 | 163 | calculator = Calculator(3,hypothesis_dir,reference_dir) 164 | calculator.load_result(result_file_2) 165 | calculator.evaluate_all_ref_hyp_pairs() 166 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 167 | bleu_1.append(bleu_1_val) 168 | bleu_2.append(bleu_2_val) 169 | bleu_3.append(bleu_3_val) 170 | bleu_4.append(bleu_4_val) 171 | rouge.append(rouge_val) 172 | 173 | steps = calculator.get_steps() 174 | 175 | plotter = Plotter() 176 | plotter.set_metrics(bleu_1,bleu_2,bleu_3,bleu_4,rouge) 177 | plotter.set_file_description(result_file_description) 178 | plotter.set_steps(steps) 179 | plotter.plot_all_metrics() 180 | 181 | ## BIDIRECTIONAL 182 | result_file_1 = 'result/bidirectional/gru/no_attention.csv' 183 | result_file_2 = 'result/bidirectional/lstm/no_attention.csv' 184 | 185 | result_file_description = ['gru_bidir','lstm_bidir'] 186 | 187 | bleu_1 = [] 188 | bleu_2 = [] 189 | bleu_3 = [] 190 | bleu_4 = [] 191 | rouge = [] 192 | 193 | 194 | calculator = Calculator(3,hypothesis_dir,reference_dir) 195 | calculator.load_result(result_file_1) 196 | calculator.evaluate_all_ref_hyp_pairs() 197 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 198 | bleu_1.append(bleu_1_val) 199 | bleu_2.append(bleu_2_val) 200 | bleu_3.append(bleu_3_val) 201 | bleu_4.append(bleu_4_val) 202 | rouge.append(rouge_val) 203 | 204 | calculator = Calculator(3,hypothesis_dir,reference_dir) 205 | calculator.load_result(result_file_2) 206 | calculator.evaluate_all_ref_hyp_pairs() 207 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 208 | bleu_1.append(bleu_1_val) 209 | bleu_2.append(bleu_2_val) 210 | bleu_3.append(bleu_3_val) 211 | bleu_4.append(bleu_4_val) 212 | rouge.append(rouge_val) 213 | 214 | steps = calculator.get_steps() 215 | 216 | plotter = Plotter() 217 | plotter.set_metrics(bleu_1,bleu_2,bleu_3,bleu_4,rouge) 218 | plotter.set_file_description(result_file_description) 219 | plotter.set_steps(steps) 220 | plotter.plot_all_metrics() 221 | 222 | ## STACKED_SIMPLE 223 | result_file_1 = 'result/stacked_simple/gru/no_attention.csv' 224 | result_file_2 = 'result/stacked_simple/lstm/no_attention.csv' 225 | 226 | result_file_description = ['gru_stacked','lstm_stacked'] 227 | 228 | bleu_1 = [] 229 | bleu_2 = [] 230 | bleu_3 = [] 231 | bleu_4 = [] 232 | rouge = [] 233 | 234 | 235 | calculator = Calculator(3,hypothesis_dir,reference_dir) 236 | calculator.load_result(result_file_1) 237 | calculator.evaluate_all_ref_hyp_pairs() 238 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 239 | bleu_1.append(bleu_1_val) 240 | bleu_2.append(bleu_2_val) 241 | bleu_3.append(bleu_3_val) 242 | bleu_4.append(bleu_4_val) 243 | rouge.append(rouge_val) 244 | 245 | calculator = Calculator(3,hypothesis_dir,reference_dir) 246 | calculator.load_result(result_file_2) 247 | calculator.evaluate_all_ref_hyp_pairs() 248 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 249 | bleu_1.append(bleu_1_val) 250 | bleu_2.append(bleu_2_val) 251 | bleu_3.append(bleu_3_val) 252 | bleu_4.append(bleu_4_val) 253 | rouge.append(rouge_val) 254 | 255 | steps = calculator.get_steps() 256 | 257 | plotter = Plotter() 258 | plotter.set_metrics(bleu_1,bleu_2,bleu_3,bleu_4,rouge) 259 | plotter.set_file_description(result_file_description) 260 | plotter.set_steps(steps) 261 | plotter.plot_all_metrics() 262 | 263 | ## STACKED BIDIRECTIONAL 264 | result_file_1 = 'result/stacked_bidirectional/gru/no_attention.csv' 265 | result_file_2 = 'result/stacked_bidirectional/lstm/no_attention.csv' 266 | 267 | result_file_description = ['gru_stack_bidir','lstm_stack_bidir'] 268 | 269 | bleu_1 = [] 270 | bleu_2 = [] 271 | bleu_3 = [] 272 | bleu_4 = [] 273 | rouge = [] 274 | 275 | 276 | calculator = Calculator(3,hypothesis_dir,reference_dir) 277 | calculator.load_result(result_file_1) 278 | calculator.evaluate_all_ref_hyp_pairs() 279 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 280 | bleu_1.append(bleu_1_val) 281 | bleu_2.append(bleu_2_val) 282 | bleu_3.append(bleu_3_val) 283 | bleu_4.append(bleu_4_val) 284 | rouge.append(rouge_val) 285 | 286 | calculator = Calculator(3,hypothesis_dir,reference_dir) 287 | calculator.load_result(result_file_2) 288 | calculator.evaluate_all_ref_hyp_pairs() 289 | bleu_1_val,bleu_2_val,bleu_3_val,bleu_4_val,rouge_val = calculator.get_all_metrics() 290 | bleu_1.append(bleu_1_val) 291 | bleu_2.append(bleu_2_val) 292 | bleu_3.append(bleu_3_val) 293 | bleu_4.append(bleu_4_val) 294 | rouge.append(rouge_val) 295 | 296 | steps = calculator.get_steps() 297 | 298 | plotter = Plotter() 299 | plotter.set_metrics(bleu_1,bleu_2,bleu_3,bleu_4,rouge) 300 | plotter.set_file_description(result_file_description) 301 | plotter.set_steps(steps) 302 | plotter.plot_all_metrics() 303 | 304 | # SHOW ALL PLOTS 305 | plt.show() 306 | -------------------------------------------------------------------------------- /extracter_script.py: -------------------------------------------------------------------------------- 1 | from helpers.extracter import Spider 2 | import sys 3 | 4 | # download the data from https://snap.stanford.edu/data/web-FineFoods.html and save as raw_data/food_raw.txt. 5 | # The provided food_raw.txt is a placeholder. Also make sure that extracted_data directory exists. 6 | # python extracter_script.py raw_data/finefoods.txt extracted_data/review_summary.csv 7 | args = sys.argv 8 | inputfile = args[1] 9 | outputfile = args[2] 10 | 11 | num_reviews = 200000 12 | spider = Spider(num_reviews) 13 | spider.crawl_for_reviews_and_summary(inputfile) 14 | spider.save_review_summary_frame(outputfile) 15 | -------------------------------------------------------------------------------- /helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/helpers/__init__.py -------------------------------------------------------------------------------- /helpers/checkpoint.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import cPickle as pickle 3 | import os 4 | 5 | 6 | class Checkpointer: 7 | def __init__(self, model_nm, cell_nm, attention_type): 8 | """ 9 | 10 | :param model_nm: 11 | :param cell_nm: 12 | :param attention_type: 13 | """ 14 | self.model_nm = model_nm 15 | self.cell_nm = cell_nm 16 | self.attention_type = attention_type 17 | self.last_ckpt = None 18 | self.last_id = 0 19 | self.step_save_location = 'steps.p' 20 | self.data_save_location = 'data' 21 | self.mapper_save_location = 'mapper.p' 22 | self.steps_per_ckpt = None 23 | self.num_steps_per_prediction = None 24 | self.present_checkpoints = None 25 | self.outfile = None 26 | # initialize the steps if not initialized 27 | if self.step_save_location not in os.listdir(self.get_checkpoint_location()): 28 | pickle.dump(0,open(self.get_step_file(), 'wb')) 29 | 30 | def steps_per_checkpoint(self, num_steps): 31 | """ 32 | 33 | :param num_steps: 34 | :return: 35 | """ 36 | self.steps_per_ckpt = num_steps 37 | 38 | def get_checkpoint_steps(self): 39 | """ 40 | 41 | :return: 42 | """ 43 | return self.steps_per_ckpt 44 | 45 | def steps_per_prediction(self, num_steps): 46 | """ 47 | 48 | :param num_steps: 49 | :return: 50 | """ 51 | self.num_steps_per_prediction = num_steps 52 | 53 | def get_prediction_checkpoint_steps(self): 54 | """ 55 | 56 | :return: 57 | """ 58 | return self.num_steps_per_prediction 59 | 60 | def get_checkpoint_location(self): 61 | """ 62 | 63 | :return: 64 | """ 65 | return 'checkpoint/' + self.model_nm + '/' + self.cell_nm + '/' + self.attention_type 66 | 67 | def get_last_checkpoint(self): 68 | """ 69 | Assumes that the last checpoint has a higher checkpoint id. Checkpoint will be saved in this exact format 70 | model_.ckpt Eg - model_100.ckpt 71 | 72 | :return: 73 | """ 74 | ''' 75 | 76 | ''' 77 | self.present_checkpoints = glob.glob(self.get_checkpoint_location() + '/*.ckpt') 78 | if len(self.present_checkpoints) != 0: 79 | present_ids = [self.__get_id(ckpt) for ckpt in self.present_checkpoints] 80 | # sort the ID's and return the model for the last ID 81 | present_ids.sort() 82 | self.last_id = present_ids[-1] 83 | self.last_ckpt = self.get_checkpoint_location() + '/model_' +\ 84 | str(self.last_id) + '.ckpt' 85 | 86 | return self.last_ckpt 87 | 88 | def __get_id(self, ckpt_file): 89 | """ 90 | 91 | :param ckpt_file: 92 | :return: 93 | """ 94 | return int(ckpt_file.split('.')[0].split('_')[1]) 95 | 96 | def delete_previous_checkpoints(self, num_previous=5): 97 | """ 98 | Deletes all previous checkpoints that are before the present checkpoint. 99 | This is done to prevent blowing out of memory due to too many checkpoints 100 | 101 | :param num_previous: 102 | :return: 103 | """ 104 | self.present_checkpoints = glob.glob(self.get_checkpoint_location() + '/*.ckpt') 105 | if len(self.present_checkpoints) > num_previous: 106 | present_ids = [self.__get_id(ckpt) for ckpt in self.present_checkpoints] 107 | present_ids.sort() 108 | ids_2_delete = present_ids[0:len(present_ids) - num_previous] 109 | for ckpt_id in ids_2_delete: 110 | ckpt_file_nm = self.get_checkpoint_location() + '/model_' + str(ckpt_id) + '.ckpt' 111 | os.remove(ckpt_file_nm) 112 | 113 | def get_save_address(self): 114 | """ 115 | 116 | :return: 117 | """ 118 | _ = self.get_last_checkpoint() 119 | next_id = self.last_id + 1 120 | return self.get_checkpoint_location() + '/model_' + str(next_id) + '.ckpt' 121 | 122 | def is_checkpointed(self): 123 | """ 124 | 125 | :return: 126 | """ 127 | return self.last_id > 0 128 | 129 | def get_data_file_location(self): 130 | """ 131 | 132 | :return: 133 | """ 134 | return 'checkpoint/' + self.data_save_location 135 | 136 | def get_mapper_file_location(self): 137 | """ 138 | 139 | :return: 140 | """ 141 | return 'checkpoint/' + self.data_save_location + '/' + self.mapper_save_location 142 | 143 | def get_mapper_folder_location(self): 144 | """ 145 | 146 | :return: 147 | """ 148 | return 'checkpoint/' + self.data_save_location 149 | 150 | def get_step_file(self): 151 | """ 152 | 153 | :return: 154 | """ 155 | return self.get_checkpoint_location() + '/' + self.step_save_location 156 | 157 | def is_mapper_checkpointed(self): 158 | """ 159 | 160 | :return: 161 | """ 162 | if self.mapper_save_location in os.listdir(self.get_mapper_folder_location()): 163 | return True 164 | else: 165 | return False 166 | 167 | def is_output_file_present(self): 168 | """ 169 | 170 | :return: 171 | """ 172 | out_loc = self.outfile.split('/') 173 | file_nm = out_loc[3] 174 | dir_nm = out_loc[0] + '/' + out_loc[1] + '/' + out_loc[2] + '/' 175 | 176 | return file_nm in os.listdir(dir_nm) 177 | 178 | def set_result_location(self, outfile): 179 | """ 180 | 181 | :param outfile: 182 | :return: 183 | """ 184 | self.outfile = outfile 185 | 186 | def get_result_location(self): 187 | """ 188 | 189 | :return: 190 | """ 191 | return self.outfile 192 | -------------------------------------------------------------------------------- /helpers/data2tensor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from nltk.tokenize import wordpunct_tokenize 4 | 5 | 6 | class Mapper: 7 | def __init__(self): 8 | """ 9 | 10 | """ 11 | self.map = dict() 12 | self.map["GO"] = 0 13 | self.revmap = dict() 14 | self.revmap[0] = "GO" 15 | self.counter = 1 16 | self.review_max_words = 100 17 | self.summary_max_words = 100 18 | self.rev_sum_pair = None 19 | self.review_tensor = None 20 | self.summary_tensor = None 21 | self.review_tensor_reverse = None 22 | 23 | def generate_vocabulary(self, review_summary_file): 24 | """ 25 | 26 | :param review_summary_file: 27 | :return: 28 | """ 29 | self.rev_sum_pair = pd.read_csv(review_summary_file, header=0).values 30 | 31 | for review,summary in self.rev_sum_pair: 32 | rev_lst = wordpunct_tokenize(review) 33 | sum_lst = wordpunct_tokenize(summary) 34 | self.__add_list_to_dict(rev_lst) 35 | self.__add_list_to_dict(sum_lst) 36 | 37 | # Now store the "" empty string as the last word of the voacabulary 38 | self.map[""] = len(self.map) 39 | self.revmap[len(self.map)] = "" 40 | 41 | def __add_list_to_dict(self, word_lst): 42 | """ 43 | 44 | :param word_lst: 45 | :return: 46 | """ 47 | for word in word_lst: 48 | word = word.lower() 49 | if word not in self.map: 50 | self.map[word] = self.counter 51 | self.revmap[self.counter] = word 52 | self.counter += 1 53 | 54 | def get_tensor(self, reverseflag=False): 55 | """ 56 | 57 | :param reverseflag: 58 | :return: 59 | """ 60 | self.review_tensor = self.__generate_tensor(is_review=True) 61 | if reverseflag: 62 | self.review_tensor_reverse = self.__generate_tensor(is_review=True, reverse=True) 63 | 64 | self.summary_tensor = self.__generate_tensor(is_review=False) 65 | 66 | if reverseflag: 67 | return self.review_tensor,self.review_tensor_reverse,self.summary_tensor 68 | else: 69 | return self.review_tensor, self.summary_tensor 70 | 71 | def __generate_tensor(self, is_review, reverse=False): 72 | """ 73 | 74 | :param is_review: 75 | :param reverse: 76 | :return: 77 | """ 78 | seq_length = self.review_max_words if is_review else self.summary_max_words 79 | total_rev_summary_pairs = self.rev_sum_pair.shape[0] 80 | data_tensor = np.zeros([total_rev_summary_pairs,seq_length]) 81 | 82 | sample = self.rev_sum_pair[0::, 0] if is_review else self.rev_sum_pair[0::, 1] 83 | 84 | for index, entry in enumerate(sample.tolist()): 85 | index_lst = np.array([self.map[word.lower()] for word in wordpunct_tokenize(entry)]) 86 | # reverse if want to get backward form 87 | if reverse: 88 | index_lst = index_lst[::-1] 89 | # Pad the list 90 | if len(index_lst) <= seq_length: 91 | index_lst = np.lib.pad(index_lst, (0,seq_length - index_lst.size), 'constant', constant_values=(0, 0)) 92 | else: 93 | index_lst = index_lst[0:seq_length] 94 | 95 | data_tensor[index] = index_lst 96 | 97 | return data_tensor 98 | 99 | def get_seq_length(self): 100 | """ 101 | 102 | :return: 103 | """ 104 | return self.review_max_words 105 | 106 | def get_vocabulary_size(self): 107 | """ 108 | 109 | :return: 110 | """ 111 | return len(self.map) 112 | 113 | def get_reverse_map(self): 114 | """ 115 | 116 | :return: 117 | """ 118 | return self.revmap 119 | -------------------------------------------------------------------------------- /helpers/extracter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | class Spider: 6 | def __init__(self,num_reviews): 7 | """ 8 | Simple Spider to crawl the JSON script dataset and load reviews and summary 9 | 10 | :param num_reviews: Number of (review, summary) samples to be extracted 11 | """ 12 | self.num_reviews = num_reviews 13 | self.raw_data_file = None 14 | self.df = None 15 | 16 | def crawl_for_reviews_and_summary(self, input_file): 17 | """ 18 | Crawl the input dataset 19 | 20 | :param input_file: The location of the file containing the txt file dataset 21 | :return: None 22 | """ 23 | self.raw_data_file = input_file 24 | self.df = pd.DataFrame() 25 | self.df['Review'] = self.__crawl_review() 26 | self.df['Summary'] = self.__crawl_summary() 27 | 28 | def __crawl_review(self): 29 | """ 30 | Crawl review 31 | 32 | :return: review [numpy array] 33 | """ 34 | review_list = [] 35 | print 'Crawling Reviews....' 36 | num_lines = 0 37 | with open(self.raw_data_file) as infile: 38 | for line in infile: 39 | if line.startswith('review/text'): 40 | if num_lines >= self.num_reviews: 41 | break 42 | num_lines += 1 43 | _,review = line.split('/text: ') 44 | review_list.append(review) 45 | 46 | return np.array(review_list) 47 | 48 | def __crawl_summary(self): 49 | """ 50 | Crawl summary 51 | 52 | :return: summary [numpy array] 53 | """ 54 | summary_list = [] 55 | print 'Crawling Summary....' 56 | num_lines = 0 57 | with open(self.raw_data_file) as infile: 58 | for line in infile: 59 | if line.startswith('review/summary'): 60 | if num_lines >= self.num_reviews: 61 | break 62 | num_lines += 1 63 | _,summary = line.split('/summary: ') 64 | summary_list.append(summary) 65 | 66 | return np.array(summary_list) 67 | 68 | def save_review_summary_frame(self, output_file): 69 | """ 70 | save (review, summary) pair in CSV file 71 | 72 | :param output_file: The location where CSV file is to be saved. 73 | :return: None 74 | """ 75 | self.df.to_csv(output_file, index=False) 76 | -------------------------------------------------------------------------------- /helpers/metric.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from metrics import tester 3 | 4 | 5 | class Calculator: 6 | def __init__(self,steps_per_prediction,hypothesis_store_loc, reference_store_loc): 7 | """ 8 | 9 | :param steps_per_prediction: 10 | :param hypothesis_store_loc: 11 | :param reference_store_loc: 12 | """ 13 | self.steps_per_prediction = steps_per_prediction 14 | self.hypothesis_store_loc = hypothesis_store_loc 15 | self.reference_store_loc = reference_store_loc 16 | self.result = None 17 | self.steps = None 18 | 19 | def load_result(self,result_file): 20 | """ 21 | 22 | :param result_file: 23 | :return: 24 | """ 25 | self.result = pd.read_csv(result_file, header=0) 26 | self.__scrape_reference() 27 | self.__scrape_all_hypotheses() 28 | 29 | def __scrape_reference(self): 30 | """ 31 | 32 | :return: 33 | """ 34 | self.reference = self.result['true_summary'].values 35 | 36 | def __scrape_all_hypotheses(self): 37 | """ 38 | 39 | :return: 40 | """ 41 | # Drop review and true summary 42 | self.hypotheses = self.result.drop(self.result.columns[[0, 1]], axis=1) 43 | self.num_hypothesis = self.hypotheses.shape[1] 44 | self.hypotheses = self.hypotheses.values 45 | 46 | def evaluate_all_ref_hyp_pairs(self): 47 | """ 48 | 49 | :return: 50 | """ 51 | self.bleu_1 = [] 52 | self.bleu_2 = [] 53 | self.bleu_3 = [] 54 | self.bleu_4 = [] 55 | self.rouge = [] 56 | self.steps = range(0, 57 | self.num_hypothesis * self.steps_per_prediction, 58 | self.steps_per_prediction) 59 | 60 | for hypothesis in self.hypotheses.T: 61 | bleu_1,bleu_2, bleu_3, bleu_4, rouge = self.__evaluate_one_ref_hypothesis_pair(self.reference,hypothesis) 62 | self.bleu_1.append(bleu_1) 63 | self.bleu_2.append(bleu_2) 64 | self.bleu_3.append(bleu_3) 65 | self.bleu_4.append(bleu_4) 66 | self.rouge.append(rouge) 67 | 68 | def __evaluate_one_ref_hypothesis_pair(self, refs, hyps): 69 | """ 70 | 71 | :param refs: 72 | :param hyps: 73 | :return: 74 | """ 75 | # Dump the data into the corresponding files 76 | for index,pair in enumerate(zip(refs,hyps)): 77 | file_ref_nm = self.reference_store_loc + '/ref' + str(index) + '.txt' 78 | file_hyp_nm = self.hypothesis_store_loc + '/gen' + str(index) + '.txt' 79 | ref_file = open(file_ref_nm,'w') 80 | hyp_file = open(file_hyp_nm,'w') 81 | ref_file.write(str(pair[0])) 82 | if pair[1] != 'nan': 83 | hyp_file.write(str(pair[1])) 84 | else: 85 | hyp_file.write('') 86 | # Call the tester function to get the evaluations 87 | return tester.main() 88 | 89 | def get_all_metrics(self): 90 | """ 91 | 92 | :return: 93 | """ 94 | return self.bleu_1, self.bleu_2,self.bleu_3,self.bleu_4,self.bleu_4 95 | 96 | def get_steps(self): 97 | """ 98 | 99 | :return: 100 | """ 101 | return self.steps 102 | -------------------------------------------------------------------------------- /helpers/plotter.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | class Plotter: 4 | def __init__(self): 5 | """ 6 | 7 | """ 8 | self.steps = None 9 | self.file_desc = None 10 | self.bleu_1 = self.bleu_2 = self.bleu_3 = self.bleu_4 = self.rouge = None 11 | 12 | def set_metrics(self, bleu_1, bleu_2, bleu_3, bleu_4, rouge): 13 | """ 14 | 15 | :param bleu_1: 16 | :param bleu_2: 17 | :param bleu_3: 18 | :param bleu_4: 19 | :param rouge: 20 | :return: 21 | """ 22 | self.bleu_1 = bleu_1 23 | self.bleu_2 = bleu_2 24 | self.bleu_3 = bleu_3 25 | self.bleu_4 = bleu_4 26 | self.rouge = rouge 27 | 28 | def set_steps(self,steps): 29 | """ 30 | 31 | :param steps: 32 | :return: 33 | """ 34 | self.steps = steps 35 | 36 | def set_file_description(self,file_desc): 37 | """ 38 | 39 | :param file_desc: 40 | :return: 41 | """ 42 | self.file_desc = file_desc 43 | 44 | def plot_all_metrics(self): 45 | """ 46 | 47 | :return: 48 | """ 49 | plt.figure() 50 | self.plot_one_metric(self.bleu_1, 'BLEU Score - 1-Gram') 51 | plt.figure() 52 | self.plot_one_metric(self.bleu_2, 'BLEU Score - 2-Gram') 53 | plt.figure() 54 | self.plot_one_metric(self.bleu_3, 'BLEU Score - 3-Gram') 55 | plt.figure() 56 | self.plot_one_metric(self.bleu_4, 'BLEU Score - 4-Gram') 57 | plt.figure() 58 | self.plot_one_metric(self.rouge, 'ROUGE Score') 59 | 60 | def plot_one_metric(self, models_metric, title): 61 | """ 62 | 63 | :param models_metric: 64 | :param title: 65 | :return: 66 | """ 67 | for index, model_metric in enumerate(models_metric): 68 | plt.plot(self.steps, model_metric, label=self.file_desc[index]) 69 | plt.title(title) 70 | plt.legend() 71 | plt.xlabel('Number of batches') 72 | plt.ylabel('Score') 73 | 74 | def show_plots(self): 75 | """ 76 | 77 | :return: 78 | """ 79 | plt.show() 80 | -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/metrics/__init__.py -------------------------------------------------------------------------------- /metrics/bleu/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /metrics/bleu/bleu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : bleu.py 4 | # 5 | # Description : Wrapper for BLEU scorer. 6 | # 7 | # Creation Date : 06-01-2015 8 | # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT 9 | # Authors : Hao Fang and Tsung-Yi Lin 10 | 11 | from bleu_scorer import BleuScorer 12 | 13 | 14 | class Bleu: 15 | def __init__(self, n=4): 16 | """ 17 | 18 | :param n: 19 | """ 20 | # default compute Blue score up to 4 - grams 21 | self._n = n 22 | self._hypo_for_image = {} 23 | self.ref_for_image = {} 24 | 25 | def compute_score(self, gts, res): 26 | """ 27 | 28 | :param gts: 29 | :param res: 30 | :return: 31 | """ 32 | assert(gts.keys() == res.keys()) 33 | imgIds = gts.keys() 34 | 35 | bleu_scorer = BleuScorer(n=self._n) 36 | for id in imgIds: 37 | hypo = res[id] 38 | ref = gts[id] 39 | 40 | # Sanity check. 41 | assert(type(hypo) is list) 42 | assert(len(hypo) == 1) 43 | assert(type(ref) is list) 44 | assert(len(ref) > 0) 45 | 46 | bleu_scorer += (hypo[0], ref) 47 | 48 | score, scores = bleu_scorer.compute_score(option='closest', verbose=1) 49 | 50 | return score, scores 51 | 52 | def method(self): 53 | """ 54 | 55 | :return: 56 | """ 57 | return "Bleu" 58 | -------------------------------------------------------------------------------- /metrics/bleu/bleu_scorer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # bleu_scorer.py 4 | # David Chiang 5 | 6 | # Copyright (c) 2004-2006 University of Maryland. All rights 7 | # reserved. Do not redistribute without permission from the 8 | # author. Not for commercial use. 9 | 10 | # Modified by: 11 | # Hao Fang 12 | # Tsung-Yi Lin 13 | 14 | '''Provides: 15 | cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test(). 16 | cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked(). 17 | ''' 18 | 19 | import copy 20 | import math 21 | from collections import defaultdict 22 | 23 | 24 | def precook(s, n=4, out=False): 25 | """ 26 | Takes a string as input and returns an object that can be given to 27 | either cook_refs or cook_test. This is optional: cook_refs and cook_test 28 | can take string arguments as well. 29 | 30 | :param s: 31 | :param n: 32 | :param out: 33 | :return: 34 | """ 35 | words = s.split() 36 | counts = defaultdict(int) 37 | for k in xrange(1,n+1): 38 | for i in xrange(len(words)-k+1): 39 | ngram = tuple(words[i:i+k]) 40 | counts[ngram] += 1 41 | return (len(words), counts) 42 | 43 | 44 | def cook_refs(refs, eff=None, n=4): 45 | """ 46 | Takes a list of reference sentences for a single segment 47 | and returns an object that encapsulates everything that BLEU 48 | needs to know about them. 49 | 50 | :param refs: 51 | :param eff: 52 | :param n: 53 | :return: 54 | """ 55 | reflen = [] 56 | maxcounts = {} 57 | for ref in refs: 58 | rl, counts = precook(ref, n) 59 | reflen.append(rl) 60 | for (ngram,count) in counts.iteritems(): 61 | maxcounts[ngram] = max(maxcounts.get(ngram,0), count) 62 | 63 | # Calculate effective reference sentence length. 64 | if eff == "shortest": 65 | reflen = min(reflen) 66 | elif eff == "average": 67 | reflen = float(sum(reflen))/len(reflen) 68 | 69 | return (reflen, maxcounts) 70 | 71 | 72 | def cook_test(test, (reflen, refmaxcounts), eff=None, n=4): 73 | """ 74 | Takes a test sentence and returns an object that 75 | encapsulates everything that BLEU needs to know about it. 76 | 77 | :param test: 78 | :param eff: 79 | :param n: 80 | :return: 81 | """ 82 | testlen, counts = precook(test, n, True) 83 | 84 | result = {} 85 | 86 | # Calculate effective reference sentence length. 87 | 88 | if eff == "closest": 89 | result["reflen"] = min((abs(l-testlen), l) for l in reflen)[1] 90 | else: 91 | result["reflen"] = reflen 92 | 93 | result["testlen"] = testlen 94 | 95 | result["guess"] = [max(0,testlen-k+1) for k in xrange(1,n+1)] 96 | 97 | result['correct'] = [0]*n 98 | for (ngram, count) in counts.iteritems(): 99 | result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count) 100 | 101 | return result 102 | 103 | 104 | class BleuScorer(object): 105 | """ 106 | Bleu scorer. 107 | """ 108 | __slots__ = "n", "crefs", "ctest", "_score", "_ratio", "_testlen", "_reflen", "special_reflen" 109 | # special_reflen is used in oracle (proportional effective ref len for a node). 110 | 111 | def copy(self): 112 | """ 113 | copy the refs. 114 | :return: 115 | """ 116 | new = BleuScorer(n=self.n) 117 | new.ctest = copy.copy(self.ctest) 118 | new.crefs = copy.copy(self.crefs) 119 | new._score = None 120 | return new 121 | 122 | def __init__(self, test=None, refs=None, n=4, special_reflen=None): 123 | """ 124 | singular instance 125 | 126 | :param test: 127 | :param refs: 128 | :param n: 129 | :param special_reflen: 130 | """ 131 | self.n = n 132 | self.crefs = [] 133 | self.ctest = [] 134 | self.cook_append(test, refs) 135 | self.special_reflen = special_reflen 136 | 137 | def cook_append(self, test, refs): 138 | """ 139 | called by constructor and __iadd__ to avoid creating new instances. 140 | 141 | :param test: 142 | :param refs: 143 | :return: 144 | """ 145 | if refs is not None: 146 | self.crefs.append(cook_refs(refs)) 147 | if test is not None: 148 | cooked_test = cook_test(test, self.crefs[-1]) 149 | self.ctest.append(cooked_test) # N.B.: -1 150 | else: 151 | self.ctest.append(None) # lens of crefs and ctest have to match 152 | 153 | self._score = None # need to recompute 154 | 155 | def ratio(self, option=None): 156 | """ 157 | 158 | :param option: 159 | :return: 160 | """ 161 | self.compute_score(option=option) 162 | return self._ratio 163 | 164 | def score_ratio(self, option=None): 165 | """ 166 | return (bleu, len_ratio) pair 167 | 168 | :param option: 169 | :return: 170 | """ 171 | return self.fscore(option=option), self.ratio(option=option) 172 | 173 | def score_ratio_str(self, option=None): 174 | """ 175 | 176 | :param option: 177 | :return: 178 | """ 179 | return "%.4f (%.2f)" % self.score_ratio(option) 180 | 181 | def reflen(self, option=None): 182 | """ 183 | 184 | :param option: 185 | :return: 186 | """ 187 | self.compute_score(option=option) 188 | return self._reflen 189 | 190 | def testlen(self, option=None): 191 | """ 192 | 193 | :param option: 194 | :return: 195 | """ 196 | self.compute_score(option=option) 197 | return self._testlen 198 | 199 | def retest(self, new_test): 200 | """ 201 | 202 | :param new_test: 203 | :return: 204 | """ 205 | if type(new_test) is str: 206 | new_test = [new_test] 207 | assert len(new_test) == len(self.crefs), new_test 208 | self.ctest = [] 209 | for t, rs in zip(new_test, self.crefs): 210 | self.ctest.append(cook_test(t, rs)) 211 | self._score = None 212 | 213 | return self 214 | 215 | def rescore(self, new_test): 216 | """ 217 | replace test(s) with new test(s), and returns the new score. 218 | 219 | :param new_test: 220 | :return: 221 | """ 222 | return self.retest(new_test).compute_score() 223 | 224 | def size(self): 225 | """ 226 | 227 | :return: 228 | """ 229 | assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest)) 230 | return len(self.crefs) 231 | 232 | def __iadd__(self, other): 233 | """ 234 | add an instance (e.g., from another sentence). 235 | 236 | :param other: 237 | :return: 238 | """ 239 | 240 | if type(other) is tuple: 241 | ## avoid creating new BleuScorer instances 242 | self.cook_append(other[0], other[1]) 243 | else: 244 | assert self.compatible(other), "incompatible BLEUs." 245 | self.ctest.extend(other.ctest) 246 | self.crefs.extend(other.crefs) 247 | self._score = None ## need to recompute 248 | 249 | return self 250 | 251 | def compatible(self, other): 252 | """ 253 | 254 | :param other: 255 | :return: 256 | """ 257 | return isinstance(other, BleuScorer) and self.n == other.n 258 | 259 | def single_reflen(self, option="average"): 260 | """ 261 | 262 | :param option: 263 | :return: 264 | """ 265 | return self._single_reflen(self.crefs[0][0], option) 266 | 267 | def _single_reflen(self, reflens, option=None, testlen=None): 268 | """ 269 | 270 | :param reflens: 271 | :param option: 272 | :param testlen: 273 | :return: 274 | """ 275 | if option == "shortest": 276 | reflen = min(reflens) 277 | elif option == "average": 278 | reflen = float(sum(reflens))/len(reflens) 279 | elif option == "closest": 280 | reflen = min((abs(l-testlen), l) for l in reflens)[1] 281 | else: 282 | assert False, "unsupported reflen option %s" % option 283 | 284 | return reflen 285 | 286 | def recompute_score(self, option=None, verbose=0): 287 | """ 288 | 289 | :param option: 290 | :param verbose: 291 | :return: 292 | """ 293 | self._score = None 294 | return self.compute_score(option, verbose) 295 | 296 | def compute_score(self, option=None, verbose=0): 297 | """ 298 | 299 | :param option: 300 | :param verbose: 301 | :return: 302 | """ 303 | n = self.n 304 | small = 1e-9 305 | tiny = 1e-15 # so that if guess is 0 still return 0 306 | bleu_list = [[] for _ in range(n)] 307 | 308 | if self._score is not None: 309 | return self._score 310 | 311 | if option is None: 312 | option = "average" if len(self.crefs) == 1 else "closest" 313 | 314 | self._testlen = 0 315 | self._reflen = 0 316 | totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n} 317 | 318 | # for each sentence 319 | for comps in self.ctest: 320 | testlen = comps['testlen'] 321 | self._testlen += testlen 322 | 323 | if self.special_reflen is None: # need computation 324 | reflen = self._single_reflen(comps['reflen'], option, testlen) 325 | else: 326 | reflen = self.special_reflen 327 | 328 | self._reflen += reflen 329 | 330 | for key in ['guess','correct']: 331 | for k in xrange(n): 332 | totalcomps[key][k] += comps[key][k] 333 | 334 | # append per image bleu score 335 | bleu = 1. 336 | for k in xrange(n): 337 | bleu *= (float(comps['correct'][k]) + tiny) \ 338 | /(float(comps['guess'][k]) + small) 339 | bleu_list[k].append(bleu ** (1./(k+1))) 340 | ratio = (testlen + tiny) / (reflen + small) # N.B.: avoid zero division 341 | if ratio < 1: 342 | for k in xrange(n): 343 | bleu_list[k][-1] *= math.exp(1 - 1/ratio) 344 | 345 | if verbose > 1: 346 | print comps, reflen 347 | 348 | totalcomps['reflen'] = self._reflen 349 | totalcomps['testlen'] = self._testlen 350 | 351 | bleus = [] 352 | bleu = 1. 353 | for k in xrange(n): 354 | bleu *= float(totalcomps['correct'][k] + tiny) \ 355 | / (totalcomps['guess'][k] + small) 356 | bleus.append(bleu ** (1./(k+1))) 357 | ratio = (self._testlen + tiny) / (self._reflen + small) # N.B.: avoid zero division 358 | if ratio < 1: 359 | for k in xrange(n): 360 | bleus[k] *= math.exp(1 - 1/ratio) 361 | 362 | if verbose > 0: 363 | print totalcomps 364 | print "ratio:", ratio 365 | 366 | self._score = bleus 367 | return self._score, bleu_list 368 | -------------------------------------------------------------------------------- /metrics/hypothesis/gen1.txt: -------------------------------------------------------------------------------- 1 | Harshal is a good boy . 2 | -------------------------------------------------------------------------------- /metrics/hypothesis/gen2.txt: -------------------------------------------------------------------------------- 1 | NLP Project is hard but very satisfying 2 | -------------------------------------------------------------------------------- /metrics/hypothesis/gen3.txt: -------------------------------------------------------------------------------- 1 | Mary no slap witch green . 2 | -------------------------------------------------------------------------------- /metrics/reference/ref1.txt: -------------------------------------------------------------------------------- 1 | Harshal is a bad boy . 2 | -------------------------------------------------------------------------------- /metrics/reference/ref2.txt: -------------------------------------------------------------------------------- 1 | Definitely it is very satisfying . 2 | -------------------------------------------------------------------------------- /metrics/reference/ref3.txt: -------------------------------------------------------------------------------- 1 | Mary did not slap the green witch . 2 | -------------------------------------------------------------------------------- /metrics/rouge/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'vrama91' 2 | -------------------------------------------------------------------------------- /metrics/rouge/rouge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : rouge.py 4 | # 5 | # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004) 6 | # 7 | # Creation Date : 2015-01-07 06:03 8 | # Author : Ramakrishna Vedantam 9 | 10 | import numpy as np 11 | import pdb 12 | 13 | 14 | def my_lcs(string, sub): 15 | """ 16 | Calculates longest common subsequence for a pair of tokenized strings 17 | 18 | :param string : list of str : tokens from a string split using whitespace 19 | :param sub : list of str : shorter string, also split using whitespace 20 | :returns: length (list of int): length of the longest common subsequence between the two strings 21 | 22 | Note: my_lcs only gives length of the longest common subsequence, not the actual LCS 23 | """ 24 | if len(string)< len(sub): 25 | sub, string = string, sub 26 | 27 | lengths = [[0 for _ in range(0,len(sub)+1)] for _ in range(0,len(string)+1)] 28 | 29 | for j in range(1,len(sub)+1): 30 | for i in range(1,len(string)+1): 31 | if string[i-1] == sub[j-1]: 32 | lengths[i][j] = lengths[i-1][j-1] + 1 33 | else: 34 | lengths[i][j] = max(lengths[i-1][j], lengths[i][j-1]) 35 | 36 | return lengths[len(string)][len(sub)] 37 | 38 | 39 | class Rouge: 40 | """ 41 | Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set 42 | 43 | """ 44 | def __init__(self): 45 | self.beta = 1.2 46 | 47 | def calc_score(self, candidate, refs): 48 | """ 49 | Compute ROUGE-L score given one candidate and references for an image 50 | 51 | :param candidate: str : candidate sentence to be evaluated 52 | :param refs: list of str : COCO reference sentences for the particular image to be evaluated 53 | :returns score: int (ROUGE-L score for the candidate evaluated against references) 54 | """ 55 | assert(len(candidate)==1) 56 | assert(len(refs)>0) 57 | prec = [] 58 | rec = [] 59 | 60 | # split into tokens 61 | token_c = candidate[0].split(" ") 62 | 63 | for reference in refs: 64 | # split into tokens 65 | token_r = reference.split(" ") 66 | # compute the longest common subsequence 67 | lcs = my_lcs(token_r, token_c) 68 | prec.append(lcs/float(len(token_c))) 69 | rec.append(lcs/float(len(token_r))) 70 | 71 | prec_max = max(prec) 72 | rec_max = max(rec) 73 | 74 | if prec_max!=0 and rec_max !=0: 75 | score = ((1 + self.beta**2)*prec_max*rec_max)/float(rec_max + self.beta**2*prec_max) 76 | else: 77 | score = 0.0 78 | return score 79 | 80 | def compute_score(self, gts, res): 81 | """ 82 | Computes Rouge-L score given a set of reference and candidate sentences for the dataset 83 | Invoked by evaluate_captions.py 84 | 85 | :param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values 86 | :param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values 87 | :returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images) 88 | """ 89 | assert(gts.keys() == res.keys()) 90 | imgIds = gts.keys() 91 | 92 | score = [] 93 | for id in imgIds: 94 | hypo = res[id] 95 | ref = gts[id] 96 | 97 | score.append(self.calc_score(hypo, ref)) 98 | 99 | # Sanity check. 100 | assert(type(hypo) is list) 101 | assert(len(hypo) == 1) 102 | assert(type(ref) is list) 103 | assert(len(ref) > 0) 104 | 105 | average_score = np.mean(np.array(score)) 106 | return average_score, np.array(score) 107 | 108 | def method(self): 109 | """ 110 | 111 | :return: 112 | """ 113 | return "Rouge" 114 | -------------------------------------------------------------------------------- /metrics/tester.py: -------------------------------------------------------------------------------- 1 | """ 2 | Computes the BLEU, ROUGE 3 | using the COCO metrics scripts 4 | """ 5 | from bleu.bleu import Bleu 6 | from rouge.rouge import Rouge 7 | import glob 8 | 9 | 10 | def load_textfiles(references, hypothesis): 11 | hypo = {idx: [lines.strip()] for (idx, lines) in enumerate(hypothesis)} 12 | # take out newlines before creating dictionary 13 | raw_refs = [map(str.strip, r) for r in zip(references)] 14 | refs = {idx: rr for idx, rr in enumerate(raw_refs)} 15 | # sanity check that we have the same number of references as hypothesis 16 | if len(hypo) != len(refs): 17 | raise ValueError("There is a sentence number mismatch between the inputs") 18 | return refs, hypo 19 | 20 | 21 | def score(ref, hypo): 22 | """ 23 | ref, dictionary of reference sentences (id, sentence) 24 | hypo, dictionary of hypothesis sentences (id, sentence) 25 | score, dictionary of scores 26 | """ 27 | scorers = [ 28 | (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), 29 | (Rouge(), "ROUGE_L"), 30 | ] 31 | final_scores = {} 32 | for scorer, method in scorers: 33 | score, scores = scorer.compute_score(ref, hypo) 34 | if type(score) == list: 35 | for m, s in zip(method, score): 36 | final_scores[m] = s 37 | else: 38 | final_scores[method] = score 39 | return final_scores 40 | 41 | def main(): 42 | # Feed in the directory where the hypothesis summary and true summary is stored 43 | hyp_file = glob.glob('metrics/hypothesis/*') 44 | ref_file = glob.glob('metrics/reference/*') 45 | 46 | BLEU_1 = 0. 47 | BLEU_2 = 0. 48 | BLEU_3 = 0. 49 | BLEU_4 = 0. 50 | ROUGE_L = 0. 51 | num_files = 0 52 | for reference_file, hypothesis_file in zip(ref_file, hyp_file): 53 | num_files += 1 54 | with open(reference_file) as rf: 55 | reference = rf.readlines() 56 | 57 | with open(hypothesis_file) as hf: 58 | hypothesis = hf.readlines() 59 | 60 | ref, hypo = load_textfiles(reference, hypothesis) 61 | score_map = score(ref, hypo) 62 | BLEU_1 += score_map['Bleu_1'] 63 | BLEU_2 += score_map['Bleu_2'] 64 | BLEU_3 += score_map['Bleu_3'] 65 | BLEU_4 += score_map['Bleu_4'] 66 | ROUGE_L += score_map['ROUGE_L'] 67 | 68 | 69 | BLEU_1 = BLEU_1/num_files 70 | BLEU_2 = BLEU_2/num_files 71 | BLEU_3 = BLEU_3/num_files 72 | BLEU_4 = BLEU_4/num_files 73 | ROUGE_L = ROUGE_L/num_files 74 | 75 | print 'Average Metric Score for All Review Summary Pairs:' 76 | print 'Bleu - 1gram:', BLEU_1 77 | print 'Bleu - 2gram:', BLEU_2 78 | print 'Bleu - 3gram:', BLEU_3 79 | print 'Bleu - 4gram:', BLEU_4 80 | print 'Rouge:', ROUGE_L 81 | 82 | return BLEU_1,BLEU_2,BLEU_3, BLEU_4, ROUGE_L 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /metrics/tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'hfang' 2 | -------------------------------------------------------------------------------- /metrics/tokenizer/ptbtokenizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # File Name : ptbtokenizer.py 4 | # 5 | # Description : Do the PTB Tokenization and remove punctuations. 6 | # 7 | # Creation Date : 29-12-2014 8 | # Last Modified : Thu Mar 19 09:53:35 2015 9 | # Authors : Hao Fang and Tsung-Yi Lin 10 | 11 | import os 12 | import sys 13 | import subprocess 14 | import tempfile 15 | import itertools 16 | 17 | # path to the stanford corenlp jar 18 | STANFORD_CORENLP_3_4_1_JAR = 'stanford-corenlp-3.4.1.jar' 19 | 20 | # punctuations to be removed from the sentences 21 | PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", ".", "?", "!", ",", ":", "-", "--", "...", ";"] 22 | 23 | class PTBTokenizer: 24 | """Python wrapper of Stanford PTBTokenizer""" 25 | 26 | def tokenize(self, captions_for_image): 27 | """ 28 | 29 | :param captions_for_image: 30 | :return: 31 | """ 32 | cmd = ['java', '-cp', STANFORD_CORENLP_3_4_1_JAR, 'edu.stanford.nlp.process.PTBTokenizer', '-preserveLines', 33 | '-lowerCase'] 34 | 35 | # ====================================================== 36 | # prepare data for PTB Tokenizer 37 | # ====================================================== 38 | final_tokenized_captions_for_image = {} 39 | image_id = [k for k, v in captions_for_image.items() for _ in range(len(v))] 40 | sentences = '\n'.join([c['caption'].replace('\n', ' ') for k, v in captions_for_image.items() for c in v]) 41 | 42 | # ====================================================== 43 | # save sentences to temporary file 44 | # ====================================================== 45 | path_to_jar_dirname=os.path.dirname(os.path.abspath(__file__)) 46 | tmp_file = tempfile.NamedTemporaryFile(delete=False, dir=path_to_jar_dirname) 47 | tmp_file.write(sentences) 48 | tmp_file.close() 49 | 50 | # ====================================================== 51 | # tokenize sentence 52 | # ====================================================== 53 | cmd.append(os.path.basename(tmp_file.name)) 54 | p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dirname, stdout=subprocess.PIPE) 55 | token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0] 56 | lines = token_lines.split('\n') 57 | # remove temp file 58 | os.remove(tmp_file.name) 59 | 60 | # ====================================================== 61 | # create dictionary for tokenized captions 62 | # ====================================================== 63 | for k, line in zip(image_id, lines): 64 | if not k in final_tokenized_captions_for_image: 65 | final_tokenized_captions_for_image[k] = [] 66 | tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') if w not in PUNCTUATIONS]) 67 | final_tokenized_captions_for_image[k].append(tokenized_caption) 68 | 69 | return final_tokenized_captions_for_image 70 | -------------------------------------------------------------------------------- /metrics/tokenizer/stanford-corenlp-3.4.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/metrics/tokenizer/stanford-corenlp-3.4.1.jar -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/models/__init__.py -------------------------------------------------------------------------------- /models/gru_bidirectional.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.bidirectional import Bidirectional 3 | 4 | 5 | class GruBidirectional(Bidirectional): 6 | def __init__(self, review_summary_file, checkpointer, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param attention: 12 | """ 13 | super(GruBidirectional, self).__init__(review_summary_file, checkpointer, attention) 14 | 15 | def get_cell(self): 16 | """ 17 | Return the atomic RNN cell type used for this model 18 | 19 | :return: The atomic RNN Cell 20 | """ 21 | return tf.nn.rnn_cell.GRUCell(self.memory_dim) 22 | -------------------------------------------------------------------------------- /models/gru_simple.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.simple import Simple 3 | 4 | 5 | class GruSimple(Simple): 6 | def __init__(self, review_summary_file, checkpointer, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param attention: 12 | """ 13 | super(GruSimple, self).__init__(review_summary_file, checkpointer, attention) 14 | 15 | def get_cell(self): 16 | """ 17 | Return the atomic RNN cell type used for this model 18 | 19 | :return: The atomic RNN Cell 20 | """ 21 | return tf.nn.rnn_cell.GRUCell(self.memory_dim) 22 | -------------------------------------------------------------------------------- /models/gru_stacked_bidirectional.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.stacked_bidirectional import StackedBidirectional 3 | 4 | 5 | class GruStackedBidirectional(StackedBidirectional): 6 | def __init__(self, review_summary_file, checkpointer, num_layers, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param num_layers: 12 | :param attention: 13 | """ 14 | super(GruStackedBidirectional, self).__init__(review_summary_file, checkpointer, num_layers, attention) 15 | 16 | def get_cell(self): 17 | """ 18 | Return the atomic RNN cell type used for this model 19 | 20 | :return: The atomic RNN Cell 21 | """ 22 | return tf.nn.rnn_cell.GRUCell(self.memory_dim) 23 | -------------------------------------------------------------------------------- /models/gru_stacked_simple.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.stacked_simple import StackedSimple 3 | 4 | 5 | class GruStackedSimple(StackedSimple): 6 | def __init__(self, review_summary_file, checkpointer, num_layers, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param num_layers: 12 | :param attention: 13 | """ 14 | self.num_layers = num_layers 15 | super(GruStackedSimple, self).__init__(review_summary_file, checkpointer, num_layers, attention) 16 | 17 | def get_cell(self): 18 | """ 19 | Return the atomic RNN cell type used for this model 20 | 21 | :return: The atomic RNN Cell 22 | """ 23 | return tf.nn.rnn_cell.GRUCell(self.memory_dim) 24 | -------------------------------------------------------------------------------- /models/lstm_bidirectional.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.bidirectional import Bidirectional 3 | 4 | 5 | class LstmBidirectional(Bidirectional): 6 | def __init__(self, review_summary_file, checkpointer, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param attention: 12 | """ 13 | super(LstmBidirectional, self).__init__(review_summary_file, checkpointer, attention) 14 | 15 | def get_cell(self): 16 | """ 17 | Return the atomic RNN cell type used for this model 18 | 19 | :return: The atomic RNN Cell 20 | """ 21 | return tf.nn.rnn_cell.LSTMCell(self.memory_dim) 22 | -------------------------------------------------------------------------------- /models/lstm_simple.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.simple import Simple 3 | 4 | 5 | class LstmSimple(Simple): 6 | def __init__(self, review_summary_file, checkpointer, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param attention: 12 | """ 13 | super(LstmSimple, self).__init__(review_summary_file, checkpointer, attention) 14 | 15 | def get_cell(self): 16 | """ 17 | Return the atomic RNN cell type used for this model 18 | 19 | :return: The atomic RNN Cell 20 | """ 21 | return tf.nn.rnn_cell.LSTMCell(self.memory_dim) 22 | -------------------------------------------------------------------------------- /models/lstm_stacked_bidirectional.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.stacked_bidirectional import StackedBidirectional 3 | 4 | 5 | class LstmStackedBidirectional(StackedBidirectional): 6 | def __init__(self, review_summary_file, checkpointer, num_layers, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param num_layers: 12 | :param attention: 13 | """ 14 | super(LstmStackedBidirectional, self).__init__(review_summary_file, checkpointer, num_layers, attention) 15 | 16 | def get_cell(self): 17 | """ 18 | Return the atomic RNN cell type used for this model 19 | 20 | :return: The atomic RNN Cell 21 | """ 22 | return tf.nn.rnn_cell.LSTMCell(self.memory_dim) 23 | -------------------------------------------------------------------------------- /models/lstm_stacked_simple.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from models.stacked_simple import StackedSimple 3 | 4 | 5 | class LstmStackedSimple(StackedSimple): 6 | def __init__(self, review_summary_file, checkpointer, num_layers, attention=False): 7 | """ 8 | 9 | :param review_summary_file: 10 | :param checkpointer: 11 | :param num_layers: 12 | :param attention: 13 | """ 14 | super(LstmStackedSimple, self).__init__(review_summary_file, checkpointer, num_layers, attention) 15 | 16 | def get_cell(self): 17 | """ 18 | Return the atomic RNN cell type used for this model 19 | 20 | :return: The atomic RNN Cell 21 | """ 22 | return tf.nn.rnn_cell.LSTMCell(self.memory_dim) 23 | -------------------------------------------------------------------------------- /models/sequenceNet.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.framework import ops 2 | import tensorflow as tf 3 | import numpy as np 4 | import pandas as pd 5 | from abc import abstractmethod, ABCMeta 6 | 7 | 8 | class NeuralNet(object): 9 | __metaclass__ = ABCMeta 10 | 11 | def __init__(self): 12 | """ 13 | Seq2Seq Neural Network 14 | 15 | This is an abstract class and can't be called directly. 16 | """ 17 | # parameters 18 | self.train_batch_size = None 19 | self.test_batch_size = None 20 | self.memory_dim = None 21 | self.learning_rate = None 22 | self.saver = None 23 | self.sess = None 24 | self.test_size = self.test_size 25 | self.checkpointer = self.checkpointer 26 | self.mapper_dict = self.mapper_dict 27 | self.test_review = self.test_review 28 | self.true_summary = self.true_summary 29 | self.predicted_test_summary = self.predicted_test_summary 30 | 31 | # Load all the parameters 32 | self._load_model_params() 33 | 34 | def set_parameters(self, train_batch_size, test_batch_size, memory_dim, learning_rate): 35 | """ 36 | Set the parameters for the model and training. 37 | 38 | :param train_batch_size: The batch size of examples used for batch training 39 | :param test_batch_size: The batch size of test examples used for testing 40 | :param memory_dim: The length of the hidden vector produced by the encoder 41 | :param learning_rate: The learning rate for Stochastic Gradient Descent 42 | :return: None 43 | """ 44 | self.train_batch_size = train_batch_size 45 | self.test_batch_size = test_batch_size 46 | self.memory_dim = memory_dim 47 | self.learning_rate = learning_rate 48 | 49 | @abstractmethod 50 | def _load_data(self): 51 | pass 52 | 53 | @abstractmethod 54 | def _split_train_tst(self): 55 | pass 56 | 57 | def _load_model_params(self): 58 | """ 59 | Load model parameters 60 | 61 | self.seq_length -> The length of the input sequence (Length of input sentence fed to the encoder-decoder model) 62 | self.vocab_size -> The size of the data vocabulary 63 | self.momentum -> The momentum parameter in the update rule for SGD 64 | 65 | :return: None 66 | """ 67 | # parameters 68 | self.seq_length = self.mapper_dict['seq_length'] 69 | self.vocab_size = self.mapper_dict['vocab_size'] 70 | self.momentum = 0.9 71 | 72 | def begin_session(self): 73 | """ 74 | Begins the session 75 | 76 | :return: None 77 | """ 78 | # start the tensorflow session 79 | ops.reset_default_graph() 80 | # initialize interactive session 81 | self.sess = tf.Session() 82 | 83 | def form_model_graph(self): 84 | """ 85 | Creates the data graph, loads the model and optimizer and then starts the session. 86 | 87 | :return: None 88 | """ 89 | self._load_data_graph() 90 | self._load_model() 91 | self._load_optimizer() 92 | self._start_session() 93 | 94 | @abstractmethod 95 | def _load_data_graph(self): 96 | pass 97 | 98 | @abstractmethod 99 | def _load_model(self): 100 | pass 101 | 102 | @abstractmethod 103 | def _load_optimizer(self): 104 | pass 105 | 106 | def _start_session(self): 107 | """ 108 | Starts the Tensorflow Session 109 | 110 | :return: None 111 | """ 112 | self.sess.run(tf.global_variables_initializer()) 113 | # initialize the saver node 114 | # print tf.GraphKeys.GLOBAL_VARIABLES 115 | self.saver = tf.train.Saver(tf.global_variables()) 116 | # get the latest checkpoint 117 | last_checkpoint_path = self.checkpointer.get_last_checkpoint() 118 | if last_checkpoint_path is not None: 119 | print 'Previous saved tensorflow objects found... Extracting...' 120 | # restore the tensorflow variables 121 | self.saver.restore(self.sess, last_checkpoint_path) 122 | print 'Extraction Complete. Moving Forward....' 123 | 124 | @abstractmethod 125 | def fit(self): 126 | pass 127 | 128 | def _index2sentence(self, list_): 129 | """ 130 | Converts the indexed sentence to the actual sentence 131 | 132 | :param list_: The list of the index of the words in the output sentence (in order) 133 | :return: Output Sentence [String] 134 | """ 135 | rev_map = self.mapper_dict['rev_map'] # rev_map is reverse mapping from index in vocabulary to actual word 136 | sentence = "" 137 | for entry in list_: 138 | if entry != 0: 139 | sentence += (rev_map[entry] + " ") 140 | 141 | return sentence 142 | 143 | def store_test_predictions(self, prediction_id='_final'): 144 | """ 145 | Stores the test predictions in a CSV file 146 | 147 | :param prediction_id: A simple id appended to the name of the summary for uniqueness 148 | :return: None 149 | """ 150 | # prediction id is usually the step count 151 | print 'Storing predictions on Test Data...' 152 | review = [] 153 | true_summary = [] 154 | generated_summary = [] 155 | for i in range(self.test_size): 156 | if not self.checkpointer.is_output_file_present(): 157 | review.append(self._index2sentence(self.test_review[i])) 158 | true_summary.append(self._index2sentence(self.true_summary[i])) 159 | if i < (self.test_batch_size * (self.test_size // self.test_batch_size)): 160 | generated_summary.append(self._index2sentence(self.predicted_test_summary[i])) 161 | else: 162 | generated_summary.append('') 163 | 164 | prediction_nm = 'generated_summary' + prediction_id 165 | if self.checkpointer.is_output_file_present(): 166 | df = pd.read_csv(self.checkpointer.get_result_location(), header=0) 167 | df[prediction_nm] = np.array(generated_summary) 168 | else: 169 | df = pd.DataFrame() 170 | df['review'] = np.array(review) 171 | df['true_summary'] = np.array(true_summary) 172 | df[prediction_nm] = np.array(generated_summary) 173 | df.to_csv(self.checkpointer.get_result_location(), index=False) 174 | print 'Stored the predictions. Moving Forward' 175 | if prediction_id == '_final': 176 | print 'All done. Exiting..' 177 | print 'Exited' 178 | -------------------------------------------------------------------------------- /raw_data/food_raw.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/raw_data/food_raw.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==1.5.1 2 | nltk==3.2.1 3 | numpy==1.11.0 4 | pandas==0.18.0 5 | pytest==2.8.1 6 | scipy==0.17.0 7 | tensorflow==0.12.0rc0 8 | -------------------------------------------------------------------------------- /result/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/.gitkeep -------------------------------------------------------------------------------- /result/bidirectional/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/bidirectional/.gitkeep -------------------------------------------------------------------------------- /result/bidirectional/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/bidirectional/gru/.gitkeep -------------------------------------------------------------------------------- /result/bidirectional/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/bidirectional/lstm/.gitkeep -------------------------------------------------------------------------------- /result/simple/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/simple/.gitkeep -------------------------------------------------------------------------------- /result/simple/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/simple/gru/.gitkeep -------------------------------------------------------------------------------- /result/simple/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/simple/lstm/.gitkeep -------------------------------------------------------------------------------- /result/stacked_bidirectional/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/stacked_bidirectional/.gitkeep -------------------------------------------------------------------------------- /result/stacked_bidirectional/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/stacked_bidirectional/gru/.gitkeep -------------------------------------------------------------------------------- /result/stacked_bidirectional/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/stacked_bidirectional/lstm/.gitkeep -------------------------------------------------------------------------------- /result/stacked_simple/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/stacked_simple/.gitkeep -------------------------------------------------------------------------------- /result/stacked_simple/gru/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/stacked_simple/gru/.gitkeep -------------------------------------------------------------------------------- /result/stacked_simple/lstm/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/result/stacked_simple/lstm/.gitkeep -------------------------------------------------------------------------------- /train_scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harpribot/deep-summarization/9b3bb1daae11a1db2386dbe4a71848714e6127f8/train_scripts/__init__.py -------------------------------------------------------------------------------- /train_scripts/train_script_gru_bidirectional_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_bidirectional 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('bidirectional', 'gru', 'Attention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | 15 | # Do using GRU cell - without attention mechanism 16 | out_file = 'result/bidirectional/gru/attention.csv' 17 | checkpointer.set_result_location(out_file) 18 | gru_net = gru_bidirectional.GruBidirectional(review_summary_file, checkpointer, attention=True) 19 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 20 | gru_net.begin_session() 21 | gru_net.form_model_graph() 22 | gru_net.fit() 23 | gru_net.predict() 24 | gru_net.store_test_predictions() 25 | -------------------------------------------------------------------------------- /train_scripts/train_script_gru_bidirectional_no_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_bidirectional 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('bidirectional', 'gru', 'noAttention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/bidirectional/gru/no_attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | gru_net = gru_bidirectional.GruBidirectional(review_summary_file, checkpointer) 18 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | gru_net.begin_session() 20 | gru_net.form_model_graph() 21 | gru_net.fit() 22 | gru_net.predict() 23 | gru_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_gru_simple_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_simple 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('simple', 'gru', 'Attention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - with attention mechanism 15 | out_file = 'result/simple/gru/attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | gru_net = gru_simple.GruSimple(review_summary_file, checkpointer, attention=True) 18 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | gru_net.begin_session() 20 | gru_net.form_model_graph() 21 | gru_net.fit() 22 | gru_net.predict() 23 | gru_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_gru_simple_no_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_simple 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('simple', 'gru', 'noAttention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/simple/gru/no_attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | gru_net = gru_simple.GruSimple(review_summary_file, checkpointer) 18 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | gru_net.begin_session() 20 | gru_net.form_model_graph() 21 | gru_net.fit() 22 | gru_net.predict() 23 | gru_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_gru_stacked_bidirectional_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_stacked_bidirectional 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('stackedBidirectional', 'gru', 'Attention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/stacked_bidirectional/gru/attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | gru_net = gru_stacked_bidirectional.GruStackedBidirectional(review_summary_file, checkpointer, 18 | attention=True, num_layers=2) 19 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 20 | gru_net.begin_session() 21 | gru_net.form_model_graph() 22 | gru_net.fit() 23 | gru_net.predict() 24 | gru_net.store_test_predictions() 25 | -------------------------------------------------------------------------------- /train_scripts/train_script_gru_stacked_bidirectional_no_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_stacked_bidirectional 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('stackedBidirectional', 'gru', 'noAttention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/stacked_bidirectional/gru/no_attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | gru_net = gru_stacked_bidirectional.GruStackedBidirectional(review_summary_file, checkpointer, num_layers=2) 18 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | gru_net.begin_session() 20 | gru_net.form_model_graph() 21 | gru_net.fit() 22 | gru_net.predict() 23 | gru_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_gru_stacked_simple_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_stacked_simple 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('stackedSimple', 'gru', 'Attention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/stacked_simple/gru/attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | gru_net = gru_stacked_simple.GruStackedSimple(review_summary_file, checkpointer,attention=True, num_layers=2) 18 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | gru_net.begin_session() 20 | gru_net.form_model_graph() 21 | gru_net.fit() 22 | gru_net.predict() 23 | gru_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_gru_stacked_simple_no_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import gru_stacked_simple 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('stackedSimple', 'gru', 'noAttention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/stacked_simple/gru/no_attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | gru_net = gru_stacked_simple.GruStackedSimple(review_summary_file, checkpointer, num_layers=2) 18 | gru_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | gru_net.begin_session() 20 | gru_net.form_model_graph() 21 | gru_net.fit() 22 | gru_net.predict() 23 | gru_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_bidirectional_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_bidirectional 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('bidirectional', 'lstm', 'Attention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/bidirectional/lstm/attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | lstm_net = lstm_bidirectional.LstmBidirectional(review_summary_file, checkpointer, attention=True) 18 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | lstm_net.begin_session() 20 | lstm_net.form_model_graph() 21 | lstm_net.fit() 22 | lstm_net.predict() 23 | lstm_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_bidirectional_no_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_bidirectional 5 | from helpers import checkpoint 6 | 7 | # Get the review summary file 8 | review_summary_file = 'extracted_data/review_summary.csv' 9 | 10 | # Initialize Checkpointer to ensure checkpointing 11 | checkpointer = checkpoint.Checkpointer('bidirectional', 'lstm', 'noAttention') 12 | checkpointer.steps_per_checkpoint(1000) 13 | checkpointer.steps_per_prediction(1000) 14 | # Do using GRU cell - without attention mechanism 15 | out_file = 'result/bidirectional/lstm/no_attention.csv' 16 | checkpointer.set_result_location(out_file) 17 | lstm_net = lstm_bidirectional.LstmBidirectional(review_summary_file, checkpointer) 18 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | lstm_net.begin_session() 20 | lstm_net.form_model_graph() 21 | lstm_net.fit() 22 | lstm_net.predict() 23 | lstm_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_simple_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_simple 5 | from helpers import checkpoint 6 | # Get the review summary file 7 | review_summary_file = 'extracted_data/review_summary.csv' 8 | 9 | # Initialize Checkpointer to ensure checkpointing 10 | checkpointer = checkpoint.Checkpointer('simple', 'lstm', 'Attention') 11 | checkpointer.steps_per_checkpoint(1000) 12 | checkpointer.steps_per_prediction(1000) 13 | # Do using LSTM cell - with attention mechanism 14 | out_file = 'result/simple/lstm/attention.csv' 15 | checkpointer.set_result_location(out_file) 16 | lstm_net = lstm_simple.LstmSimple(review_summary_file, checkpointer, attention=True) 17 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 18 | lstm_net.begin_session() 19 | lstm_net.form_model_graph() 20 | lstm_net.fit() 21 | lstm_net.predict() 22 | lstm_net.store_test_predictions() 23 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_simple_no_attn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_simple 5 | from helpers import checkpoint 6 | # Get the review summary file 7 | review_summary_file = 'extracted_data/review_summary.csv' 8 | 9 | # Initialize Checkpointer to ensure checkpointing 10 | checkpointer = checkpoint.Checkpointer('simple', 'lstm', 'noAttention') 11 | checkpointer.steps_per_checkpoint(1000) 12 | checkpointer.steps_per_prediction(1000) 13 | # Do using LSTM cell - without attention mechanism 14 | out_file = 'result/simple/lstm/no_attention.csv' 15 | checkpointer.set_result_location(out_file) 16 | lstm_net = lstm_simple.LstmSimple(review_summary_file, checkpointer) 17 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 18 | lstm_net.begin_session() 19 | lstm_net.form_model_graph() 20 | lstm_net.fit() 21 | lstm_net.predict() 22 | lstm_net.store_test_predictions() 23 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_stacked_bidirectional_attention.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_stacked_bidirectional 5 | from helpers import checkpoint 6 | # Get the review summary file 7 | review_summary_file = 'extracted_data/review_summary.csv' 8 | 9 | # Initialize Checkpointer to ensure checkpointing 10 | checkpointer = checkpoint.Checkpointer('stackedBidirectional', 'lstm', 'Attention') 11 | checkpointer.steps_per_checkpoint(1000) 12 | checkpointer.steps_per_prediction(1000) 13 | # Do using GRU cell - without attention mechanism 14 | out_file = 'result/stacked_bidirectional/lstm/attention.csv' 15 | checkpointer.set_result_location(out_file) 16 | lstm_net = lstm_stacked_bidirectional.LstmStackedBidirectional(review_summary_file, 17 | checkpointer, attention=True, num_layers=2) 18 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 19 | lstm_net.begin_session() 20 | lstm_net.form_model_graph() 21 | lstm_net.fit() 22 | lstm_net.predict() 23 | lstm_net.store_test_predictions() 24 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_stacked_bidirectional_no_attention.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_stacked_bidirectional 5 | from helpers import checkpoint 6 | # Get the review summary file 7 | review_summary_file = 'extracted_data/review_summary.csv' 8 | 9 | # Initialize Checkpointer to ensure checkpointing 10 | checkpointer = checkpoint.Checkpointer('stackedBidirectional', 'lstm', 'noAttention') 11 | checkpointer.steps_per_checkpoint(1000) 12 | checkpointer.steps_per_prediction(1000) 13 | # Do using GRU cell - without attention mechanism 14 | out_file = 'result/stacked_bidirectional/lstm/no_attention.csv' 15 | checkpointer.set_result_location(out_file) 16 | lstm_net = lstm_stacked_bidirectional.LstmStackedBidirectional(review_summary_file, checkpointer, num_layers=2) 17 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 18 | lstm_net.begin_session() 19 | lstm_net.form_model_graph() 20 | lstm_net.fit() 21 | lstm_net.predict() 22 | lstm_net.store_test_predictions() 23 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_stacked_simple_attention.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_stacked_simple 5 | from helpers import checkpoint 6 | # Get the review summary file 7 | review_summary_file = 'extracted_data/review_summary.csv' 8 | 9 | # Initialize Checkpointer to ensure checkpointing 10 | checkpointer = checkpoint.Checkpointer('stackedSimple', 'lstm', 'Attention') 11 | checkpointer.steps_per_checkpoint(1000) 12 | checkpointer.steps_per_prediction(1000) 13 | # Do using GRU cell - without attention mechanism 14 | out_file = 'result/stacked_simple/lstm/attention.csv' 15 | checkpointer.set_result_location(out_file) 16 | lstm_net = lstm_stacked_simple.LstmStackedSimple(review_summary_file, checkpointer, attention=True, num_layers=2) 17 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 18 | lstm_net.form_model_graph() 19 | lstm_net.fit() 20 | lstm_net.predict() 21 | lstm_net.store_test_predictions() 22 | -------------------------------------------------------------------------------- /train_scripts/train_script_lstm_stacked_simple_no_attention.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '..')) 4 | from models import lstm_stacked_simple 5 | from helpers import checkpoint 6 | # Get the review summary file 7 | review_summary_file = 'extracted_data/review_summary.csv' 8 | 9 | # Initialize Checkpointer to ensure checkpointing 10 | checkpointer = checkpoint.Checkpointer('stackedSimple', 'lstm', 'noAttention') 11 | checkpointer.steps_per_checkpoint(1000) 12 | checkpointer.steps_per_prediction(1000) 13 | # Do using GRU cell - without attention mechanism 14 | out_file = 'result/stacked_simple/lstm/no_attention.csv' 15 | checkpointer.set_result_location(out_file) 16 | lstm_net = lstm_stacked_simple.LstmStackedSimple(review_summary_file, checkpointer, num_layers=2) 17 | lstm_net.set_parameters(train_batch_size=128, test_batch_size=128, memory_dim=128, learning_rate=0.05) 18 | lstm_net.begin_session() 19 | lstm_net.form_model_graph() 20 | lstm_net.fit() 21 | lstm_net.predict() 22 | lstm_net.store_test_predictions() 23 | --------------------------------------------------------------------------------