├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── demo.py ├── doc └── top.png ├── requirements-test.txt ├── requirements.txt ├── setup.py ├── sumeval ├── __init__.py ├── cli │ ├── __init__.py │ └── sum_eval.py └── metrics │ ├── __init__.py │ ├── bleu.py │ ├── lang │ ├── __init__.py │ ├── base_lang.py │ ├── data │ │ ├── en │ │ │ ├── stemming.txt │ │ │ └── stop_words.txt │ │ ├── ja │ │ │ └── stop_words.txt │ │ └── zh │ │ │ └── stop_words.txt │ ├── lang_en.py │ ├── lang_ja.py │ └── lang_zh.py │ └── rouge.py └── tests ├── __init__.py ├── data ├── .gitkeep └── rouge │ ├── ROUGE-test-ja.json │ ├── ROUGE-test-zh.json │ ├── ROUGE-test.json │ ├── verify-spl.json │ └── verify.json ├── rouge_test_to_json.py ├── test_be_rouge.py ├── test_be_rouge_ja.py ├── test_bleu.py ├── test_custom_lang.py ├── test_lang_en.py ├── test_lang_ja.py ├── test_lang_zh.py ├── test_rouge.py ├── test_rouge_ja.py ├── test_rouge_zh.py └── test_sum_eval.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | .DS_Store 103 | .vscode 104 | tmp*/ 105 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | - "3.7" 5 | before_install: 6 | - python --version 7 | - sudo apt-get install -y libxml-parser-perl 8 | install: 9 | - pip install -r requirements-test.txt 10 | - python -m spacy download en 11 | - pip install "https://github.com/megagonlabs/ginza/releases/download/latest/ginza-latest.tar.gz" 12 | script: 13 | - pytest --cov=sumeval tests/test_* 14 | after_success: 15 | codecov 16 | deploy: 17 | provider: pypi 18 | user: "icoxfog417" 19 | password: 20 | secure: O3zmKefgHI+UNvgtZ/Fe/htgnP+yLXbMFfLIOF3VN9ofCyTXhjueBG+t6l9cetzICPfOMf2LjYCeamgr8DiV9xbtT5IWPOsU5qJYq8guOpYZ3Qpy0jIE7jE8uURCLBM1vdfpf6Gc+FI+NKj01K/xrX2Fh5kT90TMXN2lPFrd7SkYBWTdyZsOheUFSE8YinSx+TSaiWBohc+IaqGuUvmIA12i2a1narvaB86WWhAIY+BqJYCPdZg+++xqsMixSUDhwZgi30k1LDxzUKvz+fFUCyvTkyr8hO+CeE+d+jyY+GuD0XMQMM0OcWK0gKYgO89kvYv7h8bIKc03jPESECEGQTXYqrt486Notkm+v4DOrT52Owgx2ZsuNicb+5v6u5Mb7aD36kMrgO9BvTvtPlgiGAB+UUY1kzcgFRYgsSN5mvz9EAnq6Efxq2/1aQV3MwJE1FD6EUuzzJaWSR5+8pfe9NL8vjle7qGp/aSxEzbJPuzVMe6n/1+z26RNozkHysZRnaKGvh0WypvkjoU2lJB8Lx8buOBURy0K778/PLljEDxVwB8HriW92EwyjcfaCitekCGTjGicYWj0lmyJJo05CaNAMGqGXx03Q1zZth1ilnXXpoBez37Dx4Q4oxR1UpsIp0POKfX1cg8MCUiN3xYVSGD+znH4uNazs9vHkrNEiII= 21 | on: 22 | tags: true 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018 chakki 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 |

5 | Well tested & Multi-language
6 | evaluation framework for Text Summarization. 7 |

8 |

9 | 10 | [![PyPI version](https://badge.fury.io/py/sumeval.svg)](https://badge.fury.io/py/sumeval) 11 | [![Build Status](https://travis-ci.org/chakki-works/sumeval.svg?branch=master)](https://travis-ci.org/chakki-works/sumeval) 12 | [![codecov](https://codecov.io/gh/chakki-works/sumeval/branch/master/graph/badge.svg)](https://codecov.io/gh/chakki-works/sumeval) 13 | 14 | 15 | * Well tested 16 | * The ROUGE-X scores are tested compare with [original Perl script (ROUGE-1.5.5.pl)](https://github.com/summanlp/evaluation). 17 | * The BLEU score is calculated by [SacréBLEU](https://github.com/mjpost/sacrebleu), that produces the same values as official script (`mteval-v13a.pl`) used by WMT. 18 | * Multi-language 19 | * Not only English, Japanese and Chinese are also supported. The other language is extensible [easily](https://github.com/chakki-works/sumeval#welcome-contribution-tada). 20 | 21 | Of course, implementation is Pure Python! 22 | 23 | ## How to use 24 | 25 | ```py 26 | from sumeval.metrics.rouge import RougeCalculator 27 | 28 | 29 | rouge = RougeCalculator(stopwords=True, lang="en") 30 | 31 | rouge_1 = rouge.rouge_n( 32 | summary="I went to the Mars from my living town.", 33 | references="I went to Mars", 34 | n=1) 35 | 36 | rouge_2 = rouge.rouge_n( 37 | summary="I went to the Mars from my living town.", 38 | references=["I went to Mars", "It's my living town"], 39 | n=2) 40 | 41 | rouge_l = rouge.rouge_l( 42 | summary="I went to the Mars from my living town.", 43 | references=["I went to Mars", "It's my living town"]) 44 | 45 | # You need spaCy to calculate ROUGE-BE 46 | 47 | rouge_be = rouge.rouge_be( 48 | summary="I went to the Mars from my living town.", 49 | references=["I went to Mars", "It's my living town"]) 50 | 51 | print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format( 52 | rouge_1, rouge_2, rouge_l, rouge_be 53 | ).replace(", ", "\n")) 54 | ``` 55 | 56 | ```py 57 | from sumeval.metrics.bleu import BLEUCalculator 58 | 59 | 60 | bleu = BLEUCalculator() 61 | score = bleu.bleu("I am waiting on the beach", 62 | "He is walking on the beach") 63 | 64 | bleu_ja = BLEUCalculator(lang="ja") 65 | score_ja = bleu_ja.bleu("私はビーチで待ってる", "彼がベンチで待ってる") 66 | ``` 67 | 68 | ### From the command line 69 | 70 | ``` 71 | sumeval r-nlb "I'm living New York its my home town so awesome" "My home town is awesome" 72 | ``` 73 | 74 | output. 75 | 76 | ``` 77 | { 78 | "options": { 79 | "stopwords": true, 80 | "stemming": false, 81 | "word_limit": -1, 82 | "length_limit": -1, 83 | "alpha": 0.5, 84 | "input-summary": "I'm living New York its my home town so awesome", 85 | "input-references": [ 86 | "My home town is awesome" 87 | ] 88 | }, 89 | "averages": { 90 | "ROUGE-1": 0.7499999999999999, 91 | "ROUGE-2": 0.6666666666666666, 92 | "ROUGE-L": 0.7499999999999999, 93 | "ROUGE-BE": 0 94 | }, 95 | "scores": [ 96 | { 97 | "ROUGE-1": 0.7499999999999999, 98 | "ROUGE-2": 0.6666666666666666, 99 | "ROUGE-L": 0.7499999999999999, 100 | "ROUGE-BE": 0 101 | } 102 | ] 103 | } 104 | ``` 105 | 106 | Undoubtedly you can use file input. Please see more detail by `sumeval -h`. 107 | 108 | ## Install 109 | 110 | ``` 111 | pip install sumeval 112 | ``` 113 | 114 | ## Dependencies 115 | 116 | * BLEU is depends on [SacréBLEU](https://github.com/mjpost/sacrebleu) 117 | * To calculate `ROUGE-BE`, [`spaCy`](https://github.com/explosion/spaCy) is required. 118 | * To use lang `ja`, [`janome`](https://github.com/mocobeta/janome) or [`MeCab`](https://github.com/taku910/mecab) is required. 119 | * Especially to get score of `ROUGE-BE`, [`GiNZA`](https://github.com/megagonlabs/ginza) is needed additionally. 120 | * To use lang `zh`, [`jieba`](https://github.com/fxsjy/jieba) is required. 121 | * Especially to get score of `ROUGE-BE`, [`pyhanlp`](https://github.com/hankcs/pyhanlp) is needed additionally. 122 | 123 | ## Test 124 | 125 | `sumeval` uses two packages to test the score. 126 | 127 | * [pythonrouge](https://github.com/tagucci/pythonrouge) 128 | * It calls original perl script 129 | * `pip install git+https://github.com/tagucci/pythonrouge.git` 130 | * [rougescore](https://github.com/bdusell/rougescore) 131 | * It's simple python implementation for rouge score 132 | * `pip install git+git://github.com/bdusell/rougescore.git` 133 | 134 | ## Welcome Contribution :tada: 135 | 136 | ### Add supported language 137 | 138 | The tokenization and dependency parse process for each language is located on `sumeval/metrics/lang`. 139 | 140 | You can make language class by inheriting [`BaseLang`](https://github.com/chakki-works/sumeval/blob/master/sumeval/metrics/lang/base_lang.py). 141 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | from sumeval.metrics.rouge import RougeCalculator 2 | 3 | 4 | rouge = RougeCalculator(stopwords=True, lang="en") 5 | 6 | rouge_1 = rouge.rouge_n( 7 | summary="I went to the Mars from my living town.", 8 | references="I went to Mars", 9 | n=1) 10 | 11 | rouge_2 = rouge.rouge_n( 12 | summary="I went to the Mars from my living town.", 13 | references=["I went to Mars", "It's my living town"], 14 | n=2) 15 | 16 | rouge_l = rouge.rouge_l( 17 | summary="I went to the Mars from my living town.", 18 | references=["I went to Mars", "It's my living town"]) 19 | 20 | # You need spaCy to calculate ROUGE-BE 21 | 22 | rouge_be = rouge.rouge_be( 23 | summary="I went to the Mars from my living town.", 24 | references=["I went to Mars", "It's my living town"]) 25 | 26 | print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format( 27 | rouge_1, rouge_2, rouge_l, rouge_be 28 | ).replace(", ", "\n")) 29 | -------------------------------------------------------------------------------- /doc/top.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/doc/top.png -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | plac>=0.9.6 2 | sacrebleu>=1.3.2 3 | # For Test 4 | pytest==4.4.2 5 | codecov==2.0.15 6 | pytest-cov==2.7.1 7 | beautifulsoup4>=4.7.1 8 | -e git+https://github.com/tagucci/pythonrouge.git#egg=pythonrouge 9 | -e git+https://github.com/bdusell/rougescore.git#egg=rougescore 10 | # Basic Element 11 | spacy>=2.0.0,<3.0.0 12 | # For Chinese 13 | jieba>=0.39 14 | pyhanlp>=0.1.45 15 | # For Japanese 16 | janome>=0.3.9 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | plac>=0.9.6 2 | sacrebleu>=1.3.2,<2.0.0 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | 5 | requires = ["plac>=0.9.6", "sacrebleu>=1.3.2"] 6 | 7 | 8 | def get_lang_data(): 9 | static_files = [] 10 | root = "sumeval/metrics/lang/data/" 11 | for _dir in os.listdir(root): 12 | lang_dir = os.path.join(root, _dir) 13 | if not os.path.isdir(lang_dir): 14 | continue 15 | for content in os.listdir(lang_dir): 16 | f = os.path.join(lang_dir, content) 17 | if os.path.isfile(f) and not content.startswith("."): 18 | static_files.append(os.path.join("data/" + _dir, content)) 19 | return static_files 20 | 21 | 22 | setup( 23 | name="sumeval", 24 | description="Well tested evaluation framework for Text summarization", 25 | url="https://github.com/chakki-works/sumeval", 26 | author="icoxfog417", 27 | author_email="icoxfog417@yahoo.co.jp", 28 | license="Apache License 2.0", 29 | keywords="text summarization machine learning", 30 | use_scm_version=True, 31 | setup_requires=["setuptools_scm"], 32 | packages=[ 33 | "sumeval", 34 | "sumeval.cli", 35 | "sumeval.metrics", 36 | "sumeval.metrics.lang", 37 | ], 38 | package_data={ 39 | "sumeval.metrics.lang": get_lang_data() 40 | }, 41 | entry_points={ 42 | "console_scripts": ["sumeval=sumeval.cli.sum_eval:entry_point"], 43 | }, 44 | install_requires=requires, 45 | classifiers=[ 46 | "Programming Language :: Python :: 3.7", 47 | "Programming Language :: Python :: 3.6" 48 | ], 49 | ) 50 | -------------------------------------------------------------------------------- /sumeval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/sumeval/__init__.py -------------------------------------------------------------------------------- /sumeval/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/sumeval/cli/__init__.py -------------------------------------------------------------------------------- /sumeval/cli/sum_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | import plac 3 | from itertools import groupby 4 | from statistics import mean 5 | from sumeval.metrics.rouge import RougeCalculator 6 | from sumeval.metrics.bleu import BLEUCalculator 7 | 8 | 9 | def main( 10 | score_desc: ("score kind. ROUGE: r (-nlb means ROUGE-N, L, BE), BLEU: b."), 11 | use_file: ("read data from file", "flag", "f"), 12 | include_stopwords: ("don't ignore stop words", "flag", "in"), 13 | stemming: ("use stemming", "flag", "st"), 14 | word_limit: ("word limit count", "option", "wl") = -1, 15 | length_limit: ("sentence limit length", "option", "ll") = -1, 16 | alpha: ("alpha for f1-score", "option") = 0.5, 17 | language: ("word limit count", "option", "la") = "en", 18 | *params): 19 | """ 20 | Calculate ROUGE/BLEU score. 21 | summary: Your generated summary. 22 | references: A Reference or references to evaluate. 23 | 24 | Ex: summary: "my summary is awesome" 25 | reference: "summaries are awesome" 26 | score kind: ROUGE-N 27 | 28 | Then: 29 | sumeval r-n "my summary is awesome" "summaries are awesome" 30 | """ 31 | 32 | if "-" in score_desc: 33 | score_type, score_kinds = score_desc.lower().split("-") 34 | else: 35 | score_type = score_desc.lower() 36 | score_kinds = "" 37 | 38 | if len(params) < 2: 39 | print("You have to specify at least one summary and reference.") 40 | return 41 | 42 | summary = params[0] 43 | references = params[1:] 44 | if isinstance(references, tuple): 45 | references = list(references) 46 | stopwords = not include_stopwords 47 | 48 | generator = None 49 | if use_file: 50 | generator = file_generator(summary, references) 51 | else: 52 | generator = sentence_to_generator(summary, references) 53 | 54 | scores = [] 55 | keys = [] 56 | if score_type == "r": 57 | scorer = RougeCalculator( 58 | stopwords=stopwords, stemming=stemming, 59 | word_limit=word_limit, length_limit=length_limit, 60 | lang=language) 61 | 62 | for s, rs in generator: 63 | score = {} 64 | for k in score_kinds: 65 | if k == "n": 66 | score["ROUGE-1"] = scorer.rouge_1(s, rs, alpha) 67 | score["ROUGE-2"] = scorer.rouge_2(s, rs, alpha) 68 | elif k == "l": 69 | score["ROUGE-L"] = scorer.rouge_l(s, rs, alpha) 70 | elif k == "b": 71 | score["ROUGE-BE"] = scorer.rouge_be(s, rs, "HMR", alpha) 72 | if len(keys) == 0: 73 | keys = list(score.keys()) 74 | scores.append(score) 75 | 76 | elif score_type == "b": 77 | scorer = BLEUCalculator(lang=language) 78 | for s, rs in generator: 79 | score = {} 80 | print(s, rs) 81 | score["BLEU"] = scorer.bleu(s, rs) 82 | if len(keys) == 0: 83 | keys = list(score.keys()) 84 | scores.append(score) 85 | 86 | avgs = {} 87 | for k in keys: 88 | avg = mean([s[k] for s in scores]) 89 | avgs[k] = avg 90 | 91 | result = { 92 | "options": { 93 | "stopwords": stopwords, 94 | "stemming": stemming, 95 | "word_limit": word_limit, 96 | "length_limit": length_limit, 97 | "alpha": alpha, 98 | "input-summary": summary, 99 | "input-references": references 100 | }, 101 | "averages": avgs, 102 | "scores": scores 103 | } 104 | 105 | output = json.dumps(result, indent=2, ensure_ascii=False) 106 | print(output) 107 | 108 | 109 | def file_generator(s_file_path, r_file_paths): 110 | s_file = open(s_file_path, encoding="utf-8") 111 | r_files = [open(r, encoding="utf-8") for r in r_file_paths] 112 | for lines in zip(s_file, *r_files): 113 | lines = [ln.strip() for ln in lines] 114 | yield lines[0], lines[1:] 115 | else: 116 | s_file.close() 117 | for r in r_files: 118 | r.close() 119 | 120 | 121 | def sentence_to_generator(summary, references): 122 | yield summary, references 123 | 124 | 125 | def entry_point(): 126 | plac.call(main) 127 | 128 | 129 | if __name__ == "__main__": 130 | entry_point() 131 | -------------------------------------------------------------------------------- /sumeval/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/sumeval/metrics/__init__.py -------------------------------------------------------------------------------- /sumeval/metrics/bleu.py: -------------------------------------------------------------------------------- 1 | from sacrebleu import corpus_bleu, TOKENIZERS, DEFAULT_TOKENIZER 2 | from sumeval.metrics.lang.base_lang import BaseLang 3 | from sumeval.metrics.lang import get_lang 4 | 5 | 6 | class BLEUCalculator(): 7 | 8 | def __init__(self, 9 | smooth_method="floor", smooth_value=0.01, 10 | lowercase=False, use_effective_order=True, 11 | lang="en"): 12 | self.smooth_method = smooth_method 13 | self.smooth_value = smooth_value 14 | self.lowercase = lowercase 15 | self.use_effective_order = use_effective_order 16 | if isinstance(lang, str): 17 | self.lang = lang 18 | self._lang = get_lang(lang) 19 | elif isinstance(lang, BaseLang): 20 | self.lang = lang.lang 21 | self._lang = lang 22 | 23 | self._tokenizer = DEFAULT_TOKENIZER 24 | if self.lang == "ja": 25 | def tokenizer_ja(text): 26 | words = self._lang.tokenize_with_preprocess(text) 27 | return " ".join(words) 28 | 29 | TOKENIZERS["ja"] = tokenizer_ja 30 | self._tokenizer = "ja" 31 | elif self.lang == "zh": 32 | self._tokenizer = "zh" 33 | 34 | def bleu(self, summary, references, score_only=True): 35 | """ 36 | Calculate BLEU score by sacrebleu. 37 | 38 | Parameters 39 | ---------- 40 | summary: str 41 | summary text 42 | references: str or str[] 43 | reference or references to evaluate summary 44 | score_only: bool 45 | when True, return only score 46 | 47 | See Also 48 | -------- 49 | https://github.com/mjpost/sacreBLEU 50 | """ 51 | if isinstance(summary, str): 52 | _s = summary 53 | _refs = references 54 | if isinstance(references, list): 55 | _s = [_s] 56 | _refs = [references] 57 | bleu = corpus_bleu( 58 | _s, _refs, 59 | smooth_method=self.smooth_method, 60 | smooth_value=self.smooth_value, 61 | force=False, lowercase=self.lowercase, 62 | tokenize=self._tokenizer, 63 | use_effective_order=self.use_effective_order) 64 | else: 65 | _s = " ".join(summary) 66 | _refs = [[" ".join(r) for r in references]] 67 | # already tokenized summary and references 68 | bleu = corpus_bleu( 69 | _s, _refs, 70 | smooth_method=self.smooth_method, 71 | smooth_value=self.smooth_value, 72 | force=True, lowercase=self.lowercase, 73 | tokenize="none", 74 | use_effective_order=self.use_effective_order) 75 | 76 | if score_only: 77 | return bleu.score 78 | else: 79 | return bleu 80 | -------------------------------------------------------------------------------- /sumeval/metrics/lang/__init__.py: -------------------------------------------------------------------------------- 1 | def get_lang(lang=""): 2 | if lang == "ja": 3 | from .lang_ja import LangJA 4 | return LangJA() 5 | elif lang == "zh": 6 | from .lang_zh import LangZH 7 | return LangZH() 8 | else: 9 | from .lang_en import LangEN 10 | return LangEN() 11 | -------------------------------------------------------------------------------- /sumeval/metrics/lang/base_lang.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | 5 | class BaseLang(): 6 | _PARSER = None 7 | 8 | def __init__(self, lang): 9 | self.lang = lang 10 | self._stopwords = [] 11 | self._stemming = {} 12 | 13 | def load_parser(self): 14 | if self._PARSER is None: 15 | import spacy 16 | self._PARSER = spacy.load(self.lang) 17 | return self._PARSER 18 | 19 | def tokenize(self, text): 20 | raise Exception("Have to implement tokenize in subclass") 21 | 22 | def tokenize_with_preprocess(self, text): 23 | return self.tokenize(text) 24 | 25 | def join(self, words): 26 | return " ".join(words) 27 | 28 | def parse_to_be(self, text): 29 | from spacy.symbols import VERB, ADJ, NOUN 30 | doc = self.load_parser()(text) 31 | bes = [] 32 | 33 | for token in doc: 34 | # chunk level dependencies 35 | if token.pos == NOUN and token.head.pos in [VERB, ADJ]: 36 | print("a.{}=({})=>{}".format(token, token.dep_, token.head)) 37 | be = BasicElement(token.text, token.head.lemma_, 38 | token.dep_,) 39 | bes.append(be) 40 | print(be) 41 | elif token.pos in [VERB, ADJ] and token.head.pos == NOUN: 42 | print("b.{}=({})=>{}".format(token, token.dep_, token.head)) 43 | be = BasicElement(token.head.text, token.lemma_, 44 | token.dep_,) 45 | bes.append(be) 46 | 47 | return bes 48 | 49 | def is_stop_word(self, word): 50 | if len(self._stopwords) == 0: 51 | self.load_stopwords() 52 | return word in self._stopwords 53 | 54 | def stemming(self, word, min_length=-1): 55 | if len(self._stemming) == 0: 56 | self.load_stemming_dict() 57 | 58 | _word = word 59 | if min_length > 0 and len(_word) < min_length: 60 | return _word 61 | elif _word in self._stemming: 62 | return self._stemming[_word] 63 | else: 64 | return _word 65 | return _word 66 | 67 | def load_stopwords(self): 68 | p = Path(os.path.dirname(__file__)) 69 | p = p.joinpath("data", self.lang, "stop_words.txt") 70 | if p.is_file(): 71 | with p.open(encoding="utf-8") as f: 72 | lines = f.readlines() 73 | lines = [ln.strip() for ln in lines] 74 | lines = [ln for ln in lines if ln] 75 | self._stopwords = lines 76 | 77 | def load_stemming_dict(self): 78 | p = Path(os.path.dirname(__file__)) 79 | p = p.joinpath("data", self.lang, "stemming.txt") 80 | if p.is_file(): 81 | with p.open(encoding="utf-8") as f: 82 | lines = f.readlines() 83 | lines = [ln.strip() for ln in lines] 84 | pairs = [ln.split(" ", 1) for ln in lines if ln] 85 | self._stemming = dict(pairs) 86 | 87 | 88 | class BasicElement(): 89 | 90 | def __init__(self, head, modifier, relation): 91 | self.head = head 92 | self.modifier = modifier 93 | self.relation = relation 94 | 95 | def equals(self, other, option="HMR"): 96 | equal = True 97 | for c in option: 98 | c = c.upper() 99 | if c == "H" and self.head != other.head: 100 | equal = False 101 | elif c == "M" and self.modifier != other.modifier: 102 | equal = False 103 | elif c == "R" and self.relation != other.relation: 104 | equal = False 105 | return equal 106 | 107 | def as_key(self, option="HMR"): 108 | els = [] 109 | for c in option: 110 | c = c.upper() 111 | if c == "H": 112 | els.append(self.head) 113 | elif c == "M": 114 | els.append(self.modifier) 115 | elif c == "R": 116 | els.append(self.relation) 117 | return "|".join(els) 118 | 119 | def __repr__(self): 120 | return "{}>".format( 121 | self.head, self.relation, self.modifier) 122 | -------------------------------------------------------------------------------- /sumeval/metrics/lang/data/en/stop_words.txt: -------------------------------------------------------------------------------- 1 | reuters 2 | ap 3 | jan 4 | feb 5 | mar 6 | apr 7 | may 8 | jun 9 | jul 10 | aug 11 | sep 12 | oct 13 | nov 14 | dec 15 | tech 16 | news 17 | index 18 | mon 19 | tue 20 | wed 21 | thu 22 | fri 23 | sat 24 | 's 25 | a 26 | a's 27 | able 28 | about 29 | above 30 | according 31 | accordingly 32 | across 33 | actually 34 | after 35 | afterwards 36 | again 37 | against 38 | ain't 39 | all 40 | allow 41 | allows 42 | almost 43 | alone 44 | along 45 | already 46 | also 47 | although 48 | always 49 | am 50 | amid 51 | among 52 | amongst 53 | an 54 | and 55 | another 56 | any 57 | anybody 58 | anyhow 59 | anyone 60 | anything 61 | anyway 62 | anyways 63 | anywhere 64 | apart 65 | appear 66 | appreciate 67 | appropriate 68 | are 69 | aren't 70 | around 71 | as 72 | aside 73 | ask 74 | asking 75 | associated 76 | at 77 | available 78 | away 79 | awfully 80 | b 81 | be 82 | became 83 | because 84 | become 85 | becomes 86 | becoming 87 | been 88 | before 89 | beforehand 90 | behind 91 | being 92 | believe 93 | below 94 | beside 95 | besides 96 | best 97 | better 98 | between 99 | beyond 100 | both 101 | brief 102 | but 103 | by 104 | c 105 | c'mon 106 | c's 107 | came 108 | can 109 | can't 110 | cannot 111 | cant 112 | cause 113 | causes 114 | certain 115 | certainly 116 | changes 117 | clearly 118 | co 119 | com 120 | come 121 | comes 122 | concerning 123 | consequently 124 | consider 125 | considering 126 | contain 127 | containing 128 | contains 129 | corresponding 130 | could 131 | couldn't 132 | course 133 | currently 134 | d 135 | definitely 136 | described 137 | despite 138 | did 139 | didn't 140 | different 141 | do 142 | does 143 | doesn't 144 | doing 145 | don't 146 | done 147 | down 148 | downwards 149 | during 150 | e 151 | each 152 | edu 153 | eg 154 | e.g. 155 | eight 156 | either 157 | else 158 | elsewhere 159 | enough 160 | entirely 161 | especially 162 | et 163 | etc 164 | etc. 165 | even 166 | ever 167 | every 168 | everybody 169 | everyone 170 | everything 171 | everywhere 172 | ex 173 | exactly 174 | example 175 | except 176 | f 177 | far 178 | few 179 | fifth 180 | five 181 | followed 182 | following 183 | follows 184 | for 185 | former 186 | formerly 187 | forth 188 | four 189 | from 190 | further 191 | furthermore 192 | g 193 | get 194 | gets 195 | getting 196 | given 197 | gives 198 | go 199 | goes 200 | going 201 | gone 202 | got 203 | gotten 204 | greetings 205 | h 206 | had 207 | hadn't 208 | happens 209 | hardly 210 | has 211 | hasn't 212 | have 213 | haven't 214 | having 215 | he 216 | he's 217 | hello 218 | help 219 | hence 220 | her 221 | here 222 | here's 223 | hereafter 224 | hereby 225 | herein 226 | hereupon 227 | hers 228 | herself 229 | hi 230 | him 231 | himself 232 | his 233 | hither 234 | hopefully 235 | how 236 | howbeit 237 | however 238 | i 239 | i'd 240 | i'll 241 | i'm 242 | i've 243 | ie 244 | i.e. 245 | if 246 | ignored 247 | immediate 248 | in 249 | inasmuch 250 | inc 251 | indeed 252 | indicate 253 | indicated 254 | indicates 255 | inner 256 | insofar 257 | instead 258 | into 259 | inward 260 | is 261 | isn't 262 | it 263 | it'd 264 | it'll 265 | it's 266 | its 267 | itself 268 | j 269 | just 270 | k 271 | keep 272 | keeps 273 | kept 274 | know 275 | knows 276 | known 277 | l 278 | lately 279 | later 280 | latter 281 | latterly 282 | least 283 | less 284 | lest 285 | let 286 | let's 287 | like 288 | liked 289 | likely 290 | little 291 | look 292 | looking 293 | looks 294 | ltd 295 | m 296 | mainly 297 | many 298 | may 299 | maybe 300 | me 301 | mean 302 | meanwhile 303 | merely 304 | might 305 | more 306 | moreover 307 | most 308 | mostly 309 | mr. 310 | ms. 311 | much 312 | must 313 | my 314 | myself 315 | n 316 | namely 317 | nd 318 | near 319 | nearly 320 | necessary 321 | need 322 | needs 323 | neither 324 | never 325 | nevertheless 326 | new 327 | next 328 | nine 329 | no 330 | nobody 331 | non 332 | none 333 | noone 334 | nor 335 | normally 336 | not 337 | nothing 338 | novel 339 | now 340 | nowhere 341 | o 342 | obviously 343 | of 344 | off 345 | often 346 | oh 347 | ok 348 | okay 349 | old 350 | on 351 | once 352 | one 353 | ones 354 | only 355 | onto 356 | or 357 | other 358 | others 359 | otherwise 360 | ought 361 | our 362 | ours 363 | ourselves 364 | out 365 | outside 366 | over 367 | overall 368 | own 369 | p 370 | particular 371 | particularly 372 | per 373 | perhaps 374 | placed 375 | please 376 | plus 377 | possible 378 | presumably 379 | probably 380 | provides 381 | q 382 | que 383 | quite 384 | qv 385 | r 386 | rather 387 | rd 388 | re 389 | really 390 | reasonably 391 | regarding 392 | regardless 393 | regards 394 | relatively 395 | respectively 396 | right 397 | s 398 | said 399 | same 400 | saw 401 | say 402 | saying 403 | says 404 | second 405 | secondly 406 | see 407 | seeing 408 | seem 409 | seemed 410 | seeming 411 | seems 412 | seen 413 | self 414 | selves 415 | sensible 416 | sent 417 | serious 418 | seriously 419 | seven 420 | several 421 | shall 422 | she 423 | should 424 | shouldn't 425 | since 426 | six 427 | so 428 | some 429 | somebody 430 | somehow 431 | someone 432 | something 433 | sometime 434 | sometimes 435 | somewhat 436 | somewhere 437 | soon 438 | sorry 439 | specified 440 | specify 441 | specifying 442 | still 443 | sub 444 | such 445 | sup 446 | sure 447 | t 448 | t's 449 | take 450 | taken 451 | tell 452 | tends 453 | th 454 | than 455 | thank 456 | thanks 457 | thanx 458 | that 459 | that's 460 | thats 461 | the 462 | their 463 | theirs 464 | them 465 | themselves 466 | then 467 | thence 468 | there 469 | there's 470 | thereafter 471 | thereby 472 | therefore 473 | therein 474 | theres 475 | thereupon 476 | these 477 | they 478 | they'd 479 | they'll 480 | they're 481 | they've 482 | think 483 | third 484 | this 485 | thorough 486 | thoroughly 487 | those 488 | though 489 | three 490 | through 491 | throughout 492 | thru 493 | thus 494 | to 495 | together 496 | too 497 | took 498 | toward 499 | towards 500 | tried 501 | tries 502 | truly 503 | try 504 | trying 505 | twice 506 | two 507 | u 508 | un 509 | under 510 | unfortunately 511 | unless 512 | unlikely 513 | until 514 | unto 515 | up 516 | upon 517 | us 518 | use 519 | used 520 | useful 521 | uses 522 | using 523 | usually 524 | uucp 525 | v 526 | value 527 | various 528 | very 529 | via 530 | viz 531 | vs 532 | w 533 | want 534 | wants 535 | was 536 | wasn't 537 | way 538 | we 539 | we'd 540 | we'll 541 | we're 542 | we've 543 | welcome 544 | well 545 | went 546 | were 547 | weren't 548 | what 549 | what's 550 | whatever 551 | when 552 | whence 553 | whenever 554 | where 555 | where's 556 | whereafter 557 | whereas 558 | whereby 559 | wherein 560 | whereupon 561 | wherever 562 | whether 563 | which 564 | while 565 | whither 566 | who 567 | who's 568 | whoever 569 | whole 570 | whom 571 | whose 572 | why 573 | will 574 | willing 575 | wish 576 | with 577 | within 578 | without 579 | won't 580 | wonder 581 | would 582 | would 583 | wouldn't 584 | x 585 | y 586 | yes 587 | yet 588 | you 589 | you'd 590 | you'll 591 | you're 592 | you've 593 | your 594 | yours 595 | yourself 596 | yourselves 597 | z 598 | zero 599 | -------------------------------------------------------------------------------- /sumeval/metrics/lang/data/ja/stop_words.txt: -------------------------------------------------------------------------------- 1 | あそこ 2 | あたり 3 | あちら 4 | あっち 5 | あと 6 | あな 7 | あなた 8 | あれ 9 | いくつ 10 | いつ 11 | いま 12 | いや 13 | いろいろ 14 | うち 15 | おおまか 16 | おまえ 17 | おれ 18 | がい 19 | かく 20 | かたち 21 | かやの 22 | から 23 | がら 24 | きた 25 | くせ 26 | ここ 27 | こっち 28 | こと 29 | ごと 30 | こちら 31 | ごっちゃ 32 | これ 33 | これら 34 | ごろ 35 | さまざま 36 | さらい 37 | さん 38 | しかた 39 | しよう 40 | すか 41 | ずつ 42 | すね 43 | すべて 44 | ぜんぶ 45 | そう 46 | そこ 47 | そちら 48 | そっち 49 | そで 50 | それ 51 | それぞれ 52 | それなり 53 | たくさん 54 | たち 55 | たび 56 | ため 57 | だめ 58 | ちゃ 59 | ちゃん 60 | てん 61 | とおり 62 | とき 63 | どこ 64 | どこか 65 | ところ 66 | どちら 67 | どっか 68 | どっち 69 | どれ 70 | なか 71 | なかば 72 | なに 73 | など 74 | なん 75 | はじめ 76 | はず 77 | はるか 78 | ひと 79 | ひとつ 80 | ふく 81 | ぶり 82 | べつ 83 | へん 84 | ぺん 85 | ほう 86 | ほか 87 | まさ 88 | まし 89 | まとも 90 | まま 91 | みたい 92 | みつ 93 | みなさん 94 | みんな 95 | もと 96 | もの 97 | もん 98 | やつ 99 | よう 100 | よそ 101 | わけ 102 | わたし 103 | ハイ 104 | 上 105 | 中 106 | 下 107 | 字 108 | 年 109 | 月 110 | 日 111 | 時 112 | 分 113 | 秒 114 | 週 115 | 火 116 | 水 117 | 木 118 | 金 119 | 土 120 | 国 121 | 都 122 | 道 123 | 府 124 | 県 125 | 市 126 | 区 127 | 町 128 | 村 129 | 各 130 | 第 131 | 方 132 | 何 133 | 的 134 | 度 135 | 文 136 | 者 137 | 性 138 | 体 139 | 人 140 | 他 141 | 今 142 | 部 143 | 課 144 | 係 145 | 外 146 | 類 147 | 達 148 | 気 149 | 室 150 | 口 151 | 誰 152 | 用 153 | 界 154 | 会 155 | 首 156 | 男 157 | 女 158 | 別 159 | 話 160 | 私 161 | 屋 162 | 店 163 | 家 164 | 場 165 | 等 166 | 見 167 | 際 168 | 観 169 | 段 170 | 略 171 | 例 172 | 系 173 | 論 174 | 形 175 | 間 176 | 地 177 | 員 178 | 線 179 | 点 180 | 書 181 | 品 182 | 力 183 | 法 184 | 感 185 | 作 186 | 元 187 | 手 188 | 数 189 | 彼 190 | 彼女 191 | 子 192 | 内 193 | 楽 194 | 喜 195 | 怒 196 | 哀 197 | 輪 198 | 頃 199 | 化 200 | 境 201 | 俺 202 | 奴 203 | 高 204 | 校 205 | 婦 206 | 伸 207 | 紀 208 | 誌 209 | レ 210 | 行 211 | 列 212 | 事 213 | 士 214 | 台 215 | 集 216 | 様 217 | 所 218 | 歴 219 | 器 220 | 名 221 | 情 222 | 連 223 | 毎 224 | 式 225 | 簿 226 | 回 227 | 匹 228 | 個 229 | 席 230 | 束 231 | 歳 232 | 目 233 | 通 234 | 面 235 | 円 236 | 玉 237 | 枚 238 | 前 239 | 後 240 | 左 241 | 右 242 | 次 243 | 先 244 | 春 245 | 夏 246 | 秋 247 | 冬 248 | 一 249 | 二 250 | 三 251 | 四 252 | 五 253 | 六 254 | 七 255 | 八 256 | 九 257 | 十 258 | 百 259 | 千 260 | 万 261 | 億 262 | 兆 263 | 下記 264 | 上記 265 | 時間 266 | 今回 267 | 前回 268 | 場合 269 | 一つ 270 | 年生 271 | 自分 272 | ヶ所 273 | ヵ所 274 | カ所 275 | 箇所 276 | ヶ月 277 | ヵ月 278 | カ月 279 | 箇月 280 | 名前 281 | 本当 282 | 確か 283 | 時点 284 | 全部 285 | 関係 286 | 近く 287 | 方法 288 | 我々 289 | 違い 290 | 多く 291 | 扱い 292 | 新た 293 | その後 294 | 半ば 295 | 結局 296 | 様々 297 | 以前 298 | 以後 299 | 以降 300 | 未満 301 | 以上 302 | 以下 303 | 幾つ 304 | 毎日 305 | 自体 306 | 向こう 307 | 何人 308 | 手段 309 | 同じ 310 | 感じ -------------------------------------------------------------------------------- /sumeval/metrics/lang/data/zh/stop_words.txt: -------------------------------------------------------------------------------- 1 | ! 2 | " 3 | # 4 | $ 5 | % 6 | & 7 | ' 8 | ( 9 | ) 10 | * 11 | + 12 | , 13 | - 14 | -- 15 | . 16 | .. 17 | ... 18 | ...... 19 | ................... 20 | ./ 21 | .一 22 | .数 23 | .日 24 | / 25 | // 26 | 0 27 | 1 28 | 2 29 | 3 30 | 4 31 | 5 32 | 6 33 | 7 34 | 8 35 | 9 36 | : 37 | :// 38 | :: 39 | ; 40 | < 41 | = 42 | > 43 | >> 44 | ? 45 | @ 46 | A 47 | Lex 48 | [ 49 | \ 50 | ] 51 | ^ 52 | _ 53 | ` 54 | exp 55 | sub 56 | sup 57 | | 58 | } 59 | ~ 60 | ~~~~ 61 | · 62 | × 63 | ××× 64 | Δ 65 | Ψ 66 | γ 67 | μ 68 | φ 69 | φ. 70 | В 71 | — 72 | —— 73 | ——— 74 | ‘ 75 | ’ 76 | ’‘ 77 | “ 78 | ” 79 | ”, 80 | … 81 | …… 82 | …………………………………………………③ 83 | ′∈ 84 | ′| 85 | ℃ 86 | Ⅲ 87 | ↑ 88 | → 89 | ∈[ 90 | ∪φ∈ 91 | ≈ 92 | ① 93 | ② 94 | ②c 95 | ③ 96 | ③] 97 | ④ 98 | ⑤ 99 | ⑥ 100 | ⑦ 101 | ⑧ 102 | ⑨ 103 | ⑩ 104 | ── 105 | ■ 106 | ▲ 107 |   108 | 、 109 | 。 110 | 〈 111 | 〉 112 | 《 113 | 》 114 | 》), 115 | 」 116 | 『 117 | 』 118 | 【 119 | 】 120 | 〔 121 | 〕 122 | 〕〔 123 | ㈧ 124 | 一 125 | 一. 126 | 一一 127 | 一下 128 | 一个 129 | 一些 130 | 一何 131 | 一切 132 | 一则 133 | 一则通过 134 | 一天 135 | 一定 136 | 一方面 137 | 一旦 138 | 一时 139 | 一来 140 | 一样 141 | 一次 142 | 一片 143 | 一番 144 | 一直 145 | 一致 146 | 一般 147 | 一起 148 | 一转眼 149 | 一边 150 | 一面 151 | 七 152 | 万一 153 | 三 154 | 三天两头 155 | 三番两次 156 | 三番五次 157 | 上 158 | 上下 159 | 上升 160 | 上去 161 | 上来 162 | 上述 163 | 上面 164 | 下 165 | 下列 166 | 下去 167 | 下来 168 | 下面 169 | 不 170 | 不一 171 | 不下 172 | 不久 173 | 不了 174 | 不亦乐乎 175 | 不仅 176 | 不仅...而且 177 | 不仅仅 178 | 不仅仅是 179 | 不会 180 | 不但 181 | 不但...而且 182 | 不光 183 | 不免 184 | 不再 185 | 不力 186 | 不单 187 | 不变 188 | 不只 189 | 不可 190 | 不可开交 191 | 不可抗拒 192 | 不同 193 | 不外 194 | 不外乎 195 | 不够 196 | 不大 197 | 不如 198 | 不妨 199 | 不定 200 | 不对 201 | 不少 202 | 不尽 203 | 不尽然 204 | 不巧 205 | 不已 206 | 不常 207 | 不得 208 | 不得不 209 | 不得了 210 | 不得已 211 | 不必 212 | 不怎么 213 | 不怕 214 | 不惟 215 | 不成 216 | 不拘 217 | 不择手段 218 | 不敢 219 | 不料 220 | 不断 221 | 不日 222 | 不时 223 | 不是 224 | 不曾 225 | 不止 226 | 不止一次 227 | 不比 228 | 不消 229 | 不满 230 | 不然 231 | 不然的话 232 | 不特 233 | 不独 234 | 不由得 235 | 不知不觉 236 | 不管 237 | 不管怎样 238 | 不经意 239 | 不胜 240 | 不能 241 | 不能不 242 | 不至于 243 | 不若 244 | 不要 245 | 不论 246 | 不起 247 | 不足 248 | 不过 249 | 不迭 250 | 不问 251 | 不限 252 | 与 253 | 与其 254 | 与其说 255 | 与否 256 | 与此同时 257 | 专门 258 | 且 259 | 且不说 260 | 且说 261 | 两者 262 | 严格 263 | 严重 264 | 个 265 | 个人 266 | 个别 267 | 中小 268 | 中间 269 | 丰富 270 | 串行 271 | 临 272 | 临到 273 | 为 274 | 为主 275 | 为了 276 | 为什么 277 | 为什麽 278 | 为何 279 | 为止 280 | 为此 281 | 为着 282 | 主张 283 | 主要 284 | 举凡 285 | 举行 286 | 乃 287 | 乃至 288 | 乃至于 289 | 么 290 | 之 291 | 之一 292 | 之前 293 | 之后 294 | 之後 295 | 之所以 296 | 之类 297 | 乌乎 298 | 乎 299 | 乒 300 | 乘 301 | 乘势 302 | 乘机 303 | 乘胜 304 | 乘虚 305 | 乘隙 306 | 九 307 | 也 308 | 也好 309 | 也就是说 310 | 也是 311 | 也罢 312 | 了 313 | 了解 314 | 争取 315 | 二 316 | 二来 317 | 二话不说 318 | 二话没说 319 | 于 320 | 于是 321 | 于是乎 322 | 云云 323 | 云尔 324 | 互 325 | 互相 326 | 五 327 | 些 328 | 交口 329 | 亦 330 | 产生 331 | 亲口 332 | 亲手 333 | 亲眼 334 | 亲自 335 | 亲身 336 | 人 337 | 人人 338 | 人们 339 | 人家 340 | 人民 341 | 什么 342 | 什么样 343 | 什麽 344 | 仅 345 | 仅仅 346 | 今 347 | 今后 348 | 今天 349 | 今年 350 | 今後 351 | 介于 352 | 仍 353 | 仍旧 354 | 仍然 355 | 从 356 | 从不 357 | 从严 358 | 从中 359 | 从事 360 | 从今以后 361 | 从优 362 | 从古到今 363 | 从古至今 364 | 从头 365 | 从宽 366 | 从小 367 | 从新 368 | 从无到有 369 | 从早到晚 370 | 从未 371 | 从来 372 | 从此 373 | 从此以后 374 | 从而 375 | 从轻 376 | 从速 377 | 从重 378 | 他 379 | 他人 380 | 他们 381 | 他是 382 | 他的 383 | 代替 384 | 以 385 | 以上 386 | 以下 387 | 以为 388 | 以便 389 | 以免 390 | 以前 391 | 以及 392 | 以后 393 | 以外 394 | 以後 395 | 以故 396 | 以期 397 | 以来 398 | 以至 399 | 以至于 400 | 以致 401 | 们 402 | 任 403 | 任何 404 | 任凭 405 | 任务 406 | 企图 407 | 伙同 408 | 会 409 | 伟大 410 | 传 411 | 传说 412 | 传闻 413 | 似乎 414 | 似的 415 | 但 416 | 但凡 417 | 但愿 418 | 但是 419 | 何 420 | 何乐而不为 421 | 何以 422 | 何况 423 | 何处 424 | 何妨 425 | 何尝 426 | 何必 427 | 何时 428 | 何止 429 | 何苦 430 | 何须 431 | 余外 432 | 作为 433 | 你 434 | 你们 435 | 你是 436 | 你的 437 | 使 438 | 使得 439 | 使用 440 | 例如 441 | 依 442 | 依据 443 | 依照 444 | 依靠 445 | 便 446 | 便于 447 | 促进 448 | 保持 449 | 保管 450 | 保险 451 | 俺 452 | 俺们 453 | 倍加 454 | 倍感 455 | 倒不如 456 | 倒不如说 457 | 倒是 458 | 倘 459 | 倘使 460 | 倘或 461 | 倘然 462 | 倘若 463 | 借 464 | 借以 465 | 借此 466 | 假使 467 | 假如 468 | 假若 469 | 偏偏 470 | 做到 471 | 偶尔 472 | 偶而 473 | 傥然 474 | 像 475 | 儿 476 | 允许 477 | 元/吨 478 | 充其极 479 | 充其量 480 | 充分 481 | 先不先 482 | 先后 483 | 先後 484 | 先生 485 | 光 486 | 光是 487 | 全体 488 | 全力 489 | 全年 490 | 全然 491 | 全身心 492 | 全部 493 | 全都 494 | 全面 495 | 八 496 | 八成 497 | 公然 498 | 六 499 | 兮 500 | 共 501 | 共同 502 | 共总 503 | 关于 504 | 其 505 | 其一 506 | 其中 507 | 其二 508 | 其他 509 | 其余 510 | 其后 511 | 其它 512 | 其实 513 | 其次 514 | 具体 515 | 具体地说 516 | 具体来说 517 | 具体说来 518 | 具有 519 | 兼之 520 | 内 521 | 再 522 | 再其次 523 | 再则 524 | 再有 525 | 再次 526 | 再者 527 | 再者说 528 | 再说 529 | 冒 530 | 冲 531 | 决不 532 | 决定 533 | 决非 534 | 况且 535 | 准备 536 | 凑巧 537 | 凝神 538 | 几 539 | 几乎 540 | 几度 541 | 几时 542 | 几番 543 | 几经 544 | 凡 545 | 凡是 546 | 凭 547 | 凭借 548 | 出 549 | 出于 550 | 出去 551 | 出来 552 | 出现 553 | 分别 554 | 分头 555 | 分期 556 | 分期分批 557 | 切 558 | 切不可 559 | 切切 560 | 切勿 561 | 切莫 562 | 则 563 | 则甚 564 | 刚 565 | 刚好 566 | 刚巧 567 | 刚才 568 | 初 569 | 别 570 | 别人 571 | 别处 572 | 别是 573 | 别的 574 | 别管 575 | 别说 576 | 到 577 | 到了儿 578 | 到处 579 | 到头 580 | 到头来 581 | 到底 582 | 到目前为止 583 | 前后 584 | 前此 585 | 前者 586 | 前进 587 | 前面 588 | 加上 589 | 加之 590 | 加以 591 | 加入 592 | 加强 593 | 动不动 594 | 动辄 595 | 勃然 596 | 匆匆 597 | 十分 598 | 千 599 | 千万 600 | 千万千万 601 | 半 602 | 单 603 | 单单 604 | 单纯 605 | 即 606 | 即令 607 | 即使 608 | 即便 609 | 即刻 610 | 即如 611 | 即将 612 | 即或 613 | 即是说 614 | 即若 615 | 却 616 | 却不 617 | 历 618 | 原来 619 | 去 620 | 又 621 | 又及 622 | 及 623 | 及其 624 | 及时 625 | 及至 626 | 双方 627 | 反之 628 | 反之亦然 629 | 反之则 630 | 反倒 631 | 反倒是 632 | 反应 633 | 反手 634 | 反映 635 | 反而 636 | 反过来 637 | 反过来说 638 | 取得 639 | 取道 640 | 受到 641 | 变成 642 | 古来 643 | 另 644 | 另一个 645 | 另一方面 646 | 另外 647 | 另悉 648 | 另方面 649 | 另行 650 | 只 651 | 只当 652 | 只怕 653 | 只是 654 | 只有 655 | 只消 656 | 只要 657 | 只限 658 | 叫 659 | 叫做 660 | 召开 661 | 叮咚 662 | 叮当 663 | 可 664 | 可以 665 | 可好 666 | 可是 667 | 可能 668 | 可见 669 | 各 670 | 各个 671 | 各人 672 | 各位 673 | 各地 674 | 各式 675 | 各种 676 | 各级 677 | 各自 678 | 合理 679 | 同 680 | 同一 681 | 同时 682 | 同样 683 | 后 684 | 后来 685 | 后者 686 | 后面 687 | 向 688 | 向使 689 | 向着 690 | 吓 691 | 吗 692 | 否则 693 | 吧 694 | 吧哒 695 | 吱 696 | 呀 697 | 呃 698 | 呆呆地 699 | 呐 700 | 呕 701 | 呗 702 | 呜 703 | 呜呼 704 | 呢 705 | 周围 706 | 呵 707 | 呵呵 708 | 呸 709 | 呼哧 710 | 呼啦 711 | 咋 712 | 和 713 | 咚 714 | 咦 715 | 咧 716 | 咱 717 | 咱们 718 | 咳 719 | 哇 720 | 哈 721 | 哈哈 722 | 哉 723 | 哎 724 | 哎呀 725 | 哎哟 726 | 哗 727 | 哗啦 728 | 哟 729 | 哦 730 | 哩 731 | 哪 732 | 哪个 733 | 哪些 734 | 哪儿 735 | 哪天 736 | 哪年 737 | 哪怕 738 | 哪样 739 | 哪边 740 | 哪里 741 | 哼 742 | 哼唷 743 | 唉 744 | 唯有 745 | 啊 746 | 啊呀 747 | 啊哈 748 | 啊哟 749 | 啐 750 | 啥 751 | 啦 752 | 啪达 753 | 啷当 754 | 喀 755 | 喂 756 | 喏 757 | 喔唷 758 | 喽 759 | 嗡 760 | 嗡嗡 761 | 嗬 762 | 嗯 763 | 嗳 764 | 嘎 765 | 嘎嘎 766 | 嘎登 767 | 嘘 768 | 嘛 769 | 嘻 770 | 嘿 771 | 嘿嘿 772 | 四 773 | 因 774 | 因为 775 | 因了 776 | 因此 777 | 因着 778 | 因而 779 | 固 780 | 固然 781 | 在 782 | 在下 783 | 在于 784 | 地 785 | 均 786 | 坚决 787 | 坚持 788 | 基于 789 | 基本 790 | 基本上 791 | 处在 792 | 处处 793 | 处理 794 | 复杂 795 | 多 796 | 多么 797 | 多亏 798 | 多多 799 | 多多少少 800 | 多多益善 801 | 多少 802 | 多年前 803 | 多年来 804 | 多数 805 | 多次 806 | 够瞧的 807 | 大 808 | 大不了 809 | 大举 810 | 大事 811 | 大体 812 | 大体上 813 | 大凡 814 | 大力 815 | 大多 816 | 大多数 817 | 大大 818 | 大家 819 | 大张旗鼓 820 | 大批 821 | 大抵 822 | 大概 823 | 大略 824 | 大约 825 | 大致 826 | 大都 827 | 大量 828 | 大面儿上 829 | 失去 830 | 奇 831 | 奈 832 | 奋勇 833 | 她 834 | 她们 835 | 她是 836 | 她的 837 | 好 838 | 好在 839 | 好的 840 | 好象 841 | 如 842 | 如上 843 | 如上所述 844 | 如下 845 | 如今 846 | 如何 847 | 如其 848 | 如前所述 849 | 如同 850 | 如常 851 | 如是 852 | 如期 853 | 如果 854 | 如次 855 | 如此 856 | 如此等等 857 | 如若 858 | 始而 859 | 姑且 860 | 存在 861 | 存心 862 | 孰料 863 | 孰知 864 | 宁 865 | 宁可 866 | 宁愿 867 | 宁肯 868 | 它 869 | 它们 870 | 它们的 871 | 它是 872 | 它的 873 | 安全 874 | 完全 875 | 完成 876 | 定 877 | 实现 878 | 实际 879 | 宣布 880 | 容易 881 | 密切 882 | 对 883 | 对于 884 | 对应 885 | 对待 886 | 对方 887 | 对比 888 | 将 889 | 将才 890 | 将要 891 | 将近 892 | 小 893 | 少数 894 | 尔 895 | 尔后 896 | 尔尔 897 | 尔等 898 | 尚且 899 | 尤其 900 | 就 901 | 就地 902 | 就是 903 | 就是了 904 | 就是说 905 | 就此 906 | 就算 907 | 就要 908 | 尽 909 | 尽可能 910 | 尽如人意 911 | 尽心尽力 912 | 尽心竭力 913 | 尽快 914 | 尽早 915 | 尽然 916 | 尽管 917 | 尽管如此 918 | 尽量 919 | 局外 920 | 居然 921 | 届时 922 | 属于 923 | 屡 924 | 屡屡 925 | 屡次 926 | 屡次三番 927 | 岂 928 | 岂但 929 | 岂止 930 | 岂非 931 | 川流不息 932 | 左右 933 | 巨大 934 | 巩固 935 | 差一点 936 | 差不多 937 | 己 938 | 已 939 | 已矣 940 | 已经 941 | 巴 942 | 巴巴 943 | 带 944 | 帮助 945 | 常 946 | 常常 947 | 常言说 948 | 常言说得好 949 | 常言道 950 | 平素 951 | 年复一年 952 | 并 953 | 并不 954 | 并不是 955 | 并且 956 | 并排 957 | 并无 958 | 并没 959 | 并没有 960 | 并肩 961 | 并非 962 | 广大 963 | 广泛 964 | 应当 965 | 应用 966 | 应该 967 | 庶乎 968 | 庶几 969 | 开外 970 | 开始 971 | 开展 972 | 引起 973 | 弗 974 | 弹指之间 975 | 强烈 976 | 强调 977 | 归 978 | 归根到底 979 | 归根结底 980 | 归齐 981 | 当 982 | 当下 983 | 当中 984 | 当儿 985 | 当前 986 | 当即 987 | 当口儿 988 | 当地 989 | 当场 990 | 当头 991 | 当庭 992 | 当时 993 | 当然 994 | 当真 995 | 当着 996 | 形成 997 | 彻夜 998 | 彻底 999 | 彼 1000 | 彼时 1001 | 彼此 1002 | 往 1003 | 往往 1004 | 待 1005 | 待到 1006 | 很 1007 | 很多 1008 | 很少 1009 | 後来 1010 | 後面 1011 | 得 1012 | 得了 1013 | 得出 1014 | 得到 1015 | 得天独厚 1016 | 得起 1017 | 心里 1018 | 必 1019 | 必定 1020 | 必将 1021 | 必然 1022 | 必要 1023 | 必须 1024 | 快 1025 | 快要 1026 | 忽地 1027 | 忽然 1028 | 怎 1029 | 怎么 1030 | 怎么办 1031 | 怎么样 1032 | 怎奈 1033 | 怎样 1034 | 怎麽 1035 | 怕 1036 | 急匆匆 1037 | 怪 1038 | 怪不得 1039 | 总之 1040 | 总是 1041 | 总的来看 1042 | 总的来说 1043 | 总的说来 1044 | 总结 1045 | 总而言之 1046 | 恍然 1047 | 恐怕 1048 | 恰似 1049 | 恰好 1050 | 恰如 1051 | 恰巧 1052 | 恰恰 1053 | 恰恰相反 1054 | 恰逢 1055 | 您 1056 | 您们 1057 | 您是 1058 | 惟其 1059 | 惯常 1060 | 意思 1061 | 愤然 1062 | 愿意 1063 | 慢说 1064 | 成为 1065 | 成年 1066 | 成年累月 1067 | 成心 1068 | 我 1069 | 我们 1070 | 我是 1071 | 我的 1072 | 或 1073 | 或则 1074 | 或多或少 1075 | 或是 1076 | 或曰 1077 | 或者 1078 | 或许 1079 | 战斗 1080 | 截然 1081 | 截至 1082 | 所 1083 | 所以 1084 | 所在 1085 | 所幸 1086 | 所有 1087 | 所谓 1088 | 才 1089 | 才能 1090 | 扑通 1091 | 打 1092 | 打从 1093 | 打开天窗说亮话 1094 | 扩大 1095 | 把 1096 | 抑或 1097 | 抽冷子 1098 | 拦腰 1099 | 拿 1100 | 按 1101 | 按时 1102 | 按期 1103 | 按照 1104 | 按理 1105 | 按说 1106 | 挨个 1107 | 挨家挨户 1108 | 挨次 1109 | 挨着 1110 | 挨门挨户 1111 | 挨门逐户 1112 | 换句话说 1113 | 换言之 1114 | 据 1115 | 据实 1116 | 据悉 1117 | 据我所知 1118 | 据此 1119 | 据称 1120 | 据说 1121 | 掌握 1122 | 接下来 1123 | 接着 1124 | 接著 1125 | 接连不断 1126 | 放量 1127 | 故 1128 | 故意 1129 | 故此 1130 | 故而 1131 | 敞开儿 1132 | 敢 1133 | 敢于 1134 | 敢情 1135 | 数/ 1136 | 整个 1137 | 断然 1138 | 方 1139 | 方便 1140 | 方才 1141 | 方能 1142 | 方面 1143 | 旁人 1144 | 无 1145 | 无宁 1146 | 无法 1147 | 无论 1148 | 既 1149 | 既...又 1150 | 既往 1151 | 既是 1152 | 既然 1153 | 日复一日 1154 | 日渐 1155 | 日益 1156 | 日臻 1157 | 日见 1158 | 时候 1159 | 昂然 1160 | 明显 1161 | 明确 1162 | 是 1163 | 是不是 1164 | 是以 1165 | 是否 1166 | 是的 1167 | 显然 1168 | 显著 1169 | 普通 1170 | 普遍 1171 | 暗中 1172 | 暗地里 1173 | 暗自 1174 | 更 1175 | 更为 1176 | 更加 1177 | 更进一步 1178 | 曾 1179 | 曾经 1180 | 替 1181 | 替代 1182 | 最 1183 | 最后 1184 | 最大 1185 | 最好 1186 | 最後 1187 | 最近 1188 | 最高 1189 | 有 1190 | 有些 1191 | 有关 1192 | 有利 1193 | 有力 1194 | 有及 1195 | 有所 1196 | 有效 1197 | 有时 1198 | 有点 1199 | 有的 1200 | 有的是 1201 | 有着 1202 | 有著 1203 | 望 1204 | 朝 1205 | 朝着 1206 | 末##末 1207 | 本 1208 | 本人 1209 | 本地 1210 | 本着 1211 | 本身 1212 | 权时 1213 | 来 1214 | 来不及 1215 | 来得及 1216 | 来看 1217 | 来着 1218 | 来自 1219 | 来讲 1220 | 来说 1221 | 极 1222 | 极为 1223 | 极了 1224 | 极其 1225 | 极力 1226 | 极大 1227 | 极度 1228 | 极端 1229 | 构成 1230 | 果然 1231 | 果真 1232 | 某 1233 | 某个 1234 | 某些 1235 | 某某 1236 | 根据 1237 | 根本 1238 | 格外 1239 | 梆 1240 | 概 1241 | 次第 1242 | 欢迎 1243 | 欤 1244 | 正值 1245 | 正在 1246 | 正如 1247 | 正巧 1248 | 正常 1249 | 正是 1250 | 此 1251 | 此中 1252 | 此后 1253 | 此地 1254 | 此处 1255 | 此外 1256 | 此时 1257 | 此次 1258 | 此间 1259 | 殆 1260 | 毋宁 1261 | 每 1262 | 每个 1263 | 每天 1264 | 每年 1265 | 每当 1266 | 每时每刻 1267 | 每每 1268 | 每逢 1269 | 比 1270 | 比及 1271 | 比如 1272 | 比如说 1273 | 比方 1274 | 比照 1275 | 比起 1276 | 比较 1277 | 毕竟 1278 | 毫不 1279 | 毫无 1280 | 毫无例外 1281 | 毫无保留地 1282 | 汝 1283 | 沙沙 1284 | 没 1285 | 没奈何 1286 | 没有 1287 | 沿 1288 | 沿着 1289 | 注意 1290 | 活 1291 | 深入 1292 | 清楚 1293 | 满 1294 | 满足 1295 | 漫说 1296 | 焉 1297 | 然 1298 | 然则 1299 | 然后 1300 | 然後 1301 | 然而 1302 | 照 1303 | 照着 1304 | 牢牢 1305 | 特别是 1306 | 特殊 1307 | 特点 1308 | 犹且 1309 | 犹自 1310 | 独 1311 | 独自 1312 | 猛然 1313 | 猛然间 1314 | 率尔 1315 | 率然 1316 | 现代 1317 | 现在 1318 | 理应 1319 | 理当 1320 | 理该 1321 | 瑟瑟 1322 | 甚且 1323 | 甚么 1324 | 甚或 1325 | 甚而 1326 | 甚至 1327 | 甚至于 1328 | 用 1329 | 用来 1330 | 甫 1331 | 甭 1332 | 由 1333 | 由于 1334 | 由是 1335 | 由此 1336 | 由此可见 1337 | 略 1338 | 略为 1339 | 略加 1340 | 略微 1341 | 白 1342 | 白白 1343 | 的 1344 | 的确 1345 | 的话 1346 | 皆可 1347 | 目前 1348 | 直到 1349 | 直接 1350 | 相似 1351 | 相信 1352 | 相反 1353 | 相同 1354 | 相对 1355 | 相对而言 1356 | 相应 1357 | 相当 1358 | 相等 1359 | 省得 1360 | 看 1361 | 看上去 1362 | 看出 1363 | 看到 1364 | 看来 1365 | 看样子 1366 | 看看 1367 | 看见 1368 | 看起来 1369 | 真是 1370 | 真正 1371 | 眨眼 1372 | 着 1373 | 着呢 1374 | 矣 1375 | 矣乎 1376 | 矣哉 1377 | 知道 1378 | 砰 1379 | 确定 1380 | 碰巧 1381 | 社会主义 1382 | 离 1383 | 种 1384 | 积极 1385 | 移动 1386 | 究竟 1387 | 穷年累月 1388 | 突出 1389 | 突然 1390 | 窃 1391 | 立 1392 | 立刻 1393 | 立即 1394 | 立地 1395 | 立时 1396 | 立马 1397 | 竟 1398 | 竟然 1399 | 竟而 1400 | 第 1401 | 第二 1402 | 等 1403 | 等到 1404 | 等等 1405 | 策略地 1406 | 简直 1407 | 简而言之 1408 | 简言之 1409 | 管 1410 | 类如 1411 | 粗 1412 | 精光 1413 | 紧接着 1414 | 累年 1415 | 累次 1416 | 纯 1417 | 纯粹 1418 | 纵 1419 | 纵令 1420 | 纵使 1421 | 纵然 1422 | 练习 1423 | 组成 1424 | 经 1425 | 经常 1426 | 经过 1427 | 结合 1428 | 结果 1429 | 给 1430 | 绝 1431 | 绝不 1432 | 绝对 1433 | 绝非 1434 | 绝顶 1435 | 继之 1436 | 继后 1437 | 继续 1438 | 继而 1439 | 维持 1440 | 综上所述 1441 | 缕缕 1442 | 罢了 1443 | 老 1444 | 老大 1445 | 老是 1446 | 老老实实 1447 | 考虑 1448 | 者 1449 | 而 1450 | 而且 1451 | 而况 1452 | 而又 1453 | 而后 1454 | 而外 1455 | 而已 1456 | 而是 1457 | 而言 1458 | 而论 1459 | 联系 1460 | 联袂 1461 | 背地里 1462 | 背靠背 1463 | 能 1464 | 能否 1465 | 能够 1466 | 腾 1467 | 自 1468 | 自个儿 1469 | 自从 1470 | 自各儿 1471 | 自后 1472 | 自家 1473 | 自己 1474 | 自打 1475 | 自身 1476 | 臭 1477 | 至 1478 | 至于 1479 | 至今 1480 | 至若 1481 | 致 1482 | 般的 1483 | 良好 1484 | 若 1485 | 若夫 1486 | 若是 1487 | 若果 1488 | 若非 1489 | 范围 1490 | 莫 1491 | 莫不 1492 | 莫不然 1493 | 莫如 1494 | 莫若 1495 | 莫非 1496 | 获得 1497 | 藉以 1498 | 虽 1499 | 虽则 1500 | 虽然 1501 | 虽说 1502 | 蛮 1503 | 行为 1504 | 行动 1505 | 表明 1506 | 表示 1507 | 被 1508 | 要 1509 | 要不 1510 | 要不是 1511 | 要不然 1512 | 要么 1513 | 要是 1514 | 要求 1515 | 见 1516 | 规定 1517 | 觉得 1518 | 譬喻 1519 | 譬如 1520 | 认为 1521 | 认真 1522 | 认识 1523 | 让 1524 | 许多 1525 | 论 1526 | 论说 1527 | 设使 1528 | 设或 1529 | 设若 1530 | 诚如 1531 | 诚然 1532 | 话说 1533 | 该 1534 | 该当 1535 | 说明 1536 | 说来 1537 | 说说 1538 | 请勿 1539 | 诸 1540 | 诸位 1541 | 诸如 1542 | 谁 1543 | 谁人 1544 | 谁料 1545 | 谁知 1546 | 谨 1547 | 豁然 1548 | 贼死 1549 | 赖以 1550 | 赶 1551 | 赶快 1552 | 赶早不赶晚 1553 | 起 1554 | 起先 1555 | 起初 1556 | 起头 1557 | 起来 1558 | 起见 1559 | 起首 1560 | 趁 1561 | 趁便 1562 | 趁势 1563 | 趁早 1564 | 趁机 1565 | 趁热 1566 | 趁着 1567 | 越是 1568 | 距 1569 | 跟 1570 | 路经 1571 | 转动 1572 | 转变 1573 | 转贴 1574 | 轰然 1575 | 较 1576 | 较为 1577 | 较之 1578 | 较比 1579 | 边 1580 | 达到 1581 | 达旦 1582 | 迄 1583 | 迅速 1584 | 过 1585 | 过于 1586 | 过去 1587 | 过来 1588 | 运用 1589 | 近 1590 | 近几年来 1591 | 近年来 1592 | 近来 1593 | 还 1594 | 还是 1595 | 还有 1596 | 还要 1597 | 这 1598 | 这一来 1599 | 这个 1600 | 这么 1601 | 这么些 1602 | 这么样 1603 | 这么点儿 1604 | 这些 1605 | 这会儿 1606 | 这儿 1607 | 这就是说 1608 | 这时 1609 | 这样 1610 | 这次 1611 | 这点 1612 | 这种 1613 | 这般 1614 | 这边 1615 | 这里 1616 | 这麽 1617 | 进入 1618 | 进去 1619 | 进来 1620 | 进步 1621 | 进而 1622 | 进行 1623 | 连 1624 | 连同 1625 | 连声 1626 | 连日 1627 | 连日来 1628 | 连袂 1629 | 连连 1630 | 迟早 1631 | 迫于 1632 | 适应 1633 | 适当 1634 | 适用 1635 | 逐步 1636 | 逐渐 1637 | 通常 1638 | 通过 1639 | 造成 1640 | 逢 1641 | 遇到 1642 | 遭到 1643 | 遵循 1644 | 遵照 1645 | 避免 1646 | 那 1647 | 那个 1648 | 那么 1649 | 那么些 1650 | 那么样 1651 | 那些 1652 | 那会儿 1653 | 那儿 1654 | 那时 1655 | 那末 1656 | 那样 1657 | 那般 1658 | 那边 1659 | 那里 1660 | 那麽 1661 | 部分 1662 | 都 1663 | 鄙人 1664 | 采取 1665 | 里面 1666 | 重大 1667 | 重新 1668 | 重要 1669 | 鉴于 1670 | 针对 1671 | 长期以来 1672 | 长此下去 1673 | 长线 1674 | 长话短说 1675 | 问题 1676 | 间或 1677 | 防止 1678 | 阿 1679 | 附近 1680 | 陈年 1681 | 限制 1682 | 陡然 1683 | 除 1684 | 除了 1685 | 除却 1686 | 除去 1687 | 除外 1688 | 除开 1689 | 除此 1690 | 除此之外 1691 | 除此以外 1692 | 除此而外 1693 | 除非 1694 | 随 1695 | 随后 1696 | 随时 1697 | 随着 1698 | 随著 1699 | 隔夜 1700 | 隔日 1701 | 难得 1702 | 难怪 1703 | 难说 1704 | 难道 1705 | 难道说 1706 | 集中 1707 | 零 1708 | 需要 1709 | 非但 1710 | 非常 1711 | 非徒 1712 | 非得 1713 | 非特 1714 | 非独 1715 | 靠 1716 | 顶多 1717 | 顷 1718 | 顷刻 1719 | 顷刻之间 1720 | 顷刻间 1721 | 顺 1722 | 顺着 1723 | 顿时 1724 | 颇 1725 | 风雨无阻 1726 | 饱 1727 | 首先 1728 | 马上 1729 | 高低 1730 | 高兴 1731 | 默然 1732 | 默默地 1733 | 齐 1734 | ︿ 1735 | ! 1736 | # 1737 | $ 1738 | % 1739 | & 1740 | ' 1741 | ( 1742 | ) 1743 | )÷(1- 1744 | )、 1745 | * 1746 | + 1747 | +ξ 1748 | ++ 1749 | , 1750 | ,也 1751 | - 1752 | -β 1753 | -- 1754 | -[*]- 1755 | . 1756 | / 1757 | 0 1758 | 0:2 1759 | 1 1760 | 1. 1761 | 12% 1762 | 2 1763 | 2.3% 1764 | 3 1765 | 4 1766 | 5 1767 | 5:0 1768 | 6 1769 | 7 1770 | 8 1771 | 9 1772 | : 1773 | ; 1774 | < 1775 | <± 1776 | <Δ 1777 | <λ 1778 | <φ 1779 | << 1780 | = 1781 | =″ 1782 | =☆ 1783 | =( 1784 | =- 1785 | =[ 1786 | ={ 1787 | > 1788 | >λ 1789 | ? 1790 | @ 1791 | A 1792 | LI 1793 | R.L. 1794 | ZXFITL 1795 | [ 1796 | [①①] 1797 | [①②] 1798 | [①③] 1799 | [①④] 1800 | [①⑤] 1801 | [①⑥] 1802 | [①⑦] 1803 | [①⑧] 1804 | [①⑨] 1805 | [①A] 1806 | [①B] 1807 | [①C] 1808 | [①D] 1809 | [①E] 1810 | [①] 1811 | [①a] 1812 | [①c] 1813 | [①d] 1814 | [①e] 1815 | [①f] 1816 | [①g] 1817 | [①h] 1818 | [①i] 1819 | [①o] 1820 | [② 1821 | [②①] 1822 | [②②] 1823 | [②③] 1824 | [②④ 1825 | [②⑤] 1826 | [②⑥] 1827 | [②⑦] 1828 | [②⑧] 1829 | [②⑩] 1830 | [②B] 1831 | [②G] 1832 | [②] 1833 | [②a] 1834 | [②b] 1835 | [②c] 1836 | [②d] 1837 | [②e] 1838 | [②f] 1839 | [②g] 1840 | [②h] 1841 | [②i] 1842 | [②j] 1843 | [③①] 1844 | [③⑩] 1845 | [③F] 1846 | [③] 1847 | [③a] 1848 | [③b] 1849 | [③c] 1850 | [③d] 1851 | [③e] 1852 | [③g] 1853 | [③h] 1854 | [④] 1855 | [④a] 1856 | [④b] 1857 | [④c] 1858 | [④d] 1859 | [④e] 1860 | [⑤] 1861 | [⑤]] 1862 | [⑤a] 1863 | [⑤b] 1864 | [⑤d] 1865 | [⑤e] 1866 | [⑤f] 1867 | [⑥] 1868 | [⑦] 1869 | [⑧] 1870 | [⑨] 1871 | [⑩] 1872 | [*] 1873 | [- 1874 | [] 1875 | ] 1876 | ]∧′=[ 1877 | ][ 1878 | _ 1879 | a] 1880 | b] 1881 | c] 1882 | e] 1883 | f] 1884 | ng昉 1885 | { 1886 | {- 1887 | | 1888 | } 1889 | }> 1890 | ~ 1891 | ~± 1892 | ~+ 1893 | ¥ -------------------------------------------------------------------------------- /sumeval/metrics/lang/lang_en.py: -------------------------------------------------------------------------------- 1 | import re 2 | from sumeval.metrics.lang.base_lang import BaseLang 3 | 4 | 5 | class LangEN(BaseLang): 6 | 7 | def __init__(self): 8 | super(LangEN, self).__init__("en") 9 | self._symbol_replace = re.compile(r"[^A-Za-z0-9-]") 10 | self._valid_word = re.compile(r"^[A-Za-z0-9$]") 11 | 12 | def tokenize(self, text): 13 | return text.split(" ") 14 | 15 | def tokenize_with_preprocess(self, text): 16 | _text = self._preprocess(text) 17 | words = self.tokenize(_text) 18 | words = [w.strip() for w in words if w.strip()] 19 | words = [w for w in words if self._valid_word.match(w)] 20 | return words 21 | 22 | def _preprocess(self, text): 23 | _text = text.replace("-", " - ") 24 | _text = self._symbol_replace.sub(" ", _text) 25 | _text = _text.strip() 26 | return _text 27 | 28 | def parse_to_be(self, text): 29 | _text = self._preprocess(text) 30 | bes = super().parse_to_be(_text) 31 | 32 | def is_valid(be): 33 | if self._valid_word.match(be.head) and\ 34 | self._valid_word.match(be.modifier): 35 | return True 36 | else: 37 | return False 38 | 39 | bes = [be for be in bes if is_valid(be)] 40 | return bes 41 | -------------------------------------------------------------------------------- /sumeval/metrics/lang/lang_ja.py: -------------------------------------------------------------------------------- 1 | import re 2 | from sumeval.metrics.lang.base_lang import BaseLang 3 | 4 | 5 | class LangJA(BaseLang): 6 | 7 | def __init__(self): 8 | super(LangJA, self).__init__("ja") 9 | self._set_tokenizer() 10 | self._symbol_replace = re.compile(r"[^ぁ-んァ-ン一-龥ーa-zA-Za-zA-Z0-90-9]") 11 | 12 | def load_parser(self): 13 | if self._PARSER is None: 14 | import spacy 15 | self._PARSER = spacy.load("ja_ginza") 16 | return self._PARSER 17 | 18 | def _set_tokenizer(self): 19 | try: 20 | import MeCab 21 | 22 | class Tokenizer(): 23 | 24 | def __init__(self): 25 | self.tagger = MeCab.Tagger("-Ochasen") 26 | 27 | def tokenize(self, text): 28 | self.tagger.parse("") 29 | node = self.tagger.parseToNode(text) 30 | tokens = [] 31 | while node: 32 | if node.surface: 33 | tokens.append(node) 34 | node = node.next 35 | return tokens 36 | 37 | self.tokenizer = Tokenizer() 38 | 39 | except Exception as ex: 40 | from janome.tokenizer import Tokenizer 41 | self.tokenizer = Tokenizer() 42 | 43 | def tokenize(self, text): 44 | words = [t.surface for t in self.tokenizer.tokenize(text)] 45 | return words 46 | 47 | def tokenize_with_preprocess(self, text): 48 | _text = self._symbol_replace.sub(" ", text) 49 | words = self.tokenize(_text) 50 | words = [w.strip() for w in words if w.strip()] 51 | return words 52 | 53 | def join(self, words): 54 | return "".join(words) 55 | 56 | def parse_to_be(self, text): 57 | _text = self._symbol_replace.sub(" ", text) 58 | bes = super().parse_to_be(_text) 59 | return bes 60 | -------------------------------------------------------------------------------- /sumeval/metrics/lang/lang_zh.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from sumeval.metrics.lang.base_lang import BaseLang, BasicElement 4 | 5 | 6 | class LangZH(BaseLang): 7 | 8 | def __init__(self): 9 | super(LangZH, self).__init__("zh") 10 | self._symbol_replace = re.compile(r"[\.\!/_,$%\^\*\(\)\+\“\’\—\!。:?、,::~@#¥&()【】「」《》·]") 11 | import jieba 12 | self.tokenizer = jieba 13 | 14 | def load_parser(self): 15 | if self._PARSER is None: 16 | from pyhanlp import HanLP 17 | self._PARSER = HanLP.parseDependency 18 | return self._PARSER 19 | 20 | def tokenize(self, text): 21 | _text = self._preprocess(text) 22 | words = [t for t in self.tokenizer.cut(_text, cut_all=False)] 23 | return words 24 | 25 | def _preprocess(self, text): 26 | return self._symbol_replace.sub(" ", text) 27 | 28 | def parse_to_be(self, text): 29 | _text = self._preprocess(text) 30 | parsed = self.load_parser()(_text) 31 | bes = [] 32 | for token in parsed.iterator(): 33 | # print(f"{token.NAME}=({token.DEPREL})>{token.HEAD.LEMMA}") 34 | if token.POSTAG == "n" and token.HEAD.POSTAG in ["v", "a"]: 35 | be = BasicElement(token.NAME, token.HEAD.LEMMA, 36 | token.DEPREL) 37 | bes.append(be) 38 | elif token.POSTAG in ["v", "a"] and token.HEAD.POSTAG == "n": 39 | be = BasicElement(token.HEAD.NAME, token.LEMMA, 40 | token.DEPREL) 41 | bes.append(be) 42 | 43 | return bes 44 | -------------------------------------------------------------------------------- /sumeval/metrics/rouge.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from sumeval.metrics.lang.base_lang import BaseLang 3 | from sumeval.metrics.lang import get_lang 4 | 5 | 6 | class RougeCalculator(): 7 | 8 | def __init__(self, 9 | stopwords=True, stemming=False, 10 | word_limit=-1, length_limit=-1, lang="en"): 11 | self.stemming = stemming 12 | self.stopwords = stopwords 13 | self.word_limit = word_limit 14 | self.length_limit = length_limit 15 | if isinstance(lang, str): 16 | self.lang = lang 17 | self._lang = get_lang(lang) 18 | elif isinstance(lang, BaseLang): 19 | self.lang = lang.lang 20 | self._lang = lang 21 | 22 | def tokenize(self, text_or_words, is_reference=False): 23 | """ 24 | Tokenize a text under original Perl script manner. 25 | 26 | Parameters 27 | ---------- 28 | text_or_words: str or str[] 29 | target text or tokenized words. 30 | If you use tokenized words, preprocessing is not applied. 31 | It allows you to calculate ROUGE under your customized tokens, 32 | but you have to pay attention to preprocessing. 33 | is_reference: bool 34 | for reference process or not 35 | 36 | See Also 37 | -------- 38 | https://github.com/andersjo/pyrouge/blob/master/tools/ROUGE-1.5.5/ROUGE-1.5.5.pl#L1820 39 | """ 40 | words = text_or_words 41 | 42 | def split(text): 43 | _words = self._lang.tokenize(text) 44 | return _words 45 | 46 | if self.word_limit > 0: 47 | if isinstance(words, str): 48 | words = split(words) 49 | words = words[:self.word_limit] 50 | words = self._lang.join(words) 51 | elif self.length_limit > 0: 52 | text = words 53 | if isinstance(text, (list, tuple)): 54 | text = self._lang.join(words) 55 | words = text[:self.length_limit] 56 | 57 | if isinstance(words, str): 58 | words = self._lang.tokenize_with_preprocess(words) 59 | 60 | words = [w.lower().strip() for w in words if w.strip()] 61 | 62 | if self.stopwords: 63 | words = [w for w in words if not self._lang.is_stop_word(w)] 64 | 65 | if self.stemming and is_reference: 66 | # stemming is only adopted to reference 67 | # https://github.com/andersjo/pyrouge/blob/master/tools/ROUGE-1.5.5/ROUGE-1.5.5.pl#L1416 68 | 69 | # min_length ref 70 | # https://github.com/andersjo/pyrouge/blob/master/tools/ROUGE-1.5.5/ROUGE-1.5.5.pl#L2629 71 | words = [self._lang.stemming(w, min_length=3) for w in words] 72 | return words 73 | 74 | def parse_to_be(self, text, is_reference=False): 75 | bes = self._lang.parse_to_be(text) 76 | 77 | def preprocess(be): 78 | be.head = be.head.lower().strip() 79 | be.modifier = be.modifier.lower().strip() 80 | if self.stemming and is_reference: 81 | be.head = self._lang.stemming(be.head, min_length=3) 82 | be.modifier = self._lang.stemming(be.modifier, min_length=3) 83 | 84 | return be 85 | 86 | bes = [preprocess(be) for be in bes] 87 | return bes 88 | 89 | def len_ngram(self, words, n): 90 | return max(len(words) - n + 1, 0) 91 | 92 | def ngram_iter(self, words, n): 93 | for i in range(self.len_ngram(words, n)): 94 | n_gram = words[i:i+n] 95 | yield tuple(n_gram) 96 | 97 | def count_ngrams(self, words, n): 98 | c = Counter(self.ngram_iter(words, n)) 99 | return c 100 | 101 | def count_overlap(self, summary_ngrams, reference_ngrams): 102 | result = 0 103 | for k, v in summary_ngrams.items(): 104 | result += min(v, reference_ngrams[k]) 105 | return result 106 | 107 | def rouge_1(self, summary, references, alpha=0.5): 108 | return self.rouge_n(summary, references, 1, alpha) 109 | 110 | def rouge_2(self, summary, references, alpha=0.5): 111 | return self.rouge_n(summary, references, 2, alpha) 112 | 113 | def rouge_n(self, summary, references, n, alpha=0.5): 114 | """ 115 | Calculate ROUGE-N score. 116 | 117 | Parameters 118 | ---------- 119 | summary: str 120 | summary text 121 | references: str or str[] 122 | reference or references to evaluate summary 123 | n: int 124 | ROUGE kind. n=1, calculate when ROUGE-1 125 | alpha: float (0~1) 126 | alpha -> 0: recall is more important 127 | alpha -> 1: precision is more important 128 | F = 1/(alpha * (1/P) + (1 - alpha) * (1/R)) 129 | 130 | Returns 131 | ------- 132 | f1: float 133 | f1 score 134 | """ 135 | _summary = self.tokenize(summary) 136 | summary_ngrams = self.count_ngrams(_summary, n) 137 | _refs = [references] if isinstance(references, str) else references 138 | matches = 0 139 | count_for_recall = 0 140 | for r in _refs: 141 | _r = self.tokenize(r, True) 142 | r_ngrams = self.count_ngrams(_r, n) 143 | matches += self.count_overlap(summary_ngrams, r_ngrams) 144 | count_for_recall += self.len_ngram(_r, n) 145 | count_for_prec = len(_refs) * self.len_ngram(_summary, n) 146 | f1 = self._calc_f1(matches, count_for_recall, count_for_prec, alpha) 147 | return f1 148 | 149 | def _calc_f1(self, matches, count_for_recall, count_for_precision, alpha): 150 | def safe_div(x1, x2): 151 | return 0 if x2 == 0 else x1 / x2 152 | recall = safe_div(matches, count_for_recall) 153 | precision = safe_div(matches, count_for_precision) 154 | denom = (1.0 - alpha) * precision + alpha * recall 155 | return safe_div(precision * recall, denom) 156 | 157 | def lcs(self, a, b): 158 | longer = a 159 | base = b 160 | if len(longer) < len(base): 161 | longer, base = base, longer 162 | 163 | if len(base) == 0: 164 | return 0 165 | 166 | row = [0] * len(base) 167 | for c_a in longer: 168 | left = 0 169 | upper_left = 0 170 | for i, c_b in enumerate(base): 171 | up = row[i] 172 | if c_a == c_b: 173 | value = upper_left + 1 174 | else: 175 | value = max(left, up) 176 | row[i] = value 177 | left = value 178 | upper_left = up 179 | 180 | return left 181 | 182 | def rouge_l(self, summary, references, alpha=0.5): 183 | """ 184 | Calculate ROUGE-L score. 185 | 186 | Parameters 187 | ---------- 188 | summary: str 189 | summary text 190 | references: str or str[] 191 | reference or references to evaluate summary 192 | alpha: float (0~1) 193 | alpha -> 0: recall is more important 194 | alpha -> 1: precision is more important 195 | F = 1/(alpha * (1/P) + (1 - alpha) * (1/R)) 196 | 197 | Returns 198 | ------- 199 | f1: float 200 | f1 score 201 | """ 202 | matches = 0 203 | count_for_recall = 0 204 | _summary = self.tokenize(summary) 205 | _refs = [references] if isinstance(references, str) else references 206 | for r in _refs: 207 | _r = self.tokenize(r, True) 208 | matches += self.lcs(_r, _summary) 209 | count_for_recall += len(_r) 210 | count_for_prec = len(_refs) * len(_summary) 211 | f1 = self._calc_f1(matches, count_for_recall, count_for_prec, alpha) 212 | return f1 213 | 214 | def count_be(self, text, compare_type, is_reference=False): 215 | bes = self.parse_to_be(text, is_reference) 216 | be_keys = [be.as_key(compare_type) for be in bes] 217 | c = Counter(be_keys) 218 | return c 219 | 220 | def rouge_be(self, summary, references, compare_type="HMR", alpha=0.5): 221 | """ 222 | Calculate ROUGE-BE score. 223 | 224 | Parameters 225 | ---------- 226 | summary: str 227 | summary text 228 | references: str or str[] 229 | reference or references to evaluate summary 230 | compare_type: str 231 | "H", "M", "R" or these combination. 232 | Each character means basic element component. 233 | H: head, M: modifier, R: relation. 234 | The image of these relation is following. 235 | {head word}-{relation}->{modifier word} 236 | When "HMR", use head-relation-modifier triple as basic element. 237 | alpha: float (0~1) 238 | alpha -> 0: recall is more important 239 | alpha -> 1: precision is more important 240 | F = 1/(alpha * (1/P) + (1 - alpha) * (1/R)) 241 | 242 | Returns 243 | ------- 244 | f1: float 245 | f1 score 246 | """ 247 | matches = 0 248 | count_for_recall = 0 249 | s_bes = self.count_be(summary, compare_type) 250 | _refs = [references] if isinstance(references, str) else references 251 | for r in _refs: 252 | r_bes = self.count_be(r, compare_type, True) 253 | matches += self.count_overlap(s_bes, r_bes) 254 | count_for_recall += sum(r_bes.values()) 255 | count_for_prec = len(_refs) * sum(s_bes.values()) 256 | f1 = self._calc_f1(matches, count_for_recall, count_for_prec, alpha) 257 | return f1 258 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/tests/data/.gitkeep -------------------------------------------------------------------------------- /tests/data/rouge/ROUGE-test-ja.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": { 3 | "summaries": [ 4 | "会議 之 実行 也", 5 | "エーペック は 12 月 アジア で 重要 な 場 だ", 6 | "山田 首相 が 行っ た 交渉 は 、 今後 の アジア 貿易 交渉 において 重要 な ポイント に なる 。", 7 | "apec における 貿易 交渉 は 、 今後 の アジア 貿易 交渉 に 重要 に なる", 8 | "山田 と 李 の 会談 は ひょっと する と あと 1 日 だ" 9 | ], 10 | "references": [ 11 | "山田 首相 は 、 2 日 に 台湾 の 首相 と 会議 し た 。", 12 | "山田 首相 は 、 12 月 に 台湾 にて 貿易 の 件 を 話し た 。", 13 | "山田 首相 は 台湾 を 訪れ た 。 貿易 について 会談 する。", 14 | "APEC で 山田 首相 は 貿易 交渉 について 発言 し た" 15 | ] 16 | } 17 | } -------------------------------------------------------------------------------- /tests/data/rouge/ROUGE-test-zh.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": { 3 | "summaries": [ 4 | "自行车 不能 停 你 怎么 看", 5 | "自行车 不能 进站" 6 | ], 7 | "references": [ 8 | "自行车 不能 带上 火车" 9 | ] 10 | }, 11 | "2": { 12 | "summaries": [ 13 | "科技 钻研 钻研", 14 | "科技 工作者 要 学习 老一辈 科学家 的 钻研 精神" 15 | ], 16 | "references": [ 17 | "刘云山 看望 著名 科技 专家" 18 | ] 19 | }, 20 | "3": { 21 | "summaries": [ 22 | "中国 居 全球 第八", 23 | "瑞士 居 榜首 中国 位列 第 28" 24 | ], 25 | "references": [ 26 | "全球 竞争力 排行榜 中国 居 28 位居 金砖 国家 首位" 27 | ] 28 | } 29 | } -------------------------------------------------------------------------------- /tests/data/rouge/ROUGE-test.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": { 3 | "summaries": [ 4 | "The/ Officers it/ The concessions/ The/ The discussion", 5 | "TRADE/ AGREEMENT/ PRESSURE/ BE IN BUSINESS CONCESSIONS/ COUNTRIES BANGLADESH IN DHAKA/ ASIAN", 6 | "WORLD COUNTRIES/ SENIOR TRADE OFFICIALS/ AGREEMENT/ COUNTRIES BANGLADESH DHAKA/ CAPITAL", 7 | "WORLD COUNTRIES/ SENIOR TRADE OFFICIALS/ AGREEMENT/ COUNTRIES BANGLADESH DHAKA/ CAPITAL/ ASIAN 49 COUNTRIES/ OFFICERS/ TWO", 8 | "world trade officials in the", 9 | "world trade officials in the nations to concessions give bangladesh", 10 | "trade, business nations Bangladesh World Bank financial United_Nations hit bank proposal", 11 | "world countries is fact is that developed countries put pressure on that they should be" 12 | ], 13 | "references": [ 14 | "Poor nations demand trade subsidies from developed nations.", 15 | "Poor nations pressurise developed countries into granting trade subsidies.", 16 | "Developed countries should be pressurized. Business exemptions to poor nations.", 17 | "World's poor decide to urge developed nations for business concessions" 18 | ] 19 | }, 20 | "2": { 21 | "summaries": [ 22 | "The city Bali last/ In the case/ The averted/ The/ The death", 23 | "BOMB/ ATTACK/ FOUND GUILTY HE DEATH SENTENCE", 24 | "LAST YEAR/ BOMB BLAST/ CASE IMAM ACCUSED INDIA/ SEA/ WERE", 25 | "OCTOBER LAST YEAR/ BOMB BLAST/ CASE IMAM ACCUSED INDIA/ SEA MONDAY BEGAN BE AVERTED ATTACK", 26 | "indonesia the last year in the", 27 | "indonesia the last year in the", 28 | "bomb allegation explosion Bali(the island)/earring police attacks karachi ship, plane security", 29 | "indonesian city of bali in in bomb blast accused india began to be averted" 30 | ], 31 | "references": [ 32 | "Indonesia charges Imam Samudra and Amrozi with Bali bombing.", 33 | "The suspected 'Bali bomber', Imam Samudra goes to trial.", 34 | "Description of trial on Bali bomb blast suspect Imam Samudra", 35 | "Trial of Imam samudra gets underway in Bali" 36 | ] 37 | }, 38 | "3": { 39 | "summaries": [ 40 | "Foreign Minister Colin to quit/ Attack of Pakistan/ That Kashmir freedom", 41 | "TERRORIST ATTACK/ COLIN POWELL/ INDIAN PARLIAMENT/ PAKISTAN/ KASHMIR FREEDOM", 42 | "TERRORIST ATTACK CAMPS MEASURES BE MORE EFFECTIVE POLICY AMERICAN/ INDIAN", 43 | "TERRORIST ATTACK CAMPS MEASURES BE MORE EFFECTIVE POLICY AMERICAN FOREIGN MINISTER COLIN POWELL/ INDIAN PARLIAMENT", 44 | "militants strategy american foreign minister", 45 | "militants strategy american foreign minister", 46 | "police Pakistan terrorism Osama Bin Laden\t year attacks India", 47 | "terrorist attack measures should be more effective policy american foreign minister everyone knows that indian parliament attack" 48 | ], 49 | "references": [ 50 | "Improvements in homeland security, intelligence required to eliminate terrorist threat.", 51 | "Terrorism can be solved through efficent policy than brute force.", 52 | "Finding effective strategies better than attacking terrorist camps or war.", 53 | "Attacking terrorists not a solution. Need a more effective strategy." 54 | ] 55 | }, 56 | "4": { 57 | "summaries": [ 58 | "The world for/ At the time of any one of the/ More", 59 | "WORLD/ EVIL STRUGGLE AGAINST HORIZON/ SENTENCE", 60 | "WORLD GOOD EVIL STRUGGLE/ HORIZON/ SENTENCE/ MATTER IS/ SHAITANS CHAPTER", 61 | "INDIA/ REST/ WORLD GOOD EVIL STRUGGLE/ HORIZON LIVING NORMAL/ ONE TIME/ HISTORY MORE SENTENCE/ MATTER", 62 | "against conflict in the most", 63 | "india and other world against conflict when horizon time in a record in more", 64 | "thousand one two India Osama Bin Laden\t earthquake Osama", 65 | "india and rest of world for good and evil struggle because living" 66 | ], 67 | "references": [ 68 | "War against terror for India and world will continue.", 69 | "India along with others continue to fight against evil elements.", 70 | "Fight against 'evil' everlasting for India and rest of world", 71 | "Struggle against evil continues in India and the rest of the world." 72 | ] 73 | }, 74 | "5": { 75 | "summaries": [ 76 | "Conference general/ Rice to the failure of", 77 | "AGRA SUMMIT/ PERVEZ MUSHARRAF/ LORD RICE/ TALKS", 78 | "AGRA/ LORD RICE/ FAILURE TALKS WAS CHAIR/ ARE MISSING LAST", 79 | "AGRA SUMMIT GENERAL PERVEZ MUSHARRAF DOMINATE/ LORD RICE/ FAILURE TALKS WAS CHAIR/ ARE MISSING LAST", 80 | "agra of remains top samelana in the news", 81 | "agra of remains top samelana in the news are last year july in the", 82 | "team India Vajpayee (India's prime minister) Pakistan government police people people gathering, function", 83 | "agra remains of agra summit in general pervez musharraf was to chair idea" 84 | ], 85 | "references": [ 86 | "Musharraf charms media, but Agra Summit fails.", 87 | "Agra Summit not fruitful but Musharraf grabs attention of media.", 88 | "Agra talks fail but Musharraf manages to gain publicity", 89 | "Agra summit: Musharraf impresses journalists, but talks failure imminent." 90 | ] 91 | }, 92 | "6": { 93 | "summaries": [ 94 | "]/ On the day ever", 95 | "GUJARAT EARTHQUAKE TREMBLE RAISED LAND UNSAFE BUILDINGS/ EARTH/ LONG HAVOC", 96 | "GUJARAT EARTHQUAKE TREMBLE RAISED LAND UNSAFE BUILDINGS WERE EARTH RISING", 97 | "GUJARAT EARTHQUAKE TREMBLE RAISED LAND UNSAFE BUILDINGS WERE EARTH RISING RECALLS HOW MOMENTS/ DAY/ SEVEN", 98 | "sure they are the moment in the deluge", 99 | "earthquake shivering uThI dharaa buildings vulnerable were they are the moment in the deluge", 100 | "people earthquake building India space Columbia police Kashmir team", 101 | "gujarat earthquake tremble raised land unsafe buildings were earth" 102 | ], 103 | "references": [ 104 | "Uday Marankar reminisces on the devastation caused by Gujrat earthquake.", 105 | "Massive earthquake rocks the state of Gujarat on 26th January,2002.", 106 | "Reporter's earthquake experience and rapid work to submit magazine story", 107 | "Gujarat Earthquake: A first hand narrative" 108 | ] 109 | }, 110 | "7": { 111 | "summaries": [ 112 | "Years the affairs of Vajpayee/ 2001 passage gum/ The great relief", 113 | "VAJPAYEE REMAIN/ HOUSE/ RELIEF", 114 | "YEAR RUNNING AFFAIRS VAJPAYEE REMAIN CLEAR/ HOUSE/ RELIEF/ BIG/ YEAR", 115 | "YEAR RUNNING AFFAIRS VAJPAYEE REMAIN CLEAR/ WILL BE ATAL/ HOUSE/ GREAT SIGH RELIEF/ BIG/ YEAR", 116 | "past year by raajakaaja walking the people to the home in the breath", 117 | "past year by raajakaaja walking the people to the home in if this year", 118 | "Vajpayee (India's prime minister) year war government police Prime Minister India team", 119 | "in past year in running affairs of vajpayee remain clear less to people" 120 | ], 121 | "references": [ 122 | "Vajpayee showed lack of leadership in governing country in 2001.", 123 | "2001: Year full of conundrums and disappointments for Prime-Minister Vajpayee.", 124 | "Account of an unfortunate administrating year for Indian prime minister", 125 | "The year 2001: A disaster for Vajpayee" 126 | ] 127 | }, 128 | "8": { 129 | "summaries": [ 130 | "Iraqis to work/ Of the week at the time/ Army said/ The streets", 131 | "21 IRAQI ARMY/ WEAPONS/ WEEKS TIME JUN 1 2003 UTC/ IRAQ", 132 | "HAND TWO WEEKS TIME JUN 41 1 2003 UTC 71", 133 | "21 IRAQI ARMY/ BUSINESS WEAPONS COME HAND TWO WEEKS TIME JUN 41 1 2003 UTC", 134 | "2.1 in the streets in the", 135 | "2.1 iraakiyo.n the army of work in two weeks of the streets in the", 136 | "point army rate police two weapon one Saddam Hussain", 137 | "2.1 iraqi army 1 11:28 7.1 american forces around covered with offence to control he for iraqi army" 138 | ], 139 | "references": [ 140 | "Iraq has 2 months to turn over weapons of mass destruction.", 141 | "Iraqi's given 2 weeks notice to handover unlicensed weapons: America.", 142 | "America gives two weeks for Iraqi's to surrender unauthorized weapons", 143 | "Iraqis face weapon surrender deadline" 144 | ] 145 | }, 146 | "9": { 147 | "summaries": [ 148 | "2.1 Indian companies work to/ Dollar savings/ US/ To make the proposal.", 149 | "INDIAN/ MARCH 2003 UTC", 150 | "21 INDIAN COMPANIES WORK/ US ECONOMY 10 BILLION DOLLAR SAVINGS", 151 | "21 INDIAN COMPANIES WORK/ US ECONOMY 10 BILLION DOLLAR SAVINGS JUN 31 MARCH 2003 UTC", 152 | "state in the leading firm", 153 | "state in the leading firm", 154 | "point rate nations one two earthquake Arab/Billion thousand five", 155 | "UTC 16:35 4.1 in america indian companies to work not to be given to make proposal" 156 | ], 157 | "references": [ 158 | "American Industry saved $10 billion by outsourcing to India: NASSCOM", 159 | "Outsourcing to India saves US economy 10-11 billion dollars: NASSCOM.", 160 | "America saves billions by outsourcing work to India: NASSCOM study", 161 | "Outsourcing to India saves US 10 Billion Dollars- NASSCOM" 162 | ] 163 | }, 164 | "10": { 165 | "summaries": [ 166 | "Scam in Harshad Mehta and three other 5-5 years of hard/ The a/ Of the year", 167 | "SECURITIES SCAM/ RIGOROUS IMPRISONMENT MUMBAI/ HIGH COURT/ IMPRISONMENT SENTENCE/ CASE", 168 | "THREE OTHERS YEARS RIGOROUS IMPRISONMENT MUMBAI MUMBAI 28 TH SEPTEMBER", 169 | "THREE OTHERS YEARS RIGOROUS IMPRISONMENT MUMBAI MUMBAI 28 TH SEPTEMBER/ HIGH COURT SPECIAL COURT/ IMPRISONMENT", 170 | "securities in the five years in firm", 171 | "securities in the five years", 172 | "court company thousand fame one Enron two program share_market", 173 | "securities scam in harshad mehta and three others to 5 @-@ 5 years" 174 | ], 175 | "references": [ 176 | "Mehta and 3 others convicted for stock market scandal.", 177 | "Harshad Mehta (\"Big Bull\") and accomplices handed 5 years imprisonment.", 178 | "High court ruling on Harshad Mehta in stock market scam", 179 | "Stock scam: 5 years imprisonment for Harshad Mehta" 180 | ] 181 | }, 182 | "11": { 183 | "summaries": [ 184 | "The legal action threatened Delhi,/ Party Mr./ Threat/ Accused", 185 | "DEFENSE MINISTER THREAT OF LEGAL ACTION NEW DELHI/ FERNANDES/ SUPREMO/ FOOD", 186 | "DEFENSE MINISTER THREAT LEGAL ACTION NEW DELHI AUGUST 31 DEFENCE", 187 | "DEFENSE MINISTER THREAT LEGAL ACTION NEW DELHI AUGUST 31 DEFENCE MINISTER SAMATA PARTY PRESIDENT MR", 188 | "legal action will they arms and drug smugglers andaman sea in", 189 | "legal action of legal action will they arms and drug smugglers andaman sea in", 190 | "election test, inspection commission test, exam, inspection missile World Bank ship, plane poll, election party", 191 | "defense minister threat" 192 | ], 193 | "references": [ 194 | "Defense minister threatens Jayalalitha with criminal investigation.", 195 | "Defense Minister George Fernandes threatens prosecution against AIADMK supremo Jayalalitha.", 196 | "Account of defense minister's threat for legal action against Jayalalita", 197 | "Defense minister threatens Jayalalitha for prosecution." 198 | ] 199 | }, 200 | "12": { 201 | "summaries": [ 202 | "To keep in the:/ Jaswant Singh said/ To keep always/ Mouth of the", 203 | "MR SINGH NEW DELHI/ MINISTER JASWANT SINGH INDIA IS NEIGHBOUR/ SELF/ MOUTH", 204 | "MAINTAIN FRIENDLY RELATIONS FAVOUR MR SINGH NEW DELHI 31 MR", 205 | "INDIA NEIGHBOURING COUNTRIES MAINTAIN FRIENDLY RELATIONS FAVOUR MR SINGH NEW DELHI 31 AUGUST MR FOREIGN", 206 | "nations to friendly relationship in the mouth", 207 | "nations to friendly relationship in the mouth", 208 | "India atomic Pakistan North_Korea World Bank Korea north plane, aircraft", 209 | "india from neighbouring countries to maintain friendly relations in favour" 210 | ], 211 | "references": [ 212 | "India wants friendly relations with neighbors: Jaswant Singh", 213 | "Jaswant Singh signifies the importance for friendly relationship with neighbors.", 214 | "External affairs minister addresses India's interest in friendship with neighbors", 215 | "India in favor of maintaining friendly relations with neighbors." 216 | ] 217 | }, 218 | "13": { 219 | "summaries": [ 220 | "The charming not/ \"revolution-murdabad ' slogans and not/ Not a.m. the frenzied", 221 | "HOUSE NOT DRUMS NEW DELHI 31 AUGUST CAPITAL NEW/ ELECTION/ REVOLUTION/ RIVAL", 222 | "EXOTIC SLOGANS/ HOUSE DRUMS NEW DELHI 31 AUGUST CAPITAL NEW", 223 | "EXOTIC SLOGANS/ HOUSE DRUMS NEW DELHI 31 AUGUST CAPITAL NEW/ ELECTION MEETING THERE WAS REVOLUTION", 224 | "charming house in new delhi 31 august capital delhi in a house contenders leaders", 225 | "charming house in new delhi 31 august capital delhi in a house contenders leaders", 226 | "Singh (common Indian last name) BJP (Bhartiya Janata Party - a political party in India) victory prize, award festival party election Congress political party in India, US Congress Modi (name)", 227 | "manmohan not drums new delhi 31 august capital new delhi there was not @-@ revolution murdabad ' slogans and not" 228 | ], 229 | "references": [ 230 | "No slandering or slogan shouting at Singh's campaign party.", 231 | "Manmohan Singh's anomalous political campaign for the Lok Sabha elections.", 232 | "Description of a political gathering in Manmohan Singh's election campaign", 233 | "Manmohan Singh's assembly peaceful, hospitable and unique." 234 | ] 235 | }, 236 | "14": { 237 | "summaries": [ 238 | "By the economic development praise/ Wolfensohn India tour./ India", 239 | "ECONOMIC/ JAMES WOLFENSOHN VISIT OF FINANCE MINISTER YASHWANT/ INDIA/ GROWTH/ ROAD", 240 | "WORLD BANK PRESIDENT JAMES INDIAS ECONOMIC DEVELOPMENT APPRECIATION WASHINGTON SEPTEMBER", 241 | "JAMES WOLFENSOHN VISIT/ FINANCE MINISTER YASHWANT SINHA ACCEPTED INVITATION INDIA PRAISE ECONOMIC GROWTH INDIA/ ROAD", 242 | "world bank september world bank", 243 | "world bank september world bank", 244 | "World Bank India bank destroyed meeting/summit financial currency minister both countries", 245 | "world bank president james by india ' s economic development appreciation visit yashwant sinha accepted invitation" 246 | ], 247 | "references": [ 248 | "World Bank president praises India's economic development.", 249 | "World Bank President praises India's progress; accepts invitation to India.", 250 | "World bank chief praises India's economic development", 251 | "James Wulfenson praises India's economic development and accepts Yashwant's invitation." 252 | ] 253 | }, 254 | "15": { 255 | "summaries": [ 256 | "Summit conference with/ As a countries/ Centre", 257 | "SUMMIT OF SITE WITH DEMONSTRATIONS VIOLENCE JUNE/ POLICE/ CITY/ FRENCH", 258 | "ONGOING CONFERENCE OPPONENTS/ POLICE GENEVA/ CENTRE/ CITY SUNDAY NIGHT/ CONTINUED", 259 | "ONGOING CONFERENCE OPPONENTS/ POLICE GENEVA/ CENTRE/ CITY SUNDAY NIGHT/ CONTINUED/ FRENCH SWISS DIALOGUE COMMITTEE SDA", 260 | "summit conference site of the city centre in france", 261 | "summit conference site of violence two leading countries the organization city centre in france", 262 | "gathering, function nation, country violence people police army gas festival Dollar", 263 | "g @-@ 8 summit violence june 2 as leading countries 8 summit all over continued" 264 | ], 265 | "references": [ 266 | "Protests and demonstrations at site of G8 summit.", 267 | "Violence and demonstrations gripped the recent G-8 meeting in France.", 268 | "Demonstration and violence near G-8 summit invokes police response", 269 | "Demonstrations by rebels near G-8 summit site." 270 | ] 271 | }, 272 | "16": { 273 | "summaries": [ 274 | "Professional of/ To be / Paswan said/ In the years/ ' \"development", 275 | "PROFESSIONAL LACK OF WILL NOT BE/ CENTRAL HUMAN RESOURCES/ STATE", 276 | "JUNE CENTRAL HUMAN RESOURCES MINISTER STATE SANJAY/ PACE/ INCREASE/ MEASURES", 277 | "PROFESSIONAL LACK WILL BE ALLOWED BE JUNE CENTRAL HUMAN RESOURCES MINISTER STATE SANJAY/ YEARS/ COUNTRY", 278 | "union human resources years in the information technology", 279 | "years in the information technology", 280 | "minister government Jammu & Kashmir BJP (Bhartiya Janata Party - a political party in India) India party Singh (common Indian last name) mobile World Bank", 281 | "professional lack will not be allowed to be june 2 central human resources minister said" 282 | ], 283 | "references": [ 284 | "\"No shortage of IT professionals in the country\": Pasvaan", 285 | "IT professionals to meet the growing demand for information technology.", 286 | "Human resource minister plans to increase 'IT' professionals in India", 287 | "\"There won't be a shortage of IT professionals\" - Sanjay Paswan" 288 | ] 289 | }, 290 | "17": { 291 | "summaries": [ 292 | "Sonia visit to Kashmir new/ The recent/ Three-day meeting/ The", 293 | "VAJPAYEE SONIAS VISIT TO NEW ZEAL IN KASHMIR JUNE/ VALLEY", 294 | "VAJPAYEE SONIAS VISIT NEW ZEAL KASHMIR JUNE PRIME MINISTER ATAL", 295 | "VAJPAYEE SONIAS VISIT NEW ZEAL KASHMIR JUNE PRIME MINISTER ATAL/ KASHMIR IS ATMOSPHERE EARLY/ VALLEY", 296 | "the new chief minister in the people in", 297 | "the new chief minister in the soon peace restoration of people in long valley", 298 | "Kashmir Vajpayee (India's prime minister) Pakistan Jammu & Kashmir Congress political party in India, US Congress Sayeed (name) states India", 299 | "vajpayee sonia s visit to new zeal in kashmir june is atmosphere" 300 | ], 301 | "references": [ 302 | "New hope for peace in Kashmir after Vajpayee, Sonia's visit.", 303 | "Vajpayee, Sonia tour of Kashmir infuses new zeal in Kashmiri's.", 304 | "Vajpayee and Sonia visits rejuvenates spirits in kashmiris' for peace.", 305 | "Vajpayee and Sonia's meeting raises hopes of peace in Kashmir." 306 | ] 307 | }, 308 | "18": { 309 | "summaries": [ 310 | "The of/ Chief Kasturirangan said/ On arrival/ In", 311 | "POLL OF INDIA JOURNEY/ JUNE INDIAN SPACE KASTURIRANGAN/ MISSION", 312 | "POLL INDIA JOURNEY/ RIGHT WAY JUNE INDIAN SPACE KASTURIRANGAN CHIEF", 313 | "INDIAN MISSION RIGHT ROAD HAS BEEN GROWING CAMPAIGN/ FIRST HIGH RESOLUTION VEHICLES WILL BE SENT", 314 | "poll of the poll on reach of the rise in sunday", 315 | "poll of the journey correct that poll on reach of the rise in sunday", 316 | "ship space satellite China India flight Columbia ban water", 317 | "poll of india journey of right way june 2 indian space kasturirangan will be sent" 318 | ], 319 | "references": [ 320 | "India's plans to send unmanned moon mission on right track.", 321 | "Indian dream of reaching the moon on right track: Kasturirangan", 322 | "India's mission to reach moon: technical competancy and project justification", 323 | "India's mission to reach moon on the right track- Kasturirangan." 324 | ] 325 | }, 326 | "19": { 327 | "summaries": [ 328 | "Stock in probe into/ Sultana area of the army/ On Sunday began", 329 | "MILITARY IN TO TAKE STOCK OF PROBE JUNE/ BORDER/ ARMY/ TEAM", 330 | "MILITARY/ TAKE STOCK/ PROBE JUNE RAJASTHAN BORDER JAISALMER DISTRICT SULTANA", 331 | "MILITARY/ TAKE STOCK/ PROBE JUNE RAJASTHAN BORDER JAISALMER DISTRICT SULTANA AREA/ ARMY STOCK YESTERDAY SUNDAY", 332 | "store in aaga fire the check began two in store at and", 333 | "store in aaga fire the check began two june in store in check and", 334 | "army accident plane, aircraft rail fame one border share_market security", 335 | "military in to take stock june 2 rajasthan border jaisalmer district in stock yesterday began" 336 | ], 337 | "references": [ 338 | "Investigation on to determine cause of fire in army depot.", 339 | "Investigation into fire at the Jaisalmer army depot begins: Army.", 340 | "Investigation being conducted into fire at army depot in India", 341 | "Fire in army depot, Jaiselmer district: Investigations underway" 342 | ] 343 | }, 344 | "20": { 345 | "summaries": [ 346 | "Project successful/ Forest/ The report of the/ Money/ Delay", 347 | "GOVERNMENT PROJECT TIGER SUCCESSFUL/ TIGERS/ DELAY", 348 | "GOVERNMENT PROJECT TIGER SUCCESSFUL PROJECT JUNE ENVIRONMENT FORESTS/ MINISTRY/ MONEY", 349 | "GOVERNMENT PROJECT TIGER SUCCESSFUL PROJECT JUNE ENVIRONMENT FORESTS/ MINISTRY/ REPORT/ MONEY/ STATE GOVERNMENT DELAY IMPACT", 350 | "state of project in the", 351 | "state of project and forest ministry of the tiger for protection for project project", 352 | "effort report number, count Me/In, Inside, within Tamil disease information, knowledge government Kashmir", 353 | "government project ' tiger ' successful project june 2 environment and forests of ministry" 354 | ], 355 | "references": [ 356 | "Project to save tigers successful: Environment and Forest Ministry.", 357 | "Operation \"Project Tiger\" success: Report by Forest and Environmental Ministry.", 358 | "Glimpses of 'Project Tiger', a successful undertaking by Indian government", 359 | "Project Tiger: Indian Government's mission to save tigers successful" 360 | ] 361 | }, 362 | "21": { 363 | "summaries": [ 364 | "Crime. murder Bhel of general manager/ Wife of/ Hardwar Government company/ The/ Yet", 365 | "JUNE TALKS/ RED AND BLACK AGARWAL/ HOUSE JHOORDI/ DELAY", 366 | "RED BLACK AGARWAL WIFE SATYA AGARWAL/ HOUSE JHOORDI MURDER/ DELAY", 367 | "DEHRADUN/ RED BLACK AGARWAL WIFE SATYA AGARWAL/ PEOPLE/ HOUSE JHOORDI MURDER YET DID KNOW DELAY", 368 | "crime and his wife of the private company", 369 | "crime and his wife of the private company people by his home in murder", 370 | "police India talk use Kashmir more explosion two work", 371 | "crime" 372 | ], 373 | "references": [ 374 | "Hunt on for killers of BHEL's Vice-President, family.", 375 | "\"BHEL\" Vice-President Shyam Agarwal and wife found murdered at home.", 376 | "'BHEL' Vice President and his wife found murdered at home", 377 | "Shyam Lal Agarwal, vice president of BHEL, assasinated." 378 | ] 379 | }, 380 | "22": { 381 | "summaries": [ 382 | "Deeds of giving capable of./ Today across the border/ To", 383 | "INDIA PAKISTAN DEEDS TO ANSWER CAPABLE/ P/ TALKS/ STATE/ CROSS/ ANSWER SWAMI", 384 | "BAHERI P N JUNE 16 TALKS CENTRAL HOME MINISTER STATE", 385 | "INDIA PAKISTAN DEEDS ANSWER CAPABLE BAHERI P N JUNE 16 TALKS CENTRAL HOME MINISTER STATE", 386 | "indian pakistani harakato.n the answer give in central home", 387 | "indian pakistani harakato.n the answer give in central home to terrorism locked in the", 388 | "Pakistan police Vajpayee (India's prime minister) India government Prime Minister Qaeda [\"Al-Qaeda\"] cup European", 389 | "india pakistan deeds to answer capable of" 390 | ], 391 | "references": [ 392 | "India can deliver fitting response to Pakistan: Chinmayananda", 393 | "India aptly capable of counter-attacking Pakistani sponsored cross-border terrorism: Chinmayananda.", 394 | "India can take action if Pakistan fails to curtail terrorism", 395 | "India adept at retaliating Pakistan's dirty tricks" 396 | ] 397 | }, 398 | "23": { 399 | "summaries": [ 400 | "To make plan/ June talks./ Kalam/ College engineering on/ Course", 401 | "SUN BY 2020 INDIA/ PLAN KANCHIPURAM 19 TALKS/ WORLD", 402 | "MAKE PLAN KANCHIPURAM/ WORLD ENGINEERING COLLEGE/ COURSE/ INAUGURATING BLOCK OPENING", 403 | "SUN 2020 INDIA ENRICH NATION MAKE PLAN KANCHIPURAM JUNE 19 TALKS PRESIDENT ABDUL KALAM/ WORLD", 404 | "college in engineering at a postgraduate course for shubhaara.nbha and a postgraduate block open", 405 | "college in engineering at a postgraduate course for shubhaara.nbha and a postgraduate block open", 406 | "Me/In, Inside, within President India ship nations space effort nineteen thousand", 407 | "sun enrich nation to make plan kanchipuram june 19" 408 | ], 409 | "references": [ 410 | "Project to make India a \"Developed Nation\" by 2020.", 411 | "President APJ.Kalam - Scheme for developed India by 2020 ready.", 412 | "Improvement in five areas will make India 'developed' by 2020", 413 | "Strategy ready to make India a developed nation by 2020" 414 | ] 415 | }, 416 | "24": { 417 | "summaries": [ 418 | "Bus and hit four dead./ The district of sector/ People/ Office received", 419 | "ROADWAYS BUS AND IMPACT OF FOUR DEAD MORADABAD/ TALKS/ POLICE/ CAR/ BRASS", 420 | "MARUTI CAR CLASH WOMEN FOUR PEOPLE DIED SENIOR SUPERINTENDENT POLICE", 421 | "ROADWAYS BUS IMPACT/ FOUR DEAD MORADABAD ARTICULATED JUNE 20 TALKS UTTAR PRADESH MORADABAD DISTRICT POLICE", 422 | "clash in four dead at the", 423 | "clash in four dead at the", 424 | "police accident ear poll, election nation, country Pakistan people people weapons", 425 | "roadways bus and impact" 426 | ], 427 | "references": [ 428 | "4 killed in accident between Roadway bus and Maruti.", 429 | "4 dead in collision between \"Roadways\" Bus and Maruti car.", 430 | "Four killed in accident between 'Roadways' bus and 'Maruti' car", 431 | "Roadways bus and a Maruti collide: 4 dead" 432 | ] 433 | }, 434 | "25": { 435 | "summaries": [ 436 | "India and Pakistan in the medium/ June talks of Kashmir/ The improvement in the dialogue", 437 | "INDIA AND PAKISTAN IN KASHMIR OF FRIENDSHIP/ SRINAGAR/ INDIAN", 438 | "INDIA PAKISTAN KASHMIR FRIENDSHIP CAN BECOME MEDIUM MOOFTEE SRINAGAR JUNE", 439 | "INDIA PAKISTAN KASHMIR FRIENDSHIP CAN BECOME MEDIUM MOOFTEE SRINAGAR JUNE 20 TALKS/ PEOPLE/ INDIAN SUBCONTINENT", 440 | "kashmir indian cuisine in the chief at reform of for indian subcontinent in there", 441 | "kashmir indian cuisine in the chief at reform of for indian subcontinent in there", 442 | "both countries Jammu & Kashmir population, people both Kashmir Pakistan Sayeed (name) Vajpayee (India's prime minister) India", 443 | "india and pakistan in kashmir can become medium @-@ mooftee srinagar june 20" 444 | ], 445 | "references": [ 446 | "Kashmir can forge friendship between India and Pakistan: Mufti", 447 | "Kashmir: Link for peace rather than war between India-Pakistan - Mufti.", 448 | "Resolving Kashmir by talks will create friendship between India-Pakistan", 449 | "Kashmir: A means of friendship between India and Pakistan?" 450 | ] 451 | } 452 | } -------------------------------------------------------------------------------- /tests/data/rouge/verify-spl.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": { 3 | "summaries": [ 4 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 5 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 6 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 7 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 8 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 9 | ], 10 | "references": [ 11 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 12 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 13 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 14 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF" 15 | ] 16 | }, 17 | "2": { 18 | "summaries": [ 19 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 20 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 21 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 22 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 23 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 24 | ], 25 | "references": [ 26 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 27 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 28 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 29 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF" 30 | ] 31 | }, 32 | "3": { 33 | "summaries": [ 34 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 35 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 36 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 37 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 38 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 39 | ], 40 | "references": [ 41 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 42 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 43 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 44 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF" 45 | ] 46 | }, 47 | "4": { 48 | "summaries": [ 49 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 50 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 51 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 52 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 53 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 54 | ], 55 | "references": [ 56 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 57 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 58 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 59 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC" 60 | ] 61 | } 62 | } -------------------------------------------------------------------------------- /tests/data/rouge/verify.json: -------------------------------------------------------------------------------- 1 | { 2 | "1": { 3 | "summaries": [ 4 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 5 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 6 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 7 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 8 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 9 | ], 10 | "references": [ 11 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 12 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 13 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 14 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF" 15 | ] 16 | }, 17 | "2": { 18 | "summaries": [ 19 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 20 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 21 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 22 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 23 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 24 | ], 25 | "references": [ 26 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 27 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 28 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 29 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF" 30 | ] 31 | }, 32 | "3": { 33 | "summaries": [ 34 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 35 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 36 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 37 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 38 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 39 | ], 40 | "references": [ 41 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 42 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 43 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 44 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF" 45 | ] 46 | }, 47 | "4": { 48 | "summaries": [ 49 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 50 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD", 51 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE", 52 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF", 53 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG" 54 | ], 55 | "references": [ 56 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 57 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 58 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC", 59 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC" 60 | ] 61 | } 62 | } -------------------------------------------------------------------------------- /tests/rouge_test_to_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | from bs4 import BeautifulSoup 5 | 6 | 7 | root = Path(os.path.dirname(__file__)).joinpath("sample-test") 8 | 9 | 10 | def read_text(path, input_format): 11 | with path.open(encoding="utf-8") as f: 12 | content = f.read().strip() 13 | if input_format == "SPL": 14 | return content 15 | else: 16 | soup = BeautifulSoup(content, "html.parser") 17 | lines = soup.find_all("a", attrs={"id": True}) 18 | content = "\n".join([ln.string.strip() for ln in lines]) 19 | return content 20 | 21 | 22 | for testf in ["ROUGE-test.xml", "verify-spl.xml", "verify.xml"]: 23 | file_path = root.joinpath(testf) 24 | soup = None 25 | with file_path.open(encoding="utf-8") as f: 26 | soup = BeautifulSoup(f.read().strip(), "xml") 27 | 28 | evals = soup.find_all("EVAL") 29 | data = {} 30 | for e in evals: 31 | summary_root = e.find_next("PEER-ROOT").string.strip() 32 | ref_root = e.find_next("MODEL-ROOT").string.strip() 33 | input_format = e.find_next("INPUT-FORMAT")["TYPE"] 34 | summaries = [] 35 | references = [] 36 | for kind in ["PEERS", "MODELS"]: 37 | node = e.find_next(kind) 38 | node_type = kind[0] 39 | node_root = summary_root if node_type == "P" else ref_root 40 | nodes = node.find_all(node_type) 41 | for n in nodes: 42 | name = n.string.strip() 43 | p = root.joinpath(*node_root.split("/"), name) 44 | content = read_text(p, input_format) 45 | if node_type == "P": 46 | summaries.append(content) 47 | else: 48 | references.append(content) 49 | data[e["ID"]] = { 50 | "summaries": summaries, 51 | "references": references 52 | } 53 | 54 | serialized = json.dumps(data, indent=4) 55 | name, ext = os.path.splitext(testf) 56 | with open(name + ".json", "wb") as f: 57 | f.write(serialized.encode("utf-8")) 58 | -------------------------------------------------------------------------------- /tests/test_be_rouge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import unittest 4 | from sumeval.metrics.rouge import RougeCalculator 5 | 6 | 7 | class TestRougeBE(unittest.TestCase): 8 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge") 9 | 10 | def load_test_data(self): 11 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test.json") 12 | with open(test_file, encoding="utf-8") as f: 13 | data = json.load(f) 14 | return data 15 | 16 | def _bes_to_words(self, basic_elements, compare_type): 17 | words = [] 18 | for be in basic_elements: 19 | words.append(be.as_key(compare_type)) 20 | return words 21 | 22 | def test_rouge_be(self): 23 | data = self.load_test_data() 24 | rouge = RougeCalculator(stopwords=False) 25 | for eval_id in data: 26 | summaries = data[eval_id]["summaries"] 27 | references = data[eval_id]["references"] 28 | r_bes = [rouge.parse_to_be(r) for r in references] 29 | 30 | for _type in ["H", "HM", "HMR"]: 31 | print("eval {}: test {} pattern.".format(eval_id, _type)) 32 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes] 33 | 34 | for s in summaries: 35 | s_bes = rouge.parse_to_be(s) 36 | if len(s_bes) == 0: 37 | continue 38 | s_bes = self._bes_to_words(s_bes, _type) 39 | base = rouge.rouge_n(s_bes, _r_bes, n=1) 40 | score = rouge.rouge_be(s, references, _type) 41 | self.assertLess(abs(base - score), 1e-5) 42 | 43 | def test_rouge_be_hm(self): 44 | rouge = RougeCalculator(stopwords=False) 45 | summaries = [ 46 | "It was beautiful flower, and the other was beautiful flower also." 47 | ] 48 | references = [ 49 | "The flower was beautiful.", 50 | "Two flower were beautiful" 51 | ] 52 | r_bes = [rouge.parse_to_be(r) for r in references] 53 | 54 | for _type in ["HM", "HMR"]: 55 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes] 56 | for s in summaries: 57 | s_bes = rouge.parse_to_be(s) 58 | s_bes = self._bes_to_words(s_bes, _type) 59 | base = rouge.rouge_n(s_bes, _r_bes, n=1) 60 | score = rouge.rouge_be(s, references, _type) 61 | self.assertLess(abs(base - score), 1e-5) 62 | 63 | 64 | if __name__ == "__main__": 65 | unittest.main(warnings="ignore") 66 | -------------------------------------------------------------------------------- /tests/test_be_rouge_ja.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import unittest 5 | from sumeval.metrics.rouge import RougeCalculator 6 | 7 | 8 | class TestRougeBEJA(unittest.TestCase): 9 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge") 10 | 11 | def load_test_data(self): 12 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test-ja.json") 13 | with open(test_file, encoding="utf-8") as f: 14 | data = json.load(f) 15 | return data 16 | 17 | def _bes_to_words(self, basic_elements, compare_type): 18 | words = [] 19 | for be in basic_elements: 20 | words.append(be.as_key(compare_type)) 21 | return words 22 | 23 | def test_rouge_be(self): 24 | data = self.load_test_data() 25 | rouge = RougeCalculator(stopwords=False, lang="ja") 26 | for eval_id in data: 27 | summaries = data[eval_id]["summaries"] 28 | references = data[eval_id]["references"] 29 | r_bes = [rouge.parse_to_be(r) for r in references] 30 | for _type in ["H", "HM", "HMR"]: 31 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes] 32 | 33 | for s in summaries: 34 | s_bes = rouge.parse_to_be(s) 35 | if len(s_bes) == 0: 36 | continue 37 | s_bes = self._bes_to_words(s_bes, _type) 38 | base = rouge.rouge_n(s_bes, _r_bes, n=1) 39 | score = rouge.rouge_be(s, references, _type) 40 | self.assertLess(abs(base - score), 1e-5) 41 | 42 | def test_rouge_be_hm(self): 43 | rouge = RougeCalculator(stopwords=False, lang="ja") 44 | summaries = [ 45 | "私はきれいな花が好きで、きれいな花には目がない。" 46 | ] 47 | references = [ 48 | "きれいな花が好きだ", 49 | "私はきれいな花に目がない" 50 | ] 51 | r_bes = [rouge.parse_to_be(r) for r in references] 52 | 53 | for _type in ["HM", "HMR"]: 54 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes] 55 | for s in summaries: 56 | s_bes = rouge.parse_to_be(s) 57 | s_bes = self._bes_to_words(s_bes, _type) 58 | base = rouge.rouge_n(s_bes, _r_bes, n=1) 59 | score = rouge.rouge_be(s, references, _type) 60 | self.assertLess(abs(base - score), 1e-5) 61 | 62 | 63 | if __name__ == "__main__": 64 | unittest.main(warnings="ignore") 65 | -------------------------------------------------------------------------------- /tests/test_bleu.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from sumeval.metrics.bleu import BLEUCalculator 3 | 4 | 5 | class TestBLEU(unittest.TestCase): 6 | 7 | def test_bleu(self): 8 | bleu = BLEUCalculator() 9 | score = bleu.bleu("I am waiting on the beach", 10 | "He is walking on the beach",) 11 | score_from_list = bleu.bleu("I am waiting on the beach".split(), 12 | ["He is walking on the beach".split()]) 13 | self.assertLess(abs(score - score_from_list), 1e-8) 14 | 15 | bleu = BLEUCalculator(lang="ja") 16 | score_ja = bleu.bleu("私はビーチで待ってる", "彼がベンチで待ってる") 17 | 18 | self.assertLess(abs(score - score_ja), 1e-8) 19 | 20 | 21 | if __name__ == "__main__": 22 | unittest.main(warnings="ignore") 23 | -------------------------------------------------------------------------------- /tests/test_custom_lang.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from sumeval.metrics.lang.base_lang import BaseLang 3 | from sumeval.metrics.rouge import RougeCalculator 4 | from sumeval.metrics.bleu import BLEUCalculator 5 | 6 | 7 | class TestCustomLang(unittest.TestCase): 8 | 9 | def test_custom_lang(self): 10 | 11 | class Custom(BaseLang): 12 | 13 | def __init__(self): 14 | super(Custom, self).__init__("cs") 15 | 16 | def tokenize(self, text): 17 | return text.split("/") 18 | 19 | lang = Custom() 20 | rouge = RougeCalculator(lang=lang) 21 | rouge_score = rouge.rouge_n( 22 | summary="I/went/to/the/Mars/from/my/living/town.", 23 | references="I/went/to/Mars", 24 | n=1) 25 | 26 | bleu = BLEUCalculator(lang=lang) 27 | bleu_score = bleu.bleu("I/am/waiting/on/the/beach", 28 | "He/is/walking/on/the/beach") 29 | 30 | self.assertGreater(rouge_score, 0) 31 | self.assertGreater(bleu_score, 0) 32 | 33 | 34 | if __name__ == "__main__": 35 | unittest.main() 36 | -------------------------------------------------------------------------------- /tests/test_lang_en.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from collections import Counter 3 | from sumeval.metrics.lang.lang_en import LangEN 4 | 5 | 6 | class TestLangEN(unittest.TestCase): 7 | 8 | def test_basic_element(self): 9 | lang = LangEN() 10 | text = "The very beautiful toy is bought by Tom." 11 | bes = lang.parse_to_be(text) 12 | for i, be in enumerate(bes): 13 | print(be) 14 | if i == 0: 15 | self.assertEqual(be.head, "toy") 16 | self.assertEqual(be.modifier, "beautiful") 17 | else: 18 | self.assertEqual(be.head, "toy") 19 | self.assertEqual(be.modifier, "buy") 20 | 21 | def test_stemming(self): 22 | lang = LangEN() 23 | text = "dippier dippy" 24 | counts = Counter([lang.stemming(w) for w in lang.tokenize_with_preprocess(text)]) 25 | self.assertEqual(("dippy", 2), counts.most_common()[0]) 26 | -------------------------------------------------------------------------------- /tests/test_lang_ja.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from sumeval.metrics.lang.lang_ja import LangJA 3 | 4 | 5 | class TestLangJA(unittest.TestCase): 6 | 7 | def test_basic_element(self): 8 | lang = LangJA() 9 | text = "とても綺麗な花を見つけた" 10 | bes = lang.parse_to_be(text) 11 | for i, be in enumerate(bes): 12 | print(be) 13 | if i == 0: 14 | self.assertEqual(be.head, "花") 15 | self.assertEqual(be.modifier, "奇麗") 16 | else: 17 | self.assertEqual(be.head, "花") 18 | self.assertEqual(be.modifier, "見付ける") 19 | 20 | 21 | if __name__ == "__main__": 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /tests/test_lang_zh.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pytest 3 | from sumeval.metrics.lang.lang_zh import LangZH 4 | 5 | 6 | class TestLangZH(unittest.TestCase): 7 | 8 | def test_tokenize(self): 9 | lang = LangZH() 10 | text = "我发现了一朵非常漂亮的花" 11 | tokens = lang.tokenize(text) 12 | self.assertEqual(len(tokens), 8) 13 | 14 | @pytest.mark.skip(reason="Download the parse model is terrible slow.") 15 | def test_basic_element(self): 16 | lang = LangZH() 17 | text = "我发现了一朵非常漂亮的花" 18 | bes = lang.parse_to_be(text) 19 | for i, be in enumerate(bes): 20 | if i == 0: 21 | self.assertEqual(be.head, "花") 22 | self.assertEqual(be.modifier, "漂亮") 23 | else: 24 | self.assertEqual(be.head, "花") 25 | self.assertEqual(be.modifier, "发现") 26 | -------------------------------------------------------------------------------- /tests/test_rouge.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | import unittest 5 | from rougescore import rouge_n, rouge_l 6 | from pythonrouge.pythonrouge import Pythonrouge 7 | from sumeval.metrics.rouge import RougeCalculator 8 | 9 | 10 | class TestRouge(unittest.TestCase): 11 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge") 12 | 13 | def load_test_data(self): 14 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test.json") 15 | with open(test_file, encoding="utf-8") as f: 16 | data = json.load(f) 17 | return data 18 | 19 | def test_rouge(self): 20 | data = self.load_test_data() 21 | rouge = RougeCalculator(stopwords=False) 22 | for eval_id in data: 23 | summaries = data[eval_id]["summaries"] 24 | references = data[eval_id]["references"] 25 | for n in [1, 2]: 26 | for s in summaries: 27 | baseline = Pythonrouge( 28 | summary_file_exist=False, 29 | summary=[[s]], 30 | reference=[[[r] for r in references]], 31 | n_gram=n, recall_only=False, 32 | length_limit=False, 33 | stemming=False, stopwords=False) 34 | b1_v = baseline.calc_score() 35 | b2_v = rouge_n(rouge.tokenize(s), 36 | [rouge.tokenize(r) for r in references], 37 | n, 0.5) 38 | v = rouge.rouge_n(s, references, n) 39 | self.assertLess(abs(b2_v - v), 1e-5) 40 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa 41 | 42 | def test_rouge_with_stop_word(self): 43 | data = self.load_test_data() 44 | rouge = RougeCalculator(stopwords=True) 45 | for eval_id in data: 46 | summaries = data[eval_id]["summaries"] 47 | references = data[eval_id]["references"] 48 | for n in [1, 2]: 49 | for s in summaries: 50 | baseline = Pythonrouge( 51 | summary_file_exist=False, 52 | summary=[[s]], 53 | reference=[[[r] for r in references]], 54 | n_gram=n, recall_only=False, 55 | length_limit=False, 56 | stemming=False, stopwords=True) 57 | b1_v = baseline.calc_score() 58 | b2_v = rouge_n(rouge.tokenize(s), 59 | [rouge.tokenize(r) for r in references], 60 | n, 0.5) 61 | v = rouge.rouge_n(s, references, n) 62 | self.assertLess(abs(b2_v - v), 1e-5) 63 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa 64 | 65 | def test_rouge_with_length_limit(self): 66 | data = self.load_test_data() 67 | rouge = RougeCalculator(stopwords=True, length_limit=50) 68 | for eval_id in data: 69 | summaries = data[eval_id]["summaries"] 70 | references = data[eval_id]["references"] 71 | for n in [1, 2]: 72 | for s in summaries: 73 | baseline = Pythonrouge( 74 | summary_file_exist=False, 75 | summary=[[s]], 76 | reference=[[[r] for r in references]], 77 | n_gram=n, recall_only=False, 78 | length_limit=True, length=50, 79 | word_level=False, 80 | stemming=False, stopwords=True) 81 | b1_v = baseline.calc_score() 82 | b2_v = rouge_n(rouge.tokenize(s), 83 | [rouge.tokenize(r) for r in references], 84 | n, 0.5) 85 | v = rouge.rouge_n(s, references, n) 86 | self.assertLess(abs(b2_v - v), 1e-5) 87 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa 88 | 89 | def test_rouge_with_word_limit(self): 90 | data = self.load_test_data() 91 | rouge = RougeCalculator(stopwords=True, word_limit=5) 92 | for eval_id in data: 93 | summaries = data[eval_id]["summaries"] 94 | references = data[eval_id]["references"] 95 | for n in [1, 2]: 96 | for s in summaries: 97 | baseline = Pythonrouge( 98 | summary_file_exist=False, 99 | summary=[[s]], 100 | reference=[[[r] for r in references]], 101 | n_gram=n, recall_only=False, 102 | length_limit=True, length=5, 103 | word_level=True, 104 | stemming=False, stopwords=True) 105 | b1_v = baseline.calc_score() 106 | b2_v = rouge_n(rouge.tokenize(s), 107 | [rouge.tokenize(r) for r in references], 108 | n, 0.5) 109 | v = rouge.rouge_n(s, references, n) 110 | self.assertLess(abs(b2_v - v), 1e-5) 111 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa 112 | 113 | def test_rouge_l(self): 114 | data = self.load_test_data() 115 | rouge = RougeCalculator(stopwords=True) 116 | for eval_id in data: 117 | summaries = data[eval_id]["summaries"] 118 | references = data[eval_id]["references"] 119 | for s in summaries: 120 | baseline = Pythonrouge( 121 | summary_file_exist=False, 122 | summary=[[s]], 123 | reference=[[[r] for r in references]], 124 | n_gram=1, recall_only=False, ROUGE_L=True, 125 | length_limit=True, length=50, 126 | stemming=False, stopwords=True) 127 | b1_v = baseline.calc_score() 128 | b2_v = rouge_l(rouge.tokenize(s), 129 | [rouge.tokenize(r) for r in references], 130 | 0.5) 131 | v = rouge.rouge_l(s, references) 132 | self.assertLess(abs(b2_v - v), 1e-5) 133 | self.assertLess(abs(b1_v["ROUGE-L-F"] - v), 1e-5) 134 | 135 | 136 | if __name__ == "__main__": 137 | unittest.main() 138 | -------------------------------------------------------------------------------- /tests/test_rouge_ja.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import unittest 4 | from rougescore import rouge_n 5 | from sumeval.metrics.rouge import RougeCalculator 6 | 7 | 8 | class TestRougeJA(unittest.TestCase): 9 | 10 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge") 11 | 12 | def load_test_data(self): 13 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test-ja.json") 14 | with open(test_file, encoding="utf-8") as f: 15 | data = json.load(f) 16 | return data 17 | 18 | def _split(self, text): 19 | _txt = text.replace("。", " ").replace("、", " ").strip() 20 | words = _txt.split(" ") 21 | words = [w.strip() for w in words if w.strip()] 22 | return words 23 | 24 | def _compress(self, text_or_texts): 25 | if isinstance(text_or_texts, (tuple, list)): 26 | return ["".join(s.split(" ")) for s in text_or_texts] 27 | else: 28 | return "".join(text_or_texts.split(" ")) 29 | 30 | def test_rouge(self): 31 | data = self.load_test_data() 32 | rouge = RougeCalculator(stopwords=False, lang="ja") 33 | for eval_id in data: 34 | summaries = data[eval_id]["summaries"] 35 | references = data[eval_id]["references"] 36 | for n in [1, 2]: 37 | for s in summaries: 38 | v = rouge.rouge_n(self._compress(s), 39 | self._compress(references), n) 40 | b_v = rouge_n(self._split(s), 41 | [self._split(r) for r in references], 42 | n, 0.5) 43 | self.assertLess(abs(b_v - v), 1e-5) 44 | 45 | def test_rouge_with_stop_words(self): 46 | data = self.load_test_data() 47 | rouge = RougeCalculator(stopwords=True, lang="ja") 48 | 49 | def split(text): 50 | words = self._split(text) 51 | words = [w for w in words if not rouge._lang.is_stop_word(w)] 52 | return words 53 | 54 | for eval_id in data: 55 | summaries = data[eval_id]["summaries"] 56 | references = data[eval_id]["references"] 57 | for n in [1, 2]: 58 | for s in summaries: 59 | v = rouge.rouge_n(s, references, n) 60 | b_v = rouge_n(split(s), 61 | [split(r) for r in references], 62 | n, 0.5) 63 | self.assertLess(abs(b_v - v), 1e-5) 64 | 65 | 66 | if __name__ == "__main__": 67 | unittest.main() 68 | -------------------------------------------------------------------------------- /tests/test_rouge_zh.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import unittest 4 | from rougescore import rouge_n 5 | from sumeval.metrics.rouge import RougeCalculator 6 | 7 | 8 | class TestRougeJA(unittest.TestCase): 9 | 10 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge") 11 | 12 | def load_test_data(self): 13 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test-zh.json") 14 | with open(test_file, encoding="utf-8") as f: 15 | data = json.load(f) 16 | return data 17 | 18 | def _split(self, text): 19 | _txt = text.replace("。", " ").replace("、", " ").strip() 20 | words = _txt.split(" ") 21 | words = [w.strip() for w in words if w.strip()] 22 | return words 23 | 24 | def _compress(self, text_or_texts): 25 | if isinstance(text_or_texts, (tuple, list)): 26 | return ["".join(s.split(" ")) for s in text_or_texts] 27 | else: 28 | return "".join(text_or_texts.split(" ")) 29 | 30 | def test_rouge(self): 31 | data = self.load_test_data() 32 | rouge = RougeCalculator(stopwords=False, lang="zh") 33 | for eval_id in data: 34 | summaries = data[eval_id]["summaries"] 35 | references = data[eval_id]["references"] 36 | for n in [1, 2]: 37 | for s in summaries: 38 | v = rouge.rouge_n(self._compress(s), 39 | self._compress(references), n) 40 | b_v = rouge_n(self._split(s), 41 | [self._split(r) for r in references], 42 | n, 0.5) 43 | self.assertLess(abs(b_v - v), 1e-5) 44 | 45 | def test_rouge_with_stop_words(self): 46 | data = self.load_test_data() 47 | rouge = RougeCalculator(stopwords=True, lang="zh") 48 | 49 | def split(text): 50 | words = self._split(text) 51 | words = [w for w in words if not rouge._lang.is_stop_word(w)] 52 | return words 53 | 54 | for eval_id in data: 55 | summaries = data[eval_id]["summaries"] 56 | references = data[eval_id]["references"] 57 | for n in [1, 2]: 58 | for s in summaries: 59 | v = rouge.rouge_n(s, references, n) 60 | b_v = rouge_n(split(s), 61 | [split(r) for r in references], 62 | n, 0.5) 63 | self.assertLess(abs(b_v - v), 1e-5) 64 | 65 | 66 | if __name__ == "__main__": 67 | unittest.main() 68 | -------------------------------------------------------------------------------- /tests/test_sum_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import unittest 4 | from sumeval.cli.sum_eval import main 5 | 6 | 7 | class TestSumEval(unittest.TestCase): 8 | 9 | def test_sum_eval(self): 10 | result = main( 11 | "r-nlb", 12 | False, 13 | False, 14 | False, 15 | -1, 16 | -1, 17 | 0.5, 18 | "en", 19 | "I'm living New York its my home town so awesome", 20 | "My home town is awesome", 21 | ) 22 | --------------------------------------------------------------------------------