├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── demo.py
├── doc
└── top.png
├── requirements-test.txt
├── requirements.txt
├── setup.py
├── sumeval
├── __init__.py
├── cli
│ ├── __init__.py
│ └── sum_eval.py
└── metrics
│ ├── __init__.py
│ ├── bleu.py
│ ├── lang
│ ├── __init__.py
│ ├── base_lang.py
│ ├── data
│ │ ├── en
│ │ │ ├── stemming.txt
│ │ │ └── stop_words.txt
│ │ ├── ja
│ │ │ └── stop_words.txt
│ │ └── zh
│ │ │ └── stop_words.txt
│ ├── lang_en.py
│ ├── lang_ja.py
│ └── lang_zh.py
│ └── rouge.py
└── tests
├── __init__.py
├── data
├── .gitkeep
└── rouge
│ ├── ROUGE-test-ja.json
│ ├── ROUGE-test-zh.json
│ ├── ROUGE-test.json
│ ├── verify-spl.json
│ └── verify.json
├── rouge_test_to_json.py
├── test_be_rouge.py
├── test_be_rouge_ja.py
├── test_bleu.py
├── test_custom_lang.py
├── test_lang_en.py
├── test_lang_ja.py
├── test_lang_zh.py
├── test_rouge.py
├── test_rouge_ja.py
├── test_rouge_zh.py
└── test_sum_eval.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 | .DS_Store
103 | .vscode
104 | tmp*/
105 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.6"
4 | - "3.7"
5 | before_install:
6 | - python --version
7 | - sudo apt-get install -y libxml-parser-perl
8 | install:
9 | - pip install -r requirements-test.txt
10 | - python -m spacy download en
11 | - pip install "https://github.com/megagonlabs/ginza/releases/download/latest/ginza-latest.tar.gz"
12 | script:
13 | - pytest --cov=sumeval tests/test_*
14 | after_success:
15 | codecov
16 | deploy:
17 | provider: pypi
18 | user: "icoxfog417"
19 | password:
20 | secure: O3zmKefgHI+UNvgtZ/Fe/htgnP+yLXbMFfLIOF3VN9ofCyTXhjueBG+t6l9cetzICPfOMf2LjYCeamgr8DiV9xbtT5IWPOsU5qJYq8guOpYZ3Qpy0jIE7jE8uURCLBM1vdfpf6Gc+FI+NKj01K/xrX2Fh5kT90TMXN2lPFrd7SkYBWTdyZsOheUFSE8YinSx+TSaiWBohc+IaqGuUvmIA12i2a1narvaB86WWhAIY+BqJYCPdZg+++xqsMixSUDhwZgi30k1LDxzUKvz+fFUCyvTkyr8hO+CeE+d+jyY+GuD0XMQMM0OcWK0gKYgO89kvYv7h8bIKc03jPESECEGQTXYqrt486Notkm+v4DOrT52Owgx2ZsuNicb+5v6u5Mb7aD36kMrgO9BvTvtPlgiGAB+UUY1kzcgFRYgsSN5mvz9EAnq6Efxq2/1aQV3MwJE1FD6EUuzzJaWSR5+8pfe9NL8vjle7qGp/aSxEzbJPuzVMe6n/1+z26RNozkHysZRnaKGvh0WypvkjoU2lJB8Lx8buOBURy0K778/PLljEDxVwB8HriW92EwyjcfaCitekCGTjGicYWj0lmyJJo05CaNAMGqGXx03Q1zZth1ilnXXpoBez37Dx4Q4oxR1UpsIp0POKfX1cg8MCUiN3xYVSGD+znH4uNazs9vHkrNEiII=
21 | on:
22 | tags: true
23 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2018 chakki
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Well tested & Multi-language
6 | evaluation framework for Text Summarization.
7 |
8 |
9 |
10 | [](https://badge.fury.io/py/sumeval)
11 | [](https://travis-ci.org/chakki-works/sumeval)
12 | [](https://codecov.io/gh/chakki-works/sumeval)
13 |
14 |
15 | * Well tested
16 | * The ROUGE-X scores are tested compare with [original Perl script (ROUGE-1.5.5.pl)](https://github.com/summanlp/evaluation).
17 | * The BLEU score is calculated by [SacréBLEU](https://github.com/mjpost/sacrebleu), that produces the same values as official script (`mteval-v13a.pl`) used by WMT.
18 | * Multi-language
19 | * Not only English, Japanese and Chinese are also supported. The other language is extensible [easily](https://github.com/chakki-works/sumeval#welcome-contribution-tada).
20 |
21 | Of course, implementation is Pure Python!
22 |
23 | ## How to use
24 |
25 | ```py
26 | from sumeval.metrics.rouge import RougeCalculator
27 |
28 |
29 | rouge = RougeCalculator(stopwords=True, lang="en")
30 |
31 | rouge_1 = rouge.rouge_n(
32 | summary="I went to the Mars from my living town.",
33 | references="I went to Mars",
34 | n=1)
35 |
36 | rouge_2 = rouge.rouge_n(
37 | summary="I went to the Mars from my living town.",
38 | references=["I went to Mars", "It's my living town"],
39 | n=2)
40 |
41 | rouge_l = rouge.rouge_l(
42 | summary="I went to the Mars from my living town.",
43 | references=["I went to Mars", "It's my living town"])
44 |
45 | # You need spaCy to calculate ROUGE-BE
46 |
47 | rouge_be = rouge.rouge_be(
48 | summary="I went to the Mars from my living town.",
49 | references=["I went to Mars", "It's my living town"])
50 |
51 | print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format(
52 | rouge_1, rouge_2, rouge_l, rouge_be
53 | ).replace(", ", "\n"))
54 | ```
55 |
56 | ```py
57 | from sumeval.metrics.bleu import BLEUCalculator
58 |
59 |
60 | bleu = BLEUCalculator()
61 | score = bleu.bleu("I am waiting on the beach",
62 | "He is walking on the beach")
63 |
64 | bleu_ja = BLEUCalculator(lang="ja")
65 | score_ja = bleu_ja.bleu("私はビーチで待ってる", "彼がベンチで待ってる")
66 | ```
67 |
68 | ### From the command line
69 |
70 | ```
71 | sumeval r-nlb "I'm living New York its my home town so awesome" "My home town is awesome"
72 | ```
73 |
74 | output.
75 |
76 | ```
77 | {
78 | "options": {
79 | "stopwords": true,
80 | "stemming": false,
81 | "word_limit": -1,
82 | "length_limit": -1,
83 | "alpha": 0.5,
84 | "input-summary": "I'm living New York its my home town so awesome",
85 | "input-references": [
86 | "My home town is awesome"
87 | ]
88 | },
89 | "averages": {
90 | "ROUGE-1": 0.7499999999999999,
91 | "ROUGE-2": 0.6666666666666666,
92 | "ROUGE-L": 0.7499999999999999,
93 | "ROUGE-BE": 0
94 | },
95 | "scores": [
96 | {
97 | "ROUGE-1": 0.7499999999999999,
98 | "ROUGE-2": 0.6666666666666666,
99 | "ROUGE-L": 0.7499999999999999,
100 | "ROUGE-BE": 0
101 | }
102 | ]
103 | }
104 | ```
105 |
106 | Undoubtedly you can use file input. Please see more detail by `sumeval -h`.
107 |
108 | ## Install
109 |
110 | ```
111 | pip install sumeval
112 | ```
113 |
114 | ## Dependencies
115 |
116 | * BLEU is depends on [SacréBLEU](https://github.com/mjpost/sacrebleu)
117 | * To calculate `ROUGE-BE`, [`spaCy`](https://github.com/explosion/spaCy) is required.
118 | * To use lang `ja`, [`janome`](https://github.com/mocobeta/janome) or [`MeCab`](https://github.com/taku910/mecab) is required.
119 | * Especially to get score of `ROUGE-BE`, [`GiNZA`](https://github.com/megagonlabs/ginza) is needed additionally.
120 | * To use lang `zh`, [`jieba`](https://github.com/fxsjy/jieba) is required.
121 | * Especially to get score of `ROUGE-BE`, [`pyhanlp`](https://github.com/hankcs/pyhanlp) is needed additionally.
122 |
123 | ## Test
124 |
125 | `sumeval` uses two packages to test the score.
126 |
127 | * [pythonrouge](https://github.com/tagucci/pythonrouge)
128 | * It calls original perl script
129 | * `pip install git+https://github.com/tagucci/pythonrouge.git`
130 | * [rougescore](https://github.com/bdusell/rougescore)
131 | * It's simple python implementation for rouge score
132 | * `pip install git+git://github.com/bdusell/rougescore.git`
133 |
134 | ## Welcome Contribution :tada:
135 |
136 | ### Add supported language
137 |
138 | The tokenization and dependency parse process for each language is located on `sumeval/metrics/lang`.
139 |
140 | You can make language class by inheriting [`BaseLang`](https://github.com/chakki-works/sumeval/blob/master/sumeval/metrics/lang/base_lang.py).
141 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | from sumeval.metrics.rouge import RougeCalculator
2 |
3 |
4 | rouge = RougeCalculator(stopwords=True, lang="en")
5 |
6 | rouge_1 = rouge.rouge_n(
7 | summary="I went to the Mars from my living town.",
8 | references="I went to Mars",
9 | n=1)
10 |
11 | rouge_2 = rouge.rouge_n(
12 | summary="I went to the Mars from my living town.",
13 | references=["I went to Mars", "It's my living town"],
14 | n=2)
15 |
16 | rouge_l = rouge.rouge_l(
17 | summary="I went to the Mars from my living town.",
18 | references=["I went to Mars", "It's my living town"])
19 |
20 | # You need spaCy to calculate ROUGE-BE
21 |
22 | rouge_be = rouge.rouge_be(
23 | summary="I went to the Mars from my living town.",
24 | references=["I went to Mars", "It's my living town"])
25 |
26 | print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format(
27 | rouge_1, rouge_2, rouge_l, rouge_be
28 | ).replace(", ", "\n"))
29 |
--------------------------------------------------------------------------------
/doc/top.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/doc/top.png
--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | plac>=0.9.6
2 | sacrebleu>=1.3.2
3 | # For Test
4 | pytest==4.4.2
5 | codecov==2.0.15
6 | pytest-cov==2.7.1
7 | beautifulsoup4>=4.7.1
8 | -e git+https://github.com/tagucci/pythonrouge.git#egg=pythonrouge
9 | -e git+https://github.com/bdusell/rougescore.git#egg=rougescore
10 | # Basic Element
11 | spacy>=2.0.0,<3.0.0
12 | # For Chinese
13 | jieba>=0.39
14 | pyhanlp>=0.1.45
15 | # For Japanese
16 | janome>=0.3.9
17 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | plac>=0.9.6
2 | sacrebleu>=1.3.2,<2.0.0
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from setuptools import setup
3 |
4 |
5 | requires = ["plac>=0.9.6", "sacrebleu>=1.3.2"]
6 |
7 |
8 | def get_lang_data():
9 | static_files = []
10 | root = "sumeval/metrics/lang/data/"
11 | for _dir in os.listdir(root):
12 | lang_dir = os.path.join(root, _dir)
13 | if not os.path.isdir(lang_dir):
14 | continue
15 | for content in os.listdir(lang_dir):
16 | f = os.path.join(lang_dir, content)
17 | if os.path.isfile(f) and not content.startswith("."):
18 | static_files.append(os.path.join("data/" + _dir, content))
19 | return static_files
20 |
21 |
22 | setup(
23 | name="sumeval",
24 | description="Well tested evaluation framework for Text summarization",
25 | url="https://github.com/chakki-works/sumeval",
26 | author="icoxfog417",
27 | author_email="icoxfog417@yahoo.co.jp",
28 | license="Apache License 2.0",
29 | keywords="text summarization machine learning",
30 | use_scm_version=True,
31 | setup_requires=["setuptools_scm"],
32 | packages=[
33 | "sumeval",
34 | "sumeval.cli",
35 | "sumeval.metrics",
36 | "sumeval.metrics.lang",
37 | ],
38 | package_data={
39 | "sumeval.metrics.lang": get_lang_data()
40 | },
41 | entry_points={
42 | "console_scripts": ["sumeval=sumeval.cli.sum_eval:entry_point"],
43 | },
44 | install_requires=requires,
45 | classifiers=[
46 | "Programming Language :: Python :: 3.7",
47 | "Programming Language :: Python :: 3.6"
48 | ],
49 | )
50 |
--------------------------------------------------------------------------------
/sumeval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/sumeval/__init__.py
--------------------------------------------------------------------------------
/sumeval/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/sumeval/cli/__init__.py
--------------------------------------------------------------------------------
/sumeval/cli/sum_eval.py:
--------------------------------------------------------------------------------
1 | import json
2 | import plac
3 | from itertools import groupby
4 | from statistics import mean
5 | from sumeval.metrics.rouge import RougeCalculator
6 | from sumeval.metrics.bleu import BLEUCalculator
7 |
8 |
9 | def main(
10 | score_desc: ("score kind. ROUGE: r (-nlb means ROUGE-N, L, BE), BLEU: b."),
11 | use_file: ("read data from file", "flag", "f"),
12 | include_stopwords: ("don't ignore stop words", "flag", "in"),
13 | stemming: ("use stemming", "flag", "st"),
14 | word_limit: ("word limit count", "option", "wl") = -1,
15 | length_limit: ("sentence limit length", "option", "ll") = -1,
16 | alpha: ("alpha for f1-score", "option") = 0.5,
17 | language: ("word limit count", "option", "la") = "en",
18 | *params):
19 | """
20 | Calculate ROUGE/BLEU score.
21 | summary: Your generated summary.
22 | references: A Reference or references to evaluate.
23 |
24 | Ex: summary: "my summary is awesome"
25 | reference: "summaries are awesome"
26 | score kind: ROUGE-N
27 |
28 | Then:
29 | sumeval r-n "my summary is awesome" "summaries are awesome"
30 | """
31 |
32 | if "-" in score_desc:
33 | score_type, score_kinds = score_desc.lower().split("-")
34 | else:
35 | score_type = score_desc.lower()
36 | score_kinds = ""
37 |
38 | if len(params) < 2:
39 | print("You have to specify at least one summary and reference.")
40 | return
41 |
42 | summary = params[0]
43 | references = params[1:]
44 | if isinstance(references, tuple):
45 | references = list(references)
46 | stopwords = not include_stopwords
47 |
48 | generator = None
49 | if use_file:
50 | generator = file_generator(summary, references)
51 | else:
52 | generator = sentence_to_generator(summary, references)
53 |
54 | scores = []
55 | keys = []
56 | if score_type == "r":
57 | scorer = RougeCalculator(
58 | stopwords=stopwords, stemming=stemming,
59 | word_limit=word_limit, length_limit=length_limit,
60 | lang=language)
61 |
62 | for s, rs in generator:
63 | score = {}
64 | for k in score_kinds:
65 | if k == "n":
66 | score["ROUGE-1"] = scorer.rouge_1(s, rs, alpha)
67 | score["ROUGE-2"] = scorer.rouge_2(s, rs, alpha)
68 | elif k == "l":
69 | score["ROUGE-L"] = scorer.rouge_l(s, rs, alpha)
70 | elif k == "b":
71 | score["ROUGE-BE"] = scorer.rouge_be(s, rs, "HMR", alpha)
72 | if len(keys) == 0:
73 | keys = list(score.keys())
74 | scores.append(score)
75 |
76 | elif score_type == "b":
77 | scorer = BLEUCalculator(lang=language)
78 | for s, rs in generator:
79 | score = {}
80 | print(s, rs)
81 | score["BLEU"] = scorer.bleu(s, rs)
82 | if len(keys) == 0:
83 | keys = list(score.keys())
84 | scores.append(score)
85 |
86 | avgs = {}
87 | for k in keys:
88 | avg = mean([s[k] for s in scores])
89 | avgs[k] = avg
90 |
91 | result = {
92 | "options": {
93 | "stopwords": stopwords,
94 | "stemming": stemming,
95 | "word_limit": word_limit,
96 | "length_limit": length_limit,
97 | "alpha": alpha,
98 | "input-summary": summary,
99 | "input-references": references
100 | },
101 | "averages": avgs,
102 | "scores": scores
103 | }
104 |
105 | output = json.dumps(result, indent=2, ensure_ascii=False)
106 | print(output)
107 |
108 |
109 | def file_generator(s_file_path, r_file_paths):
110 | s_file = open(s_file_path, encoding="utf-8")
111 | r_files = [open(r, encoding="utf-8") for r in r_file_paths]
112 | for lines in zip(s_file, *r_files):
113 | lines = [ln.strip() for ln in lines]
114 | yield lines[0], lines[1:]
115 | else:
116 | s_file.close()
117 | for r in r_files:
118 | r.close()
119 |
120 |
121 | def sentence_to_generator(summary, references):
122 | yield summary, references
123 |
124 |
125 | def entry_point():
126 | plac.call(main)
127 |
128 |
129 | if __name__ == "__main__":
130 | entry_point()
131 |
--------------------------------------------------------------------------------
/sumeval/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/sumeval/metrics/__init__.py
--------------------------------------------------------------------------------
/sumeval/metrics/bleu.py:
--------------------------------------------------------------------------------
1 | from sacrebleu import corpus_bleu, TOKENIZERS, DEFAULT_TOKENIZER
2 | from sumeval.metrics.lang.base_lang import BaseLang
3 | from sumeval.metrics.lang import get_lang
4 |
5 |
6 | class BLEUCalculator():
7 |
8 | def __init__(self,
9 | smooth_method="floor", smooth_value=0.01,
10 | lowercase=False, use_effective_order=True,
11 | lang="en"):
12 | self.smooth_method = smooth_method
13 | self.smooth_value = smooth_value
14 | self.lowercase = lowercase
15 | self.use_effective_order = use_effective_order
16 | if isinstance(lang, str):
17 | self.lang = lang
18 | self._lang = get_lang(lang)
19 | elif isinstance(lang, BaseLang):
20 | self.lang = lang.lang
21 | self._lang = lang
22 |
23 | self._tokenizer = DEFAULT_TOKENIZER
24 | if self.lang == "ja":
25 | def tokenizer_ja(text):
26 | words = self._lang.tokenize_with_preprocess(text)
27 | return " ".join(words)
28 |
29 | TOKENIZERS["ja"] = tokenizer_ja
30 | self._tokenizer = "ja"
31 | elif self.lang == "zh":
32 | self._tokenizer = "zh"
33 |
34 | def bleu(self, summary, references, score_only=True):
35 | """
36 | Calculate BLEU score by sacrebleu.
37 |
38 | Parameters
39 | ----------
40 | summary: str
41 | summary text
42 | references: str or str[]
43 | reference or references to evaluate summary
44 | score_only: bool
45 | when True, return only score
46 |
47 | See Also
48 | --------
49 | https://github.com/mjpost/sacreBLEU
50 | """
51 | if isinstance(summary, str):
52 | _s = summary
53 | _refs = references
54 | if isinstance(references, list):
55 | _s = [_s]
56 | _refs = [references]
57 | bleu = corpus_bleu(
58 | _s, _refs,
59 | smooth_method=self.smooth_method,
60 | smooth_value=self.smooth_value,
61 | force=False, lowercase=self.lowercase,
62 | tokenize=self._tokenizer,
63 | use_effective_order=self.use_effective_order)
64 | else:
65 | _s = " ".join(summary)
66 | _refs = [[" ".join(r) for r in references]]
67 | # already tokenized summary and references
68 | bleu = corpus_bleu(
69 | _s, _refs,
70 | smooth_method=self.smooth_method,
71 | smooth_value=self.smooth_value,
72 | force=True, lowercase=self.lowercase,
73 | tokenize="none",
74 | use_effective_order=self.use_effective_order)
75 |
76 | if score_only:
77 | return bleu.score
78 | else:
79 | return bleu
80 |
--------------------------------------------------------------------------------
/sumeval/metrics/lang/__init__.py:
--------------------------------------------------------------------------------
1 | def get_lang(lang=""):
2 | if lang == "ja":
3 | from .lang_ja import LangJA
4 | return LangJA()
5 | elif lang == "zh":
6 | from .lang_zh import LangZH
7 | return LangZH()
8 | else:
9 | from .lang_en import LangEN
10 | return LangEN()
11 |
--------------------------------------------------------------------------------
/sumeval/metrics/lang/base_lang.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 |
5 | class BaseLang():
6 | _PARSER = None
7 |
8 | def __init__(self, lang):
9 | self.lang = lang
10 | self._stopwords = []
11 | self._stemming = {}
12 |
13 | def load_parser(self):
14 | if self._PARSER is None:
15 | import spacy
16 | self._PARSER = spacy.load(self.lang)
17 | return self._PARSER
18 |
19 | def tokenize(self, text):
20 | raise Exception("Have to implement tokenize in subclass")
21 |
22 | def tokenize_with_preprocess(self, text):
23 | return self.tokenize(text)
24 |
25 | def join(self, words):
26 | return " ".join(words)
27 |
28 | def parse_to_be(self, text):
29 | from spacy.symbols import VERB, ADJ, NOUN
30 | doc = self.load_parser()(text)
31 | bes = []
32 |
33 | for token in doc:
34 | # chunk level dependencies
35 | if token.pos == NOUN and token.head.pos in [VERB, ADJ]:
36 | print("a.{}=({})=>{}".format(token, token.dep_, token.head))
37 | be = BasicElement(token.text, token.head.lemma_,
38 | token.dep_,)
39 | bes.append(be)
40 | print(be)
41 | elif token.pos in [VERB, ADJ] and token.head.pos == NOUN:
42 | print("b.{}=({})=>{}".format(token, token.dep_, token.head))
43 | be = BasicElement(token.head.text, token.lemma_,
44 | token.dep_,)
45 | bes.append(be)
46 |
47 | return bes
48 |
49 | def is_stop_word(self, word):
50 | if len(self._stopwords) == 0:
51 | self.load_stopwords()
52 | return word in self._stopwords
53 |
54 | def stemming(self, word, min_length=-1):
55 | if len(self._stemming) == 0:
56 | self.load_stemming_dict()
57 |
58 | _word = word
59 | if min_length > 0 and len(_word) < min_length:
60 | return _word
61 | elif _word in self._stemming:
62 | return self._stemming[_word]
63 | else:
64 | return _word
65 | return _word
66 |
67 | def load_stopwords(self):
68 | p = Path(os.path.dirname(__file__))
69 | p = p.joinpath("data", self.lang, "stop_words.txt")
70 | if p.is_file():
71 | with p.open(encoding="utf-8") as f:
72 | lines = f.readlines()
73 | lines = [ln.strip() for ln in lines]
74 | lines = [ln for ln in lines if ln]
75 | self._stopwords = lines
76 |
77 | def load_stemming_dict(self):
78 | p = Path(os.path.dirname(__file__))
79 | p = p.joinpath("data", self.lang, "stemming.txt")
80 | if p.is_file():
81 | with p.open(encoding="utf-8") as f:
82 | lines = f.readlines()
83 | lines = [ln.strip() for ln in lines]
84 | pairs = [ln.split(" ", 1) for ln in lines if ln]
85 | self._stemming = dict(pairs)
86 |
87 |
88 | class BasicElement():
89 |
90 | def __init__(self, head, modifier, relation):
91 | self.head = head
92 | self.modifier = modifier
93 | self.relation = relation
94 |
95 | def equals(self, other, option="HMR"):
96 | equal = True
97 | for c in option:
98 | c = c.upper()
99 | if c == "H" and self.head != other.head:
100 | equal = False
101 | elif c == "M" and self.modifier != other.modifier:
102 | equal = False
103 | elif c == "R" and self.relation != other.relation:
104 | equal = False
105 | return equal
106 |
107 | def as_key(self, option="HMR"):
108 | els = []
109 | for c in option:
110 | c = c.upper()
111 | if c == "H":
112 | els.append(self.head)
113 | elif c == "M":
114 | els.append(self.modifier)
115 | elif c == "R":
116 | els.append(self.relation)
117 | return "|".join(els)
118 |
119 | def __repr__(self):
120 | return "{}>".format(
121 | self.head, self.relation, self.modifier)
122 |
--------------------------------------------------------------------------------
/sumeval/metrics/lang/data/en/stop_words.txt:
--------------------------------------------------------------------------------
1 | reuters
2 | ap
3 | jan
4 | feb
5 | mar
6 | apr
7 | may
8 | jun
9 | jul
10 | aug
11 | sep
12 | oct
13 | nov
14 | dec
15 | tech
16 | news
17 | index
18 | mon
19 | tue
20 | wed
21 | thu
22 | fri
23 | sat
24 | 's
25 | a
26 | a's
27 | able
28 | about
29 | above
30 | according
31 | accordingly
32 | across
33 | actually
34 | after
35 | afterwards
36 | again
37 | against
38 | ain't
39 | all
40 | allow
41 | allows
42 | almost
43 | alone
44 | along
45 | already
46 | also
47 | although
48 | always
49 | am
50 | amid
51 | among
52 | amongst
53 | an
54 | and
55 | another
56 | any
57 | anybody
58 | anyhow
59 | anyone
60 | anything
61 | anyway
62 | anyways
63 | anywhere
64 | apart
65 | appear
66 | appreciate
67 | appropriate
68 | are
69 | aren't
70 | around
71 | as
72 | aside
73 | ask
74 | asking
75 | associated
76 | at
77 | available
78 | away
79 | awfully
80 | b
81 | be
82 | became
83 | because
84 | become
85 | becomes
86 | becoming
87 | been
88 | before
89 | beforehand
90 | behind
91 | being
92 | believe
93 | below
94 | beside
95 | besides
96 | best
97 | better
98 | between
99 | beyond
100 | both
101 | brief
102 | but
103 | by
104 | c
105 | c'mon
106 | c's
107 | came
108 | can
109 | can't
110 | cannot
111 | cant
112 | cause
113 | causes
114 | certain
115 | certainly
116 | changes
117 | clearly
118 | co
119 | com
120 | come
121 | comes
122 | concerning
123 | consequently
124 | consider
125 | considering
126 | contain
127 | containing
128 | contains
129 | corresponding
130 | could
131 | couldn't
132 | course
133 | currently
134 | d
135 | definitely
136 | described
137 | despite
138 | did
139 | didn't
140 | different
141 | do
142 | does
143 | doesn't
144 | doing
145 | don't
146 | done
147 | down
148 | downwards
149 | during
150 | e
151 | each
152 | edu
153 | eg
154 | e.g.
155 | eight
156 | either
157 | else
158 | elsewhere
159 | enough
160 | entirely
161 | especially
162 | et
163 | etc
164 | etc.
165 | even
166 | ever
167 | every
168 | everybody
169 | everyone
170 | everything
171 | everywhere
172 | ex
173 | exactly
174 | example
175 | except
176 | f
177 | far
178 | few
179 | fifth
180 | five
181 | followed
182 | following
183 | follows
184 | for
185 | former
186 | formerly
187 | forth
188 | four
189 | from
190 | further
191 | furthermore
192 | g
193 | get
194 | gets
195 | getting
196 | given
197 | gives
198 | go
199 | goes
200 | going
201 | gone
202 | got
203 | gotten
204 | greetings
205 | h
206 | had
207 | hadn't
208 | happens
209 | hardly
210 | has
211 | hasn't
212 | have
213 | haven't
214 | having
215 | he
216 | he's
217 | hello
218 | help
219 | hence
220 | her
221 | here
222 | here's
223 | hereafter
224 | hereby
225 | herein
226 | hereupon
227 | hers
228 | herself
229 | hi
230 | him
231 | himself
232 | his
233 | hither
234 | hopefully
235 | how
236 | howbeit
237 | however
238 | i
239 | i'd
240 | i'll
241 | i'm
242 | i've
243 | ie
244 | i.e.
245 | if
246 | ignored
247 | immediate
248 | in
249 | inasmuch
250 | inc
251 | indeed
252 | indicate
253 | indicated
254 | indicates
255 | inner
256 | insofar
257 | instead
258 | into
259 | inward
260 | is
261 | isn't
262 | it
263 | it'd
264 | it'll
265 | it's
266 | its
267 | itself
268 | j
269 | just
270 | k
271 | keep
272 | keeps
273 | kept
274 | know
275 | knows
276 | known
277 | l
278 | lately
279 | later
280 | latter
281 | latterly
282 | least
283 | less
284 | lest
285 | let
286 | let's
287 | like
288 | liked
289 | likely
290 | little
291 | look
292 | looking
293 | looks
294 | ltd
295 | m
296 | mainly
297 | many
298 | may
299 | maybe
300 | me
301 | mean
302 | meanwhile
303 | merely
304 | might
305 | more
306 | moreover
307 | most
308 | mostly
309 | mr.
310 | ms.
311 | much
312 | must
313 | my
314 | myself
315 | n
316 | namely
317 | nd
318 | near
319 | nearly
320 | necessary
321 | need
322 | needs
323 | neither
324 | never
325 | nevertheless
326 | new
327 | next
328 | nine
329 | no
330 | nobody
331 | non
332 | none
333 | noone
334 | nor
335 | normally
336 | not
337 | nothing
338 | novel
339 | now
340 | nowhere
341 | o
342 | obviously
343 | of
344 | off
345 | often
346 | oh
347 | ok
348 | okay
349 | old
350 | on
351 | once
352 | one
353 | ones
354 | only
355 | onto
356 | or
357 | other
358 | others
359 | otherwise
360 | ought
361 | our
362 | ours
363 | ourselves
364 | out
365 | outside
366 | over
367 | overall
368 | own
369 | p
370 | particular
371 | particularly
372 | per
373 | perhaps
374 | placed
375 | please
376 | plus
377 | possible
378 | presumably
379 | probably
380 | provides
381 | q
382 | que
383 | quite
384 | qv
385 | r
386 | rather
387 | rd
388 | re
389 | really
390 | reasonably
391 | regarding
392 | regardless
393 | regards
394 | relatively
395 | respectively
396 | right
397 | s
398 | said
399 | same
400 | saw
401 | say
402 | saying
403 | says
404 | second
405 | secondly
406 | see
407 | seeing
408 | seem
409 | seemed
410 | seeming
411 | seems
412 | seen
413 | self
414 | selves
415 | sensible
416 | sent
417 | serious
418 | seriously
419 | seven
420 | several
421 | shall
422 | she
423 | should
424 | shouldn't
425 | since
426 | six
427 | so
428 | some
429 | somebody
430 | somehow
431 | someone
432 | something
433 | sometime
434 | sometimes
435 | somewhat
436 | somewhere
437 | soon
438 | sorry
439 | specified
440 | specify
441 | specifying
442 | still
443 | sub
444 | such
445 | sup
446 | sure
447 | t
448 | t's
449 | take
450 | taken
451 | tell
452 | tends
453 | th
454 | than
455 | thank
456 | thanks
457 | thanx
458 | that
459 | that's
460 | thats
461 | the
462 | their
463 | theirs
464 | them
465 | themselves
466 | then
467 | thence
468 | there
469 | there's
470 | thereafter
471 | thereby
472 | therefore
473 | therein
474 | theres
475 | thereupon
476 | these
477 | they
478 | they'd
479 | they'll
480 | they're
481 | they've
482 | think
483 | third
484 | this
485 | thorough
486 | thoroughly
487 | those
488 | though
489 | three
490 | through
491 | throughout
492 | thru
493 | thus
494 | to
495 | together
496 | too
497 | took
498 | toward
499 | towards
500 | tried
501 | tries
502 | truly
503 | try
504 | trying
505 | twice
506 | two
507 | u
508 | un
509 | under
510 | unfortunately
511 | unless
512 | unlikely
513 | until
514 | unto
515 | up
516 | upon
517 | us
518 | use
519 | used
520 | useful
521 | uses
522 | using
523 | usually
524 | uucp
525 | v
526 | value
527 | various
528 | very
529 | via
530 | viz
531 | vs
532 | w
533 | want
534 | wants
535 | was
536 | wasn't
537 | way
538 | we
539 | we'd
540 | we'll
541 | we're
542 | we've
543 | welcome
544 | well
545 | went
546 | were
547 | weren't
548 | what
549 | what's
550 | whatever
551 | when
552 | whence
553 | whenever
554 | where
555 | where's
556 | whereafter
557 | whereas
558 | whereby
559 | wherein
560 | whereupon
561 | wherever
562 | whether
563 | which
564 | while
565 | whither
566 | who
567 | who's
568 | whoever
569 | whole
570 | whom
571 | whose
572 | why
573 | will
574 | willing
575 | wish
576 | with
577 | within
578 | without
579 | won't
580 | wonder
581 | would
582 | would
583 | wouldn't
584 | x
585 | y
586 | yes
587 | yet
588 | you
589 | you'd
590 | you'll
591 | you're
592 | you've
593 | your
594 | yours
595 | yourself
596 | yourselves
597 | z
598 | zero
599 |
--------------------------------------------------------------------------------
/sumeval/metrics/lang/data/ja/stop_words.txt:
--------------------------------------------------------------------------------
1 | あそこ
2 | あたり
3 | あちら
4 | あっち
5 | あと
6 | あな
7 | あなた
8 | あれ
9 | いくつ
10 | いつ
11 | いま
12 | いや
13 | いろいろ
14 | うち
15 | おおまか
16 | おまえ
17 | おれ
18 | がい
19 | かく
20 | かたち
21 | かやの
22 | から
23 | がら
24 | きた
25 | くせ
26 | ここ
27 | こっち
28 | こと
29 | ごと
30 | こちら
31 | ごっちゃ
32 | これ
33 | これら
34 | ごろ
35 | さまざま
36 | さらい
37 | さん
38 | しかた
39 | しよう
40 | すか
41 | ずつ
42 | すね
43 | すべて
44 | ぜんぶ
45 | そう
46 | そこ
47 | そちら
48 | そっち
49 | そで
50 | それ
51 | それぞれ
52 | それなり
53 | たくさん
54 | たち
55 | たび
56 | ため
57 | だめ
58 | ちゃ
59 | ちゃん
60 | てん
61 | とおり
62 | とき
63 | どこ
64 | どこか
65 | ところ
66 | どちら
67 | どっか
68 | どっち
69 | どれ
70 | なか
71 | なかば
72 | なに
73 | など
74 | なん
75 | はじめ
76 | はず
77 | はるか
78 | ひと
79 | ひとつ
80 | ふく
81 | ぶり
82 | べつ
83 | へん
84 | ぺん
85 | ほう
86 | ほか
87 | まさ
88 | まし
89 | まとも
90 | まま
91 | みたい
92 | みつ
93 | みなさん
94 | みんな
95 | もと
96 | もの
97 | もん
98 | やつ
99 | よう
100 | よそ
101 | わけ
102 | わたし
103 | ハイ
104 | 上
105 | 中
106 | 下
107 | 字
108 | 年
109 | 月
110 | 日
111 | 時
112 | 分
113 | 秒
114 | 週
115 | 火
116 | 水
117 | 木
118 | 金
119 | 土
120 | 国
121 | 都
122 | 道
123 | 府
124 | 県
125 | 市
126 | 区
127 | 町
128 | 村
129 | 各
130 | 第
131 | 方
132 | 何
133 | 的
134 | 度
135 | 文
136 | 者
137 | 性
138 | 体
139 | 人
140 | 他
141 | 今
142 | 部
143 | 課
144 | 係
145 | 外
146 | 類
147 | 達
148 | 気
149 | 室
150 | 口
151 | 誰
152 | 用
153 | 界
154 | 会
155 | 首
156 | 男
157 | 女
158 | 別
159 | 話
160 | 私
161 | 屋
162 | 店
163 | 家
164 | 場
165 | 等
166 | 見
167 | 際
168 | 観
169 | 段
170 | 略
171 | 例
172 | 系
173 | 論
174 | 形
175 | 間
176 | 地
177 | 員
178 | 線
179 | 点
180 | 書
181 | 品
182 | 力
183 | 法
184 | 感
185 | 作
186 | 元
187 | 手
188 | 数
189 | 彼
190 | 彼女
191 | 子
192 | 内
193 | 楽
194 | 喜
195 | 怒
196 | 哀
197 | 輪
198 | 頃
199 | 化
200 | 境
201 | 俺
202 | 奴
203 | 高
204 | 校
205 | 婦
206 | 伸
207 | 紀
208 | 誌
209 | レ
210 | 行
211 | 列
212 | 事
213 | 士
214 | 台
215 | 集
216 | 様
217 | 所
218 | 歴
219 | 器
220 | 名
221 | 情
222 | 連
223 | 毎
224 | 式
225 | 簿
226 | 回
227 | 匹
228 | 個
229 | 席
230 | 束
231 | 歳
232 | 目
233 | 通
234 | 面
235 | 円
236 | 玉
237 | 枚
238 | 前
239 | 後
240 | 左
241 | 右
242 | 次
243 | 先
244 | 春
245 | 夏
246 | 秋
247 | 冬
248 | 一
249 | 二
250 | 三
251 | 四
252 | 五
253 | 六
254 | 七
255 | 八
256 | 九
257 | 十
258 | 百
259 | 千
260 | 万
261 | 億
262 | 兆
263 | 下記
264 | 上記
265 | 時間
266 | 今回
267 | 前回
268 | 場合
269 | 一つ
270 | 年生
271 | 自分
272 | ヶ所
273 | ヵ所
274 | カ所
275 | 箇所
276 | ヶ月
277 | ヵ月
278 | カ月
279 | 箇月
280 | 名前
281 | 本当
282 | 確か
283 | 時点
284 | 全部
285 | 関係
286 | 近く
287 | 方法
288 | 我々
289 | 違い
290 | 多く
291 | 扱い
292 | 新た
293 | その後
294 | 半ば
295 | 結局
296 | 様々
297 | 以前
298 | 以後
299 | 以降
300 | 未満
301 | 以上
302 | 以下
303 | 幾つ
304 | 毎日
305 | 自体
306 | 向こう
307 | 何人
308 | 手段
309 | 同じ
310 | 感じ
--------------------------------------------------------------------------------
/sumeval/metrics/lang/data/zh/stop_words.txt:
--------------------------------------------------------------------------------
1 | !
2 | "
3 | #
4 | $
5 | %
6 | &
7 | '
8 | (
9 | )
10 | *
11 | +
12 | ,
13 | -
14 | --
15 | .
16 | ..
17 | ...
18 | ......
19 | ...................
20 | ./
21 | .一
22 | .数
23 | .日
24 | /
25 | //
26 | 0
27 | 1
28 | 2
29 | 3
30 | 4
31 | 5
32 | 6
33 | 7
34 | 8
35 | 9
36 | :
37 | ://
38 | ::
39 | ;
40 | <
41 | =
42 | >
43 | >>
44 | ?
45 | @
46 | A
47 | Lex
48 | [
49 | \
50 | ]
51 | ^
52 | _
53 | `
54 | exp
55 | sub
56 | sup
57 | |
58 | }
59 | ~
60 | ~~~~
61 | ·
62 | ×
63 | ×××
64 | Δ
65 | Ψ
66 | γ
67 | μ
68 | φ
69 | φ.
70 | В
71 | —
72 | ——
73 | ———
74 | ‘
75 | ’
76 | ’‘
77 | “
78 | ”
79 | ”,
80 | …
81 | ……
82 | …………………………………………………③
83 | ′∈
84 | ′|
85 | ℃
86 | Ⅲ
87 | ↑
88 | →
89 | ∈[
90 | ∪φ∈
91 | ≈
92 | ①
93 | ②
94 | ②c
95 | ③
96 | ③]
97 | ④
98 | ⑤
99 | ⑥
100 | ⑦
101 | ⑧
102 | ⑨
103 | ⑩
104 | ──
105 | ■
106 | ▲
107 |
108 | 、
109 | 。
110 | 〈
111 | 〉
112 | 《
113 | 》
114 | 》),
115 | 」
116 | 『
117 | 』
118 | 【
119 | 】
120 | 〔
121 | 〕
122 | 〕〔
123 | ㈧
124 | 一
125 | 一.
126 | 一一
127 | 一下
128 | 一个
129 | 一些
130 | 一何
131 | 一切
132 | 一则
133 | 一则通过
134 | 一天
135 | 一定
136 | 一方面
137 | 一旦
138 | 一时
139 | 一来
140 | 一样
141 | 一次
142 | 一片
143 | 一番
144 | 一直
145 | 一致
146 | 一般
147 | 一起
148 | 一转眼
149 | 一边
150 | 一面
151 | 七
152 | 万一
153 | 三
154 | 三天两头
155 | 三番两次
156 | 三番五次
157 | 上
158 | 上下
159 | 上升
160 | 上去
161 | 上来
162 | 上述
163 | 上面
164 | 下
165 | 下列
166 | 下去
167 | 下来
168 | 下面
169 | 不
170 | 不一
171 | 不下
172 | 不久
173 | 不了
174 | 不亦乐乎
175 | 不仅
176 | 不仅...而且
177 | 不仅仅
178 | 不仅仅是
179 | 不会
180 | 不但
181 | 不但...而且
182 | 不光
183 | 不免
184 | 不再
185 | 不力
186 | 不单
187 | 不变
188 | 不只
189 | 不可
190 | 不可开交
191 | 不可抗拒
192 | 不同
193 | 不外
194 | 不外乎
195 | 不够
196 | 不大
197 | 不如
198 | 不妨
199 | 不定
200 | 不对
201 | 不少
202 | 不尽
203 | 不尽然
204 | 不巧
205 | 不已
206 | 不常
207 | 不得
208 | 不得不
209 | 不得了
210 | 不得已
211 | 不必
212 | 不怎么
213 | 不怕
214 | 不惟
215 | 不成
216 | 不拘
217 | 不择手段
218 | 不敢
219 | 不料
220 | 不断
221 | 不日
222 | 不时
223 | 不是
224 | 不曾
225 | 不止
226 | 不止一次
227 | 不比
228 | 不消
229 | 不满
230 | 不然
231 | 不然的话
232 | 不特
233 | 不独
234 | 不由得
235 | 不知不觉
236 | 不管
237 | 不管怎样
238 | 不经意
239 | 不胜
240 | 不能
241 | 不能不
242 | 不至于
243 | 不若
244 | 不要
245 | 不论
246 | 不起
247 | 不足
248 | 不过
249 | 不迭
250 | 不问
251 | 不限
252 | 与
253 | 与其
254 | 与其说
255 | 与否
256 | 与此同时
257 | 专门
258 | 且
259 | 且不说
260 | 且说
261 | 两者
262 | 严格
263 | 严重
264 | 个
265 | 个人
266 | 个别
267 | 中小
268 | 中间
269 | 丰富
270 | 串行
271 | 临
272 | 临到
273 | 为
274 | 为主
275 | 为了
276 | 为什么
277 | 为什麽
278 | 为何
279 | 为止
280 | 为此
281 | 为着
282 | 主张
283 | 主要
284 | 举凡
285 | 举行
286 | 乃
287 | 乃至
288 | 乃至于
289 | 么
290 | 之
291 | 之一
292 | 之前
293 | 之后
294 | 之後
295 | 之所以
296 | 之类
297 | 乌乎
298 | 乎
299 | 乒
300 | 乘
301 | 乘势
302 | 乘机
303 | 乘胜
304 | 乘虚
305 | 乘隙
306 | 九
307 | 也
308 | 也好
309 | 也就是说
310 | 也是
311 | 也罢
312 | 了
313 | 了解
314 | 争取
315 | 二
316 | 二来
317 | 二话不说
318 | 二话没说
319 | 于
320 | 于是
321 | 于是乎
322 | 云云
323 | 云尔
324 | 互
325 | 互相
326 | 五
327 | 些
328 | 交口
329 | 亦
330 | 产生
331 | 亲口
332 | 亲手
333 | 亲眼
334 | 亲自
335 | 亲身
336 | 人
337 | 人人
338 | 人们
339 | 人家
340 | 人民
341 | 什么
342 | 什么样
343 | 什麽
344 | 仅
345 | 仅仅
346 | 今
347 | 今后
348 | 今天
349 | 今年
350 | 今後
351 | 介于
352 | 仍
353 | 仍旧
354 | 仍然
355 | 从
356 | 从不
357 | 从严
358 | 从中
359 | 从事
360 | 从今以后
361 | 从优
362 | 从古到今
363 | 从古至今
364 | 从头
365 | 从宽
366 | 从小
367 | 从新
368 | 从无到有
369 | 从早到晚
370 | 从未
371 | 从来
372 | 从此
373 | 从此以后
374 | 从而
375 | 从轻
376 | 从速
377 | 从重
378 | 他
379 | 他人
380 | 他们
381 | 他是
382 | 他的
383 | 代替
384 | 以
385 | 以上
386 | 以下
387 | 以为
388 | 以便
389 | 以免
390 | 以前
391 | 以及
392 | 以后
393 | 以外
394 | 以後
395 | 以故
396 | 以期
397 | 以来
398 | 以至
399 | 以至于
400 | 以致
401 | 们
402 | 任
403 | 任何
404 | 任凭
405 | 任务
406 | 企图
407 | 伙同
408 | 会
409 | 伟大
410 | 传
411 | 传说
412 | 传闻
413 | 似乎
414 | 似的
415 | 但
416 | 但凡
417 | 但愿
418 | 但是
419 | 何
420 | 何乐而不为
421 | 何以
422 | 何况
423 | 何处
424 | 何妨
425 | 何尝
426 | 何必
427 | 何时
428 | 何止
429 | 何苦
430 | 何须
431 | 余外
432 | 作为
433 | 你
434 | 你们
435 | 你是
436 | 你的
437 | 使
438 | 使得
439 | 使用
440 | 例如
441 | 依
442 | 依据
443 | 依照
444 | 依靠
445 | 便
446 | 便于
447 | 促进
448 | 保持
449 | 保管
450 | 保险
451 | 俺
452 | 俺们
453 | 倍加
454 | 倍感
455 | 倒不如
456 | 倒不如说
457 | 倒是
458 | 倘
459 | 倘使
460 | 倘或
461 | 倘然
462 | 倘若
463 | 借
464 | 借以
465 | 借此
466 | 假使
467 | 假如
468 | 假若
469 | 偏偏
470 | 做到
471 | 偶尔
472 | 偶而
473 | 傥然
474 | 像
475 | 儿
476 | 允许
477 | 元/吨
478 | 充其极
479 | 充其量
480 | 充分
481 | 先不先
482 | 先后
483 | 先後
484 | 先生
485 | 光
486 | 光是
487 | 全体
488 | 全力
489 | 全年
490 | 全然
491 | 全身心
492 | 全部
493 | 全都
494 | 全面
495 | 八
496 | 八成
497 | 公然
498 | 六
499 | 兮
500 | 共
501 | 共同
502 | 共总
503 | 关于
504 | 其
505 | 其一
506 | 其中
507 | 其二
508 | 其他
509 | 其余
510 | 其后
511 | 其它
512 | 其实
513 | 其次
514 | 具体
515 | 具体地说
516 | 具体来说
517 | 具体说来
518 | 具有
519 | 兼之
520 | 内
521 | 再
522 | 再其次
523 | 再则
524 | 再有
525 | 再次
526 | 再者
527 | 再者说
528 | 再说
529 | 冒
530 | 冲
531 | 决不
532 | 决定
533 | 决非
534 | 况且
535 | 准备
536 | 凑巧
537 | 凝神
538 | 几
539 | 几乎
540 | 几度
541 | 几时
542 | 几番
543 | 几经
544 | 凡
545 | 凡是
546 | 凭
547 | 凭借
548 | 出
549 | 出于
550 | 出去
551 | 出来
552 | 出现
553 | 分别
554 | 分头
555 | 分期
556 | 分期分批
557 | 切
558 | 切不可
559 | 切切
560 | 切勿
561 | 切莫
562 | 则
563 | 则甚
564 | 刚
565 | 刚好
566 | 刚巧
567 | 刚才
568 | 初
569 | 别
570 | 别人
571 | 别处
572 | 别是
573 | 别的
574 | 别管
575 | 别说
576 | 到
577 | 到了儿
578 | 到处
579 | 到头
580 | 到头来
581 | 到底
582 | 到目前为止
583 | 前后
584 | 前此
585 | 前者
586 | 前进
587 | 前面
588 | 加上
589 | 加之
590 | 加以
591 | 加入
592 | 加强
593 | 动不动
594 | 动辄
595 | 勃然
596 | 匆匆
597 | 十分
598 | 千
599 | 千万
600 | 千万千万
601 | 半
602 | 单
603 | 单单
604 | 单纯
605 | 即
606 | 即令
607 | 即使
608 | 即便
609 | 即刻
610 | 即如
611 | 即将
612 | 即或
613 | 即是说
614 | 即若
615 | 却
616 | 却不
617 | 历
618 | 原来
619 | 去
620 | 又
621 | 又及
622 | 及
623 | 及其
624 | 及时
625 | 及至
626 | 双方
627 | 反之
628 | 反之亦然
629 | 反之则
630 | 反倒
631 | 反倒是
632 | 反应
633 | 反手
634 | 反映
635 | 反而
636 | 反过来
637 | 反过来说
638 | 取得
639 | 取道
640 | 受到
641 | 变成
642 | 古来
643 | 另
644 | 另一个
645 | 另一方面
646 | 另外
647 | 另悉
648 | 另方面
649 | 另行
650 | 只
651 | 只当
652 | 只怕
653 | 只是
654 | 只有
655 | 只消
656 | 只要
657 | 只限
658 | 叫
659 | 叫做
660 | 召开
661 | 叮咚
662 | 叮当
663 | 可
664 | 可以
665 | 可好
666 | 可是
667 | 可能
668 | 可见
669 | 各
670 | 各个
671 | 各人
672 | 各位
673 | 各地
674 | 各式
675 | 各种
676 | 各级
677 | 各自
678 | 合理
679 | 同
680 | 同一
681 | 同时
682 | 同样
683 | 后
684 | 后来
685 | 后者
686 | 后面
687 | 向
688 | 向使
689 | 向着
690 | 吓
691 | 吗
692 | 否则
693 | 吧
694 | 吧哒
695 | 吱
696 | 呀
697 | 呃
698 | 呆呆地
699 | 呐
700 | 呕
701 | 呗
702 | 呜
703 | 呜呼
704 | 呢
705 | 周围
706 | 呵
707 | 呵呵
708 | 呸
709 | 呼哧
710 | 呼啦
711 | 咋
712 | 和
713 | 咚
714 | 咦
715 | 咧
716 | 咱
717 | 咱们
718 | 咳
719 | 哇
720 | 哈
721 | 哈哈
722 | 哉
723 | 哎
724 | 哎呀
725 | 哎哟
726 | 哗
727 | 哗啦
728 | 哟
729 | 哦
730 | 哩
731 | 哪
732 | 哪个
733 | 哪些
734 | 哪儿
735 | 哪天
736 | 哪年
737 | 哪怕
738 | 哪样
739 | 哪边
740 | 哪里
741 | 哼
742 | 哼唷
743 | 唉
744 | 唯有
745 | 啊
746 | 啊呀
747 | 啊哈
748 | 啊哟
749 | 啐
750 | 啥
751 | 啦
752 | 啪达
753 | 啷当
754 | 喀
755 | 喂
756 | 喏
757 | 喔唷
758 | 喽
759 | 嗡
760 | 嗡嗡
761 | 嗬
762 | 嗯
763 | 嗳
764 | 嘎
765 | 嘎嘎
766 | 嘎登
767 | 嘘
768 | 嘛
769 | 嘻
770 | 嘿
771 | 嘿嘿
772 | 四
773 | 因
774 | 因为
775 | 因了
776 | 因此
777 | 因着
778 | 因而
779 | 固
780 | 固然
781 | 在
782 | 在下
783 | 在于
784 | 地
785 | 均
786 | 坚决
787 | 坚持
788 | 基于
789 | 基本
790 | 基本上
791 | 处在
792 | 处处
793 | 处理
794 | 复杂
795 | 多
796 | 多么
797 | 多亏
798 | 多多
799 | 多多少少
800 | 多多益善
801 | 多少
802 | 多年前
803 | 多年来
804 | 多数
805 | 多次
806 | 够瞧的
807 | 大
808 | 大不了
809 | 大举
810 | 大事
811 | 大体
812 | 大体上
813 | 大凡
814 | 大力
815 | 大多
816 | 大多数
817 | 大大
818 | 大家
819 | 大张旗鼓
820 | 大批
821 | 大抵
822 | 大概
823 | 大略
824 | 大约
825 | 大致
826 | 大都
827 | 大量
828 | 大面儿上
829 | 失去
830 | 奇
831 | 奈
832 | 奋勇
833 | 她
834 | 她们
835 | 她是
836 | 她的
837 | 好
838 | 好在
839 | 好的
840 | 好象
841 | 如
842 | 如上
843 | 如上所述
844 | 如下
845 | 如今
846 | 如何
847 | 如其
848 | 如前所述
849 | 如同
850 | 如常
851 | 如是
852 | 如期
853 | 如果
854 | 如次
855 | 如此
856 | 如此等等
857 | 如若
858 | 始而
859 | 姑且
860 | 存在
861 | 存心
862 | 孰料
863 | 孰知
864 | 宁
865 | 宁可
866 | 宁愿
867 | 宁肯
868 | 它
869 | 它们
870 | 它们的
871 | 它是
872 | 它的
873 | 安全
874 | 完全
875 | 完成
876 | 定
877 | 实现
878 | 实际
879 | 宣布
880 | 容易
881 | 密切
882 | 对
883 | 对于
884 | 对应
885 | 对待
886 | 对方
887 | 对比
888 | 将
889 | 将才
890 | 将要
891 | 将近
892 | 小
893 | 少数
894 | 尔
895 | 尔后
896 | 尔尔
897 | 尔等
898 | 尚且
899 | 尤其
900 | 就
901 | 就地
902 | 就是
903 | 就是了
904 | 就是说
905 | 就此
906 | 就算
907 | 就要
908 | 尽
909 | 尽可能
910 | 尽如人意
911 | 尽心尽力
912 | 尽心竭力
913 | 尽快
914 | 尽早
915 | 尽然
916 | 尽管
917 | 尽管如此
918 | 尽量
919 | 局外
920 | 居然
921 | 届时
922 | 属于
923 | 屡
924 | 屡屡
925 | 屡次
926 | 屡次三番
927 | 岂
928 | 岂但
929 | 岂止
930 | 岂非
931 | 川流不息
932 | 左右
933 | 巨大
934 | 巩固
935 | 差一点
936 | 差不多
937 | 己
938 | 已
939 | 已矣
940 | 已经
941 | 巴
942 | 巴巴
943 | 带
944 | 帮助
945 | 常
946 | 常常
947 | 常言说
948 | 常言说得好
949 | 常言道
950 | 平素
951 | 年复一年
952 | 并
953 | 并不
954 | 并不是
955 | 并且
956 | 并排
957 | 并无
958 | 并没
959 | 并没有
960 | 并肩
961 | 并非
962 | 广大
963 | 广泛
964 | 应当
965 | 应用
966 | 应该
967 | 庶乎
968 | 庶几
969 | 开外
970 | 开始
971 | 开展
972 | 引起
973 | 弗
974 | 弹指之间
975 | 强烈
976 | 强调
977 | 归
978 | 归根到底
979 | 归根结底
980 | 归齐
981 | 当
982 | 当下
983 | 当中
984 | 当儿
985 | 当前
986 | 当即
987 | 当口儿
988 | 当地
989 | 当场
990 | 当头
991 | 当庭
992 | 当时
993 | 当然
994 | 当真
995 | 当着
996 | 形成
997 | 彻夜
998 | 彻底
999 | 彼
1000 | 彼时
1001 | 彼此
1002 | 往
1003 | 往往
1004 | 待
1005 | 待到
1006 | 很
1007 | 很多
1008 | 很少
1009 | 後来
1010 | 後面
1011 | 得
1012 | 得了
1013 | 得出
1014 | 得到
1015 | 得天独厚
1016 | 得起
1017 | 心里
1018 | 必
1019 | 必定
1020 | 必将
1021 | 必然
1022 | 必要
1023 | 必须
1024 | 快
1025 | 快要
1026 | 忽地
1027 | 忽然
1028 | 怎
1029 | 怎么
1030 | 怎么办
1031 | 怎么样
1032 | 怎奈
1033 | 怎样
1034 | 怎麽
1035 | 怕
1036 | 急匆匆
1037 | 怪
1038 | 怪不得
1039 | 总之
1040 | 总是
1041 | 总的来看
1042 | 总的来说
1043 | 总的说来
1044 | 总结
1045 | 总而言之
1046 | 恍然
1047 | 恐怕
1048 | 恰似
1049 | 恰好
1050 | 恰如
1051 | 恰巧
1052 | 恰恰
1053 | 恰恰相反
1054 | 恰逢
1055 | 您
1056 | 您们
1057 | 您是
1058 | 惟其
1059 | 惯常
1060 | 意思
1061 | 愤然
1062 | 愿意
1063 | 慢说
1064 | 成为
1065 | 成年
1066 | 成年累月
1067 | 成心
1068 | 我
1069 | 我们
1070 | 我是
1071 | 我的
1072 | 或
1073 | 或则
1074 | 或多或少
1075 | 或是
1076 | 或曰
1077 | 或者
1078 | 或许
1079 | 战斗
1080 | 截然
1081 | 截至
1082 | 所
1083 | 所以
1084 | 所在
1085 | 所幸
1086 | 所有
1087 | 所谓
1088 | 才
1089 | 才能
1090 | 扑通
1091 | 打
1092 | 打从
1093 | 打开天窗说亮话
1094 | 扩大
1095 | 把
1096 | 抑或
1097 | 抽冷子
1098 | 拦腰
1099 | 拿
1100 | 按
1101 | 按时
1102 | 按期
1103 | 按照
1104 | 按理
1105 | 按说
1106 | 挨个
1107 | 挨家挨户
1108 | 挨次
1109 | 挨着
1110 | 挨门挨户
1111 | 挨门逐户
1112 | 换句话说
1113 | 换言之
1114 | 据
1115 | 据实
1116 | 据悉
1117 | 据我所知
1118 | 据此
1119 | 据称
1120 | 据说
1121 | 掌握
1122 | 接下来
1123 | 接着
1124 | 接著
1125 | 接连不断
1126 | 放量
1127 | 故
1128 | 故意
1129 | 故此
1130 | 故而
1131 | 敞开儿
1132 | 敢
1133 | 敢于
1134 | 敢情
1135 | 数/
1136 | 整个
1137 | 断然
1138 | 方
1139 | 方便
1140 | 方才
1141 | 方能
1142 | 方面
1143 | 旁人
1144 | 无
1145 | 无宁
1146 | 无法
1147 | 无论
1148 | 既
1149 | 既...又
1150 | 既往
1151 | 既是
1152 | 既然
1153 | 日复一日
1154 | 日渐
1155 | 日益
1156 | 日臻
1157 | 日见
1158 | 时候
1159 | 昂然
1160 | 明显
1161 | 明确
1162 | 是
1163 | 是不是
1164 | 是以
1165 | 是否
1166 | 是的
1167 | 显然
1168 | 显著
1169 | 普通
1170 | 普遍
1171 | 暗中
1172 | 暗地里
1173 | 暗自
1174 | 更
1175 | 更为
1176 | 更加
1177 | 更进一步
1178 | 曾
1179 | 曾经
1180 | 替
1181 | 替代
1182 | 最
1183 | 最后
1184 | 最大
1185 | 最好
1186 | 最後
1187 | 最近
1188 | 最高
1189 | 有
1190 | 有些
1191 | 有关
1192 | 有利
1193 | 有力
1194 | 有及
1195 | 有所
1196 | 有效
1197 | 有时
1198 | 有点
1199 | 有的
1200 | 有的是
1201 | 有着
1202 | 有著
1203 | 望
1204 | 朝
1205 | 朝着
1206 | 末##末
1207 | 本
1208 | 本人
1209 | 本地
1210 | 本着
1211 | 本身
1212 | 权时
1213 | 来
1214 | 来不及
1215 | 来得及
1216 | 来看
1217 | 来着
1218 | 来自
1219 | 来讲
1220 | 来说
1221 | 极
1222 | 极为
1223 | 极了
1224 | 极其
1225 | 极力
1226 | 极大
1227 | 极度
1228 | 极端
1229 | 构成
1230 | 果然
1231 | 果真
1232 | 某
1233 | 某个
1234 | 某些
1235 | 某某
1236 | 根据
1237 | 根本
1238 | 格外
1239 | 梆
1240 | 概
1241 | 次第
1242 | 欢迎
1243 | 欤
1244 | 正值
1245 | 正在
1246 | 正如
1247 | 正巧
1248 | 正常
1249 | 正是
1250 | 此
1251 | 此中
1252 | 此后
1253 | 此地
1254 | 此处
1255 | 此外
1256 | 此时
1257 | 此次
1258 | 此间
1259 | 殆
1260 | 毋宁
1261 | 每
1262 | 每个
1263 | 每天
1264 | 每年
1265 | 每当
1266 | 每时每刻
1267 | 每每
1268 | 每逢
1269 | 比
1270 | 比及
1271 | 比如
1272 | 比如说
1273 | 比方
1274 | 比照
1275 | 比起
1276 | 比较
1277 | 毕竟
1278 | 毫不
1279 | 毫无
1280 | 毫无例外
1281 | 毫无保留地
1282 | 汝
1283 | 沙沙
1284 | 没
1285 | 没奈何
1286 | 没有
1287 | 沿
1288 | 沿着
1289 | 注意
1290 | 活
1291 | 深入
1292 | 清楚
1293 | 满
1294 | 满足
1295 | 漫说
1296 | 焉
1297 | 然
1298 | 然则
1299 | 然后
1300 | 然後
1301 | 然而
1302 | 照
1303 | 照着
1304 | 牢牢
1305 | 特别是
1306 | 特殊
1307 | 特点
1308 | 犹且
1309 | 犹自
1310 | 独
1311 | 独自
1312 | 猛然
1313 | 猛然间
1314 | 率尔
1315 | 率然
1316 | 现代
1317 | 现在
1318 | 理应
1319 | 理当
1320 | 理该
1321 | 瑟瑟
1322 | 甚且
1323 | 甚么
1324 | 甚或
1325 | 甚而
1326 | 甚至
1327 | 甚至于
1328 | 用
1329 | 用来
1330 | 甫
1331 | 甭
1332 | 由
1333 | 由于
1334 | 由是
1335 | 由此
1336 | 由此可见
1337 | 略
1338 | 略为
1339 | 略加
1340 | 略微
1341 | 白
1342 | 白白
1343 | 的
1344 | 的确
1345 | 的话
1346 | 皆可
1347 | 目前
1348 | 直到
1349 | 直接
1350 | 相似
1351 | 相信
1352 | 相反
1353 | 相同
1354 | 相对
1355 | 相对而言
1356 | 相应
1357 | 相当
1358 | 相等
1359 | 省得
1360 | 看
1361 | 看上去
1362 | 看出
1363 | 看到
1364 | 看来
1365 | 看样子
1366 | 看看
1367 | 看见
1368 | 看起来
1369 | 真是
1370 | 真正
1371 | 眨眼
1372 | 着
1373 | 着呢
1374 | 矣
1375 | 矣乎
1376 | 矣哉
1377 | 知道
1378 | 砰
1379 | 确定
1380 | 碰巧
1381 | 社会主义
1382 | 离
1383 | 种
1384 | 积极
1385 | 移动
1386 | 究竟
1387 | 穷年累月
1388 | 突出
1389 | 突然
1390 | 窃
1391 | 立
1392 | 立刻
1393 | 立即
1394 | 立地
1395 | 立时
1396 | 立马
1397 | 竟
1398 | 竟然
1399 | 竟而
1400 | 第
1401 | 第二
1402 | 等
1403 | 等到
1404 | 等等
1405 | 策略地
1406 | 简直
1407 | 简而言之
1408 | 简言之
1409 | 管
1410 | 类如
1411 | 粗
1412 | 精光
1413 | 紧接着
1414 | 累年
1415 | 累次
1416 | 纯
1417 | 纯粹
1418 | 纵
1419 | 纵令
1420 | 纵使
1421 | 纵然
1422 | 练习
1423 | 组成
1424 | 经
1425 | 经常
1426 | 经过
1427 | 结合
1428 | 结果
1429 | 给
1430 | 绝
1431 | 绝不
1432 | 绝对
1433 | 绝非
1434 | 绝顶
1435 | 继之
1436 | 继后
1437 | 继续
1438 | 继而
1439 | 维持
1440 | 综上所述
1441 | 缕缕
1442 | 罢了
1443 | 老
1444 | 老大
1445 | 老是
1446 | 老老实实
1447 | 考虑
1448 | 者
1449 | 而
1450 | 而且
1451 | 而况
1452 | 而又
1453 | 而后
1454 | 而外
1455 | 而已
1456 | 而是
1457 | 而言
1458 | 而论
1459 | 联系
1460 | 联袂
1461 | 背地里
1462 | 背靠背
1463 | 能
1464 | 能否
1465 | 能够
1466 | 腾
1467 | 自
1468 | 自个儿
1469 | 自从
1470 | 自各儿
1471 | 自后
1472 | 自家
1473 | 自己
1474 | 自打
1475 | 自身
1476 | 臭
1477 | 至
1478 | 至于
1479 | 至今
1480 | 至若
1481 | 致
1482 | 般的
1483 | 良好
1484 | 若
1485 | 若夫
1486 | 若是
1487 | 若果
1488 | 若非
1489 | 范围
1490 | 莫
1491 | 莫不
1492 | 莫不然
1493 | 莫如
1494 | 莫若
1495 | 莫非
1496 | 获得
1497 | 藉以
1498 | 虽
1499 | 虽则
1500 | 虽然
1501 | 虽说
1502 | 蛮
1503 | 行为
1504 | 行动
1505 | 表明
1506 | 表示
1507 | 被
1508 | 要
1509 | 要不
1510 | 要不是
1511 | 要不然
1512 | 要么
1513 | 要是
1514 | 要求
1515 | 见
1516 | 规定
1517 | 觉得
1518 | 譬喻
1519 | 譬如
1520 | 认为
1521 | 认真
1522 | 认识
1523 | 让
1524 | 许多
1525 | 论
1526 | 论说
1527 | 设使
1528 | 设或
1529 | 设若
1530 | 诚如
1531 | 诚然
1532 | 话说
1533 | 该
1534 | 该当
1535 | 说明
1536 | 说来
1537 | 说说
1538 | 请勿
1539 | 诸
1540 | 诸位
1541 | 诸如
1542 | 谁
1543 | 谁人
1544 | 谁料
1545 | 谁知
1546 | 谨
1547 | 豁然
1548 | 贼死
1549 | 赖以
1550 | 赶
1551 | 赶快
1552 | 赶早不赶晚
1553 | 起
1554 | 起先
1555 | 起初
1556 | 起头
1557 | 起来
1558 | 起见
1559 | 起首
1560 | 趁
1561 | 趁便
1562 | 趁势
1563 | 趁早
1564 | 趁机
1565 | 趁热
1566 | 趁着
1567 | 越是
1568 | 距
1569 | 跟
1570 | 路经
1571 | 转动
1572 | 转变
1573 | 转贴
1574 | 轰然
1575 | 较
1576 | 较为
1577 | 较之
1578 | 较比
1579 | 边
1580 | 达到
1581 | 达旦
1582 | 迄
1583 | 迅速
1584 | 过
1585 | 过于
1586 | 过去
1587 | 过来
1588 | 运用
1589 | 近
1590 | 近几年来
1591 | 近年来
1592 | 近来
1593 | 还
1594 | 还是
1595 | 还有
1596 | 还要
1597 | 这
1598 | 这一来
1599 | 这个
1600 | 这么
1601 | 这么些
1602 | 这么样
1603 | 这么点儿
1604 | 这些
1605 | 这会儿
1606 | 这儿
1607 | 这就是说
1608 | 这时
1609 | 这样
1610 | 这次
1611 | 这点
1612 | 这种
1613 | 这般
1614 | 这边
1615 | 这里
1616 | 这麽
1617 | 进入
1618 | 进去
1619 | 进来
1620 | 进步
1621 | 进而
1622 | 进行
1623 | 连
1624 | 连同
1625 | 连声
1626 | 连日
1627 | 连日来
1628 | 连袂
1629 | 连连
1630 | 迟早
1631 | 迫于
1632 | 适应
1633 | 适当
1634 | 适用
1635 | 逐步
1636 | 逐渐
1637 | 通常
1638 | 通过
1639 | 造成
1640 | 逢
1641 | 遇到
1642 | 遭到
1643 | 遵循
1644 | 遵照
1645 | 避免
1646 | 那
1647 | 那个
1648 | 那么
1649 | 那么些
1650 | 那么样
1651 | 那些
1652 | 那会儿
1653 | 那儿
1654 | 那时
1655 | 那末
1656 | 那样
1657 | 那般
1658 | 那边
1659 | 那里
1660 | 那麽
1661 | 部分
1662 | 都
1663 | 鄙人
1664 | 采取
1665 | 里面
1666 | 重大
1667 | 重新
1668 | 重要
1669 | 鉴于
1670 | 针对
1671 | 长期以来
1672 | 长此下去
1673 | 长线
1674 | 长话短说
1675 | 问题
1676 | 间或
1677 | 防止
1678 | 阿
1679 | 附近
1680 | 陈年
1681 | 限制
1682 | 陡然
1683 | 除
1684 | 除了
1685 | 除却
1686 | 除去
1687 | 除外
1688 | 除开
1689 | 除此
1690 | 除此之外
1691 | 除此以外
1692 | 除此而外
1693 | 除非
1694 | 随
1695 | 随后
1696 | 随时
1697 | 随着
1698 | 随著
1699 | 隔夜
1700 | 隔日
1701 | 难得
1702 | 难怪
1703 | 难说
1704 | 难道
1705 | 难道说
1706 | 集中
1707 | 零
1708 | 需要
1709 | 非但
1710 | 非常
1711 | 非徒
1712 | 非得
1713 | 非特
1714 | 非独
1715 | 靠
1716 | 顶多
1717 | 顷
1718 | 顷刻
1719 | 顷刻之间
1720 | 顷刻间
1721 | 顺
1722 | 顺着
1723 | 顿时
1724 | 颇
1725 | 风雨无阻
1726 | 饱
1727 | 首先
1728 | 马上
1729 | 高低
1730 | 高兴
1731 | 默然
1732 | 默默地
1733 | 齐
1734 | ︿
1735 | !
1736 | #
1737 | $
1738 | %
1739 | &
1740 | '
1741 | (
1742 | )
1743 | )÷(1-
1744 | )、
1745 | *
1746 | +
1747 | +ξ
1748 | ++
1749 | ,
1750 | ,也
1751 | -
1752 | -β
1753 | --
1754 | -[*]-
1755 | .
1756 | /
1757 | 0
1758 | 0:2
1759 | 1
1760 | 1.
1761 | 12%
1762 | 2
1763 | 2.3%
1764 | 3
1765 | 4
1766 | 5
1767 | 5:0
1768 | 6
1769 | 7
1770 | 8
1771 | 9
1772 | :
1773 | ;
1774 | <
1775 | <±
1776 | <Δ
1777 | <λ
1778 | <φ
1779 | <<
1780 | =
1781 | =″
1782 | =☆
1783 | =(
1784 | =-
1785 | =[
1786 | ={
1787 | >
1788 | >λ
1789 | ?
1790 | @
1791 | A
1792 | LI
1793 | R.L.
1794 | ZXFITL
1795 | [
1796 | [①①]
1797 | [①②]
1798 | [①③]
1799 | [①④]
1800 | [①⑤]
1801 | [①⑥]
1802 | [①⑦]
1803 | [①⑧]
1804 | [①⑨]
1805 | [①A]
1806 | [①B]
1807 | [①C]
1808 | [①D]
1809 | [①E]
1810 | [①]
1811 | [①a]
1812 | [①c]
1813 | [①d]
1814 | [①e]
1815 | [①f]
1816 | [①g]
1817 | [①h]
1818 | [①i]
1819 | [①o]
1820 | [②
1821 | [②①]
1822 | [②②]
1823 | [②③]
1824 | [②④
1825 | [②⑤]
1826 | [②⑥]
1827 | [②⑦]
1828 | [②⑧]
1829 | [②⑩]
1830 | [②B]
1831 | [②G]
1832 | [②]
1833 | [②a]
1834 | [②b]
1835 | [②c]
1836 | [②d]
1837 | [②e]
1838 | [②f]
1839 | [②g]
1840 | [②h]
1841 | [②i]
1842 | [②j]
1843 | [③①]
1844 | [③⑩]
1845 | [③F]
1846 | [③]
1847 | [③a]
1848 | [③b]
1849 | [③c]
1850 | [③d]
1851 | [③e]
1852 | [③g]
1853 | [③h]
1854 | [④]
1855 | [④a]
1856 | [④b]
1857 | [④c]
1858 | [④d]
1859 | [④e]
1860 | [⑤]
1861 | [⑤]]
1862 | [⑤a]
1863 | [⑤b]
1864 | [⑤d]
1865 | [⑤e]
1866 | [⑤f]
1867 | [⑥]
1868 | [⑦]
1869 | [⑧]
1870 | [⑨]
1871 | [⑩]
1872 | [*]
1873 | [-
1874 | []
1875 | ]
1876 | ]∧′=[
1877 | ][
1878 | _
1879 | a]
1880 | b]
1881 | c]
1882 | e]
1883 | f]
1884 | ng昉
1885 | {
1886 | {-
1887 | |
1888 | }
1889 | }>
1890 | ~
1891 | ~±
1892 | ~+
1893 | ¥
--------------------------------------------------------------------------------
/sumeval/metrics/lang/lang_en.py:
--------------------------------------------------------------------------------
1 | import re
2 | from sumeval.metrics.lang.base_lang import BaseLang
3 |
4 |
5 | class LangEN(BaseLang):
6 |
7 | def __init__(self):
8 | super(LangEN, self).__init__("en")
9 | self._symbol_replace = re.compile(r"[^A-Za-z0-9-]")
10 | self._valid_word = re.compile(r"^[A-Za-z0-9$]")
11 |
12 | def tokenize(self, text):
13 | return text.split(" ")
14 |
15 | def tokenize_with_preprocess(self, text):
16 | _text = self._preprocess(text)
17 | words = self.tokenize(_text)
18 | words = [w.strip() for w in words if w.strip()]
19 | words = [w for w in words if self._valid_word.match(w)]
20 | return words
21 |
22 | def _preprocess(self, text):
23 | _text = text.replace("-", " - ")
24 | _text = self._symbol_replace.sub(" ", _text)
25 | _text = _text.strip()
26 | return _text
27 |
28 | def parse_to_be(self, text):
29 | _text = self._preprocess(text)
30 | bes = super().parse_to_be(_text)
31 |
32 | def is_valid(be):
33 | if self._valid_word.match(be.head) and\
34 | self._valid_word.match(be.modifier):
35 | return True
36 | else:
37 | return False
38 |
39 | bes = [be for be in bes if is_valid(be)]
40 | return bes
41 |
--------------------------------------------------------------------------------
/sumeval/metrics/lang/lang_ja.py:
--------------------------------------------------------------------------------
1 | import re
2 | from sumeval.metrics.lang.base_lang import BaseLang
3 |
4 |
5 | class LangJA(BaseLang):
6 |
7 | def __init__(self):
8 | super(LangJA, self).__init__("ja")
9 | self._set_tokenizer()
10 | self._symbol_replace = re.compile(r"[^ぁ-んァ-ン一-龥ーa-zA-Za-zA-Z0-90-9]")
11 |
12 | def load_parser(self):
13 | if self._PARSER is None:
14 | import spacy
15 | self._PARSER = spacy.load("ja_ginza")
16 | return self._PARSER
17 |
18 | def _set_tokenizer(self):
19 | try:
20 | import MeCab
21 |
22 | class Tokenizer():
23 |
24 | def __init__(self):
25 | self.tagger = MeCab.Tagger("-Ochasen")
26 |
27 | def tokenize(self, text):
28 | self.tagger.parse("")
29 | node = self.tagger.parseToNode(text)
30 | tokens = []
31 | while node:
32 | if node.surface:
33 | tokens.append(node)
34 | node = node.next
35 | return tokens
36 |
37 | self.tokenizer = Tokenizer()
38 |
39 | except Exception as ex:
40 | from janome.tokenizer import Tokenizer
41 | self.tokenizer = Tokenizer()
42 |
43 | def tokenize(self, text):
44 | words = [t.surface for t in self.tokenizer.tokenize(text)]
45 | return words
46 |
47 | def tokenize_with_preprocess(self, text):
48 | _text = self._symbol_replace.sub(" ", text)
49 | words = self.tokenize(_text)
50 | words = [w.strip() for w in words if w.strip()]
51 | return words
52 |
53 | def join(self, words):
54 | return "".join(words)
55 |
56 | def parse_to_be(self, text):
57 | _text = self._symbol_replace.sub(" ", text)
58 | bes = super().parse_to_be(_text)
59 | return bes
60 |
--------------------------------------------------------------------------------
/sumeval/metrics/lang/lang_zh.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | from sumeval.metrics.lang.base_lang import BaseLang, BasicElement
4 |
5 |
6 | class LangZH(BaseLang):
7 |
8 | def __init__(self):
9 | super(LangZH, self).__init__("zh")
10 | self._symbol_replace = re.compile(r"[\.\!/_,$%\^\*\(\)\+\“\’\—\!。:?、,::~@#¥&()【】「」《》·]")
11 | import jieba
12 | self.tokenizer = jieba
13 |
14 | def load_parser(self):
15 | if self._PARSER is None:
16 | from pyhanlp import HanLP
17 | self._PARSER = HanLP.parseDependency
18 | return self._PARSER
19 |
20 | def tokenize(self, text):
21 | _text = self._preprocess(text)
22 | words = [t for t in self.tokenizer.cut(_text, cut_all=False)]
23 | return words
24 |
25 | def _preprocess(self, text):
26 | return self._symbol_replace.sub(" ", text)
27 |
28 | def parse_to_be(self, text):
29 | _text = self._preprocess(text)
30 | parsed = self.load_parser()(_text)
31 | bes = []
32 | for token in parsed.iterator():
33 | # print(f"{token.NAME}=({token.DEPREL})>{token.HEAD.LEMMA}")
34 | if token.POSTAG == "n" and token.HEAD.POSTAG in ["v", "a"]:
35 | be = BasicElement(token.NAME, token.HEAD.LEMMA,
36 | token.DEPREL)
37 | bes.append(be)
38 | elif token.POSTAG in ["v", "a"] and token.HEAD.POSTAG == "n":
39 | be = BasicElement(token.HEAD.NAME, token.LEMMA,
40 | token.DEPREL)
41 | bes.append(be)
42 |
43 | return bes
44 |
--------------------------------------------------------------------------------
/sumeval/metrics/rouge.py:
--------------------------------------------------------------------------------
1 | from collections import Counter
2 | from sumeval.metrics.lang.base_lang import BaseLang
3 | from sumeval.metrics.lang import get_lang
4 |
5 |
6 | class RougeCalculator():
7 |
8 | def __init__(self,
9 | stopwords=True, stemming=False,
10 | word_limit=-1, length_limit=-1, lang="en"):
11 | self.stemming = stemming
12 | self.stopwords = stopwords
13 | self.word_limit = word_limit
14 | self.length_limit = length_limit
15 | if isinstance(lang, str):
16 | self.lang = lang
17 | self._lang = get_lang(lang)
18 | elif isinstance(lang, BaseLang):
19 | self.lang = lang.lang
20 | self._lang = lang
21 |
22 | def tokenize(self, text_or_words, is_reference=False):
23 | """
24 | Tokenize a text under original Perl script manner.
25 |
26 | Parameters
27 | ----------
28 | text_or_words: str or str[]
29 | target text or tokenized words.
30 | If you use tokenized words, preprocessing is not applied.
31 | It allows you to calculate ROUGE under your customized tokens,
32 | but you have to pay attention to preprocessing.
33 | is_reference: bool
34 | for reference process or not
35 |
36 | See Also
37 | --------
38 | https://github.com/andersjo/pyrouge/blob/master/tools/ROUGE-1.5.5/ROUGE-1.5.5.pl#L1820
39 | """
40 | words = text_or_words
41 |
42 | def split(text):
43 | _words = self._lang.tokenize(text)
44 | return _words
45 |
46 | if self.word_limit > 0:
47 | if isinstance(words, str):
48 | words = split(words)
49 | words = words[:self.word_limit]
50 | words = self._lang.join(words)
51 | elif self.length_limit > 0:
52 | text = words
53 | if isinstance(text, (list, tuple)):
54 | text = self._lang.join(words)
55 | words = text[:self.length_limit]
56 |
57 | if isinstance(words, str):
58 | words = self._lang.tokenize_with_preprocess(words)
59 |
60 | words = [w.lower().strip() for w in words if w.strip()]
61 |
62 | if self.stopwords:
63 | words = [w for w in words if not self._lang.is_stop_word(w)]
64 |
65 | if self.stemming and is_reference:
66 | # stemming is only adopted to reference
67 | # https://github.com/andersjo/pyrouge/blob/master/tools/ROUGE-1.5.5/ROUGE-1.5.5.pl#L1416
68 |
69 | # min_length ref
70 | # https://github.com/andersjo/pyrouge/blob/master/tools/ROUGE-1.5.5/ROUGE-1.5.5.pl#L2629
71 | words = [self._lang.stemming(w, min_length=3) for w in words]
72 | return words
73 |
74 | def parse_to_be(self, text, is_reference=False):
75 | bes = self._lang.parse_to_be(text)
76 |
77 | def preprocess(be):
78 | be.head = be.head.lower().strip()
79 | be.modifier = be.modifier.lower().strip()
80 | if self.stemming and is_reference:
81 | be.head = self._lang.stemming(be.head, min_length=3)
82 | be.modifier = self._lang.stemming(be.modifier, min_length=3)
83 |
84 | return be
85 |
86 | bes = [preprocess(be) for be in bes]
87 | return bes
88 |
89 | def len_ngram(self, words, n):
90 | return max(len(words) - n + 1, 0)
91 |
92 | def ngram_iter(self, words, n):
93 | for i in range(self.len_ngram(words, n)):
94 | n_gram = words[i:i+n]
95 | yield tuple(n_gram)
96 |
97 | def count_ngrams(self, words, n):
98 | c = Counter(self.ngram_iter(words, n))
99 | return c
100 |
101 | def count_overlap(self, summary_ngrams, reference_ngrams):
102 | result = 0
103 | for k, v in summary_ngrams.items():
104 | result += min(v, reference_ngrams[k])
105 | return result
106 |
107 | def rouge_1(self, summary, references, alpha=0.5):
108 | return self.rouge_n(summary, references, 1, alpha)
109 |
110 | def rouge_2(self, summary, references, alpha=0.5):
111 | return self.rouge_n(summary, references, 2, alpha)
112 |
113 | def rouge_n(self, summary, references, n, alpha=0.5):
114 | """
115 | Calculate ROUGE-N score.
116 |
117 | Parameters
118 | ----------
119 | summary: str
120 | summary text
121 | references: str or str[]
122 | reference or references to evaluate summary
123 | n: int
124 | ROUGE kind. n=1, calculate when ROUGE-1
125 | alpha: float (0~1)
126 | alpha -> 0: recall is more important
127 | alpha -> 1: precision is more important
128 | F = 1/(alpha * (1/P) + (1 - alpha) * (1/R))
129 |
130 | Returns
131 | -------
132 | f1: float
133 | f1 score
134 | """
135 | _summary = self.tokenize(summary)
136 | summary_ngrams = self.count_ngrams(_summary, n)
137 | _refs = [references] if isinstance(references, str) else references
138 | matches = 0
139 | count_for_recall = 0
140 | for r in _refs:
141 | _r = self.tokenize(r, True)
142 | r_ngrams = self.count_ngrams(_r, n)
143 | matches += self.count_overlap(summary_ngrams, r_ngrams)
144 | count_for_recall += self.len_ngram(_r, n)
145 | count_for_prec = len(_refs) * self.len_ngram(_summary, n)
146 | f1 = self._calc_f1(matches, count_for_recall, count_for_prec, alpha)
147 | return f1
148 |
149 | def _calc_f1(self, matches, count_for_recall, count_for_precision, alpha):
150 | def safe_div(x1, x2):
151 | return 0 if x2 == 0 else x1 / x2
152 | recall = safe_div(matches, count_for_recall)
153 | precision = safe_div(matches, count_for_precision)
154 | denom = (1.0 - alpha) * precision + alpha * recall
155 | return safe_div(precision * recall, denom)
156 |
157 | def lcs(self, a, b):
158 | longer = a
159 | base = b
160 | if len(longer) < len(base):
161 | longer, base = base, longer
162 |
163 | if len(base) == 0:
164 | return 0
165 |
166 | row = [0] * len(base)
167 | for c_a in longer:
168 | left = 0
169 | upper_left = 0
170 | for i, c_b in enumerate(base):
171 | up = row[i]
172 | if c_a == c_b:
173 | value = upper_left + 1
174 | else:
175 | value = max(left, up)
176 | row[i] = value
177 | left = value
178 | upper_left = up
179 |
180 | return left
181 |
182 | def rouge_l(self, summary, references, alpha=0.5):
183 | """
184 | Calculate ROUGE-L score.
185 |
186 | Parameters
187 | ----------
188 | summary: str
189 | summary text
190 | references: str or str[]
191 | reference or references to evaluate summary
192 | alpha: float (0~1)
193 | alpha -> 0: recall is more important
194 | alpha -> 1: precision is more important
195 | F = 1/(alpha * (1/P) + (1 - alpha) * (1/R))
196 |
197 | Returns
198 | -------
199 | f1: float
200 | f1 score
201 | """
202 | matches = 0
203 | count_for_recall = 0
204 | _summary = self.tokenize(summary)
205 | _refs = [references] if isinstance(references, str) else references
206 | for r in _refs:
207 | _r = self.tokenize(r, True)
208 | matches += self.lcs(_r, _summary)
209 | count_for_recall += len(_r)
210 | count_for_prec = len(_refs) * len(_summary)
211 | f1 = self._calc_f1(matches, count_for_recall, count_for_prec, alpha)
212 | return f1
213 |
214 | def count_be(self, text, compare_type, is_reference=False):
215 | bes = self.parse_to_be(text, is_reference)
216 | be_keys = [be.as_key(compare_type) for be in bes]
217 | c = Counter(be_keys)
218 | return c
219 |
220 | def rouge_be(self, summary, references, compare_type="HMR", alpha=0.5):
221 | """
222 | Calculate ROUGE-BE score.
223 |
224 | Parameters
225 | ----------
226 | summary: str
227 | summary text
228 | references: str or str[]
229 | reference or references to evaluate summary
230 | compare_type: str
231 | "H", "M", "R" or these combination.
232 | Each character means basic element component.
233 | H: head, M: modifier, R: relation.
234 | The image of these relation is following.
235 | {head word}-{relation}->{modifier word}
236 | When "HMR", use head-relation-modifier triple as basic element.
237 | alpha: float (0~1)
238 | alpha -> 0: recall is more important
239 | alpha -> 1: precision is more important
240 | F = 1/(alpha * (1/P) + (1 - alpha) * (1/R))
241 |
242 | Returns
243 | -------
244 | f1: float
245 | f1 score
246 | """
247 | matches = 0
248 | count_for_recall = 0
249 | s_bes = self.count_be(summary, compare_type)
250 | _refs = [references] if isinstance(references, str) else references
251 | for r in _refs:
252 | r_bes = self.count_be(r, compare_type, True)
253 | matches += self.count_overlap(s_bes, r_bes)
254 | count_for_recall += sum(r_bes.values())
255 | count_for_prec = len(_refs) * sum(s_bes.values())
256 | f1 = self._calc_f1(matches, count_for_recall, count_for_prec, alpha)
257 | return f1
258 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/tests/__init__.py
--------------------------------------------------------------------------------
/tests/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chakki-works/sumeval/9a6eedc9634a8bcf145c45ad4516809ee1f28c7c/tests/data/.gitkeep
--------------------------------------------------------------------------------
/tests/data/rouge/ROUGE-test-ja.json:
--------------------------------------------------------------------------------
1 | {
2 | "1": {
3 | "summaries": [
4 | "会議 之 実行 也",
5 | "エーペック は 12 月 アジア で 重要 な 場 だ",
6 | "山田 首相 が 行っ た 交渉 は 、 今後 の アジア 貿易 交渉 において 重要 な ポイント に なる 。",
7 | "apec における 貿易 交渉 は 、 今後 の アジア 貿易 交渉 に 重要 に なる",
8 | "山田 と 李 の 会談 は ひょっと する と あと 1 日 だ"
9 | ],
10 | "references": [
11 | "山田 首相 は 、 2 日 に 台湾 の 首相 と 会議 し た 。",
12 | "山田 首相 は 、 12 月 に 台湾 にて 貿易 の 件 を 話し た 。",
13 | "山田 首相 は 台湾 を 訪れ た 。 貿易 について 会談 する。",
14 | "APEC で 山田 首相 は 貿易 交渉 について 発言 し た"
15 | ]
16 | }
17 | }
--------------------------------------------------------------------------------
/tests/data/rouge/ROUGE-test-zh.json:
--------------------------------------------------------------------------------
1 | {
2 | "1": {
3 | "summaries": [
4 | "自行车 不能 停 你 怎么 看",
5 | "自行车 不能 进站"
6 | ],
7 | "references": [
8 | "自行车 不能 带上 火车"
9 | ]
10 | },
11 | "2": {
12 | "summaries": [
13 | "科技 钻研 钻研",
14 | "科技 工作者 要 学习 老一辈 科学家 的 钻研 精神"
15 | ],
16 | "references": [
17 | "刘云山 看望 著名 科技 专家"
18 | ]
19 | },
20 | "3": {
21 | "summaries": [
22 | "中国 居 全球 第八",
23 | "瑞士 居 榜首 中国 位列 第 28"
24 | ],
25 | "references": [
26 | "全球 竞争力 排行榜 中国 居 28 位居 金砖 国家 首位"
27 | ]
28 | }
29 | }
--------------------------------------------------------------------------------
/tests/data/rouge/ROUGE-test.json:
--------------------------------------------------------------------------------
1 | {
2 | "1": {
3 | "summaries": [
4 | "The/ Officers it/ The concessions/ The/ The discussion",
5 | "TRADE/ AGREEMENT/ PRESSURE/ BE IN BUSINESS CONCESSIONS/ COUNTRIES BANGLADESH IN DHAKA/ ASIAN",
6 | "WORLD COUNTRIES/ SENIOR TRADE OFFICIALS/ AGREEMENT/ COUNTRIES BANGLADESH DHAKA/ CAPITAL",
7 | "WORLD COUNTRIES/ SENIOR TRADE OFFICIALS/ AGREEMENT/ COUNTRIES BANGLADESH DHAKA/ CAPITAL/ ASIAN 49 COUNTRIES/ OFFICERS/ TWO",
8 | "world trade officials in the",
9 | "world trade officials in the nations to concessions give bangladesh",
10 | "trade, business nations Bangladesh World Bank financial United_Nations hit bank proposal",
11 | "world countries is fact is that developed countries put pressure on that they should be"
12 | ],
13 | "references": [
14 | "Poor nations demand trade subsidies from developed nations.",
15 | "Poor nations pressurise developed countries into granting trade subsidies.",
16 | "Developed countries should be pressurized. Business exemptions to poor nations.",
17 | "World's poor decide to urge developed nations for business concessions"
18 | ]
19 | },
20 | "2": {
21 | "summaries": [
22 | "The city Bali last/ In the case/ The averted/ The/ The death",
23 | "BOMB/ ATTACK/ FOUND GUILTY HE DEATH SENTENCE",
24 | "LAST YEAR/ BOMB BLAST/ CASE IMAM ACCUSED INDIA/ SEA/ WERE",
25 | "OCTOBER LAST YEAR/ BOMB BLAST/ CASE IMAM ACCUSED INDIA/ SEA MONDAY BEGAN BE AVERTED ATTACK",
26 | "indonesia the last year in the",
27 | "indonesia the last year in the",
28 | "bomb allegation explosion Bali(the island)/earring police attacks karachi ship, plane security",
29 | "indonesian city of bali in in bomb blast accused india began to be averted"
30 | ],
31 | "references": [
32 | "Indonesia charges Imam Samudra and Amrozi with Bali bombing.",
33 | "The suspected 'Bali bomber', Imam Samudra goes to trial.",
34 | "Description of trial on Bali bomb blast suspect Imam Samudra",
35 | "Trial of Imam samudra gets underway in Bali"
36 | ]
37 | },
38 | "3": {
39 | "summaries": [
40 | "Foreign Minister Colin to quit/ Attack of Pakistan/ That Kashmir freedom",
41 | "TERRORIST ATTACK/ COLIN POWELL/ INDIAN PARLIAMENT/ PAKISTAN/ KASHMIR FREEDOM",
42 | "TERRORIST ATTACK CAMPS MEASURES BE MORE EFFECTIVE POLICY AMERICAN/ INDIAN",
43 | "TERRORIST ATTACK CAMPS MEASURES BE MORE EFFECTIVE POLICY AMERICAN FOREIGN MINISTER COLIN POWELL/ INDIAN PARLIAMENT",
44 | "militants strategy american foreign minister",
45 | "militants strategy american foreign minister",
46 | "police Pakistan terrorism Osama Bin Laden\t year attacks India",
47 | "terrorist attack measures should be more effective policy american foreign minister everyone knows that indian parliament attack"
48 | ],
49 | "references": [
50 | "Improvements in homeland security, intelligence required to eliminate terrorist threat.",
51 | "Terrorism can be solved through efficent policy than brute force.",
52 | "Finding effective strategies better than attacking terrorist camps or war.",
53 | "Attacking terrorists not a solution. Need a more effective strategy."
54 | ]
55 | },
56 | "4": {
57 | "summaries": [
58 | "The world for/ At the time of any one of the/ More",
59 | "WORLD/ EVIL STRUGGLE AGAINST HORIZON/ SENTENCE",
60 | "WORLD GOOD EVIL STRUGGLE/ HORIZON/ SENTENCE/ MATTER IS/ SHAITANS CHAPTER",
61 | "INDIA/ REST/ WORLD GOOD EVIL STRUGGLE/ HORIZON LIVING NORMAL/ ONE TIME/ HISTORY MORE SENTENCE/ MATTER",
62 | "against conflict in the most",
63 | "india and other world against conflict when horizon time in a record in more",
64 | "thousand one two India Osama Bin Laden\t earthquake Osama",
65 | "india and rest of world for good and evil struggle because living"
66 | ],
67 | "references": [
68 | "War against terror for India and world will continue.",
69 | "India along with others continue to fight against evil elements.",
70 | "Fight against 'evil' everlasting for India and rest of world",
71 | "Struggle against evil continues in India and the rest of the world."
72 | ]
73 | },
74 | "5": {
75 | "summaries": [
76 | "Conference general/ Rice to the failure of",
77 | "AGRA SUMMIT/ PERVEZ MUSHARRAF/ LORD RICE/ TALKS",
78 | "AGRA/ LORD RICE/ FAILURE TALKS WAS CHAIR/ ARE MISSING LAST",
79 | "AGRA SUMMIT GENERAL PERVEZ MUSHARRAF DOMINATE/ LORD RICE/ FAILURE TALKS WAS CHAIR/ ARE MISSING LAST",
80 | "agra of remains top samelana in the news",
81 | "agra of remains top samelana in the news are last year july in the",
82 | "team India Vajpayee (India's prime minister) Pakistan government police people people gathering, function",
83 | "agra remains of agra summit in general pervez musharraf was to chair idea"
84 | ],
85 | "references": [
86 | "Musharraf charms media, but Agra Summit fails.",
87 | "Agra Summit not fruitful but Musharraf grabs attention of media.",
88 | "Agra talks fail but Musharraf manages to gain publicity",
89 | "Agra summit: Musharraf impresses journalists, but talks failure imminent."
90 | ]
91 | },
92 | "6": {
93 | "summaries": [
94 | "]/ On the day ever",
95 | "GUJARAT EARTHQUAKE TREMBLE RAISED LAND UNSAFE BUILDINGS/ EARTH/ LONG HAVOC",
96 | "GUJARAT EARTHQUAKE TREMBLE RAISED LAND UNSAFE BUILDINGS WERE EARTH RISING",
97 | "GUJARAT EARTHQUAKE TREMBLE RAISED LAND UNSAFE BUILDINGS WERE EARTH RISING RECALLS HOW MOMENTS/ DAY/ SEVEN",
98 | "sure they are the moment in the deluge",
99 | "earthquake shivering uThI dharaa buildings vulnerable were they are the moment in the deluge",
100 | "people earthquake building India space Columbia police Kashmir team",
101 | "gujarat earthquake tremble raised land unsafe buildings were earth"
102 | ],
103 | "references": [
104 | "Uday Marankar reminisces on the devastation caused by Gujrat earthquake.",
105 | "Massive earthquake rocks the state of Gujarat on 26th January,2002.",
106 | "Reporter's earthquake experience and rapid work to submit magazine story",
107 | "Gujarat Earthquake: A first hand narrative"
108 | ]
109 | },
110 | "7": {
111 | "summaries": [
112 | "Years the affairs of Vajpayee/ 2001 passage gum/ The great relief",
113 | "VAJPAYEE REMAIN/ HOUSE/ RELIEF",
114 | "YEAR RUNNING AFFAIRS VAJPAYEE REMAIN CLEAR/ HOUSE/ RELIEF/ BIG/ YEAR",
115 | "YEAR RUNNING AFFAIRS VAJPAYEE REMAIN CLEAR/ WILL BE ATAL/ HOUSE/ GREAT SIGH RELIEF/ BIG/ YEAR",
116 | "past year by raajakaaja walking the people to the home in the breath",
117 | "past year by raajakaaja walking the people to the home in if this year",
118 | "Vajpayee (India's prime minister) year war government police Prime Minister India team",
119 | "in past year in running affairs of vajpayee remain clear less to people"
120 | ],
121 | "references": [
122 | "Vajpayee showed lack of leadership in governing country in 2001.",
123 | "2001: Year full of conundrums and disappointments for Prime-Minister Vajpayee.",
124 | "Account of an unfortunate administrating year for Indian prime minister",
125 | "The year 2001: A disaster for Vajpayee"
126 | ]
127 | },
128 | "8": {
129 | "summaries": [
130 | "Iraqis to work/ Of the week at the time/ Army said/ The streets",
131 | "21 IRAQI ARMY/ WEAPONS/ WEEKS TIME JUN 1 2003 UTC/ IRAQ",
132 | "HAND TWO WEEKS TIME JUN 41 1 2003 UTC 71",
133 | "21 IRAQI ARMY/ BUSINESS WEAPONS COME HAND TWO WEEKS TIME JUN 41 1 2003 UTC",
134 | "2.1 in the streets in the",
135 | "2.1 iraakiyo.n the army of work in two weeks of the streets in the",
136 | "point army rate police two weapon one Saddam Hussain",
137 | "2.1 iraqi army 1 11:28 7.1 american forces around covered with offence to control he for iraqi army"
138 | ],
139 | "references": [
140 | "Iraq has 2 months to turn over weapons of mass destruction.",
141 | "Iraqi's given 2 weeks notice to handover unlicensed weapons: America.",
142 | "America gives two weeks for Iraqi's to surrender unauthorized weapons",
143 | "Iraqis face weapon surrender deadline"
144 | ]
145 | },
146 | "9": {
147 | "summaries": [
148 | "2.1 Indian companies work to/ Dollar savings/ US/ To make the proposal.",
149 | "INDIAN/ MARCH 2003 UTC",
150 | "21 INDIAN COMPANIES WORK/ US ECONOMY 10 BILLION DOLLAR SAVINGS",
151 | "21 INDIAN COMPANIES WORK/ US ECONOMY 10 BILLION DOLLAR SAVINGS JUN 31 MARCH 2003 UTC",
152 | "state in the leading firm",
153 | "state in the leading firm",
154 | "point rate nations one two earthquake Arab/Billion thousand five",
155 | "UTC 16:35 4.1 in america indian companies to work not to be given to make proposal"
156 | ],
157 | "references": [
158 | "American Industry saved $10 billion by outsourcing to India: NASSCOM",
159 | "Outsourcing to India saves US economy 10-11 billion dollars: NASSCOM.",
160 | "America saves billions by outsourcing work to India: NASSCOM study",
161 | "Outsourcing to India saves US 10 Billion Dollars- NASSCOM"
162 | ]
163 | },
164 | "10": {
165 | "summaries": [
166 | "Scam in Harshad Mehta and three other 5-5 years of hard/ The a/ Of the year",
167 | "SECURITIES SCAM/ RIGOROUS IMPRISONMENT MUMBAI/ HIGH COURT/ IMPRISONMENT SENTENCE/ CASE",
168 | "THREE OTHERS YEARS RIGOROUS IMPRISONMENT MUMBAI MUMBAI 28 TH SEPTEMBER",
169 | "THREE OTHERS YEARS RIGOROUS IMPRISONMENT MUMBAI MUMBAI 28 TH SEPTEMBER/ HIGH COURT SPECIAL COURT/ IMPRISONMENT",
170 | "securities in the five years in firm",
171 | "securities in the five years",
172 | "court company thousand fame one Enron two program share_market",
173 | "securities scam in harshad mehta and three others to 5 @-@ 5 years"
174 | ],
175 | "references": [
176 | "Mehta and 3 others convicted for stock market scandal.",
177 | "Harshad Mehta (\"Big Bull\") and accomplices handed 5 years imprisonment.",
178 | "High court ruling on Harshad Mehta in stock market scam",
179 | "Stock scam: 5 years imprisonment for Harshad Mehta"
180 | ]
181 | },
182 | "11": {
183 | "summaries": [
184 | "The legal action threatened Delhi,/ Party Mr./ Threat/ Accused",
185 | "DEFENSE MINISTER THREAT OF LEGAL ACTION NEW DELHI/ FERNANDES/ SUPREMO/ FOOD",
186 | "DEFENSE MINISTER THREAT LEGAL ACTION NEW DELHI AUGUST 31 DEFENCE",
187 | "DEFENSE MINISTER THREAT LEGAL ACTION NEW DELHI AUGUST 31 DEFENCE MINISTER SAMATA PARTY PRESIDENT MR",
188 | "legal action will they arms and drug smugglers andaman sea in",
189 | "legal action of legal action will they arms and drug smugglers andaman sea in",
190 | "election test, inspection commission test, exam, inspection missile World Bank ship, plane poll, election party",
191 | "defense minister threat"
192 | ],
193 | "references": [
194 | "Defense minister threatens Jayalalitha with criminal investigation.",
195 | "Defense Minister George Fernandes threatens prosecution against AIADMK supremo Jayalalitha.",
196 | "Account of defense minister's threat for legal action against Jayalalita",
197 | "Defense minister threatens Jayalalitha for prosecution."
198 | ]
199 | },
200 | "12": {
201 | "summaries": [
202 | "To keep in the:/ Jaswant Singh said/ To keep always/ Mouth of the",
203 | "MR SINGH NEW DELHI/ MINISTER JASWANT SINGH INDIA IS NEIGHBOUR/ SELF/ MOUTH",
204 | "MAINTAIN FRIENDLY RELATIONS FAVOUR MR SINGH NEW DELHI 31 MR",
205 | "INDIA NEIGHBOURING COUNTRIES MAINTAIN FRIENDLY RELATIONS FAVOUR MR SINGH NEW DELHI 31 AUGUST MR FOREIGN",
206 | "nations to friendly relationship in the mouth",
207 | "nations to friendly relationship in the mouth",
208 | "India atomic Pakistan North_Korea World Bank Korea north plane, aircraft",
209 | "india from neighbouring countries to maintain friendly relations in favour"
210 | ],
211 | "references": [
212 | "India wants friendly relations with neighbors: Jaswant Singh",
213 | "Jaswant Singh signifies the importance for friendly relationship with neighbors.",
214 | "External affairs minister addresses India's interest in friendship with neighbors",
215 | "India in favor of maintaining friendly relations with neighbors."
216 | ]
217 | },
218 | "13": {
219 | "summaries": [
220 | "The charming not/ \"revolution-murdabad ' slogans and not/ Not a.m. the frenzied",
221 | "HOUSE NOT DRUMS NEW DELHI 31 AUGUST CAPITAL NEW/ ELECTION/ REVOLUTION/ RIVAL",
222 | "EXOTIC SLOGANS/ HOUSE DRUMS NEW DELHI 31 AUGUST CAPITAL NEW",
223 | "EXOTIC SLOGANS/ HOUSE DRUMS NEW DELHI 31 AUGUST CAPITAL NEW/ ELECTION MEETING THERE WAS REVOLUTION",
224 | "charming house in new delhi 31 august capital delhi in a house contenders leaders",
225 | "charming house in new delhi 31 august capital delhi in a house contenders leaders",
226 | "Singh (common Indian last name) BJP (Bhartiya Janata Party - a political party in India) victory prize, award festival party election Congress political party in India, US Congress Modi (name)",
227 | "manmohan not drums new delhi 31 august capital new delhi there was not @-@ revolution murdabad ' slogans and not"
228 | ],
229 | "references": [
230 | "No slandering or slogan shouting at Singh's campaign party.",
231 | "Manmohan Singh's anomalous political campaign for the Lok Sabha elections.",
232 | "Description of a political gathering in Manmohan Singh's election campaign",
233 | "Manmohan Singh's assembly peaceful, hospitable and unique."
234 | ]
235 | },
236 | "14": {
237 | "summaries": [
238 | "By the economic development praise/ Wolfensohn India tour./ India",
239 | "ECONOMIC/ JAMES WOLFENSOHN VISIT OF FINANCE MINISTER YASHWANT/ INDIA/ GROWTH/ ROAD",
240 | "WORLD BANK PRESIDENT JAMES INDIAS ECONOMIC DEVELOPMENT APPRECIATION WASHINGTON SEPTEMBER",
241 | "JAMES WOLFENSOHN VISIT/ FINANCE MINISTER YASHWANT SINHA ACCEPTED INVITATION INDIA PRAISE ECONOMIC GROWTH INDIA/ ROAD",
242 | "world bank september world bank",
243 | "world bank september world bank",
244 | "World Bank India bank destroyed meeting/summit financial currency minister both countries",
245 | "world bank president james by india ' s economic development appreciation visit yashwant sinha accepted invitation"
246 | ],
247 | "references": [
248 | "World Bank president praises India's economic development.",
249 | "World Bank President praises India's progress; accepts invitation to India.",
250 | "World bank chief praises India's economic development",
251 | "James Wulfenson praises India's economic development and accepts Yashwant's invitation."
252 | ]
253 | },
254 | "15": {
255 | "summaries": [
256 | "Summit conference with/ As a countries/ Centre",
257 | "SUMMIT OF SITE WITH DEMONSTRATIONS VIOLENCE JUNE/ POLICE/ CITY/ FRENCH",
258 | "ONGOING CONFERENCE OPPONENTS/ POLICE GENEVA/ CENTRE/ CITY SUNDAY NIGHT/ CONTINUED",
259 | "ONGOING CONFERENCE OPPONENTS/ POLICE GENEVA/ CENTRE/ CITY SUNDAY NIGHT/ CONTINUED/ FRENCH SWISS DIALOGUE COMMITTEE SDA",
260 | "summit conference site of the city centre in france",
261 | "summit conference site of violence two leading countries the organization city centre in france",
262 | "gathering, function nation, country violence people police army gas festival Dollar",
263 | "g @-@ 8 summit violence june 2 as leading countries 8 summit all over continued"
264 | ],
265 | "references": [
266 | "Protests and demonstrations at site of G8 summit.",
267 | "Violence and demonstrations gripped the recent G-8 meeting in France.",
268 | "Demonstration and violence near G-8 summit invokes police response",
269 | "Demonstrations by rebels near G-8 summit site."
270 | ]
271 | },
272 | "16": {
273 | "summaries": [
274 | "Professional of/ To be / Paswan said/ In the years/ ' \"development",
275 | "PROFESSIONAL LACK OF WILL NOT BE/ CENTRAL HUMAN RESOURCES/ STATE",
276 | "JUNE CENTRAL HUMAN RESOURCES MINISTER STATE SANJAY/ PACE/ INCREASE/ MEASURES",
277 | "PROFESSIONAL LACK WILL BE ALLOWED BE JUNE CENTRAL HUMAN RESOURCES MINISTER STATE SANJAY/ YEARS/ COUNTRY",
278 | "union human resources years in the information technology",
279 | "years in the information technology",
280 | "minister government Jammu & Kashmir BJP (Bhartiya Janata Party - a political party in India) India party Singh (common Indian last name) mobile World Bank",
281 | "professional lack will not be allowed to be june 2 central human resources minister said"
282 | ],
283 | "references": [
284 | "\"No shortage of IT professionals in the country\": Pasvaan",
285 | "IT professionals to meet the growing demand for information technology.",
286 | "Human resource minister plans to increase 'IT' professionals in India",
287 | "\"There won't be a shortage of IT professionals\" - Sanjay Paswan"
288 | ]
289 | },
290 | "17": {
291 | "summaries": [
292 | "Sonia visit to Kashmir new/ The recent/ Three-day meeting/ The",
293 | "VAJPAYEE SONIAS VISIT TO NEW ZEAL IN KASHMIR JUNE/ VALLEY",
294 | "VAJPAYEE SONIAS VISIT NEW ZEAL KASHMIR JUNE PRIME MINISTER ATAL",
295 | "VAJPAYEE SONIAS VISIT NEW ZEAL KASHMIR JUNE PRIME MINISTER ATAL/ KASHMIR IS ATMOSPHERE EARLY/ VALLEY",
296 | "the new chief minister in the people in",
297 | "the new chief minister in the soon peace restoration of people in long valley",
298 | "Kashmir Vajpayee (India's prime minister) Pakistan Jammu & Kashmir Congress political party in India, US Congress Sayeed (name) states India",
299 | "vajpayee sonia s visit to new zeal in kashmir june is atmosphere"
300 | ],
301 | "references": [
302 | "New hope for peace in Kashmir after Vajpayee, Sonia's visit.",
303 | "Vajpayee, Sonia tour of Kashmir infuses new zeal in Kashmiri's.",
304 | "Vajpayee and Sonia visits rejuvenates spirits in kashmiris' for peace.",
305 | "Vajpayee and Sonia's meeting raises hopes of peace in Kashmir."
306 | ]
307 | },
308 | "18": {
309 | "summaries": [
310 | "The of/ Chief Kasturirangan said/ On arrival/ In",
311 | "POLL OF INDIA JOURNEY/ JUNE INDIAN SPACE KASTURIRANGAN/ MISSION",
312 | "POLL INDIA JOURNEY/ RIGHT WAY JUNE INDIAN SPACE KASTURIRANGAN CHIEF",
313 | "INDIAN MISSION RIGHT ROAD HAS BEEN GROWING CAMPAIGN/ FIRST HIGH RESOLUTION VEHICLES WILL BE SENT",
314 | "poll of the poll on reach of the rise in sunday",
315 | "poll of the journey correct that poll on reach of the rise in sunday",
316 | "ship space satellite China India flight Columbia ban water",
317 | "poll of india journey of right way june 2 indian space kasturirangan will be sent"
318 | ],
319 | "references": [
320 | "India's plans to send unmanned moon mission on right track.",
321 | "Indian dream of reaching the moon on right track: Kasturirangan",
322 | "India's mission to reach moon: technical competancy and project justification",
323 | "India's mission to reach moon on the right track- Kasturirangan."
324 | ]
325 | },
326 | "19": {
327 | "summaries": [
328 | "Stock in probe into/ Sultana area of the army/ On Sunday began",
329 | "MILITARY IN TO TAKE STOCK OF PROBE JUNE/ BORDER/ ARMY/ TEAM",
330 | "MILITARY/ TAKE STOCK/ PROBE JUNE RAJASTHAN BORDER JAISALMER DISTRICT SULTANA",
331 | "MILITARY/ TAKE STOCK/ PROBE JUNE RAJASTHAN BORDER JAISALMER DISTRICT SULTANA AREA/ ARMY STOCK YESTERDAY SUNDAY",
332 | "store in aaga fire the check began two in store at and",
333 | "store in aaga fire the check began two june in store in check and",
334 | "army accident plane, aircraft rail fame one border share_market security",
335 | "military in to take stock june 2 rajasthan border jaisalmer district in stock yesterday began"
336 | ],
337 | "references": [
338 | "Investigation on to determine cause of fire in army depot.",
339 | "Investigation into fire at the Jaisalmer army depot begins: Army.",
340 | "Investigation being conducted into fire at army depot in India",
341 | "Fire in army depot, Jaiselmer district: Investigations underway"
342 | ]
343 | },
344 | "20": {
345 | "summaries": [
346 | "Project successful/ Forest/ The report of the/ Money/ Delay",
347 | "GOVERNMENT PROJECT TIGER SUCCESSFUL/ TIGERS/ DELAY",
348 | "GOVERNMENT PROJECT TIGER SUCCESSFUL PROJECT JUNE ENVIRONMENT FORESTS/ MINISTRY/ MONEY",
349 | "GOVERNMENT PROJECT TIGER SUCCESSFUL PROJECT JUNE ENVIRONMENT FORESTS/ MINISTRY/ REPORT/ MONEY/ STATE GOVERNMENT DELAY IMPACT",
350 | "state of project in the",
351 | "state of project and forest ministry of the tiger for protection for project project",
352 | "effort report number, count Me/In, Inside, within Tamil disease information, knowledge government Kashmir",
353 | "government project ' tiger ' successful project june 2 environment and forests of ministry"
354 | ],
355 | "references": [
356 | "Project to save tigers successful: Environment and Forest Ministry.",
357 | "Operation \"Project Tiger\" success: Report by Forest and Environmental Ministry.",
358 | "Glimpses of 'Project Tiger', a successful undertaking by Indian government",
359 | "Project Tiger: Indian Government's mission to save tigers successful"
360 | ]
361 | },
362 | "21": {
363 | "summaries": [
364 | "Crime. murder Bhel of general manager/ Wife of/ Hardwar Government company/ The/ Yet",
365 | "JUNE TALKS/ RED AND BLACK AGARWAL/ HOUSE JHOORDI/ DELAY",
366 | "RED BLACK AGARWAL WIFE SATYA AGARWAL/ HOUSE JHOORDI MURDER/ DELAY",
367 | "DEHRADUN/ RED BLACK AGARWAL WIFE SATYA AGARWAL/ PEOPLE/ HOUSE JHOORDI MURDER YET DID KNOW DELAY",
368 | "crime and his wife of the private company",
369 | "crime and his wife of the private company people by his home in murder",
370 | "police India talk use Kashmir more explosion two work",
371 | "crime"
372 | ],
373 | "references": [
374 | "Hunt on for killers of BHEL's Vice-President, family.",
375 | "\"BHEL\" Vice-President Shyam Agarwal and wife found murdered at home.",
376 | "'BHEL' Vice President and his wife found murdered at home",
377 | "Shyam Lal Agarwal, vice president of BHEL, assasinated."
378 | ]
379 | },
380 | "22": {
381 | "summaries": [
382 | "Deeds of giving capable of./ Today across the border/ To",
383 | "INDIA PAKISTAN DEEDS TO ANSWER CAPABLE/ P/ TALKS/ STATE/ CROSS/ ANSWER SWAMI",
384 | "BAHERI P N JUNE 16 TALKS CENTRAL HOME MINISTER STATE",
385 | "INDIA PAKISTAN DEEDS ANSWER CAPABLE BAHERI P N JUNE 16 TALKS CENTRAL HOME MINISTER STATE",
386 | "indian pakistani harakato.n the answer give in central home",
387 | "indian pakistani harakato.n the answer give in central home to terrorism locked in the",
388 | "Pakistan police Vajpayee (India's prime minister) India government Prime Minister Qaeda [\"Al-Qaeda\"] cup European",
389 | "india pakistan deeds to answer capable of"
390 | ],
391 | "references": [
392 | "India can deliver fitting response to Pakistan: Chinmayananda",
393 | "India aptly capable of counter-attacking Pakistani sponsored cross-border terrorism: Chinmayananda.",
394 | "India can take action if Pakistan fails to curtail terrorism",
395 | "India adept at retaliating Pakistan's dirty tricks"
396 | ]
397 | },
398 | "23": {
399 | "summaries": [
400 | "To make plan/ June talks./ Kalam/ College engineering on/ Course",
401 | "SUN BY 2020 INDIA/ PLAN KANCHIPURAM 19 TALKS/ WORLD",
402 | "MAKE PLAN KANCHIPURAM/ WORLD ENGINEERING COLLEGE/ COURSE/ INAUGURATING BLOCK OPENING",
403 | "SUN 2020 INDIA ENRICH NATION MAKE PLAN KANCHIPURAM JUNE 19 TALKS PRESIDENT ABDUL KALAM/ WORLD",
404 | "college in engineering at a postgraduate course for shubhaara.nbha and a postgraduate block open",
405 | "college in engineering at a postgraduate course for shubhaara.nbha and a postgraduate block open",
406 | "Me/In, Inside, within President India ship nations space effort nineteen thousand",
407 | "sun enrich nation to make plan kanchipuram june 19"
408 | ],
409 | "references": [
410 | "Project to make India a \"Developed Nation\" by 2020.",
411 | "President APJ.Kalam - Scheme for developed India by 2020 ready.",
412 | "Improvement in five areas will make India 'developed' by 2020",
413 | "Strategy ready to make India a developed nation by 2020"
414 | ]
415 | },
416 | "24": {
417 | "summaries": [
418 | "Bus and hit four dead./ The district of sector/ People/ Office received",
419 | "ROADWAYS BUS AND IMPACT OF FOUR DEAD MORADABAD/ TALKS/ POLICE/ CAR/ BRASS",
420 | "MARUTI CAR CLASH WOMEN FOUR PEOPLE DIED SENIOR SUPERINTENDENT POLICE",
421 | "ROADWAYS BUS IMPACT/ FOUR DEAD MORADABAD ARTICULATED JUNE 20 TALKS UTTAR PRADESH MORADABAD DISTRICT POLICE",
422 | "clash in four dead at the",
423 | "clash in four dead at the",
424 | "police accident ear poll, election nation, country Pakistan people people weapons",
425 | "roadways bus and impact"
426 | ],
427 | "references": [
428 | "4 killed in accident between Roadway bus and Maruti.",
429 | "4 dead in collision between \"Roadways\" Bus and Maruti car.",
430 | "Four killed in accident between 'Roadways' bus and 'Maruti' car",
431 | "Roadways bus and a Maruti collide: 4 dead"
432 | ]
433 | },
434 | "25": {
435 | "summaries": [
436 | "India and Pakistan in the medium/ June talks of Kashmir/ The improvement in the dialogue",
437 | "INDIA AND PAKISTAN IN KASHMIR OF FRIENDSHIP/ SRINAGAR/ INDIAN",
438 | "INDIA PAKISTAN KASHMIR FRIENDSHIP CAN BECOME MEDIUM MOOFTEE SRINAGAR JUNE",
439 | "INDIA PAKISTAN KASHMIR FRIENDSHIP CAN BECOME MEDIUM MOOFTEE SRINAGAR JUNE 20 TALKS/ PEOPLE/ INDIAN SUBCONTINENT",
440 | "kashmir indian cuisine in the chief at reform of for indian subcontinent in there",
441 | "kashmir indian cuisine in the chief at reform of for indian subcontinent in there",
442 | "both countries Jammu & Kashmir population, people both Kashmir Pakistan Sayeed (name) Vajpayee (India's prime minister) India",
443 | "india and pakistan in kashmir can become medium @-@ mooftee srinagar june 20"
444 | ],
445 | "references": [
446 | "Kashmir can forge friendship between India and Pakistan: Mufti",
447 | "Kashmir: Link for peace rather than war between India-Pakistan - Mufti.",
448 | "Resolving Kashmir by talks will create friendship between India-Pakistan",
449 | "Kashmir: A means of friendship between India and Pakistan?"
450 | ]
451 | }
452 | }
--------------------------------------------------------------------------------
/tests/data/rouge/verify-spl.json:
--------------------------------------------------------------------------------
1 | {
2 | "1": {
3 | "summaries": [
4 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
5 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
6 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
7 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
8 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
9 | ],
10 | "references": [
11 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
12 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
13 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
14 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF"
15 | ]
16 | },
17 | "2": {
18 | "summaries": [
19 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
20 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
21 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
22 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
23 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
24 | ],
25 | "references": [
26 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
27 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
28 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
29 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF"
30 | ]
31 | },
32 | "3": {
33 | "summaries": [
34 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
35 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
36 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
37 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
38 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
39 | ],
40 | "references": [
41 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
42 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
43 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
44 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF"
45 | ]
46 | },
47 | "4": {
48 | "summaries": [
49 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
50 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
51 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
52 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
53 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
54 | ],
55 | "references": [
56 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
57 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
58 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
59 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC"
60 | ]
61 | }
62 | }
--------------------------------------------------------------------------------
/tests/data/rouge/verify.json:
--------------------------------------------------------------------------------
1 | {
2 | "1": {
3 | "summaries": [
4 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
5 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
6 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
7 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
8 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
9 | ],
10 | "references": [
11 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
12 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
13 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
14 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF"
15 | ]
16 | },
17 | "2": {
18 | "summaries": [
19 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
20 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
21 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
22 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
23 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
24 | ],
25 | "references": [
26 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
27 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
28 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
29 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF"
30 | ]
31 | },
32 | "3": {
33 | "summaries": [
34 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
35 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
36 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
37 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
38 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
39 | ],
40 | "references": [
41 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
42 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
43 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
44 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF"
45 | ]
46 | },
47 | "4": {
48 | "summaries": [
49 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
50 | "BritishDDD authoritiesDDD arrestedDDD GeneralDDD AugustoDDD PinochetDDD inDDD LondonDDD forDDD backDDD surgeryDDD onDDD anDDD internationalDDD warrantDDD\nissuedDDD byDDD SpanishDDD magistrateDDD BaltasarDDD GarzonDDD\nTheDDD MadridDDD courtDDD chargedDDD PinochetDDD\nwhoDDD ruledDDD ChileDDD asDDD aDDD despotDDD forDDD yearsDDD\nwithDDD crimesDDD againstDDD humanityDDD\nincludingDDD genocideDDD andDDD terrorismDDD involvingDDD theDDD deathsDDD ofDDD moreDDD thanDDD peopleDDD\nTheDDD ChileanDDD governmentDDD protestedDDD thatDDD PinochetDDD nowDDD aDDD hasDDD legalDDD immunityDDD\nbutDDD fewDDD inDDD ChileanDDD societyDDD protestedDDD theDDD arrestDDD\nPinochetDDD arrestDDD showsDDD theDDD growingDDD significanceDDD ofDDD internationalDDD lawDDD\nsuggestingDDD thatDDD officialsDDD accusedDDD ofDDD atrocitiesDDD haveDDD fewerDDD placesDDD toDDD hideDDD theseDDD daysDDD\nevenDDD ifDDD theyDDD areDDD carryingDDD diplomaticDDD passportsDDD",
51 | "BritishEEE authoritiesEEE arrestedEEE GeneralEEE AugustoEEE PinochetEEE inEEE LondonEEE forEEE backEEE surgeryEEE onEEE anEEE internationalEEE warrantEEE\nissuedEEE byEEE SpanishEEE magistrateEEE BaltasarEEE GarzonEEE\nTheEEE MadridEEE courtEEE chargedEEE PinochetEEE\nwhoEEE ruledEEE ChileEEE asEEE aEEE despotEEE forEEE yearsEEE\nwithEEE crimesEEE againstEEE humanityEEE\nincludingEEE genocideEEE andEEE terrorismEEE involvingEEE theEEE deathsEEE ofEEE moreEEE thanEEE peopleEEE\nTheEEE ChileanEEE governmentEEE protestedEEE thatEEE PinochetEEE nowEEE aEEE hasEEE legalEEE immunityEEE\nbutEEE fewEEE inEEE ChileanEEE societyEEE protestedEEE theEEE arrestEEE\nPinochetEEE arrestEEE showsEEE theEEE growingEEE significanceEEE ofEEE internationalEEE lawEEE\nsuggestingEEE thatEEE officialsEEE accusedEEE ofEEE atrocitiesEEE haveEEE fewerEEE placesEEE toEEE hideEEE theseEEE daysEEE\nevenEEE ifEEE theyEEE areEEE carryingEEE diplomaticEEE passportsEEE",
52 | "BritishFFF authoritiesFFF arrestedFFF GeneralFFF AugustoFFF PinochetFFF inFFF LondonFFF forFFF backFFF surgeryFFF onFFF anFFF internationalFFF warrantFFF\nissuedFFF byFFF SpanishFFF magistrateFFF BaltasarFFF GarzonFFF\nTheFFF MadridFFF courtFFF chargedFFF PinochetFFF\nwhoFFF ruledFFF ChileFFF asFFF aFFF despotFFF forFFF yearsFFF\nwithFFF crimesFFF againstFFF humanityFFF\nincludingFFF genocideFFF andFFF terrorismFFF involvingFFF theFFF deathsFFF ofFFF moreFFF thanFFF peopleFFF\nTheFFF ChileanFFF governmentFFF protestedFFF thatFFF PinochetFFF nowFFF aFFF hasFFF legalFFF immunityFFF\nbutFFF fewFFF inFFF ChileanFFF societyFFF protestedFFF theFFF arrestFFF\nPinochetFFF arrestFFF showsFFF theFFF growingFFF significanceFFF ofFFF internationalFFF lawFFF\nsuggestingFFF thatFFF officialsFFF accusedFFF ofFFF atrocitiesFFF haveFFF fewerFFF placesFFF toFFF hideFFF theseFFF daysFFF\nevenFFF ifFFF theyFFF areFFF carryingFFF diplomaticFFF passportsFFF",
53 | "BritishGGG authoritiesGGG arrestedGGG GeneralGGG AugustoGGG PinochetGGG inGGG LondonGGG forGGG backGGG surgeryGGG onGGG anGGG internationalGGG warrantGGG\nissuedGGG byGGG SpanishGGG magistrateGGG BaltasarGGG GarzonGGG\nTheGGG MadridGGG courtGGG chargedGGG PinochetGGG\nwhoGGG ruledGGG ChileGGG asGGG aGGG despotGGG forGGG yearsGGG\nwithGGG crimesGGG againstGGG humanityGGG\nincludingGGG genocideGGG andGGG terrorismGGG involvingGGG theGGG deathsGGG ofGGG moreGGG thanGGG peopleGGG\nTheGGG ChileanGGG governmentGGG protestedGGG thatGGG PinochetGGG nowGGG aGGG hasGGG legalGGG immunityGGG\nbutGGG fewGGG inGGG ChileanGGG societyGGG protestedGGG theGGG arrestGGG\nPinochetGGG arrestGGG showsGGG theGGG growingGGG significanceGGG ofGGG internationalGGG lawGGG\nsuggestingGGG thatGGG officialsGGG accusedGGG ofGGG atrocitiesGGG haveGGG fewerGGG placesGGG toGGG hideGGG theseGGG daysGGG\nevenGGG ifGGG theyGGG areGGG carryingGGG diplomaticGGG passportsGGG"
54 | ],
55 | "references": [
56 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
57 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
58 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC",
59 | "BritishCCC authoritiesCCC arrestedCCC GeneralCCC AugustoCCC PinochetCCC inCCC LondonCCC forCCC backCCC surgeryCCC onCCC anCCC internationalCCC warrantCCC\nissuedCCC byCCC SpanishCCC magistrateCCC BaltasarCCC GarzonCCC\nTheCCC MadridCCC courtCCC chargedCCC PinochetCCC\nwhoCCC ruledCCC ChileCCC asCCC aCCC despotCCC forCCC yearsCCC\nwithCCC crimesCCC againstCCC humanityCCC\nincludingCCC genocideCCC andCCC terrorismCCC involvingCCC theCCC deathsCCC ofCCC moreCCC thanCCC peopleCCC\nTheCCC ChileanCCC governmentCCC protestedCCC thatCCC PinochetCCC nowCCC aCCC hasCCC legalCCC immunityCCC\nbutCCC fewCCC inCCC ChileanCCC societyCCC protestedCCC theCCC arrestCCC\nPinochetCCC arrestCCC showsCCC theCCC growingCCC significanceCCC ofCCC internationalCCC lawCCC\nsuggestingCCC thatCCC officialsCCC accusedCCC ofCCC atrocitiesCCC haveCCC fewerCCC placesCCC toCCC hideCCC theseCCC daysCCC\nevenCCC ifCCC theyCCC areCCC carryingCCC diplomaticCCC passportsCCC"
60 | ]
61 | }
62 | }
--------------------------------------------------------------------------------
/tests/rouge_test_to_json.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from pathlib import Path
4 | from bs4 import BeautifulSoup
5 |
6 |
7 | root = Path(os.path.dirname(__file__)).joinpath("sample-test")
8 |
9 |
10 | def read_text(path, input_format):
11 | with path.open(encoding="utf-8") as f:
12 | content = f.read().strip()
13 | if input_format == "SPL":
14 | return content
15 | else:
16 | soup = BeautifulSoup(content, "html.parser")
17 | lines = soup.find_all("a", attrs={"id": True})
18 | content = "\n".join([ln.string.strip() for ln in lines])
19 | return content
20 |
21 |
22 | for testf in ["ROUGE-test.xml", "verify-spl.xml", "verify.xml"]:
23 | file_path = root.joinpath(testf)
24 | soup = None
25 | with file_path.open(encoding="utf-8") as f:
26 | soup = BeautifulSoup(f.read().strip(), "xml")
27 |
28 | evals = soup.find_all("EVAL")
29 | data = {}
30 | for e in evals:
31 | summary_root = e.find_next("PEER-ROOT").string.strip()
32 | ref_root = e.find_next("MODEL-ROOT").string.strip()
33 | input_format = e.find_next("INPUT-FORMAT")["TYPE"]
34 | summaries = []
35 | references = []
36 | for kind in ["PEERS", "MODELS"]:
37 | node = e.find_next(kind)
38 | node_type = kind[0]
39 | node_root = summary_root if node_type == "P" else ref_root
40 | nodes = node.find_all(node_type)
41 | for n in nodes:
42 | name = n.string.strip()
43 | p = root.joinpath(*node_root.split("/"), name)
44 | content = read_text(p, input_format)
45 | if node_type == "P":
46 | summaries.append(content)
47 | else:
48 | references.append(content)
49 | data[e["ID"]] = {
50 | "summaries": summaries,
51 | "references": references
52 | }
53 |
54 | serialized = json.dumps(data, indent=4)
55 | name, ext = os.path.splitext(testf)
56 | with open(name + ".json", "wb") as f:
57 | f.write(serialized.encode("utf-8"))
58 |
--------------------------------------------------------------------------------
/tests/test_be_rouge.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import unittest
4 | from sumeval.metrics.rouge import RougeCalculator
5 |
6 |
7 | class TestRougeBE(unittest.TestCase):
8 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge")
9 |
10 | def load_test_data(self):
11 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test.json")
12 | with open(test_file, encoding="utf-8") as f:
13 | data = json.load(f)
14 | return data
15 |
16 | def _bes_to_words(self, basic_elements, compare_type):
17 | words = []
18 | for be in basic_elements:
19 | words.append(be.as_key(compare_type))
20 | return words
21 |
22 | def test_rouge_be(self):
23 | data = self.load_test_data()
24 | rouge = RougeCalculator(stopwords=False)
25 | for eval_id in data:
26 | summaries = data[eval_id]["summaries"]
27 | references = data[eval_id]["references"]
28 | r_bes = [rouge.parse_to_be(r) for r in references]
29 |
30 | for _type in ["H", "HM", "HMR"]:
31 | print("eval {}: test {} pattern.".format(eval_id, _type))
32 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes]
33 |
34 | for s in summaries:
35 | s_bes = rouge.parse_to_be(s)
36 | if len(s_bes) == 0:
37 | continue
38 | s_bes = self._bes_to_words(s_bes, _type)
39 | base = rouge.rouge_n(s_bes, _r_bes, n=1)
40 | score = rouge.rouge_be(s, references, _type)
41 | self.assertLess(abs(base - score), 1e-5)
42 |
43 | def test_rouge_be_hm(self):
44 | rouge = RougeCalculator(stopwords=False)
45 | summaries = [
46 | "It was beautiful flower, and the other was beautiful flower also."
47 | ]
48 | references = [
49 | "The flower was beautiful.",
50 | "Two flower were beautiful"
51 | ]
52 | r_bes = [rouge.parse_to_be(r) for r in references]
53 |
54 | for _type in ["HM", "HMR"]:
55 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes]
56 | for s in summaries:
57 | s_bes = rouge.parse_to_be(s)
58 | s_bes = self._bes_to_words(s_bes, _type)
59 | base = rouge.rouge_n(s_bes, _r_bes, n=1)
60 | score = rouge.rouge_be(s, references, _type)
61 | self.assertLess(abs(base - score), 1e-5)
62 |
63 |
64 | if __name__ == "__main__":
65 | unittest.main(warnings="ignore")
66 |
--------------------------------------------------------------------------------
/tests/test_be_rouge_ja.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import sys
4 | import unittest
5 | from sumeval.metrics.rouge import RougeCalculator
6 |
7 |
8 | class TestRougeBEJA(unittest.TestCase):
9 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge")
10 |
11 | def load_test_data(self):
12 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test-ja.json")
13 | with open(test_file, encoding="utf-8") as f:
14 | data = json.load(f)
15 | return data
16 |
17 | def _bes_to_words(self, basic_elements, compare_type):
18 | words = []
19 | for be in basic_elements:
20 | words.append(be.as_key(compare_type))
21 | return words
22 |
23 | def test_rouge_be(self):
24 | data = self.load_test_data()
25 | rouge = RougeCalculator(stopwords=False, lang="ja")
26 | for eval_id in data:
27 | summaries = data[eval_id]["summaries"]
28 | references = data[eval_id]["references"]
29 | r_bes = [rouge.parse_to_be(r) for r in references]
30 | for _type in ["H", "HM", "HMR"]:
31 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes]
32 |
33 | for s in summaries:
34 | s_bes = rouge.parse_to_be(s)
35 | if len(s_bes) == 0:
36 | continue
37 | s_bes = self._bes_to_words(s_bes, _type)
38 | base = rouge.rouge_n(s_bes, _r_bes, n=1)
39 | score = rouge.rouge_be(s, references, _type)
40 | self.assertLess(abs(base - score), 1e-5)
41 |
42 | def test_rouge_be_hm(self):
43 | rouge = RougeCalculator(stopwords=False, lang="ja")
44 | summaries = [
45 | "私はきれいな花が好きで、きれいな花には目がない。"
46 | ]
47 | references = [
48 | "きれいな花が好きだ",
49 | "私はきれいな花に目がない"
50 | ]
51 | r_bes = [rouge.parse_to_be(r) for r in references]
52 |
53 | for _type in ["HM", "HMR"]:
54 | _r_bes = [self._bes_to_words(r, _type) for r in r_bes]
55 | for s in summaries:
56 | s_bes = rouge.parse_to_be(s)
57 | s_bes = self._bes_to_words(s_bes, _type)
58 | base = rouge.rouge_n(s_bes, _r_bes, n=1)
59 | score = rouge.rouge_be(s, references, _type)
60 | self.assertLess(abs(base - score), 1e-5)
61 |
62 |
63 | if __name__ == "__main__":
64 | unittest.main(warnings="ignore")
65 |
--------------------------------------------------------------------------------
/tests/test_bleu.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from sumeval.metrics.bleu import BLEUCalculator
3 |
4 |
5 | class TestBLEU(unittest.TestCase):
6 |
7 | def test_bleu(self):
8 | bleu = BLEUCalculator()
9 | score = bleu.bleu("I am waiting on the beach",
10 | "He is walking on the beach",)
11 | score_from_list = bleu.bleu("I am waiting on the beach".split(),
12 | ["He is walking on the beach".split()])
13 | self.assertLess(abs(score - score_from_list), 1e-8)
14 |
15 | bleu = BLEUCalculator(lang="ja")
16 | score_ja = bleu.bleu("私はビーチで待ってる", "彼がベンチで待ってる")
17 |
18 | self.assertLess(abs(score - score_ja), 1e-8)
19 |
20 |
21 | if __name__ == "__main__":
22 | unittest.main(warnings="ignore")
23 |
--------------------------------------------------------------------------------
/tests/test_custom_lang.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from sumeval.metrics.lang.base_lang import BaseLang
3 | from sumeval.metrics.rouge import RougeCalculator
4 | from sumeval.metrics.bleu import BLEUCalculator
5 |
6 |
7 | class TestCustomLang(unittest.TestCase):
8 |
9 | def test_custom_lang(self):
10 |
11 | class Custom(BaseLang):
12 |
13 | def __init__(self):
14 | super(Custom, self).__init__("cs")
15 |
16 | def tokenize(self, text):
17 | return text.split("/")
18 |
19 | lang = Custom()
20 | rouge = RougeCalculator(lang=lang)
21 | rouge_score = rouge.rouge_n(
22 | summary="I/went/to/the/Mars/from/my/living/town.",
23 | references="I/went/to/Mars",
24 | n=1)
25 |
26 | bleu = BLEUCalculator(lang=lang)
27 | bleu_score = bleu.bleu("I/am/waiting/on/the/beach",
28 | "He/is/walking/on/the/beach")
29 |
30 | self.assertGreater(rouge_score, 0)
31 | self.assertGreater(bleu_score, 0)
32 |
33 |
34 | if __name__ == "__main__":
35 | unittest.main()
36 |
--------------------------------------------------------------------------------
/tests/test_lang_en.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from collections import Counter
3 | from sumeval.metrics.lang.lang_en import LangEN
4 |
5 |
6 | class TestLangEN(unittest.TestCase):
7 |
8 | def test_basic_element(self):
9 | lang = LangEN()
10 | text = "The very beautiful toy is bought by Tom."
11 | bes = lang.parse_to_be(text)
12 | for i, be in enumerate(bes):
13 | print(be)
14 | if i == 0:
15 | self.assertEqual(be.head, "toy")
16 | self.assertEqual(be.modifier, "beautiful")
17 | else:
18 | self.assertEqual(be.head, "toy")
19 | self.assertEqual(be.modifier, "buy")
20 |
21 | def test_stemming(self):
22 | lang = LangEN()
23 | text = "dippier dippy"
24 | counts = Counter([lang.stemming(w) for w in lang.tokenize_with_preprocess(text)])
25 | self.assertEqual(("dippy", 2), counts.most_common()[0])
26 |
--------------------------------------------------------------------------------
/tests/test_lang_ja.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from sumeval.metrics.lang.lang_ja import LangJA
3 |
4 |
5 | class TestLangJA(unittest.TestCase):
6 |
7 | def test_basic_element(self):
8 | lang = LangJA()
9 | text = "とても綺麗な花を見つけた"
10 | bes = lang.parse_to_be(text)
11 | for i, be in enumerate(bes):
12 | print(be)
13 | if i == 0:
14 | self.assertEqual(be.head, "花")
15 | self.assertEqual(be.modifier, "奇麗")
16 | else:
17 | self.assertEqual(be.head, "花")
18 | self.assertEqual(be.modifier, "見付ける")
19 |
20 |
21 | if __name__ == "__main__":
22 | unittest.main()
23 |
--------------------------------------------------------------------------------
/tests/test_lang_zh.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import pytest
3 | from sumeval.metrics.lang.lang_zh import LangZH
4 |
5 |
6 | class TestLangZH(unittest.TestCase):
7 |
8 | def test_tokenize(self):
9 | lang = LangZH()
10 | text = "我发现了一朵非常漂亮的花"
11 | tokens = lang.tokenize(text)
12 | self.assertEqual(len(tokens), 8)
13 |
14 | @pytest.mark.skip(reason="Download the parse model is terrible slow.")
15 | def test_basic_element(self):
16 | lang = LangZH()
17 | text = "我发现了一朵非常漂亮的花"
18 | bes = lang.parse_to_be(text)
19 | for i, be in enumerate(bes):
20 | if i == 0:
21 | self.assertEqual(be.head, "花")
22 | self.assertEqual(be.modifier, "漂亮")
23 | else:
24 | self.assertEqual(be.head, "花")
25 | self.assertEqual(be.modifier, "发现")
26 |
--------------------------------------------------------------------------------
/tests/test_rouge.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import sys
4 | import unittest
5 | from rougescore import rouge_n, rouge_l
6 | from pythonrouge.pythonrouge import Pythonrouge
7 | from sumeval.metrics.rouge import RougeCalculator
8 |
9 |
10 | class TestRouge(unittest.TestCase):
11 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge")
12 |
13 | def load_test_data(self):
14 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test.json")
15 | with open(test_file, encoding="utf-8") as f:
16 | data = json.load(f)
17 | return data
18 |
19 | def test_rouge(self):
20 | data = self.load_test_data()
21 | rouge = RougeCalculator(stopwords=False)
22 | for eval_id in data:
23 | summaries = data[eval_id]["summaries"]
24 | references = data[eval_id]["references"]
25 | for n in [1, 2]:
26 | for s in summaries:
27 | baseline = Pythonrouge(
28 | summary_file_exist=False,
29 | summary=[[s]],
30 | reference=[[[r] for r in references]],
31 | n_gram=n, recall_only=False,
32 | length_limit=False,
33 | stemming=False, stopwords=False)
34 | b1_v = baseline.calc_score()
35 | b2_v = rouge_n(rouge.tokenize(s),
36 | [rouge.tokenize(r) for r in references],
37 | n, 0.5)
38 | v = rouge.rouge_n(s, references, n)
39 | self.assertLess(abs(b2_v - v), 1e-5)
40 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa
41 |
42 | def test_rouge_with_stop_word(self):
43 | data = self.load_test_data()
44 | rouge = RougeCalculator(stopwords=True)
45 | for eval_id in data:
46 | summaries = data[eval_id]["summaries"]
47 | references = data[eval_id]["references"]
48 | for n in [1, 2]:
49 | for s in summaries:
50 | baseline = Pythonrouge(
51 | summary_file_exist=False,
52 | summary=[[s]],
53 | reference=[[[r] for r in references]],
54 | n_gram=n, recall_only=False,
55 | length_limit=False,
56 | stemming=False, stopwords=True)
57 | b1_v = baseline.calc_score()
58 | b2_v = rouge_n(rouge.tokenize(s),
59 | [rouge.tokenize(r) for r in references],
60 | n, 0.5)
61 | v = rouge.rouge_n(s, references, n)
62 | self.assertLess(abs(b2_v - v), 1e-5)
63 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa
64 |
65 | def test_rouge_with_length_limit(self):
66 | data = self.load_test_data()
67 | rouge = RougeCalculator(stopwords=True, length_limit=50)
68 | for eval_id in data:
69 | summaries = data[eval_id]["summaries"]
70 | references = data[eval_id]["references"]
71 | for n in [1, 2]:
72 | for s in summaries:
73 | baseline = Pythonrouge(
74 | summary_file_exist=False,
75 | summary=[[s]],
76 | reference=[[[r] for r in references]],
77 | n_gram=n, recall_only=False,
78 | length_limit=True, length=50,
79 | word_level=False,
80 | stemming=False, stopwords=True)
81 | b1_v = baseline.calc_score()
82 | b2_v = rouge_n(rouge.tokenize(s),
83 | [rouge.tokenize(r) for r in references],
84 | n, 0.5)
85 | v = rouge.rouge_n(s, references, n)
86 | self.assertLess(abs(b2_v - v), 1e-5)
87 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa
88 |
89 | def test_rouge_with_word_limit(self):
90 | data = self.load_test_data()
91 | rouge = RougeCalculator(stopwords=True, word_limit=5)
92 | for eval_id in data:
93 | summaries = data[eval_id]["summaries"]
94 | references = data[eval_id]["references"]
95 | for n in [1, 2]:
96 | for s in summaries:
97 | baseline = Pythonrouge(
98 | summary_file_exist=False,
99 | summary=[[s]],
100 | reference=[[[r] for r in references]],
101 | n_gram=n, recall_only=False,
102 | length_limit=True, length=5,
103 | word_level=True,
104 | stemming=False, stopwords=True)
105 | b1_v = baseline.calc_score()
106 | b2_v = rouge_n(rouge.tokenize(s),
107 | [rouge.tokenize(r) for r in references],
108 | n, 0.5)
109 | v = rouge.rouge_n(s, references, n)
110 | self.assertLess(abs(b2_v - v), 1e-5)
111 | self.assertLess(abs(b1_v["ROUGE-{}-F".format(n)] - v), 1e-5) # noqa
112 |
113 | def test_rouge_l(self):
114 | data = self.load_test_data()
115 | rouge = RougeCalculator(stopwords=True)
116 | for eval_id in data:
117 | summaries = data[eval_id]["summaries"]
118 | references = data[eval_id]["references"]
119 | for s in summaries:
120 | baseline = Pythonrouge(
121 | summary_file_exist=False,
122 | summary=[[s]],
123 | reference=[[[r] for r in references]],
124 | n_gram=1, recall_only=False, ROUGE_L=True,
125 | length_limit=True, length=50,
126 | stemming=False, stopwords=True)
127 | b1_v = baseline.calc_score()
128 | b2_v = rouge_l(rouge.tokenize(s),
129 | [rouge.tokenize(r) for r in references],
130 | 0.5)
131 | v = rouge.rouge_l(s, references)
132 | self.assertLess(abs(b2_v - v), 1e-5)
133 | self.assertLess(abs(b1_v["ROUGE-L-F"] - v), 1e-5)
134 |
135 |
136 | if __name__ == "__main__":
137 | unittest.main()
138 |
--------------------------------------------------------------------------------
/tests/test_rouge_ja.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import unittest
4 | from rougescore import rouge_n
5 | from sumeval.metrics.rouge import RougeCalculator
6 |
7 |
8 | class TestRougeJA(unittest.TestCase):
9 |
10 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge")
11 |
12 | def load_test_data(self):
13 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test-ja.json")
14 | with open(test_file, encoding="utf-8") as f:
15 | data = json.load(f)
16 | return data
17 |
18 | def _split(self, text):
19 | _txt = text.replace("。", " ").replace("、", " ").strip()
20 | words = _txt.split(" ")
21 | words = [w.strip() for w in words if w.strip()]
22 | return words
23 |
24 | def _compress(self, text_or_texts):
25 | if isinstance(text_or_texts, (tuple, list)):
26 | return ["".join(s.split(" ")) for s in text_or_texts]
27 | else:
28 | return "".join(text_or_texts.split(" "))
29 |
30 | def test_rouge(self):
31 | data = self.load_test_data()
32 | rouge = RougeCalculator(stopwords=False, lang="ja")
33 | for eval_id in data:
34 | summaries = data[eval_id]["summaries"]
35 | references = data[eval_id]["references"]
36 | for n in [1, 2]:
37 | for s in summaries:
38 | v = rouge.rouge_n(self._compress(s),
39 | self._compress(references), n)
40 | b_v = rouge_n(self._split(s),
41 | [self._split(r) for r in references],
42 | n, 0.5)
43 | self.assertLess(abs(b_v - v), 1e-5)
44 |
45 | def test_rouge_with_stop_words(self):
46 | data = self.load_test_data()
47 | rouge = RougeCalculator(stopwords=True, lang="ja")
48 |
49 | def split(text):
50 | words = self._split(text)
51 | words = [w for w in words if not rouge._lang.is_stop_word(w)]
52 | return words
53 |
54 | for eval_id in data:
55 | summaries = data[eval_id]["summaries"]
56 | references = data[eval_id]["references"]
57 | for n in [1, 2]:
58 | for s in summaries:
59 | v = rouge.rouge_n(s, references, n)
60 | b_v = rouge_n(split(s),
61 | [split(r) for r in references],
62 | n, 0.5)
63 | self.assertLess(abs(b_v - v), 1e-5)
64 |
65 |
66 | if __name__ == "__main__":
67 | unittest.main()
68 |
--------------------------------------------------------------------------------
/tests/test_rouge_zh.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import unittest
4 | from rougescore import rouge_n
5 | from sumeval.metrics.rouge import RougeCalculator
6 |
7 |
8 | class TestRougeJA(unittest.TestCase):
9 |
10 | DATA_DIR = os.path.join(os.path.dirname(__file__), "data/rouge")
11 |
12 | def load_test_data(self):
13 | test_file = os.path.join(self.DATA_DIR, "ROUGE-test-zh.json")
14 | with open(test_file, encoding="utf-8") as f:
15 | data = json.load(f)
16 | return data
17 |
18 | def _split(self, text):
19 | _txt = text.replace("。", " ").replace("、", " ").strip()
20 | words = _txt.split(" ")
21 | words = [w.strip() for w in words if w.strip()]
22 | return words
23 |
24 | def _compress(self, text_or_texts):
25 | if isinstance(text_or_texts, (tuple, list)):
26 | return ["".join(s.split(" ")) for s in text_or_texts]
27 | else:
28 | return "".join(text_or_texts.split(" "))
29 |
30 | def test_rouge(self):
31 | data = self.load_test_data()
32 | rouge = RougeCalculator(stopwords=False, lang="zh")
33 | for eval_id in data:
34 | summaries = data[eval_id]["summaries"]
35 | references = data[eval_id]["references"]
36 | for n in [1, 2]:
37 | for s in summaries:
38 | v = rouge.rouge_n(self._compress(s),
39 | self._compress(references), n)
40 | b_v = rouge_n(self._split(s),
41 | [self._split(r) for r in references],
42 | n, 0.5)
43 | self.assertLess(abs(b_v - v), 1e-5)
44 |
45 | def test_rouge_with_stop_words(self):
46 | data = self.load_test_data()
47 | rouge = RougeCalculator(stopwords=True, lang="zh")
48 |
49 | def split(text):
50 | words = self._split(text)
51 | words = [w for w in words if not rouge._lang.is_stop_word(w)]
52 | return words
53 |
54 | for eval_id in data:
55 | summaries = data[eval_id]["summaries"]
56 | references = data[eval_id]["references"]
57 | for n in [1, 2]:
58 | for s in summaries:
59 | v = rouge.rouge_n(s, references, n)
60 | b_v = rouge_n(split(s),
61 | [split(r) for r in references],
62 | n, 0.5)
63 | self.assertLess(abs(b_v - v), 1e-5)
64 |
65 |
66 | if __name__ == "__main__":
67 | unittest.main()
68 |
--------------------------------------------------------------------------------
/tests/test_sum_eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import unittest
4 | from sumeval.cli.sum_eval import main
5 |
6 |
7 | class TestSumEval(unittest.TestCase):
8 |
9 | def test_sum_eval(self):
10 | result = main(
11 | "r-nlb",
12 | False,
13 | False,
14 | False,
15 | -1,
16 | -1,
17 | 0.5,
18 | "en",
19 | "I'm living New York its my home town so awesome",
20 | "My home town is awesome",
21 | )
22 |
--------------------------------------------------------------------------------