├── .github └── workflows │ ├── ci.yml │ └── publish-to-pypi.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── compare_mt ├── __init__.py ├── align_utils.py ├── arg_utils.py ├── bucketers.py ├── cache_utils.py ├── compare_ll_main.py ├── compare_mt_main.py ├── corpus_utils.py ├── formatting.py ├── ngram_utils.py ├── print_utils.py ├── reporters.py ├── rouge │ ├── README.md │ ├── __init__.py │ ├── io.py │ ├── requirements.txt │ ├── rouge.py │ ├── rouge_scorer.py │ ├── run.sh │ ├── scoring.py │ └── tokenize.py ├── scorers.py ├── sign_utils.py ├── stat_utils.py └── version_info.py ├── example ├── ll_test.sys1.likelihood ├── ll_test.sys2.likelihood ├── ll_test.tag ├── ll_test.txt ├── multited.ref.jpn ├── multited.ref.jpn.tag ├── multited.sys1.jpn ├── multited.sys1.jpn.tag ├── multited.sys2.jpn ├── multited.sys2.jpn.tag ├── sum.ref.eng ├── sum.sys1.eng ├── sum.sys2.eng ├── ted.orig.slk ├── ted.ref.align ├── ted.ref.detok.eng ├── ted.ref.eng ├── ted.ref.eng.rptag ├── ted.ref.eng.tag ├── ted.sys1.align ├── ted.sys1.detok.eng ├── ted.sys1.eng ├── ted.sys1.eng.rptag ├── ted.sys1.eng.senttag ├── ted.sys1.eng.tag ├── ted.sys2.align ├── ted.sys2.detok.eng ├── ted.sys2.eng ├── ted.sys2.eng.rptag ├── ted.sys2.eng.senttag ├── ted.sys2.eng.tag ├── ted.train.counts └── ted.train.eng ├── pytest.ini ├── requirements.txt ├── scripts ├── count.py ├── interleave.py ├── postag.py └── relativepositiontag.py ├── setup.py └── tests ├── __init__.py ├── test_cache.py └── test_scorers.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push] 3 | 4 | jobs: 5 | build: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v2 9 | - name: Install Python 3 10 | uses: actions/setup-python@v1 11 | with: 12 | python-version: 3.9 13 | - name: Install dependencies 14 | run: | 15 | python -m pip install --upgrade pip 16 | pip install . 17 | - name: Run tests with unittest 18 | run: python -m unittest 19 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build-n-publish: 7 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 8 | runs-on: ubuntu-18.04 9 | steps: 10 | - uses: actions/checkout@master 11 | - name: Set up Python 3.9 12 | uses: actions/setup-python@v1 13 | with: 14 | python-version: 3.9 15 | - name: Install pypa/build 16 | run: >- 17 | python -m 18 | pip install 19 | build 20 | --user 21 | - name: Build a binary wheel and a source tarball 22 | run: >- 23 | python -m 24 | build 25 | --sdist 26 | --wheel 27 | --outdir dist/ 28 | . 29 | - name: Publish distribution 📦 to Test PyPI 30 | uses: pypa/gh-action-pypi-publish@master 31 | with: 32 | skip_existing: true 33 | password: ${{ secrets.TEST_PYPI_API_KEY }} 34 | repository_url: https://test.pypi.org/legacy/ 35 | - name: Publish distribution 📦 to PyPI 36 | if: startsWith(github.ref, 'refs/tags') 37 | uses: pypa/gh-action-pypi-publish@master 38 | with: 39 | password: ${{ secrets.PYPI_API_KEY }} 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCharm 2 | .idea 3 | __pycache__ 4 | # vim 5 | *.swp 6 | # VS code 7 | .vscode/ 8 | # Mac 9 | .DS_Store 10 | # setup.py build artifacts 11 | *.egg-info 12 | dist/ 13 | build/ 14 | # Virtualenv for developing 15 | env/ 16 | # Outputs 17 | output/ 18 | outputs/ 19 | .pytest_cache 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '3.6' 4 | - 3.7-dev 5 | install: 6 | - pip install -r requirements.txt 7 | - pip install -U setuptools 8 | - python setup.py install 9 | script: 10 | - pytest 11 | - compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --decimals 2 --output_directory output 12 | - compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --compare_scores score_type=bleu,bootstrap=10,prob_thresh=0.05 --output_directory output 13 | - compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --compare_word_accuracies bucket_type=freq,freq_corpus_file=example/ted.train.eng,bucket_cutoffs=1:2:3:5:10 bucket_type=freq,freq_count_file=example/ted.train.counts,bucket_cutoffs=1:2:3:5:10 --output_directory output 14 | - compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --compare_word_accuracies bucket_type=case bucket_type=label,ref_labels=example/ted.ref.eng.tag,out_labels="example/ted.sys1.eng.tag;example/ted.sys2.eng.tag",label_set=CC+DT+IN+JJ+NN+NNP+NNS+PRP+RB+TO+VB+VBP+VBZ bucket_type=numlabel,ref_labels=example/ted.ref.eng.rptag,out_labels="example/ted.sys1.eng.rptag;example/ted.sys2.eng.rptag" --compare_ngrams compare_type=match,ref_labels=example/ted.ref.eng.tag,out_labels="example/ted.sys1.eng.tag;example/ted.sys2.eng.tag" --output_directory output 15 | - compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --src_file example/ted.orig.slk --compare_src_word_accuracies ref_align_file=example/ted.ref.align --output_directory output 16 | - compare-ll --ref example/ll_test.txt --ll-files example/ll_test.sys1.likelihood example/ll_test.sys2.likelihood --compare-word-likelihoods bucket_type=freq,freq_corpus_file=example/ll_test.txt --decimals 2 17 | - compare-ll --ref example/ll_test.txt --ll-files example/ll_test.sys1.likelihood example/ll_test.sys2.likelihood --compare-word-likelihoods bucket_type=label,label_corpus=example/ll_test.tag,label_set=CC+DT+IN+JJ+NN+NNP+NNS+PRP+RB+TO+VB+VBP+VBZ 18 | - compare-mt example/sum.ref.eng example/sum.sys1.eng example/sum.sys2.eng --compare_scores 'score_type=rouge1' 'score_type=rouge2' 'score_type=rougeL' --output_directory output 19 | - python compare_mt/compare_mt_main.py example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --output_directory output 20 | deploy: 21 | provider: pypi 22 | user: pmichel31415 23 | skip_existing: true 24 | password: 25 | secure: fGKIZDGfu5L2WGiGlIidPI5uBi2P2TIytEIDerK8sJWKdIM6CSLnzVVXHst5VIujIhF2/TP7YMniLvMEflW5HY7Bu5fb2dBMQnyQJiE8SE9ih/Oq35W3fHJCEiAYnWo3CKLYlwUyJC9VZn8w0JrU2MBWfLCIli3Fuh9sbRyVNvjRq4kc2IGIjcxwQvM0Hml9G/89UwWYKUbxi53tFfUr5qu9WyuPdy/i2bcHaYMB6FgXbTn47MmOgVDvLjLjePpMsF+fNQDkkN035ngPRLDfHfBM74ag2ycVUhjT8nsMOfKGMpmbk/CeyKOYT9TW6Fp/MALQ5nJ9qF4q49mOpz7lh0JfogTCxweU76cpPsi9j99BvYULTYy1SnjOP9ZqglobosWq2fUtw8Pf6KE57Y0ultfh+CAgXWhX7rBFGj9PrYW6+P8Y2p5+MQuXRZp+6TOXgpELh0SiXUAFQA5B77Kw8+tPw5DJL1b5oGXBTp94sttHxNXeV9bm9AwKB18rUcKKA0AHFP5FgjvdtfZKnjydSg/hFn82UA/0g0ubcSuqdoSRgk49NT4RasODiqnfqXseJ/q1vWm5eiW60QzXuHZrK6EN8vzKxFH7DYjAZTOQsAdoCgAQvSXABOKum/Pm3HWU+BfD0xZH9cJEn9YvSKD5qMNikmMK1LR2cgRHmbhjUXQ= 26 | on: 27 | branch: master 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Graham Neubig 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compare-mt 2 | by [NeuLab](http://www.cs.cmu.edu/~neulab/) @ [CMU LTI](https://lti.cs.cmu.edu), and other contributors 3 | 4 | [![Integration Tests](https://github.com/neulab/compare-mt/actions/workflows/ci.yml/badge.svg?event=push)](.github/workflows/ci.yml) 5 | 6 | `compare-mt` (for "compare my text") is a program to compare the output of multiple systems for language generation, 7 | including machine translation, summarization, dialog response generation, etc. 8 | To use it you need to have, in text format, a "correct" reference, and the output of two different systems. 9 | Based on this, `compare-mt` will run a number of analyses that attempt to pick out salient differences between 10 | the systems, which will make it easier for you to figure out what things one system is doing better than another. 11 | 12 | ## Basic Usage 13 | 14 | First, you need to install the package: 15 | 16 | ```bash 17 | # Requirements 18 | pip install -r requirements.txt 19 | # Install the package 20 | python setup.py install 21 | ``` 22 | 23 | Then, as an example, you can run this over two included system outputs. 24 | 25 | ```bash 26 | compare-mt --output_directory output/ example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng 27 | ``` 28 | 29 | This will output some statistics to the command line, and also write a formatted HTML report to `output/`. 30 | Here, system 1 and system 2 are the baseline phrase-based and neural Slovak-English systems from our 31 | [EMNLP 2018 paper](http://aclweb.org/anthology/D18-1103). This will print out a number of statistics including: 32 | 33 | * **Aggregate Scores:** A report on overall BLEU scores and length ratios 34 | * **Word Accuracy Analysis:** A report on the F-measure of words by frequency bucket 35 | * **Sentence Bucket Analysis:** Bucket sentences by various statistics (e.g. sentence BLEU, length difference with the 36 | reference, overall length), and calculate statistics by bucket (e.g. number of sentences, BLEU score per bucket) 37 | * **N-gram Difference Analysis:** Calculate which n-grams one system is consistently translating better 38 | * **Sentence Examples:** Find sentences where one system is doing better than the other according to sentence BLEU 39 | 40 | You can see an example of running this analysis (as well as the more advanced analysis below) either through a 41 | [generated HTML report here](http://phontron.com/compare-mt/output/), or in the following narrated video: 42 | 43 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/K-MNPOGKnDQ/0.jpg)](https://www.youtube.com/watch?v=K-MNPOGKnDQ) 44 | 45 | To summarize the results that immediately stick out from the basic analysis: 46 | 47 | * From the *aggregate scores* we can see that the BLEU of neural MT is higher, but its sentences are slightly shorter. 48 | * From the *word accuracy analysis* we can see that phrase-based MT is better at low-frequency words. 49 | * From the *sentence bucket analysis* we can see that neural seems to be better at translating shorter sentences. 50 | * From the *n-gram difference analysis* we can see that there are a few words that neural MT is not good at 51 | but phrase based MT gets right (e.g. "phantom"), while there are a few long phrases that neural MT does better with 52 | (e.g. "going to show you"). 53 | 54 | If you run on your own data, you might be able to find more interesting things about your own systems. Try comparing 55 | your modified system with your baseline and seeing what you find! 56 | 57 | ## Other Options 58 | 59 | There are many options that can be used to do different types of analysis. 60 | If you want to find all the different types of analysis supported, the most comprehensive way to do so is by 61 | taking a look at `compare-mt`, which is documented relatively well and should give examples. 62 | We do highlight a few particularly useful and common types of analysis below: 63 | 64 | ### Significance Tests 65 | 66 | The script allows you to perform statistical significance tests for scores based on [bootstrap resampling](https://aclanthology.org/W04-3250.pdf). You can set 67 | the number of samples manually. Here is an example using the example data: 68 | 69 | 70 | ```bash 71 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --compare_scores score_type=bleu,bootstrap=1000,prob_thresh=0.05 72 | ``` 73 | 74 | One important thing to note is that bootrap resampling as implemented in compare-mt only tests for variance due to data sampling, approximately answering the question ``if I ran the same system on a different, similarly sampled dataset, would I be likely to get the same result?''. 75 | It does not say anything about whether a system will perform better on another dataset in a different domain, and it [does not control for training-time factors](https://aclanthology.org/P11-2031/) such as selection of the random seed, so it cannot say if another training run of the same model would yield the same result. 76 | 77 | ### Using Training Set Frequency 78 | 79 | One useful piece of analysis is the "word accuracy by frequency" analysis. By default this frequency is the frequency 80 | in the *test set*, but arguably it is more informative to know accuracy by frequency in the *training set* as this 81 | demonstrates the models' robustness to words they haven't seen much, or at all, in the training data. To change the 82 | corpus used to calculate word frequency and use the training set (or some other set), you can set the `freq_corpus_file` 83 | option to the appropriate corpus. 84 | 85 | 86 | ```bash 87 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng 88 | --compare_word_accuracies bucket_type=freq,freq_corpus_file=example/ted.train.eng 89 | ``` 90 | 91 | In addition, because training sets may be very big, you can also calculate the counts on the file beforehand, 92 | 93 | ```bash 94 | python scripts/count.py < example/ted.train.eng > example/ted.train.counts 95 | ``` 96 | 97 | and then use these counts directly to improve efficiency. 98 | 99 | ```bash 100 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng 101 | --compare_word_accuracies bucket_type=freq,freq_count_file=example/ted.train.counts 102 | ``` 103 | 104 | 105 | ### Incorporating Word/Sentence Labels 106 | 107 | If you're interested in performing aggregate analysis over labels for each word/sentence instead of the words/sentences themselves, it 108 | is possible to do so. As an example, we've included POS tags for each of the example outputs. You can use these in 109 | aggregate analysis, or n-gram-based analysis. The following gives an example: 110 | 111 | 112 | ```bash 113 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng 114 | --compare_word_accuracies bucket_type=label,ref_labels=example/ted.ref.eng.tag,out_labels="example/ted.sys1.eng.tag;example/ted.sys2.eng.tag",label_set=CC+DT+IN+JJ+NN+NNP+NNS+PRP+RB+TO+VB+VBP+VBZ 115 | --compare_ngrams compare_type=match,ref_labels=example/ted.ref.eng.tag,out_labels="example/ted.sys1.eng.tag;example/ted.sys2.eng.tag" 116 | ``` 117 | 118 | This will calculate word accuracies and n-gram matches by POS bucket, and allows you to see things like the fact 119 | that the phrase-based MT system is better at translating content words such as nouns and verbs, while neural MT 120 | is doing better at translating function words. 121 | 122 | We also give an example to perform aggregate analysis when multiple labels per word/sentence, where each group of labels is a string separated by '+'s, are allowed: 123 | 124 | ```bash 125 | compare-mt example/multited.ref.jpn example/multited.sys1.jpn example/multited.sys2.jpn 126 | --compare_word_accuracies bucket_type=multilabel,ref_labels=example/multited.ref.jpn.tag,out_labels="example/multited.sys1.jpn.tag;example/multited.sys2.jpn.tag",label_set=lexical+formality+pronouns+ellipsis 127 | ``` 128 | 129 | It also is possible to create labels that represent numberical values. For example, `scripts/relativepositiontag.py` calculates the relative position of words in the sentence, where 0 is the first word in the sentence, 0.5 is the word in the middle, and 1.0 is the word in the end. These numerical values can then be bucketed. Here is an example: 130 | 131 | ```bash 132 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng 133 | --compare_word_accuracies bucket_type=numlabel,ref_labels=example/ted.ref.eng.rptag,out_labels="example/ted.sys1.eng.rptag;example/ted.sys2.eng.rptag" 134 | ``` 135 | 136 | From this particular analysis we can discover that NMT does worse than PBMT at the end of the sentence, and of course other varieties of numerical labels could be used to measure different properties of words. 137 | 138 | You can also perform analysis over labels for sentences. Here is an example: 139 | 140 | ```bash 141 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng 142 | --compare_sentence_buckets 'bucket_type=label,out_labels=example/ted.sys1.eng.senttag;example/ted.sys2.eng.senttag,label_set=0+10+20+30+40+50+60+70+80+90+100,statistic_type=score,score_measure=bleu' 143 | ``` 144 | 145 | 146 | ### Analyzing Source Words 147 | 148 | If you have a source corpus that is aligned to the target, you can also analyze accuracies according to features of the 149 | source language words, which would allow you to examine whether, for example, infrequent words on the source side are 150 | hard to output properly. Here is an example using the example data: 151 | 152 | ```bash 153 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --src_file example/ted.orig.slk --compare_src_word_accuracies ref_align_file=example/ted.ref.align 154 | ``` 155 | 156 | ### Analyzing Word Likelihoods 157 | 158 | If you wish to analyze the word log likelihoods by two systems on the target corpus, you can use the following 159 | 160 | ```bash 161 | compare-ll --ref example/ll_test.txt --ll-files example/ll_test.sys1.likelihood example/ll_test.sys2.likelihood --compare-word-likelihoods bucket_type=freq,freq_corpus_file=example/ll_test.txt 162 | ``` 163 | 164 | You can analyze the word log likelihoods over labels for each word instead of the words themselves: 165 | 166 | ```bash 167 | compare-ll --ref example/ll_test.txt --ll-files example/ll_test.sys1.likelihood example/ll_test.sys2.likelihood --compare-word-likelihoods bucket_type=label,label_corpus=example/ll_test.tag,label_set=CC+DT+IN+JJ+NN+NNP+NNS+PRP+RB+TO+VB+VBP+VBZ 168 | ``` 169 | 170 | NOTE: You can also use the above to also analyze the word likelihoods produced by two language models. 171 | 172 | ### Analyzing Other Language Generation Systems 173 | 174 | You can also analyze other language generation systems using the script. Here is an example of comparing two text summarization systems. 175 | 176 | ```bash 177 | compare-mt example/sum.ref.eng example/sum.sys1.eng example/sum.sys2.eng --compare_scores 'score_type=rouge1' 'score_type=rouge2' 'score_type=rougeL' 178 | ``` 179 | 180 | ### Evaluating on COMET 181 | 182 | It is possible to use the [COMET](https://unbabel.github.io/COMET/html/index.html) as a metric. 183 | To do so, you need to install it first by running 184 | 185 | ```bash 186 | pip install unbabel-comet 187 | ``` 188 | 189 | To then run, pass the source and select the appropriate score type. Here is an example. 190 | ```bash 191 | compare-mt example/ted.ref.eng example/ted.sys1.eng example/ted.sys2.eng --src_file example/ted.orig.slk \ 192 | --compare_scores score_type=comet \ 193 | --compare_sentence_buckets bucket_type=score,score_measure=sentcomet 194 | ``` 195 | 196 | Note that COMET runs on top of XLM-R, so it's highly recommended you use a GPU with it. 197 | 198 | ## Citation/References 199 | 200 | If you use compare-mt, we'd appreciate if you cite the [paper](http://arxiv.org/abs/1903.07926) about it! 201 | 202 | @article{DBLP:journals/corr/abs-1903-07926, 203 | author = {Graham Neubig and Zi{-}Yi Dou and Junjie Hu and Paul Michel and Danish Pruthi and Xinyi Wang and John Wieting}, 204 | title = {compare-mt: {A} Tool for Holistic Comparison of Language Generation Systems}, 205 | journal = {CoRR}, 206 | volume = {abs/1903.07926}, 207 | year = {2019}, 208 | url = {http://arxiv.org/abs/1903.07926}, 209 | } 210 | 211 | There is an extensive literature review included in the paper above, but some key papers that it borrows ideas from are below: 212 | 213 | * **Automatic Error Analysis:** 214 | Popovic and Ney "[Towards Automatic Error Analysis of Machine Translation Output](https://www.mitpressjournals.org/doi/pdf/10.1162/COLI_a_00072)" Computational Linguistics 2011. 215 | * **POS-based Analysis:** 216 | Chiang et al. "[The Hiero Machine Translation System](http://aclweb.org/anthology/H05-1098)" EMNLP 2005. 217 | * **n-gram Difference Analysis** 218 | Akabe et al. "[Discriminative Language Models as a Tool for Machine Translation Error Analysis](http://www.phontron.com/paper/akabe14coling.pdf)" COLING 2014. 219 | 220 | There is also other good software for automatic comparison or error analysis of MT systems: 221 | 222 | * **[MT-ComparEval](https://github.com/choko/MT-ComparEval):** Very nice for visualization of individual examples, but 223 | not as focused on aggregate analysis as `compare-mt`. Also has more software dependencies and requires using a web 224 | browser, while `compare-mt` can be used as a command-line tool. 225 | -------------------------------------------------------------------------------- /compare_mt/__init__.py: -------------------------------------------------------------------------------- 1 | import compare_mt.ngram_utils 2 | import compare_mt.stat_utils 3 | import compare_mt.corpus_utils 4 | import compare_mt.sign_utils 5 | import compare_mt.scorers 6 | import compare_mt.bucketers 7 | import compare_mt.reporters 8 | import compare_mt.arg_utils 9 | import compare_mt.print_utils 10 | import compare_mt.version_info 11 | 12 | __version__ = compare_mt.version_info.__version__ 13 | -------------------------------------------------------------------------------- /compare_mt/align_utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from compare_mt import corpus_utils 3 | 4 | def _count_ngram(sent, order): 5 | gram_pos = dict() 6 | for i in range(order): 7 | gram_pos[i+1] = defaultdict(lambda: []) 8 | for i, word in enumerate(sent): 9 | for j in range(min(i+1, order)): 10 | gram_pos[j+1][word].append(i-j) 11 | word = sent[i-j-1] + ' ' + word 12 | return gram_pos 13 | 14 | def ngram_context_align(ref, out, order=-1, case_insensitive=False): 15 | """ 16 | Calculate the word alignment between a reference sentence and an output sentence. 17 | Proposed in the following paper: 18 | 19 | Automatic Evaluation of Translation Quality for Distant Language Pairs 20 | Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh, Hajime Tsukada 21 | http://www.anthology.aclweb.org/D/D10/D10-1092.pdf 22 | 23 | Args: 24 | ref: A reference sentence 25 | out: An output sentence 26 | order: The highest order of grams we want to consider (-1=inf) 27 | case_insensitive: A boolean specifying whether to turn on the case insensitive option 28 | 29 | Returns: 30 | The word alignment, represented as a list of integers. 31 | """ 32 | 33 | if case_insensitive: 34 | ref = corpus_utils.lower(ref) 35 | out = corpus_utils.lower(out) 36 | 37 | order = len(ref) if order == -1 else order 38 | 39 | ref_gram_pos = _count_ngram(ref, order) 40 | out_gram_pos = _count_ngram(out, order) 41 | 42 | worder = [] 43 | for i, word in enumerate(out): 44 | if len(ref_gram_pos[1][word]) == 0: 45 | continue 46 | if len(ref_gram_pos[1][word]) == len(out_gram_pos[1][word]) == 1: 47 | worder.append(ref_gram_pos[1][word][0]) 48 | else: 49 | word_forward = word 50 | word_backward = word 51 | for j in range(1, order): 52 | if i - j >= 0: 53 | word_backward = out[i-j] + ' ' + word_backward 54 | if len(ref_gram_pos[j+1][word_backward]) == len(out_gram_pos[j+1][word_backward]) == 1: 55 | worder.append(ref_gram_pos[j+1][word_backward][0]+j) 56 | break 57 | 58 | if i + j < len(out): 59 | word_forward = word_forward + ' ' + out[i+j] 60 | if len(ref_gram_pos[j+1][word_forward]) == len(out_gram_pos[j+1][word_forward]) == 1: 61 | worder.append(ref_gram_pos[j+1][word_forward][0]) 62 | break 63 | 64 | return worder 65 | -------------------------------------------------------------------------------- /compare_mt/arg_utils.py: -------------------------------------------------------------------------------- 1 | def parse_profile(profile): 2 | kargs = {} 3 | try: 4 | for kv in profile.split(','): 5 | k, v = kv.split('=') 6 | kargs[k] = v 7 | except ValueError: 8 | # more informative error message 9 | raise ValueError( 10 | f"Failed to parse profile: {profile}. The expected format is:" 11 | " \"key1=value1,key2=value2,[...]\"" 12 | ) 13 | return kargs 14 | 15 | def parse_compare_directions(compare_directions): 16 | direcs = [] 17 | try: 18 | for direc in compare_directions.split(';'): 19 | left, right = direc.split('-') 20 | left, right = int(left), int(right) 21 | direcs.append((left, right)) 22 | except ValueError: 23 | # more informative error message 24 | raise ValueError( 25 | f"Failed to parse directions: {compare_directions}." 26 | " The expected format is: \"left1-right1;left2-right2;[...]\"" 27 | ) 28 | return direcs 29 | 30 | def parse_files(filenames): 31 | files = [] 32 | for f in filenames.split(';'): 33 | files.append(f) 34 | return files 35 | 36 | def parse_intfloat(s): 37 | try: 38 | return int(s) 39 | except ValueError: 40 | return float(s) -------------------------------------------------------------------------------- /compare_mt/bucketers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import itertools 3 | import numpy as np 4 | from collections import defaultdict 5 | 6 | from compare_mt import corpus_utils 7 | from compare_mt import scorers 8 | from compare_mt import arg_utils 9 | 10 | class Bucketer: 11 | 12 | def set_bucket_cutoffs(self, bucket_cutoffs, num_type='int'): 13 | self.bucket_cutoffs = bucket_cutoffs 14 | self.bucket_strs = [] 15 | for i, x in enumerate(bucket_cutoffs): 16 | if i == 0: 17 | self.bucket_strs.append(f'<{x}') 18 | elif num_type == 'int' and x-1 == bucket_cutoffs[i-1]: 19 | self.bucket_strs.append(f'{x-1}') 20 | else: 21 | self.bucket_strs.append(f'[{bucket_cutoffs[i-1]},{x})') 22 | self.bucket_strs.append(f'>={x}') 23 | 24 | def cutoff_into_bucket(self, value): 25 | for i, v in enumerate(self.bucket_cutoffs): 26 | if value < v: 27 | return i 28 | return len(self.bucket_cutoffs) 29 | 30 | class WordBucketer(Bucketer): 31 | 32 | def calc_bucket(self, val, label=None): 33 | """ 34 | Calculate the bucket for a particular word 35 | 36 | Args: 37 | val: The word to calculate the bucket for 38 | label: If there's a label on the target word, add it 39 | 40 | Returns: 41 | An integer ID of the bucket 42 | """ 43 | raise NotImplementedError('calc_bucket must be implemented in subclasses of WordBucketer') 44 | 45 | def _calc_trg_matches(self, ref_sent, out_sents): 46 | ref_pos = defaultdict(lambda: []) 47 | out_matches = [[-1 for _ in s] for s in out_sents] 48 | ref_matches = [[-1 for _ in ref_sent] for _ in out_sents] 49 | for ri, ref_word in enumerate(ref_sent): 50 | ref_pos[ref_word].append(ri) 51 | for oai, out_sent in enumerate(out_sents): 52 | out_word_cnts = {} 53 | for oi, out_word in enumerate(out_sent): 54 | ref_poss = ref_pos.get(out_word, None) 55 | if ref_poss: 56 | out_word_cnt = out_word_cnts.get(out_word, 0) 57 | if out_word_cnt < len(ref_poss): 58 | out_matches[oai][oi] = ref_poss[out_word_cnt] 59 | ref_matches[oai][ref_poss[out_word_cnt]] = oi 60 | out_word_cnts[out_word] = out_word_cnt + 1 61 | return out_matches, ref_matches 62 | 63 | def _calc_trg_buckets_and_matches(self, ref_sent, ref_label, out_sents, out_labels): 64 | # Initial setup for special cases 65 | if self.case_insensitive: 66 | ref_sent = [corpus_utils.lower(w) for w in ref_sent] 67 | out_sents = [[corpus_utils.lower(w) for w in out_sent] for out_sent in out_sents] 68 | if not ref_label: 69 | ref_label = [] 70 | out_labels = [[] for _ in out_sents] 71 | # Get matches 72 | out_matches, _ = self._calc_trg_matches(ref_sent, out_sents) 73 | # Process the reference, getting the bucket 74 | ref_buckets = [self.calc_bucket(w, label=l) for (w,l) in itertools.zip_longest(ref_sent, ref_label)] 75 | # Process each of the outputs, finding matches 76 | out_buckets = [[] for _ in out_sents] 77 | for oai, (out_sent, out_label, match, out_buck) in \ 78 | enumerate(itertools.zip_longest(out_sents, out_labels, out_matches, out_buckets)): 79 | for oi, (w, l, m) in enumerate(itertools.zip_longest(out_sent, out_label, match)): 80 | out_buck.append(self.calc_bucket(w, label=l) if m < 0 else ref_buckets[m]) 81 | # Calculate totals for each sentence 82 | num_buckets = len(self.bucket_strs) 83 | num_outs = len(out_sents) 84 | my_ref_total = np.zeros(num_buckets ,dtype=int) 85 | my_out_totals = np.zeros( (num_outs, num_buckets) ,dtype=int) 86 | my_out_matches = np.zeros( (num_outs, num_buckets) ,dtype=int) 87 | for b in ref_buckets: 88 | if isinstance(b, list): 89 | for bi in b: 90 | my_ref_total[bi] += 1 91 | else: 92 | my_ref_total[b] += 1 93 | for oi, (obs, ms) in enumerate(zip(out_buckets, out_matches)): 94 | for b, m in zip(obs, ms): 95 | if isinstance(b, list): 96 | for bi in b: 97 | my_out_totals[oi,bi] += 1 98 | if m >= 0: 99 | my_out_matches[oi,bi] += 1 100 | else: 101 | my_out_totals[oi,b] += 1 102 | if m >= 0: 103 | my_out_matches[oi,b] += 1 104 | return my_ref_total, my_out_totals, my_out_matches, ref_buckets, out_buckets, out_matches 105 | 106 | def _calc_src_buckets_and_matches(self, src_sent, src_label, ref_sent, ref_aligns, out_sents): 107 | # Initial setup for special cases 108 | if self.case_insensitive: 109 | src_sent = [corpus_utils.lower(w) for w in src_sent] 110 | ref_sent = [corpus_utils.lower(w) for w in ref_sent] 111 | out_sents = [[corpus_utils.lower(w) for w in out_sent] for out_sent in out_sents] 112 | if not src_label: 113 | src_label = [] 114 | # Get matches 115 | _, ref_matches = self._calc_trg_matches(ref_sent, out_sents) 116 | # Process the source, getting the bucket 117 | src_buckets = [self.calc_bucket(w, label=l) for (w,l) in itertools.zip_longest(src_sent, src_label)] 118 | # For each source word, find the reference words that need to be correct 119 | src_aligns = [[] for _ in src_sent] 120 | for src, trg in ref_aligns: 121 | src_aligns[src].append(trg) 122 | # Calculate totals for each sentence 123 | num_buckets = len(self.bucket_strs) 124 | num_outs = len(out_sents) 125 | my_ref_total = np.zeros(num_buckets ,dtype=int) 126 | my_out_matches = np.zeros( (num_outs, num_buckets) ,dtype=int) 127 | for src_bucket in src_buckets: 128 | my_ref_total[src_bucket] += 1 129 | my_out_totals = np.broadcast_to(np.reshape(my_ref_total, (1, num_buckets)), (num_outs, num_buckets)) 130 | for oai, (out_sent, ref_match) in enumerate(zip(out_sents, ref_matches)): 131 | for src_bucket, src_align in zip(src_buckets, src_aligns): 132 | if len(src_align) != 0: 133 | if all([ref_match[x] >= 0 for x in src_align]): 134 | my_out_matches[oai,src_bucket] += 1 135 | return my_ref_total, my_out_totals, my_out_matches, src_buckets, src_aligns, ref_matches 136 | 137 | def calc_statistics(self, ref, outs, 138 | src=None, 139 | ref_labels=None, out_labels=None, 140 | ref_aligns=None, src_labels=None): 141 | """ 142 | Calculate match statistics, bucketed by the type of word we have, and IDs of example sentences to show. 143 | This must be used with a subclass that has self.bucket_strs defined, and self.calc_bucket(word) implemented. 144 | 145 | Args: 146 | ref: The reference corpus 147 | outs: A list of output corpora 148 | src: Source sentences. 149 | If src is set, it will use ref_aligns, out_aligns, and src_labels. 150 | Otherwise, it will use ref_labels and out_labels. 151 | ref_labels: Labels of the reference corpus (optional) 152 | out_labels: Labels of the output corpora (should be specified iff ref_labels is) 153 | 154 | Returns: 155 | statistics: containing a list of equal length to out, containing for each system 156 | both_tot: the frequency of a particular bucket appearing in both output and reference 157 | ref_tot: the frequency of a particular bucket appearing in just reference 158 | out_tot: the frequency of a particular bucket appearing in just output 159 | rec: recall of the bucket 160 | prec: precision of the bucket 161 | fmeas: f1-measure of the bucket 162 | my_ref_total_list: containing a list of statistics of the reference 163 | my_out_matches_list: containing a list of statistics of the outputs 164 | """ 165 | if not hasattr(self, 'case_insensitive'): 166 | self.case_insensitive = False 167 | 168 | # Dimensions 169 | num_buckets = len(self.bucket_strs) 170 | num_outs = len(outs) 171 | 172 | # Initialize the sufficient statistics for prec/rec/fmeas 173 | ref_total = np.zeros(num_buckets, dtype=int) 174 | out_totals = np.zeros( (num_outs, num_buckets) ,dtype=int) 175 | out_matches = np.zeros( ( num_outs, num_buckets) ,dtype=int) 176 | 177 | my_ref_total_list = [] 178 | my_out_totals_list = [] 179 | my_out_matches_list = [] 180 | 181 | # Step through the sentences 182 | for rsi, (ref_sent, ref_label) in enumerate(itertools.zip_longest(ref, ref_labels if ref_labels else [])): 183 | if src: 184 | my_ref_total, my_out_totals, my_out_matches, _, _, _ = \ 185 | self._calc_src_buckets_and_matches(src[rsi], 186 | src_labels[rsi] if src_labels else None, 187 | ref_sent, 188 | ref_aligns[rsi], 189 | [x[rsi] for x in outs]) 190 | else: 191 | my_ref_total, my_out_totals, my_out_matches, _, _, _ = \ 192 | self._calc_trg_buckets_and_matches(ref_sent, 193 | ref_label, 194 | [x[rsi] for x in outs], 195 | [x[rsi] for x in out_labels] if out_labels else None) 196 | ref_total += my_ref_total 197 | out_totals += my_out_totals 198 | out_matches += my_out_matches 199 | 200 | my_ref_total_list.append(my_ref_total) 201 | my_out_totals_list.append(my_out_totals) 202 | my_out_matches_list.append(my_out_matches) 203 | 204 | # Calculate statistics 205 | statistics = [[] for _ in range(num_outs)] 206 | for oi, ostatistics in enumerate(statistics): 207 | for bi in range(num_buckets): 208 | mcnt, ocnt, rcnt = out_matches[oi,bi], out_totals[oi,bi], ref_total[bi] 209 | if mcnt == 0: 210 | rec, prec, fmeas = 0.0, 0.0, 0.0 211 | else: 212 | rec = mcnt / float(rcnt) 213 | prec = mcnt / float(ocnt) 214 | fmeas = 2 * prec * rec / (prec + rec) 215 | ostatistics.append( (mcnt, rcnt, ocnt, rec, prec, fmeas) ) 216 | 217 | return statistics, my_ref_total_list, my_out_totals_list, my_out_matches_list 218 | 219 | def calc_bucket_details(self, my_ref_total_list, my_out_totals_list, my_out_matches_list, num_samples=1000, sample_ratio=0.5): 220 | 221 | ref_total = np.array(my_ref_total_list).sum(0) 222 | 223 | num_outs, num_buckets = my_out_totals_list[0].shape 224 | n = len(my_ref_total_list) 225 | ids = list(range(n)) 226 | sample_size = int(np.ceil(n*sample_ratio)) 227 | rt_arr = np.array(my_ref_total_list) 228 | ot_arr = np.array(my_out_totals_list) 229 | om_arr = np.array(my_out_matches_list) 230 | statistics = [[ [] for __ in range(num_buckets) ] for _ in range(num_outs)] 231 | for _ in range(num_samples): 232 | reduced_ids = np.random.choice(ids, size=sample_size, replace=True) 233 | reduced_ref_total, reduced_out_totals, reduced_out_matches= rt_arr[reduced_ids].sum(0), ot_arr[reduced_ids].sum(0), om_arr[reduced_ids].sum(0) 234 | # Calculate accuracy on the reduced sample and save stats 235 | for oi in range(num_outs): 236 | for bi in range(num_buckets): 237 | mcnt, ocnt, rcnt = reduced_out_matches[oi,bi], reduced_out_totals[oi,bi], reduced_ref_total[bi] 238 | if mcnt == 0: 239 | rec, prec, fmeas = 0.0, 0.0, 0.0 240 | else: 241 | rec = mcnt / float(rcnt) 242 | prec = mcnt / float(ocnt) 243 | fmeas = 2 * prec * rec / (prec + rec) 244 | statistics[oi][bi].append( (mcnt, rcnt, ocnt, rec, prec, fmeas) ) 245 | 246 | intervals = [[] for _ in range(num_outs)] 247 | for oi in range(num_outs): 248 | for bi in range(num_buckets): 249 | if len(statistics[oi][bi]) > 0: 250 | _, _, _, recs, precs, fmeas = zip(*statistics[oi][bi]) 251 | else: 252 | recs, precs, fmeas = [0.0], [0.0], [0.0] 253 | # The first three elements (intervals of mcnt, ocnt and rcnt) are None 254 | bounds = [None, None, None] 255 | for x in [recs, precs, fmeas]: 256 | x = list(x) 257 | x.sort() 258 | lower_bound = x[int(num_samples * 0.025)] 259 | upper_bound = x[int(num_samples * 0.975)] 260 | bounds.append( (lower_bound, upper_bound) ) 261 | intervals[oi].append(bounds) 262 | 263 | return ref_total, intervals 264 | 265 | def calc_examples(self, num_sents, num_outs, 266 | statistics, 267 | my_ref_total_list, my_out_matches_list, 268 | num_examples=5): 269 | """ 270 | Calculate examples based the computed statistics. 271 | 272 | Args: 273 | num_sents: number of sentences 274 | num_outs: number of outputs 275 | statistics: containing a list of equal length to out, containing for each system 276 | both_tot: the frequency of a particular bucket appearing in both output and reference 277 | ref_tot: the frequency of a particular bucket appearing in just reference 278 | out_tot: the frequency of a particular bucket appearing in just output 279 | rec: recall of the bucket 280 | prec: precision of the bucket 281 | fmeas: f1-measure of the bucket 282 | my_ref_total_list: containing a list of statistics of the reference 283 | my_out_matches_list: containing a list of statistics of the outputs 284 | num_examples: number of examples to print 285 | 286 | Returns: 287 | example: containing a list of examples to print 288 | """ 289 | num_buckets = len(self.bucket_strs) 290 | num_examp_feats = 3 291 | example_scores = np.zeros( (num_sents, num_examp_feats, num_buckets) ) 292 | 293 | # Step through the sentences 294 | for rsi, (my_ref_total, my_out_matches) in enumerate(zip(my_ref_total_list, my_out_matches_list)): 295 | 296 | # Scoring of examples across different dimensions: 297 | # 0: overall variance of matches 298 | example_scores[rsi,0] = (my_out_matches / (my_ref_total+1e-10).reshape( (1, num_buckets) )).std(axis=0) 299 | # 1: overall percentage of matches 300 | example_scores[rsi,1] = my_out_matches.sum(axis=0) / (my_ref_total*num_outs+1e-10) 301 | # 2: overall percentage of misses 302 | example_scores[rsi,2] = (my_ref_total*num_outs-my_out_matches.sum(axis=0)) / (my_ref_total*num_outs+1e-10) 303 | 304 | # Calculate statistics 305 | # Find top-5 examples of each class 306 | examples = [[('Examples where some systems were good, some were bad', []), 307 | ('Examples where all systems were good', []), 308 | ('Examples where all systems were bad', [])] for _ in range(num_buckets)] 309 | # NOTE: This could be made faster with argpartition, but the complexity is probably not worth it 310 | topn = np.argsort(-example_scores, axis=0) 311 | for bi, bexamples in enumerate(examples): 312 | for fi, (_, fexamples) in enumerate(bexamples): 313 | for si in topn[:num_examples,fi,bi]: 314 | if example_scores[si,fi,bi] > 0: 315 | fexamples.append(si) 316 | 317 | return examples 318 | 319 | def calc_source_bucketed_matches(self, src, ref, out, ref_aligns, out_aligns, src_labels=None): 320 | """ 321 | Calculate the number of matches, bucketed by the type of word we have 322 | This must be used with a subclass that has self.bucket_strs defined, and self.calc_bucket(word) implemented. 323 | 324 | Args: 325 | src: The source corpus 326 | ref: The reference corpus 327 | out: The output corpus 328 | ref_aligns: Alignments of the reference corpus 329 | out_aligns: Alignments of the output corpus 330 | src_labels: Labels of the source corpus (optional) 331 | 332 | Returns: 333 | A tuple containing: 334 | both_tot: the frequency of a particular bucket appearing in both output and reference 335 | ref_tot: the frequency of a particular bucket appearing in just reference 336 | out_tot: the frequency of a particular bucket appearing in just output 337 | rec: recall of the bucket 338 | prec: precision of the bucket 339 | fmeas: f1-measure of the bucket 340 | """ 341 | if not hasattr(self, 'case_insensitive'): 342 | self.case_insensitive = False 343 | 344 | src_labels = src_labels if src_labels else [] 345 | matches = [[0, 0, 0] for x in self.bucket_strs] 346 | for src_sent, ref_sent, out_sent, ref_align, out_align, src_lab in itertools.zip_longest(src, ref, out, ref_aligns, out_aligns, src_labels): 347 | ref_cnt = defaultdict(lambda: 0) 348 | for i, word in enumerate(ref_sent): 349 | if self.case_insensitive: 350 | word = corpus_utils.lower(word) 351 | ref_cnt[word] += 1 352 | for i, (src_index, trg_index) in enumerate(out_align): 353 | src_word = src_sent[src_index] 354 | word = out_sent[trg_index] 355 | if self.case_insensitive: 356 | word = corpus_utils.lower(word) 357 | bucket = self.calc_bucket(src_word, 358 | label=src_lab[src_index] if src_lab else None) 359 | if ref_cnt[word] > 0: 360 | ref_cnt[word] -= 1 361 | matches[bucket][0] += 1 362 | matches[bucket][2] += 1 363 | for i, (src_index, trg_index) in enumerate(ref_align): 364 | src_word = src_sent[src_index] 365 | bucket = self.calc_bucket(src_word, 366 | label=src_lab[src_index] if src_lab else None) 367 | matches[bucket][1] += 1 368 | 369 | for both_tot, ref_tot, out_tot in matches: 370 | if both_tot == 0: 371 | rec, prec, fmeas = 0.0, 0.0, 0.0 372 | else: 373 | rec = both_tot / float(ref_tot) 374 | prec = both_tot / float(out_tot) 375 | fmeas = 2 * prec * rec / (prec + rec) 376 | yield both_tot, ref_tot, out_tot, rec, prec, fmeas 377 | 378 | def calc_bucketed_likelihoods(self, corpus, likelihoods): 379 | """ 380 | Calculate the average of log likelihoods, bucketed by the type of word/label we have 381 | This must be used with a subclass that has self.bucket_strs defined, and self.calc_bucket(word) implemented. 382 | 383 | Args: 384 | corpus: The text/label corpus over which we compute the likelihoods 385 | likelihoods: The log-likelihoods corresponding to each word/label in the corpus 386 | 387 | Returns: 388 | the average log-likelihood bucketed by the type of word/label we have 389 | """ 390 | if not hasattr(self, 'case_insensitive'): 391 | self.case_insensitive = False 392 | 393 | if type(corpus) == str: 394 | corpus = corpus_utils.load_tokens(corpus) 395 | bucketed_likelihoods = [[0.0, 0] for _ in self.bucket_strs] 396 | if len(corpus) != len(likelihoods): 397 | raise ValueError("Corpus and likelihoods should have the same size.") 398 | for sent, list_of_likelihoods in zip(corpus, likelihoods): 399 | if len(sent) != len(list_of_likelihoods): 400 | raise ValueError("Each sentence of the corpus should have likelihood value for each word") 401 | 402 | for word, ll in zip(sent, list_of_likelihoods): 403 | if self.case_insensitive: 404 | word = corpus_utils.lower(word) 405 | bucket = self.calc_bucket(word, label=word) 406 | bucketed_likelihoods[bucket][0] += ll 407 | bucketed_likelihoods[bucket][1] += 1 408 | 409 | for ll, count in bucketed_likelihoods: 410 | if count != 0: 411 | yield ll/float(count) 412 | else: 413 | yield "NA" # not applicable 414 | 415 | 416 | class FreqWordBucketer(WordBucketer): 417 | 418 | def __init__(self, 419 | freq_counts=None, freq_count_file=None, freq_corpus_file=None, freq_data=None, 420 | bucket_cutoffs=None, 421 | case_insensitive=False): 422 | """ 423 | A bucketer that buckets words by their frequency. 424 | 425 | Args: 426 | freq_counts: A dictionary containing word/count data. 427 | freq_count_file: A file containing counts for each word in tab-separated word, count format. 428 | Ignored if freq_counts exists. 429 | freq_corpus_file: A file with a corpus used for collecting counts. Ignored if freq_count_file exists. 430 | freq_data: A tokenized corpus from which counts can be calculated. Ignored if freq_corpus_file exists. 431 | bucket_cutoffs: Cutoffs for each bucket. 432 | The first bucket will be range(0,bucket_cutoffs[0]). 433 | Middle buckets will be range(bucket_cutoffs[i],bucket_cutoffs[i-1]. 434 | Final bucket will be everything greater than bucket_cutoffs[-1]. 435 | case_insensitive: A boolean specifying whether to turn on the case insensitive option. 436 | """ 437 | self.case_insensitive = case_insensitive 438 | if not freq_counts: 439 | freq_counts = defaultdict(lambda: 0) 440 | if freq_count_file != None: 441 | print(f'Reading frequency from "{freq_count_file}"') 442 | with open(freq_count_file, "r") as f: 443 | for line in f: 444 | cols = line.strip().split('\t') 445 | if len(cols) != 2: 446 | print(f'Bad line in counts file {freq_count_file}, ignoring:\n{line}') 447 | else: 448 | word, freq = cols 449 | if self.case_insensitive: 450 | word = corpus_utils.lower(word) 451 | freq_counts[word] = int(freq) 452 | elif freq_corpus_file: 453 | print(f'Reading frequency from "{freq_corpus_file}"') 454 | for words in corpus_utils.iterate_tokens(freq_corpus_file): 455 | for word in words: 456 | if self.case_insensitive: 457 | word = corpus_utils.lower(word) 458 | freq_counts[word] += 1 459 | elif freq_data: 460 | print('Reading frequency from the reference') 461 | for words in freq_data: 462 | for word in words: 463 | if self.case_insensitive: 464 | word = corpus_utils.lower(word) 465 | freq_counts[word] += 1 466 | else: 467 | raise ValueError('Must have at least one source of frequency counts for FreqWordBucketer') 468 | self.freq_counts = freq_counts 469 | 470 | if bucket_cutoffs is None: 471 | bucket_cutoffs = [1, 2, 3, 4, 5, 10, 100, 1000] 472 | self.set_bucket_cutoffs(bucket_cutoffs) 473 | 474 | def calc_bucket(self, word, label=None): 475 | if self.case_insensitive: 476 | word = corpus_utils.lower(word) 477 | return self.cutoff_into_bucket(self.freq_counts.get(word, 0)) 478 | 479 | def name(self): 480 | return "frequency" 481 | 482 | def idstr(self): 483 | return "freq" 484 | 485 | class CaseWordBucketer(WordBucketer): 486 | 487 | def __init__(self): 488 | """ 489 | A bucketer that buckets words by whether they're all all lower-case (lower), all upper-case (upper), 490 | title case (title), or other. 491 | """ 492 | self.bucket_strs = ['lower', 'upper', 'title', 'other'] 493 | 494 | def calc_bucket(self, word, label=None): 495 | if word.islower(): 496 | return 0 497 | elif word.isupper(): 498 | return 1 499 | elif word.istitle(): 500 | return 2 501 | else: 502 | return 3 503 | 504 | def name(self): 505 | return "case" 506 | 507 | def idstr(self): 508 | return "case" 509 | 510 | class LabelWordBucketer(WordBucketer): 511 | 512 | def __init__(self, 513 | label_set=None): 514 | """ 515 | A bucketer that buckets words by their labels. 516 | 517 | Args: 518 | label_set: The set of labels to use as buckets. This can be a list, or a string separated by '+'s. 519 | """ 520 | if type(label_set) == str: 521 | label_set = label_set.split('+') 522 | self.bucket_strs = label_set + ['other'] 523 | label_set_len = len(label_set) 524 | self.bucket_map = defaultdict(lambda: label_set_len) 525 | for i, l in enumerate(label_set): 526 | self.bucket_map[l] = i 527 | 528 | def calc_bucket(self, word, label=None): 529 | if not label: 530 | raise ValueError('When calculating buckets by label, label must be non-zero') 531 | return self.bucket_map[label] 532 | 533 | def name(self): 534 | return "labels" 535 | 536 | def idstr(self): 537 | return "labels" 538 | 539 | class MultiLabelWordBucketer(WordBucketer): 540 | 541 | def __init__(self, 542 | label_set=None): 543 | """ 544 | A bucketer that buckets words by one or multiple labels. 545 | 546 | Args: 547 | label_set: The set of labels to use as buckets. This can be a list, or a string separated by '+'s. 548 | """ 549 | if type(label_set) == str: 550 | label_set = label_set.split('+') 551 | self.bucket_strs = label_set + ['other'] 552 | label_set_len = len(label_set) 553 | self.bucket_map = defaultdict(lambda: label_set_len) 554 | for i, l in enumerate(label_set): 555 | self.bucket_map[l] = i 556 | 557 | def calc_bucket(self, word, label=None): 558 | if not label: 559 | raise ValueError('When calculating buckets by label, label must be non-zero') 560 | label = label.split('+') 561 | return [self.bucket_map[l] for l in label] 562 | 563 | def name(self): 564 | return "multilabels" 565 | 566 | def idstr(self): 567 | return "multilabels" 568 | 569 | class NumericalLabelWordBucketer(WordBucketer): 570 | 571 | def __init__(self, 572 | bucket_cutoffs=None): 573 | """ 574 | A bucketer that buckets words by labels that are numerical values. 575 | 576 | Args: 577 | bucket_cutoffs: Cutoffs for each bucket. 578 | The first bucket will be range(0,bucket_cutoffs[0]). 579 | Middle buckets will be range(bucket_cutoffs[i],bucket_cutoffs[i-1]. 580 | Final bucket will be everything greater than bucket_cutoffs[-1]. 581 | """ 582 | if bucket_cutoffs is None: 583 | bucket_cutoffs = [0.25, 0.5, 0.75] 584 | self.set_bucket_cutoffs(bucket_cutoffs) 585 | 586 | def calc_bucket(self, word, label=None): 587 | if label: 588 | return self.cutoff_into_bucket(float(label)) 589 | else: 590 | raise ValueError('When calculating buckets by label must be non-zero') 591 | 592 | def name(self): 593 | return "numerical labels" 594 | 595 | def idstr(self): 596 | return "numlabels" 597 | 598 | class SentenceBucketer(Bucketer): 599 | 600 | def calc_bucket(self, val, ref=None, src=None, out_label=None, ref_label=None): 601 | """ 602 | Calculate the bucket for a particular sentence 603 | 604 | Args: 605 | val: The sentence to calculate the bucket for 606 | ref: The reference sentence, if it exists 607 | src: The source sentence, if it exists 608 | ref_labels: The label of the reference sentence, if it exists 609 | out_labels: The label of the output sentence, if it exists 610 | 611 | Returns: 612 | An integer ID of the bucket 613 | """ 614 | raise NotImplementedError('calc_bucket must be implemented in subclasses of SentenceBucketer') 615 | 616 | def create_bucketed_corpus(self, out, ref=None, src=None, ref_labels=None, out_labels=None): 617 | bucketed_corpus = [([],[] if ref else None, []) for _ in self.bucket_strs] 618 | if ref is None: 619 | ref = out 620 | 621 | if ref_labels is None: 622 | ref_labels = out_labels 623 | 624 | src = [None for _ in out] if src is None else src 625 | 626 | for i, (out_words, ref_words, src_words) in enumerate(zip(out, ref, src)): 627 | bucket = self.calc_bucket(out_words, ref_words, src_words, label=(ref_labels[i][0] if ref_labels else None)) 628 | 629 | bucketed_corpus[bucket][0].append(out_words) 630 | bucketed_corpus[bucket][1].append(ref_words) 631 | bucketed_corpus[bucket][2].append(src_words) 632 | 633 | return bucketed_corpus 634 | 635 | 636 | class ScoreSentenceBucketer(SentenceBucketer): 637 | """ 638 | Bucket sentences by some score (e.g. BLEU) 639 | """ 640 | 641 | def __init__(self, score_type, bucket_cutoffs=None, case_insensitive=False): 642 | self.score_type = score_type 643 | self.scorer = scorers.create_scorer_from_profile(score_type) 644 | if bucket_cutoffs is None: 645 | bucket_cutoffs = [x * self.scorer.scale / 10.0 for x in range(1,10)] 646 | self.set_bucket_cutoffs(bucket_cutoffs, num_type='float') 647 | self.case_insensitive = case_insensitive 648 | 649 | def calc_bucket(self, val, ref=None, src=None, label=None): 650 | if self.case_insensitive: 651 | return self.cutoff_into_bucket(self.scorer.score_sentence(corpus_utils.lower(ref), corpus_utils.lower(val))[0]) 652 | else: 653 | return self.cutoff_into_bucket(self.scorer.score_sentence(ref, val, src)[0]) 654 | 655 | def name(self): 656 | return self.scorer.name() 657 | 658 | def idstr(self): 659 | return self.scorer.idstr() 660 | 661 | class LengthSentenceBucketer(SentenceBucketer): 662 | """ 663 | Bucket sentences by length 664 | """ 665 | 666 | def __init__(self, bucket_cutoffs=None): 667 | if bucket_cutoffs is None: 668 | bucket_cutoffs = [10, 20, 30, 40, 50, 60] 669 | self.set_bucket_cutoffs(bucket_cutoffs, num_type='int') 670 | 671 | def calc_bucket(self, val, ref=None, src=None, label=None): 672 | return self.cutoff_into_bucket(len(ref)) 673 | 674 | def name(self): 675 | return "length" 676 | 677 | def idstr(self): 678 | return "length" 679 | 680 | class LengthDiffSentenceBucketer(SentenceBucketer): 681 | """ 682 | Bucket sentences by length 683 | """ 684 | 685 | def __init__(self, bucket_cutoffs=None): 686 | if bucket_cutoffs is None: 687 | bucket_cutoffs = [-20, -10, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 11, 21] 688 | self.set_bucket_cutoffs(bucket_cutoffs, num_type='int') 689 | 690 | def calc_bucket(self, val, ref=None, src=None, label=None): 691 | return self.cutoff_into_bucket(len(val) - len(ref)) 692 | 693 | def name(self): 694 | return "len(output)-len(reference)" 695 | 696 | def idstr(self): 697 | return "lengthdiff" 698 | 699 | class LabelSentenceBucketer(SentenceBucketer): 700 | 701 | def __init__(self, label_set=None): 702 | """ 703 | A bucketer that buckets sentences by their labels. 704 | 705 | Args: 706 | label_set: The set of labels to use as buckets. This can be a list, or a string separated by '+'s. 707 | """ 708 | if type(label_set) == str: 709 | label_set = label_set.split('+') 710 | self.bucket_strs = label_set + ['other'] 711 | label_set_len = len(label_set) 712 | self.bucket_map = defaultdict(lambda: label_set_len) 713 | for i, l in enumerate(label_set): 714 | self.bucket_map[l] = i 715 | 716 | def calc_bucket(self, val, ref=None, src=None, label=None): 717 | return self.bucket_map[label] 718 | 719 | def name(self): 720 | return "labels" 721 | 722 | def idstr(self): 723 | return "labels" 724 | 725 | class MultiLabelSentenceBucketer(SentenceBucketer): 726 | 727 | def __init__(self, label_set=None): 728 | """ 729 | A bucketer that buckets sentences by their labels. 730 | 731 | Args: 732 | label_set: The set of labels to use as buckets. This can be a list, or a string separated by '+'s. 733 | """ 734 | if type(label_set) == str: 735 | label_set = label_set.split('+') 736 | self.bucket_strs = label_set + ['other'] 737 | label_set_len = len(label_set) 738 | self.bucket_map = defaultdict(lambda: label_set_len) 739 | for i, l in enumerate(label_set): 740 | self.bucket_map[l] = i 741 | 742 | def calc_bucket(self, val, ref=None, src=None, label=None): 743 | label = label.split('+') 744 | return [self.bucket_map[l] for l in label] 745 | 746 | def name(self): 747 | return "multilabels" 748 | 749 | def idstr(self): 750 | return "multilabels" 751 | 752 | class NumericalLabelSentenceBucketer(SentenceBucketer): 753 | 754 | def __init__(self, bucket_cutoffs=None): 755 | """ 756 | A bucketer that buckets sentences by labels that are numerical values. 757 | 758 | Args: 759 | bucket_cutoffs: Cutoffs for each bucket. 760 | The first bucket will be range(0,bucket_cutoffs[0]). 761 | Middle buckets will be range(bucket_cutoffs[i],bucket_cutoffs[i-1]. 762 | Final bucket will be everything greater than bucket_cutoffs[-1]. 763 | """ 764 | if bucket_cutoffs is None: 765 | bucket_cutoffs = [0.25, 0.5, 0.75] 766 | self.set_bucket_cutoffs(bucket_cutoffs) 767 | 768 | def calc_bucket(self, val, ref=None, src=None, label=None): 769 | return self.cutoff_into_bucket(float(label)) 770 | 771 | def name(self): 772 | return "numerical labels" 773 | 774 | def idstr(self): 775 | return "numlabels" 776 | 777 | def create_word_bucketer_from_profile(bucket_type, 778 | freq_counts=None, freq_count_file=None, freq_corpus_file=None, freq_data=None, 779 | label_set=None, 780 | bucket_cutoffs=None, 781 | case_insensitive=False): 782 | if type(bucket_cutoffs) == str: 783 | bucket_cutoffs = [arg_utils.parse_intfloat(x) for x in bucket_cutoffs.split(':')] 784 | if bucket_type == 'freq': 785 | return FreqWordBucketer( 786 | freq_counts=freq_counts, 787 | freq_count_file=freq_count_file, 788 | freq_corpus_file=freq_corpus_file, 789 | freq_data=freq_data, 790 | bucket_cutoffs=bucket_cutoffs, 791 | case_insensitive=case_insensitive) 792 | if bucket_type == 'case': 793 | return CaseWordBucketer() 794 | elif bucket_type == 'label': 795 | return LabelWordBucketer( 796 | label_set=label_set) 797 | elif bucket_type == 'multilabel': 798 | return MultiLabelWordBucketer( 799 | label_set=label_set) 800 | elif bucket_type == 'numlabel': 801 | return NumericalLabelWordBucketer( 802 | bucket_cutoffs=bucket_cutoffs) 803 | else: 804 | raise ValueError(f'Illegal bucket type {bucket_type}') 805 | 806 | def create_sentence_bucketer_from_profile(bucket_type, 807 | score_type=None, 808 | bucket_cutoffs=None, 809 | label_set=None, 810 | case_insensitive=False): 811 | if type(bucket_cutoffs) == str: 812 | bucket_cutoffs = [arg_utils.parse_intfloat(x) for x in bucket_cutoffs.split(':')] 813 | if bucket_type == 'score': 814 | return ScoreSentenceBucketer(score_type, bucket_cutoffs=bucket_cutoffs, case_insensitive=case_insensitive) 815 | elif bucket_type == 'length': 816 | return LengthSentenceBucketer(bucket_cutoffs=bucket_cutoffs) 817 | elif bucket_type == 'lengthdiff': 818 | return LengthDiffSentenceBucketer(bucket_cutoffs=bucket_cutoffs) 819 | elif bucket_type == 'label': 820 | return LabelSentenceBucketer(label_set=label_set) 821 | elif bucket_type == 'multilabel': 822 | return MultiLabelSentenceBucketer( 823 | label_set=label_set) 824 | elif bucket_type == 'numlabel': 825 | return NumericalLabelSentenceBucketer(bucket_cutoffs=bucket_cutoffs) 826 | else: 827 | raise NotImplementedError(f'Illegal bucket type {bucket_type}') 828 | -------------------------------------------------------------------------------- /compare_mt/cache_utils.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | from nltk.stem.porter import PorterStemmer 3 | 4 | def extract_cache_dicts(cache_dicts, key_list, num_out): 5 | if cache_dicts is not None: 6 | if len(cache_dicts) != num_out: 7 | raise ValueError(f'Length of cache_dicts should be equal to the number of output files!') 8 | if len(key_list) == 1: 9 | return [c[key_list[0]] for c in cache_dicts] 10 | return zip(*[[c[k] for k in key_list] for c in cache_dicts]) 11 | 12 | return [None]*len(key_list) 13 | 14 | def return_cache_dict(key_list, value_list): 15 | for v in value_list: 16 | if len(v) != 1: 17 | raise ValueError(f'Only support caching for one system at a time!') 18 | cache_dict = {k:v[0] for (k, v) in zip(key_list, value_list)} 19 | return cache_dict 20 | 21 | class CachedPorterStemmer(PorterStemmer): 22 | """A wrapper class for PorterStemmer that uses LRU cache to reduce latency""" 23 | def __init__(self, mode=PorterStemmer.NLTK_EXTENSIONS): 24 | super().__init__(mode) 25 | 26 | @lru_cache(maxsize=50000) 27 | def stem(self, word, to_lowercase=True): 28 | return super().stem(word, to_lowercase) -------------------------------------------------------------------------------- /compare_mt/compare_ll_main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | # In-package imports 4 | from compare_mt import corpus_utils 5 | from compare_mt import bucketers 6 | from compare_mt import arg_utils 7 | from compare_mt import print_utils 8 | from compare_mt import formatting 9 | 10 | def print_word_likelihood_report(ref, lls, bucket_type='freq', bucket_cutoffs=None, 11 | freq_count_file=None, freq_corpus_file=None, 12 | label_corpus=None, label_set=None, 13 | case_insensitive=False): 14 | """ 15 | Print a report comparing the word log likelihood. 16 | 17 | Args: 18 | ref: the ref of words over which the likelihoods are computed 19 | lls: likelihoods corresponding to each word in ref from the systems 20 | bucket_type: A string specifying the way to bucket words together to calculate average likelihood 21 | bucket_cutoffs: The boundaries between buckets, specified as a colon-separated string. 22 | freq_corpus_file: When using "freq" as a bucketer, which corpus to use to calculate frequency. 23 | freq_count_file: An alternative to freq_corpus that uses a count file in "word\tfreq" format. 24 | label_corpus: When using "label" as bucket type, the corpus containing the labels 25 | corresponding to each word in the corpus 26 | label_set: the permissible set of labels when using "label" as a bucket type 27 | case_insensitive: A boolean specifying whether to turn on the case insensitive option 28 | """ 29 | case_insensitive = True if case_insensitive == 'True' else False 30 | 31 | bucketer = bucketers.create_word_bucketer_from_profile(bucket_type=bucket_type, 32 | bucket_cutoffs=bucket_cutoffs, 33 | freq_count_file=freq_count_file, 34 | freq_corpus_file=freq_corpus_file, 35 | label_set=label_set, 36 | case_insensitive=case_insensitive) 37 | 38 | if type(label_corpus) == str: 39 | label_corpus = corpus_utils.load_tokens(label_corpus) 40 | 41 | if label_corpus is not None: 42 | ref = label_corpus 43 | 44 | lls_out = [[l for l in bucketer.calc_bucketed_likelihoods(ref, ll)] for ll in lls] 45 | 46 | print(f'--- average word log likelihood by {bucketer.name()} bucket') 47 | for i, bucket_str in enumerate(bucketer.bucket_strs): 48 | print (bucket_str + "\t", end='') 49 | for ll_out in lls_out: 50 | print(f"{formatting.fmt(ll_out[i])}\t", end="") 51 | print() 52 | 53 | def main(): 54 | parser = argparse.ArgumentParser( 55 | description='Program to compare MT results', 56 | ) 57 | parser.add_argument('--ref-file', type=str, dest='ref_file', 58 | help='A path to a reference file over which the likelihoods are being computed/compared') 59 | parser.add_argument('--ll-files', type=str, nargs='+', dest='ll_files', 60 | help='A path to file containing log likelihoods for ref-file generated by systems') 61 | parser.add_argument('--compare-word-likelihoods', type=str, dest='compare_word_likelihoods', nargs='*', 62 | default=['bucket_type=freq'], 63 | help=""" 64 | Compare word log likelihoods by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. 65 | See documentation for 'print_word_likelihood_report' to see which arguments are available. 66 | """) 67 | parser.add_argument('--decimals', type=int, default=4, 68 | help="Number of decimals to print for floating point numbers") 69 | 70 | args = parser.parse_args() 71 | 72 | # Set formatting 73 | 74 | # Set formatting 75 | formatting.fmt.set_decimals(args.decimals) 76 | 77 | ref = corpus_utils.load_tokens(args.ref_file) 78 | lls = [corpus_utils.load_nums(x) for x in args.ll_files] 79 | 80 | # Word likelihood analysis 81 | if args.compare_word_likelihoods: 82 | print_utils.print_header('Word Likelihood Analysis') 83 | for profile in args.compare_word_likelihoods: 84 | kargs = arg_utils.parse_profile(profile) 85 | print_word_likelihood_report(ref, lls, **kargs) 86 | print() 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /compare_mt/corpus_utils.py: -------------------------------------------------------------------------------- 1 | def iterate_tokens(filename): 2 | with open(filename, "r", encoding="utf-8") as f: 3 | for line in f: 4 | yield line.strip().split(' ') 5 | 6 | def load_tokens(filename): 7 | return list(iterate_tokens(filename)) 8 | 9 | def iterate_nums(filename): 10 | with open(filename, "r", encoding="utf-8") as f: 11 | for line in f: 12 | yield [float(i) for i in line.strip().split(' ')] 13 | 14 | def load_nums(filename): 15 | return list(iterate_nums(filename)) 16 | 17 | def iterate_alignments(filename): 18 | with open(filename, "r", encoding="utf-8") as f: 19 | for line in f: 20 | try: 21 | yield [(int(src),int(trg)) for (src,trg) in [x.split('-') for x in line.strip().split(' ')]] 22 | except: 23 | raise ValueError(f'Poorly formed alignment line in {filename}:\n{line}') 24 | 25 | def load_alignments(filename): 26 | return list(iterate_alignments(filename)) 27 | 28 | def lower(inp): 29 | return inp.lower() if type(inp) == str else [lower(x) for x in inp] 30 | 31 | def list2str(l): 32 | string = '' 33 | for i, s in enumerate(l): 34 | string = string + ' ' + str(s) if i != 0 else string + str(s) 35 | return string 36 | 37 | def write_tokens(filename, ls): 38 | with open(filename, 'w') as f: 39 | for i, l in enumerate(ls): 40 | string = list2str(l) 41 | string = '\n' + string if i != 0 else string 42 | f.write(string) 43 | return string 44 | -------------------------------------------------------------------------------- /compare_mt/formatting.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | class Formatter(object): 4 | 5 | latex_substitutions = { 6 | re.compile("\["): "{[}", 7 | re.compile("\]"): "{]}", 8 | re.compile("<"): r"\\textless", 9 | re.compile(">"): r"\\textgreater" 10 | } 11 | 12 | def __init__(self, decimals=4): 13 | self.set_decimals(decimals) 14 | 15 | def set_decimals(self, decimals): 16 | self.decimals = decimals 17 | 18 | def escape_latex(self, x): 19 | """Adds escape sequences wherever needed to make the output 20 | LateX compatible""" 21 | for pat, replace_with in self.latex_substitutions.items(): 22 | x = pat.sub(replace_with, x) 23 | return x 24 | 25 | def __call__(self, x, latex=True): 26 | """Convert object to string with controlled decimals""" 27 | if isinstance(x, str): 28 | return self.escape_latex(x) if latex else x 29 | elif isinstance(x, int): 30 | return f"{x:d}" 31 | elif isinstance(x, float): 32 | return f"{x:.{self.decimals}f}" 33 | else: 34 | str(x) 35 | 36 | fmt = Formatter(decimals=4) 37 | -------------------------------------------------------------------------------- /compare_mt/ngram_utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import itertools 3 | 4 | def sent_ngrams_list(words, n): 5 | """ 6 | Create a list with all the n-grams in a sentence 7 | 8 | Arguments: 9 | words: A list of strings representing a sentence 10 | n: The ngram length to consider 11 | 12 | Returns: 13 | A list of n-grams in the sentence 14 | """ 15 | word_ngram = [] 16 | for i in range(len(words) - n + 1): 17 | ngram = tuple(words[i:i + n]) 18 | word_ngram.append(ngram) 19 | return word_ngram 20 | 21 | def iterate_sent_ngrams(words, labels=None, min_length=1, max_length=4): 22 | """ 23 | Create a list with all the n-grams in a sentence 24 | 25 | Arguments: 26 | words: A list of strings representing a sentence 27 | labels: A list of labels on each word in the sentence, optional (will use `words` if not specified) 28 | min_length: The minimum ngram length to consider 29 | max_length: The maximum ngram length to consider 30 | 31 | Returns: 32 | An iterator over n-grams in the sentence with both words and labels 33 | """ 34 | if labels is not None and len(labels) != len(words): 35 | raise ValueError(f'length of labels and sentence must be the same but got' 36 | f' {len(words)} != {len(labels)} at\n{words}\n{labels}') 37 | for n in range(min_length-1, max_length): 38 | for i in range(len(words) - n): 39 | word_ngram = tuple(words[i:i + n + 1]) 40 | label_ngram = tuple(labels[i:i + n + 1]) if (labels is not None) else word_ngram 41 | yield word_ngram, label_ngram 42 | 43 | def compare_ngrams(ref, out, ref_labels=None, out_labels=None, min_length=1, max_length=4): 44 | """ 45 | Compare n-grams appearing in the reference sentences and output 46 | 47 | Args: 48 | ref: A list of reference sentences 49 | out: A list of output sentences 50 | ref_labels: Alternative labels for reference words (e.g. POS tags) to use when aggregating counts 51 | out_labels: Alternative labels for output words (e.g. POS tags) to use when aggregating counts 52 | min_length: The minimum length of n-grams to consider 53 | max_length: The maximum length of n-grams to consider 54 | 55 | Returns: 56 | A tuple of dictionaries including 57 | total: the total number of n-grams in the output 58 | match: the total number of matched n-grams appearing in both output and reference 59 | over: the total number of over-generated n-grams appearing in output but not reference 60 | under: the total number of under-generated n-grams appearing in output but not reference 61 | """ 62 | if (ref_labels is None) != (out_labels is None): 63 | raise ValueError('ref_labels or out_labels must both be either None or not None') 64 | total, match, over, under = [defaultdict(lambda: 0) for _ in range(4)] 65 | if ref_labels is None: ref_labels = [] 66 | if out_labels is None: out_labels = [] 67 | for ref_sent, out_sent, ref_lab, out_lab in itertools.zip_longest(ref, out, ref_labels, out_labels): 68 | # Find the number of reference n-grams (on a word level) 69 | ref_ngrams = list(iterate_sent_ngrams(ref_sent, labels=ref_lab, min_length=min_length, max_length=max_length)) 70 | ref_word_counts = defaultdict(lambda: 0) 71 | for ref_w, ref_l in ref_ngrams: 72 | ref_word_counts[ref_w] += 1 73 | # Step through the output ngrams and find matched and overproduced ones 74 | for out_w, out_l in iterate_sent_ngrams(out_sent, labels=out_lab, min_length=min_length, max_length=max_length): 75 | total[out_l] += 1 76 | if ref_word_counts[out_w] > 0: 77 | match[out_l] += 1 78 | ref_word_counts[out_w] -= 1 79 | else: 80 | over[out_l] += 1 81 | # Remaining ones are underproduced 82 | # (do reverse order just to make ordering consistent for over and under, shouldn't matter much) 83 | for ref_w, ref_l in reversed(ref_ngrams): 84 | if ref_word_counts[ref_w] > 0: 85 | under[ref_l] += 1 86 | ref_word_counts[ref_w] -= 1 87 | return total, match, over, under 88 | -------------------------------------------------------------------------------- /compare_mt/print_utils.py: -------------------------------------------------------------------------------- 1 | def print_header(header): 2 | print(f'********************** {header} ************************') -------------------------------------------------------------------------------- /compare_mt/reporters.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('agg') 3 | from matplotlib import pyplot as plt 4 | from cycler import cycler 5 | plt.rcParams['font.family'] = 'sans-serif' 6 | plt.rcParams['axes.prop_cycle'] = cycler(color=["#7293CB", "#E1974C", "#84BA5B", "#D35E60", "#808585", "#9067A7", "#AB6857", "#CCC210"]) 7 | import numpy as np 8 | import os 9 | import itertools 10 | from compare_mt.formatting import fmt 11 | 12 | from functools import partial 13 | from http.server import SimpleHTTPRequestHandler, HTTPServer 14 | import socket 15 | from pathlib import Path 16 | import logging as log 17 | 18 | log.basicConfig(level=log.INFO) 19 | 20 | # Global variables used by all reporters. These are set by compare_mt_main.py 21 | sys_names = None 22 | fig_size = None 23 | 24 | # The CSS style file to use 25 | css_style = """ 26 | html { 27 | font-family: sans-serif; 28 | } 29 | 30 | table, th, td { 31 | border: 1px solid black; 32 | } 33 | 34 | th, td { 35 | padding: 2px; 36 | } 37 | 38 | tr:hover {background-color: #f5f5f5;} 39 | 40 | tr:nth-child(even) {background-color: #f2f2f2;} 41 | 42 | th { 43 | background-color: #396AB1; 44 | color: white; 45 | } 46 | 47 | em { 48 | font-weight: bold; 49 | } 50 | 51 | caption { 52 | font-size: 14pt; 53 | font-weight: bold; 54 | } 55 | 56 | table { 57 | border-collapse: collapse; 58 | } 59 | """ 60 | 61 | # The Javascript header to use 62 | javascript_style = """ 63 | function showhide(elem) { 64 | var x = document.getElementById(elem); 65 | if (x.style.display === "none") { 66 | x.style.display = "block"; 67 | } else { 68 | x.style.display = "none"; 69 | } 70 | } 71 | """ 72 | 73 | fig_counter, tab_counter = 0, 0 74 | def next_fig_id(): 75 | global fig_counter 76 | fig_counter += 1 77 | return f'{fig_counter:03d}' 78 | def next_tab_id(): 79 | global tab_counter 80 | tab_counter += 1 81 | return f'{tab_counter:03d}' 82 | 83 | def make_bar_chart(datas, 84 | output_directory, output_fig_file, output_fig_format='png', 85 | errs=None, title=None, xlabel=None, xticklabels=None, ylabel=None): 86 | fig, ax = plt.subplots(figsize=fig_size) 87 | ind = np.arange(len(datas[0])) 88 | width = 0.7/len(datas) 89 | bars = [] 90 | for i, data in enumerate(datas): 91 | err = errs[i] if errs != None else None 92 | bars.append(ax.bar(ind+i*width, data, width, bottom=0, yerr=err)) 93 | # Set axis/title labels 94 | if title is not None: 95 | ax.set_title(title) 96 | if xlabel is not None: 97 | ax.set_xlabel(xlabel) 98 | if ylabel is not None: 99 | ax.set_ylabel(ylabel) 100 | if xticklabels is not None: 101 | ax.set_xticks(ind + width / 2) 102 | ax.set_xticklabels(xticklabels) 103 | plt.xticks(rotation=70) 104 | else: 105 | ax.xaxis.set_visible(False) 106 | 107 | ax.legend(bars, sys_names) 108 | ax.autoscale_view() 109 | 110 | if not os.path.exists(output_directory): 111 | os.makedirs(output_directory) 112 | out_file = os.path.join(output_directory, f'{output_fig_file}.{output_fig_format}') 113 | plt.savefig(out_file, format=output_fig_format, bbox_inches='tight') 114 | 115 | def html_img_reference(fig_file, title): 116 | latex_code_pieces = [r"\begin{figure}[h]", 117 | r" \centering", 118 | r" \includegraphics{" + fig_file + ".pdf}", 119 | r" \caption{" + title + "}", 120 | r" \label{fig:" + fig_file + "}", 121 | r"\end{figure}"] 122 | latex_code = "\n".join(latex_code_pieces) 123 | return (f'{title}
' + 124 | f'
' + 125 | f'') 126 | 127 | class Report: 128 | # def __init__(self, iterable=(), **kwargs): 129 | # # Initialize a report by a dictionary which contains all the statistics 130 | # self.__dict__.update(iterable, **kwargs) 131 | 132 | def print(self): 133 | raise NotImplementedError('print must be implemented in subclasses of Report') 134 | 135 | def plot(self, output_directory, output_fig_file, output_fig_type): 136 | raise NotImplementedError('plot must be implemented in subclasses of Report') 137 | 138 | def print_header(self, header): 139 | print(f'********************** {header} ************************') 140 | 141 | def print_tabbed_table(self, tab): 142 | for x in tab: 143 | print('\t'.join([fmt(y, latex=False) if y else '' for y in x])) 144 | print() 145 | 146 | def generate_report(self, output_fig_file=None, output_fig_format=None, output_directory=None): 147 | self.print() 148 | 149 | class ScoreReport(Report): 150 | def __init__(self, scorer, scores, strs, 151 | wins=None, sys_stats=None, prob_thresh=0.05, 152 | title=None): 153 | self.scorer = scorer 154 | self.scores = scores 155 | self.strs = [f'{fmt(x)} ({y})' if y else fmt(x) for (x,y) in zip(scores,strs)] 156 | self.wins = wins 157 | self.sys_stats = sys_stats 158 | self.output_fig_file = f'{next_fig_id()}-score-{scorer.idstr()}' 159 | self.prob_thresh = prob_thresh 160 | self.title = scorer.name() if not title else title 161 | 162 | def winstr_pval(self, my_wins): 163 | if 1-my_wins[0] < self.prob_thresh: 164 | winstr = 's1>s2' 165 | elif 1-my_wins[1] < self.prob_thresh: 166 | winstr = 's2>s1' 167 | else: 168 | winstr = '-' 169 | pval = 1-(my_wins[0] if my_wins[0] > my_wins[1] else my_wins[1]) 170 | return winstr, pval 171 | 172 | def scores_to_tables(self): 173 | if self.wins is None: 174 | # Single table with just scores 175 | return [[""]+sys_names, [self.scorer.name()]+self.strs], None 176 | elif len(self.scores) == 1: 177 | # Single table with scores for one system 178 | return [ 179 | [""]+sys_names, 180 | [self.scorer.name()]+self.strs, 181 | [""]+[f'[{fmt(x["lower_bound"])},{fmt(x["upper_bound"])}]' for x in self.sys_stats] 182 | ], None 183 | elif len(self.scores) == 2: 184 | # Single table with scores and wins for two systems 185 | winstr, pval = self.winstr_pval(self.wins[0][1]) 186 | return [ 187 | [""]+sys_names+["Win?"], 188 | [self.scorer.name()]+self.strs+[winstr], 189 | [""]+[f'[{fmt(x["lower_bound"])},{fmt(x["upper_bound"])}]' for x in self.sys_stats]+[f'p={fmt(pval)}'] 190 | ], None 191 | else: 192 | # Table with scores, and separate one with wins for multiple systems 193 | wptable = [['v s1 / s2 ->'] + [sys_names[i] for i in range(1,len(self.scores))]] 194 | for i in range(0, len(self.scores)-1): 195 | wptable.append([sys_names[i]] + [""] * (len(self.scores)-1)) 196 | for (left,right), my_wins in self.wins: 197 | winstr, pval = self.winstr_pval(my_wins) 198 | wptable[left+1][right] = f'{winstr} (p={fmt(pval)})' 199 | return [[""]+sys_names, [self.scorer.name()]+self.strs], wptable 200 | 201 | def print(self): 202 | aggregate_table, win_table = self.scores_to_tables() 203 | self.print_header('Aggregate Scores') 204 | print(f'{self.title}:') 205 | self.print_tabbed_table(aggregate_table) 206 | if win_table: 207 | self.print_tabbed_table(win_table) 208 | 209 | def plot(self, output_directory, output_fig_file, output_fig_format='pdf'): 210 | sys = [[score] for score in self.scores] 211 | if self.wins: 212 | sys_errs = [np.array([ [score-stat['lower_bound']], [stat['upper_bound']-score] ]) for (score,stat) in zip(self.scores, self.sys_stats)] 213 | else: 214 | sys_errs = None 215 | xticklabels = None 216 | 217 | make_bar_chart(sys, 218 | output_directory, output_fig_file, 219 | output_fig_format=output_fig_format, 220 | errs=sys_errs, ylabel=self.scorer.name(), 221 | xticklabels=xticklabels) 222 | 223 | def html_content(self, output_directory): 224 | aggregate_table, win_table = self.scores_to_tables() 225 | html = html_table(aggregate_table, title=self.title) 226 | if win_table: 227 | html += html_table(win_table, title=f'{self.scorer.name()} Wins') 228 | for ext in ('png', 'pdf'): 229 | self.plot(output_directory, self.output_fig_file, ext) 230 | html += html_img_reference(self.output_fig_file, 'Score Comparison') 231 | return html 232 | 233 | class WordReport(Report): 234 | def __init__(self, bucketer, statistics, 235 | acc_type, header, 236 | examples=None, 237 | bucket_cnts=None, 238 | bucket_intervals=None, 239 | src_sents=None, 240 | ref_sents=None, ref_labels=None, 241 | out_sents=None, out_labels=None, 242 | src_labels=None, ref_aligns=None, 243 | title=None): 244 | self.bucketer = bucketer 245 | self.statistics = [[s for s in stat] for stat in statistics] 246 | self.examples = examples 247 | self.bucket_cnts = bucket_cnts 248 | self.bucket_intervals = bucket_intervals 249 | self.src_sents = src_sents 250 | self.ref_sents = ref_sents 251 | self.ref_labels = ref_labels 252 | self.out_sents = out_sents 253 | self.out_labels = out_labels 254 | self.src_labels = src_labels 255 | self.ref_aligns = ref_aligns 256 | self.acc_type = acc_type 257 | self.header = header 258 | self.acc_type_map = {'prec': 3, 'rec': 4, 'fmeas': 5} 259 | self.output_fig_file = f'{next_fig_id()}-wordacc-{bucketer.name()}' 260 | self.title = title if title else f'word {acc_type} by {bucketer.name()} bucket' 261 | 262 | def print(self): 263 | acc_type_map = self.acc_type_map 264 | bucketer, statistics, acc_type, header = self.bucketer, self.statistics, self.acc_type, self.header 265 | self.print_header(header) 266 | acc_types = acc_type.split('+') 267 | for at in acc_types: 268 | if at not in acc_type_map: 269 | raise ValueError(f'Unknown accuracy type {at}') 270 | aid = acc_type_map[at] 271 | print(f'--- {self.title}') 272 | # first line 273 | print(f'{bucketer.name()}', end='') 274 | if self.bucket_cnts is not None: 275 | print(f'\t# words', end='') 276 | for sn in sys_names: 277 | print(f'\t{sn}', end='') 278 | print() 279 | # stats 280 | for i, bucket_str in enumerate(bucketer.bucket_strs): 281 | print(f'{bucket_str}', end='') 282 | if self.bucket_cnts is not None: 283 | print(f'\t{self.bucket_cnts[i]}', end='') 284 | for j, match in enumerate(statistics): 285 | print(f'\t{fmt(match[i][aid])}', end='') 286 | if self.bucket_intervals is not None: 287 | low, up = self.bucket_intervals[j][i][aid] 288 | print(f' [{fmt(low)}, {fmt(up)}]', end='') 289 | print() 290 | print() 291 | 292 | def plot(self, output_directory, output_fig_file, output_fig_format='pdf'): 293 | acc_types = self.acc_type.split('+') 294 | for at in acc_types: 295 | if at not in self.acc_type_map: 296 | raise ValueError(f'Unknown accuracy type {at}') 297 | aid = self.acc_type_map[at] 298 | sys = [[m[aid] for m in match] for match in self.statistics] 299 | xticklabels = [s for s in self.bucketer.bucket_strs] 300 | 301 | if self.bucket_intervals: 302 | errs = [] 303 | for i, match in enumerate(sys): 304 | lows, ups = [], [] 305 | for j, score in enumerate(match): 306 | low, up = self.bucket_intervals[i][j][aid] 307 | lows.append(score-low) 308 | ups.append(up-score) 309 | errs.append(np.array([lows, ups]) ) 310 | else: 311 | errs = None 312 | 313 | make_bar_chart(sys, 314 | output_directory, output_fig_file, 315 | output_fig_format=output_fig_format, 316 | errs=errs, 317 | xlabel=self.bucketer.name(), ylabel=at, 318 | xticklabels=xticklabels) 319 | 320 | def highlight_words(self, sent, hls=None): 321 | if not hls: 322 | return ' '.join(sent) 323 | return ' '.join([f'{w}' if hl else w for (w,hl) in zip(sent, hls)]) 324 | 325 | def write_examples(self, title, output_directory): 326 | # Create separate examples HTML file 327 | html = '' 328 | for bi, bucket_examples in enumerate(self.examples): 329 | html += f'' 330 | html += tag_str('h3', f'Examples for Bucket {self.bucketer.bucket_strs[bi]}') 331 | for tag, examp_ids in bucket_examples: 332 | # Skip ones with no examples 333 | if len(examp_ids) == 0: 334 | continue 335 | html += tag_str('h4', tag) 336 | for eid in examp_ids: 337 | table = [['', 'Output']] 338 | # Find buckets for the examples if it's on the source side (will have alignments in this case) 339 | if self.ref_aligns: 340 | _, _, _, src_buckets, ref_aligns, ref_matches = \ 341 | self.bucketer._calc_src_buckets_and_matches(self.src_sents[eid], 342 | self.src_labels[eid] if self.src_labels else None, 343 | self.ref_sents[eid], 344 | self.ref_aligns[eid], 345 | [x[eid] for x in self.out_sents]) 346 | src_hls = [x == bi for x in src_buckets] 347 | table.append(['Src', self.highlight_words(self.src_sents[eid], src_hls)]) 348 | ref_hls = [False for _ in self.ref_sents[eid]] 349 | out_hls = [[False for _ in x[eid]] for x in self.out_sents] 350 | for sid, tid in self.ref_aligns[eid]: 351 | if src_hls[sid]: 352 | ref_hls[tid] = True 353 | for rm, ohls in zip(ref_matches, out_hls): 354 | if rm[tid] >= 0: 355 | ohls[rm[tid]] = True 356 | # Find buckets for the examples if it's on the target side 357 | else: 358 | _, _, _, ref_buckets, out_buckets, out_matches = \ 359 | self.bucketer._calc_trg_buckets_and_matches(self.ref_sents[eid], 360 | self.ref_labels[eid] if self.ref_labels else None, 361 | [x[eid] for x in self.out_sents], 362 | [x[eid] for x in self.out_labels] if self.out_labels else None) 363 | ref_hls = [x == bi for x in ref_buckets] 364 | out_hls = [[(b == bi and m >= 0) for (b,m) in zip(ob, om)] for (ob, om) in zip(out_buckets, out_matches)] 365 | table.append(['Ref', self.highlight_words(self.ref_sents[eid], ref_hls)]) 366 | for sn, oss, ohl in itertools.zip_longest(sys_names, self.out_sents, out_hls): 367 | table.append([sn, self.highlight_words(oss[eid], ohl)]) 368 | html += html_table(table, None) 369 | with open(f'{output_directory}/{self.output_fig_file}.html', 'w') as example_stream: 370 | example_stream.write(styled_html_message(title, html)) 371 | 372 | def html_content(self, output_directory): 373 | acc_type_map = self.acc_type_map 374 | bucketer, matches, acc_type, header = self.bucketer, self.statistics, self.acc_type, self.header 375 | acc_types = acc_type.split('+') 376 | 377 | title = f'Word {acc_type} by {bucketer.name()} bucket' if not self.title else self.title 378 | 379 | if self.examples: 380 | self.write_examples(title, output_directory) 381 | 382 | # Create main HTML content 383 | html = '' 384 | for at in acc_types: 385 | if at not in acc_type_map: 386 | raise ValueError(f'Unknown accuracy type {at}') 387 | aid = acc_type_map[at] 388 | line = [bucketer.name()] 389 | if self.bucket_cnts is not None: 390 | line.append('# words') 391 | line += sys_names 392 | table = [line] 393 | if self.examples: 394 | table[0].append('Examples') 395 | for i, bs in enumerate(bucketer.bucket_strs): 396 | line = [bs] 397 | if self.bucket_cnts is not None: 398 | line.append(f'{self.bucket_cnts[i]}') 399 | for j, match in enumerate(matches): 400 | line.append(f'{fmt(match[i][aid])}') 401 | if self.bucket_intervals is not None: 402 | low, up = self.bucket_intervals[j][i][aid] 403 | line[-1] += f' [{fmt(low)}, {fmt(up)}]' 404 | if self.examples: 405 | line.append(f'Examples') 406 | table += [line] 407 | html += html_table(table, title, latex_ignore_cols={3}) 408 | img_name = f'{self.output_fig_file}-{at}' 409 | for ext in ('png', 'pdf'): 410 | self.plot(output_directory, img_name, ext) 411 | html += html_img_reference(img_name, self.header) 412 | return html 413 | 414 | class NgramReport(Report): 415 | def __init__(self, scorelist, report_length, min_ngram_length, max_ngram_length, 416 | matches, compare_type, alpha, compare_directions=[(0, 1)], label_files=None, title=None): 417 | self.scorelist = scorelist 418 | self.report_length = report_length 419 | self.min_ngram_length = min_ngram_length 420 | self.max_ngram_length = max_ngram_length 421 | self.matches = matches 422 | self.compare_type = compare_type 423 | self.label_files = label_files 424 | self.alpha = alpha 425 | self.compare_directions = compare_directions 426 | self.title = title 427 | 428 | def print(self): 429 | report_length = self.report_length 430 | self.print_header('N-gram Difference Analysis') 431 | if self.title: 432 | print(f'--- {self.title}') 433 | else: 434 | print(f'--- min_ngram_length={self.min_ngram_length}, max_ngram_length={self.max_ngram_length}') 435 | print(f' report_length={report_length}, alpha={self.alpha}, compare_type={self.compare_type}') 436 | 437 | if self.label_files is not None: 438 | print(self.label_files) 439 | 440 | for i, (left, right) in enumerate(self.compare_directions): 441 | print(f'--- {report_length} n-grams where {sys_names[left]}>{sys_names[right]} in {self.compare_type}') 442 | for k, v in self.scorelist[i][:report_length]: 443 | print(f"{' '.join(k)}\t{fmt(v)} (sys{left+1}={self.matches[left][k]}, sys{right+1}={self.matches[right][k]})") 444 | print() 445 | print(f'--- {report_length} n-grams where {sys_names[right]}>{sys_names[left]} in {self.compare_type}') 446 | for k, v in reversed(self.scorelist[i][-report_length:]): 447 | print(f"{' '.join(k)}\t{fmt(v)} (sys{left+1}={self.matches[left][k]}, sys{right+1}={self.matches[right][k]})") 448 | print() 449 | 450 | def plot(self, output_directory, output_fig_file, output_fig_format='pdf'): 451 | raise NotImplementedError('Plotting is not implemented for n-gram reports') 452 | 453 | def html_content(self, output_directory=None): 454 | report_length = self.report_length 455 | if self.title: 456 | html = tag_str('p', self.title) 457 | else: 458 | html = tag_str('p', f'min_ngram_length={self.min_ngram_length}, max_ngram_length={self.max_ngram_length}') 459 | html += tag_str('p', f'report_length={report_length}, alpha={self.alpha}, compare_type={self.compare_type}') 460 | if self.label_files is not None: 461 | html += tag_str('p', self.label_files) 462 | 463 | for i, (left, right) in enumerate(self.compare_directions): 464 | title = f'{report_length} n-grams where {sys_names[left]}>{sys_names[right]} in {self.compare_type}' 465 | table = [['n-gram', self.compare_type, f'{sys_names[left]}', f'{sys_names[right]}']] 466 | table.extend([[' '.join(k), fmt(v), self.matches[left][k], self.matches[right][k]] for k, v in self.scorelist[i][:report_length]]) 467 | html += html_table(table, title) 468 | 469 | title = f'{report_length} n-grams where {sys_names[right]}>{sys_names[left]} in {self.compare_type}' 470 | table = [['n-gram', self.compare_type, f'{sys_names[left]}', f'{sys_names[right]}']] 471 | table.extend([[' '.join(k), fmt(v), self.matches[left][k], self.matches[right][k]] for k, v in reversed(self.scorelist[i][-report_length:])]) 472 | html += html_table(table, title) 473 | return html 474 | 475 | class SentenceReport(Report): 476 | 477 | def __init__(self, bucketer=None, sys_stats=None, statistic_type=None, scorer=None, bucket_cnts=None, bucket_intervals=None, title=None): 478 | self.bucketer = bucketer 479 | self.sys_stats = [[s for s in stat] for stat in sys_stats] 480 | self.statistic_type = statistic_type 481 | self.scorer = scorer 482 | self.bucket_cnts = bucket_cnts 483 | self.bucket_intervals = bucket_intervals 484 | self.yname = scorer.name() if statistic_type == 'score' else statistic_type 485 | self.yidstr = scorer.idstr() if statistic_type == 'score' else statistic_type 486 | self.output_fig_file = f'{next_fig_id()}-sent-{bucketer.idstr()}-{self.yidstr}' 487 | if title: 488 | self.title = title 489 | elif scorer: 490 | self.title = f'bucket type: {bucketer.name()}, statistic type: {scorer.name()}' 491 | else: 492 | self.title = f'bucket type: {bucketer.name()}, statistic type: {statistic_type}' 493 | 494 | def print(self): 495 | self.print_header('Sentence Bucket Analysis') 496 | print(f'--- {self.title}') 497 | # first line 498 | print(f'{self.bucketer.idstr()}', end='') 499 | if self.bucket_cnts is not None: 500 | print(f'\t# sents', end='') 501 | for sn in sys_names: 502 | print(f'\t{sn}', end='') 503 | print() 504 | for i, bs in enumerate(self.bucketer.bucket_strs): 505 | print(f'{bs}', end='') 506 | if self.bucket_cnts is not None: 507 | print(f'\t{self.bucket_cnts[i]}', end='') 508 | for j, stat in enumerate(self.sys_stats): 509 | print(f'\t{fmt(stat[i])}', end='') 510 | if self.bucket_intervals is not None: 511 | interval = self.bucket_intervals[j][i] 512 | low, up = interval['lower_bound'], interval['upper_bound'] 513 | print(f' [{fmt(low)}, {fmt(up)}]', end='') 514 | print() 515 | print() 516 | 517 | def plot(self, output_directory='outputs', output_fig_file='word-acc', output_fig_format='pdf'): 518 | sys = self.sys_stats 519 | xticklabels = [s for s in self.bucketer.bucket_strs] 520 | 521 | if self.bucket_intervals: 522 | errs = [] 523 | for i, stat in enumerate(sys): 524 | lows, ups = [], [] 525 | for j, score in enumerate(stat): 526 | interval = self.bucket_intervals[i][j] 527 | low, up = interval['lower_bound'], interval['upper_bound'] 528 | lows.append(score-low) 529 | ups.append(up-score) 530 | errs.append(np.array([lows, ups]) ) 531 | else: 532 | errs = None 533 | 534 | make_bar_chart(sys, 535 | output_directory, output_fig_file, 536 | output_fig_format=output_fig_format, 537 | errs=errs, 538 | xlabel=self.bucketer.name(), ylabel=self.yname, 539 | xticklabels=xticklabels) 540 | 541 | def html_content(self, output_directory=None): 542 | line = [self.bucketer.idstr()] 543 | if self.bucket_cnts is not None: 544 | line.append('# sents') 545 | line += sys_names 546 | table = [ line ] 547 | for i, bs in enumerate(self.bucketer.bucket_strs): 548 | line = [bs] 549 | if self.bucket_cnts is not None: 550 | line.append(f'\t{self.bucket_cnts[i]}') 551 | for j, stat in enumerate(self.sys_stats): 552 | line.append(fmt(stat[i])) 553 | if self.bucket_intervals is not None: 554 | interval = self.bucket_intervals[j][i] 555 | low, up = interval['lower_bound'], interval['upper_bound'] 556 | line[-1] += f' [{fmt(low)}, {fmt(up)}]' 557 | table.extend([line]) 558 | html = html_table(table, self.title) 559 | for ext in ('png', 'pdf'): 560 | self.plot(output_directory, self.output_fig_file, ext) 561 | html += html_img_reference(self.output_fig_file, 'Sentence Bucket Analysis') 562 | return html 563 | 564 | class SentenceExampleReport(Report): 565 | 566 | def __init__(self, report_length=None, scorediff_lists=None, scorer=None, ref=None, outs=None, src=None, compare_directions=[(0, 1)], title=None): 567 | self.report_length = report_length 568 | self.scorediff_lists = scorediff_lists 569 | self.scorer = scorer 570 | self.ref = ref 571 | self.outs = outs 572 | self.src = src 573 | self.compare_directions = compare_directions 574 | self.title = title 575 | 576 | def print(self): 577 | self.print_header('Sentence Examples Analysis') 578 | report_length = self.report_length 579 | for cnt, (left, right) in enumerate(self.compare_directions): 580 | ref, out1, out2 = self.ref, self.outs[left], self.outs[right] 581 | sleft, sright = sys_names[left], sys_names[right] 582 | print(f'--- {report_length} sentences where {sleft}>{sright} at {self.scorer.name()}') 583 | for bdiff, s1, s2, str1, str2, i in self.scorediff_lists[cnt][:report_length]: 584 | print(f"{sleft}-{sright}={fmt(-bdiff)}, {sleft}={fmt(s1)}, {sright}={fmt(s2)}") 585 | if self.src and self.src[i]: 586 | print(f"Src: {' '.join(self.src[i])}") 587 | print ( 588 | f"Ref: {' '.join(ref[i])}\n" 589 | f"{sleft}: {' '.join(out1[i])}\n" 590 | f"{sright}: {' '.join(out2[i])}\n" 591 | ) 592 | 593 | print(f'--- {report_length} sentences where {sright}>{sleft} at {self.scorer.name()}') 594 | for bdiff, s1, s2, str1, str2, i in self.scorediff_lists[cnt][-report_length:]: 595 | print(f"{sleft}-{sright}={fmt(-bdiff)}, {sleft}={fmt(s1)}, {sright}={fmt(s2)}") 596 | if self.src and self.src[i]: 597 | print(f"Src: {' '.join(self.src[i])}") 598 | print ( 599 | f"Ref: {' '.join(ref[i])}\n" 600 | f"{sleft}: {' '.join(out1[i])}\n" 601 | f"{sright}: {' '.join(out2[i])}\n" 602 | ) 603 | 604 | def plot(self, output_directory, output_fig_file, output_fig_format='pdf'): 605 | pass 606 | 607 | def html_content(self, output_directory=None): 608 | report_length = self.report_length 609 | for cnt, (left, right) in enumerate(self.compare_directions): 610 | sleft, sright = sys_names[left], sys_names[right] 611 | ref, out1, out2 = self.ref, self.outs[left], self.outs[right] 612 | html = tag_str('h4', f'{report_length} sentences where {sleft}>{sright} at {self.scorer.name()}') 613 | for bdiff, s1, s2, str1, str2, i in self.scorediff_lists[cnt][:report_length]: 614 | table = [['', 'Output', f'{self.scorer.idstr()}']] 615 | if self.src and self.src[i]: 616 | table.append(['Src', ' '.join(self.src[i]), '']) 617 | table += [ 618 | ['Ref', ' '.join(ref[i]), ''], 619 | [f'{sleft}', ' '.join(out1[i]), fmt(s1)], 620 | [f'{sright}', ' '.join(out2[i]), fmt(s2)] 621 | ] 622 | 623 | html += html_table(table, None) 624 | 625 | html += tag_str('h4', f'{report_length} sentences where {sright}>{sleft} at {self.scorer.name()}') 626 | for bdiff, s1, s2, str1, str2, i in self.scorediff_lists[cnt][-report_length:]: 627 | table = [['', 'Output', f'{self.scorer.idstr()}']] 628 | if self.src and self.src[i]: 629 | table.append(['Src', ' '.join(self.src[i]), '']) 630 | table += [ 631 | ['Ref', ' '.join(ref[i]), ''], 632 | [f'{sleft}', ' '.join(out1[i]), fmt(s1)], 633 | [f'{sright}', ' '.join(out2[i]), fmt(s2)] 634 | ] 635 | 636 | html += html_table(table, None) 637 | 638 | return html 639 | 640 | 641 | def tag_str(tag, str, new_line=''): 642 | return f'<{tag}>{new_line} {str} {new_line}' 643 | 644 | def html_table(table, title=None, bold_rows=1, bold_cols=1, latex_ignore_cols={}): 645 | html = '\n' 646 | if title is not None: 647 | html += tag_str('caption', title) 648 | for i, row in enumerate(table): 649 | tag_type = 'th' if (i < bold_rows) else 'td' 650 | table_row = '\n '.join(tag_str('th' if j < bold_cols else tag_type, rdata) for (j, rdata) in enumerate(row)) 651 | html += tag_str('tr', table_row) 652 | html += '\n
\n
' 653 | 654 | tab_id = next_tab_id() 655 | latex_code = "\\begin{table}[t]\n \\centering\n" 656 | cs = ['c'] * len(table[0]) 657 | if bold_cols != 0: 658 | cs[bold_cols-1] = 'c||' 659 | latex_code += " \\begin{tabular}{"+''.join(cs)+"}\n" 660 | for i, row in enumerate(table): 661 | latex_code += ' & '.join([fmt(x) for c_i, x in enumerate(row) if c_i not in latex_ignore_cols]) + (' \\\\\n' if i != bold_rows-1 else ' \\\\ \\hline \\hline\n') 662 | latex_code += " \\end{tabular}\n \\caption{Caption}\n \\label{tab:table"+tab_id+"}\n\\end{table}" 663 | 664 | html += (f'
' + 665 | f'') 666 | return html 667 | 668 | def styled_html_message(report_title, content): 669 | content = content.encode("ascii","xmlcharrefreplace").decode() 670 | return (f'\n\n\n\n'+ 671 | f'\n'+ 672 | f'\n

{report_title}

\n {content} \n\n') 673 | 674 | def generate_html_report(reports, output_directory, report_title): 675 | content = [] 676 | for name, rep in reports: 677 | content.append(f'

{name}

') 678 | for r in rep: 679 | content.append(r.html_content(output_directory)) 680 | content = "\n".join(content) 681 | 682 | if not os.path.exists(output_directory): 683 | os.makedirs(output_directory) 684 | html_file = os.path.join(output_directory, 'index.html') 685 | with open(html_file, 'w') as f: 686 | f.write(styled_html_message(report_title, content)) 687 | css_file = os.path.join(output_directory, 'compare_mt.css') 688 | with open(css_file, 'w') as f: 689 | f.write(css_style) 690 | 691 | def launch_http_server(output_directory: str, bind_address:str ='0.0.0.0', bind_port: int=8000): 692 | assert Path(output_directory).is_dir() 693 | hostname = bind_address if bind_address != '0.0.0.0' else socket.gethostname() 694 | log.info(f'Directory = {output_directory}') 695 | log.info(f'Launching a web server:: http://{hostname}:{bind_port}/') 696 | Handler = partial(SimpleHTTPRequestHandler, directory=output_directory) 697 | server = HTTPServer(server_address=(bind_address, bind_port), 698 | RequestHandlerClass=Handler) 699 | try: 700 | server.serve_forever() 701 | except KeyboardInterrupt: 702 | pass # all good! Exiting without printing stacktrace 703 | 704 | -------------------------------------------------------------------------------- /compare_mt/rouge/README.md: -------------------------------------------------------------------------------- 1 | # Python ROUGE Implementation 2 | 3 | ## Overview 4 | 5 | This is a native python implementation of ROUGE, designed to replicate results 6 | from the original perl package. 7 | 8 | ROUGE was originally introduced in the paper: 9 | 10 | Lin, Chin-Yew. ROUGE: a Package for Automatic Evaluation of Summaries. In 11 | Proceedings of the Workshop on Text Summarization Branches Out (WAS 2004), 12 | Barcelona, Spain, July 25 - 26, 2004. 13 | 14 | ## ROUGE for Python 15 | 16 | There are ROUGE implementations available for Python, however some are not 17 | native python due to their dependency on the perl script, and others provide 18 | differing results when compared with the original implementation. This makes it 19 | difficult to directly compare with known results. 20 | 21 | This package is designed to replicate perl results. It implements: 22 | 23 | * ROUGE-N (N-gram) scoring 24 | * ROUGE-L (Longest Common Subsequence) scoring 25 | * Text normalization 26 | * Bootstrap resampling for confidence interval calculation 27 | * Optional Porter stemming to remove plurals and word suffixes such as (ing, 28 | ion, ment). 29 | 30 | Note that not all options provided by the original perl ROUGE script are 31 | supported, but the subset of options that are implemented should replicate the 32 | original functionality. 33 | 34 | ## Stopword removal 35 | 36 | The original ROUGE perl script implemented optional stopword removal (using the 37 | -s parameter). However, there were ~600 stopwords used by ROUGE, borrowed from 38 | another now defunct package. This word list contained many words that may not be 39 | suited to some tasks, such as day and month names and numbers. It also has no 40 | clear license for redistribution. Since we are unable to replicate this 41 | functionality precisely we do not include stopword removal. 42 | 43 | ## How to run 44 | 45 | This package compares target files (containing one example per line) with 46 | prediction files in the same format. It can be launched as follows (from 47 | google-research/): 48 | 49 | ```shell 50 | python -m rouge.rouge \ 51 | --target_filepattern=*.targets \ 52 | --prediction_filepattern=*.decodes \ 53 | --output_filename=scores.csv \ 54 | --use_stemmer=true 55 | ``` 56 | 57 | ## License 58 | 59 | Licensed under the 60 | [Apache 2.0](https://github.com/google-research/google-research/blob/master/LICENSE) 61 | License. 62 | 63 | ## Disclaimer 64 | 65 | This is not an official Google product. 66 | -------------------------------------------------------------------------------- /compare_mt/rouge/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | -------------------------------------------------------------------------------- /compare_mt/rouge/io.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Library for reading/writing input and score files.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import glob 23 | import itertools 24 | 25 | from absl import logging 26 | 27 | 28 | def compute_scores_and_write_to_csv(target_filepattern, 29 | prediction_filepattern, 30 | output_filename, 31 | scorer, 32 | aggregator, 33 | delimiter="\n"): 34 | """Runs aggregate score calculations and outputs results to a CSV file. 35 | 36 | Args: 37 | target_filepattern: Pattern for files containing target text. 38 | prediction_filepattern: Pattern for files containing prediction text. 39 | output_filename: Name of file to write results to. 40 | scorer: A BaseScorer object to compute scores. 41 | aggregator: An aggregator to aggregate scores. If None, outputs are 42 | per-example scores. 43 | delimiter: Record delimiter. 44 | """ 45 | 46 | target_filenames = _glob(target_filepattern) 47 | prediction_filenames = _glob(prediction_filepattern) 48 | scores = _compute_scores(target_filenames, prediction_filenames, scorer, 49 | delimiter) 50 | if aggregator: 51 | for score in scores: 52 | aggregator.add_scores(score) 53 | _write_aggregates_to_csv(output_filename, aggregator.aggregate()) 54 | else: 55 | _write_scores_to_csv(output_filename, scores) 56 | 57 | 58 | def _glob(filepattern): 59 | return glob.glob(filepattern) # pylint: disable=unreachable 60 | 61 | 62 | def _open(filepattern, mode="r"): 63 | return open(filepattern, mode) # pylint: disable=unreachable 64 | 65 | 66 | def _record_gen(filename, delimiter): 67 | """Opens file and yields records separated by delimiter.""" 68 | with _open(filename) as f: 69 | records = f.read().split(delimiter) 70 | if records[-1]: 71 | # Need a final delimiter at end of file to be able to detect an empty last 72 | # record. 73 | logging.warn("Expected delimiter at end of file") 74 | else: 75 | records = records[:-1] 76 | for record in records: 77 | yield record 78 | 79 | 80 | def _compute_scores(target_filenames, prediction_filenames, scorer, delimiter): 81 | """Computes aggregates scores across the given target and prediction files. 82 | 83 | Args: 84 | target_filenames: List of filenames from which to read target lines. 85 | prediction_filenames: List of filenames from which to read prediction lines. 86 | scorer: A BaseScorer object to compute scores. 87 | delimiter: string delimiter between each record in input files 88 | Returns: 89 | A list of dicts mapping score_type to Score objects. 90 | Raises: 91 | ValueError: If invalid targets or predictions are provided. 92 | """ 93 | 94 | if (len(target_filenames) < 1 or 95 | len(target_filenames) != len(prediction_filenames)): 96 | raise ValueError("Must have equal and positive number of target and " 97 | "prediction files. Found: %d target files, %d prediction " 98 | "files." % (len(target_filenames), 99 | len(prediction_filenames))) 100 | 101 | scores = [] 102 | for target_filename, prediction_filename in zip( 103 | sorted(target_filenames), sorted(prediction_filenames)): 104 | logging.info("Reading targets from %s.", target_filename) 105 | logging.info("Reading predictions from %s.", prediction_filename) 106 | targets = _record_gen(target_filename, delimiter) 107 | preds = _record_gen(prediction_filename, delimiter) 108 | for target_rec, prediction_rec in itertools.zip_longest(targets, preds): 109 | if target_rec is None or prediction_rec is None: 110 | raise ValueError("Must have equal number of lines across target and " 111 | "prediction files. Mismatch between files: %s, %s." % 112 | (target_filename, prediction_filename)) 113 | scores.append(scorer.score(target_rec, prediction_rec)) 114 | 115 | return scores 116 | 117 | 118 | def _write_aggregates_to_csv(output_filename, aggregates): 119 | """Writes aggregate scores to an output CSV file. 120 | 121 | Output file is a comma separated where each line has the format: 122 | score_type-(P|R|F),low_ci,mean,high_ci 123 | 124 | P/R/F indicates whether the score is a precision, recall or f-measure. 125 | 126 | Args: 127 | output_filename: Name of file to write results to. 128 | aggregates: A dict mapping each score_type to a AggregateScore object. 129 | """ 130 | 131 | logging.info("Writing results to %s.", output_filename) 132 | with _open(output_filename, "w") as output_file: 133 | output_file.write("score_type,low,mid,high\n") 134 | for score_type, aggregate in sorted(aggregates.items()): 135 | output_file.write("%s-R,%f,%f,%f\n" % 136 | (score_type, aggregate.low.recall, aggregate.mid.recall, 137 | aggregate.high.recall)) 138 | output_file.write("%s-P,%f,%f,%f\n" % 139 | (score_type, aggregate.low.precision, 140 | aggregate.mid.precision, aggregate.high.precision)) 141 | output_file.write("%s-F,%f,%f,%f\n" % 142 | (score_type, aggregate.low.fmeasure, 143 | aggregate.mid.fmeasure, aggregate.high.fmeasure)) 144 | logging.info("Finished writing results.") 145 | 146 | 147 | def _write_scores_to_csv(output_filename, scores): 148 | """Writes scores for each individual example to an output CSV file. 149 | 150 | Output file is a comma separated where each line has the format: 151 | id,score1,score2,score3,... 152 | 153 | The header row indicates the type of each score column. 154 | 155 | Args: 156 | output_filename: Name of file to write results to. 157 | scores: A list of dicts mapping each score_type to a Score object. 158 | """ 159 | 160 | if len(scores) < 1: 161 | logging.warn("No scores to write") 162 | return 163 | rouge_types = sorted(scores[0].keys()) 164 | 165 | logging.info("Writing results to %s.", output_filename) 166 | with _open(output_filename, "w") as out_file: 167 | out_file.write("id") 168 | for rouge_type in rouge_types: 169 | out_file.write(",{t}-P,{t}-R,{t}-F".format(t=rouge_type)) 170 | out_file.write("\n") 171 | for i, result in enumerate(scores): 172 | out_file.write("%d" % i) 173 | for rouge_type in rouge_types: 174 | out_file.write(",%f,%f,%f" % 175 | (result[rouge_type].precision, result[rouge_type].recall, 176 | result[rouge_type].fmeasure)) 177 | out_file.write("\n") 178 | logging.info("Finished writing results.") 179 | -------------------------------------------------------------------------------- /compare_mt/rouge/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py 2 | nltk 3 | numpy 4 | six 5 | -------------------------------------------------------------------------------- /compare_mt/rouge/rouge.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | r"""Main routine to calculate ROUGE scores across text files. 17 | 18 | Designed to replicate scores computed by the ROUGE perl implementation as 19 | closely as possible. 20 | 21 | Output is a text file in CSV format. 22 | 23 | Sample usage: 24 | 25 | rouge ---rouge_types=rouge1,rouge2,rougeL \ 26 | --target_filepattern=*.targets \ 27 | --prediction_fliepattern=*.decodes \ 28 | --output_filename=scores.csv \ 29 | --use_stemmer 30 | 31 | Which is equivalent to calling the perl ROUGE script as: 32 | 33 | ROUGE-1.5.5.pl -m -e ./data -n 2 -a /tmp/rouge/settings.xml 34 | 35 | Where settings.xml provides target and decode text. 36 | """ 37 | 38 | from __future__ import absolute_import 39 | from __future__ import division 40 | from __future__ import print_function 41 | 42 | from absl import app 43 | from absl import flags 44 | from compare_mt.rouge import io 45 | from compare_mt.rouge import rouge_scorer 46 | from compare_mt.rouge import scoring 47 | 48 | flags.DEFINE_string("target_filepattern", None, 49 | "Files containing target text.") 50 | flags.DEFINE_string("prediction_filepattern", None, 51 | "Files containing prediction text.") 52 | flags.DEFINE_string("output_filename", None, 53 | "File in which to write calculated ROUGE scores as a CSV.") 54 | flags.DEFINE_string("delimiter", "\n", 55 | "Record delimiter in files.") 56 | flags.DEFINE_list("rouge_types", ["rouge1", "rouge2", "rougeL"], 57 | "List of ROUGE types to calculate.") 58 | flags.DEFINE_boolean("use_stemmer", False, 59 | "Whether to use Porter stemmer to remove common suffixes.") 60 | flags.DEFINE_boolean("aggregate", True, 61 | "Write aggregates if this is set to True") 62 | 63 | FLAGS = flags.FLAGS 64 | 65 | 66 | def main(argv): 67 | if len(argv) > 1: 68 | raise app.UsageError("Too many command-line arguments.") 69 | scorer = rouge_scorer.RougeScorer(FLAGS.rouge_types, FLAGS.use_stemmer) 70 | aggregator = scoring.BootstrapAggregator() if FLAGS.aggregate else None 71 | io.compute_scores_and_write_to_csv( 72 | FLAGS.target_filepattern, 73 | FLAGS.prediction_filepattern, 74 | FLAGS.output_filename, 75 | scorer, 76 | aggregator, 77 | delimiter=FLAGS.delimiter) 78 | 79 | 80 | if __name__ == "__main__": 81 | flags.mark_flag_as_required("target_filepattern") 82 | flags.mark_flag_as_required("prediction_filepattern") 83 | flags.mark_flag_as_required("output_filename") 84 | app.run(main) 85 | -------------------------------------------------------------------------------- /compare_mt/rouge/rouge_scorer.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Lint as: python2, python3 17 | """Computes rouge scores between two text blobs. 18 | Implementation replicates the functionality in the original ROUGE package. See: 19 | Lin, Chin-Yew. ROUGE: a Package for Automatic Evaluation of Summaries. In 20 | Proceedings of the Workshop on Text Summarization Branches Out (WAS 2004), 21 | Barcelona, Spain, July 25 - 26, 2004. 22 | Default options are equivalent to running: 23 | ROUGE-1.5.5.pl -e data -n 2 -a settings.xml 24 | Or with use_stemmer=True: 25 | ROUGE-1.5.5.pl -m -e data -n 2 -a settings.xml 26 | In these examples settings.xml lists input files and formats. 27 | """ 28 | 29 | from __future__ import absolute_import 30 | from __future__ import division 31 | from __future__ import print_function 32 | 33 | import collections 34 | import re 35 | 36 | from compare_mt.cache_utils import CachedPorterStemmer 37 | import six 38 | from six.moves import map 39 | from six.moves import range 40 | from compare_mt.rouge import scoring 41 | from compare_mt.rouge import tokenize 42 | 43 | 44 | class RougeScorer(scoring.BaseScorer): 45 | """Calculate rouges scores between two blobs of text. 46 | Sample usage: 47 | scorer = RougeScorer(['rouge1', 'rougeL'], use_stemmer=True) 48 | scores = scorer.score('The quick brown fox jumps over the lazy dog', 49 | 'The quick brown dog jumps on the log.') 50 | """ 51 | 52 | def __init__(self, rouge_types, use_stemmer=False): 53 | """Initializes a new RougeScorer. 54 | Valid rouge types that can be computed are: 55 | rougen (e.g. rouge1, rouge2): n-gram based scoring. 56 | rougeL: Longest common subsequence based scoring. 57 | Args: 58 | rouge_types: A list of rouge types to calculate. 59 | use_stemmer: Bool indicating whether Porter stemmer should be used to 60 | strip word suffixes to improve matching. 61 | Returns: 62 | A dict mapping rouge types to Score tuples. 63 | """ 64 | 65 | self.rouge_types = rouge_types 66 | self._stemmer = CachedPorterStemmer() if use_stemmer else None 67 | 68 | def score(self, target, prediction): 69 | """Calculates rouge scores between the target and prediction. 70 | Args: 71 | target: Text containing the target (ground truth) text. 72 | prediction: Text containing the predicted text. 73 | Returns: 74 | A dict mapping each rouge type to a Score object. 75 | Raises: 76 | ValueError: If an invalid rouge type is encountered. 77 | """ 78 | 79 | target_tokens = tokenize.tokenize(target, self._stemmer) 80 | prediction_tokens = tokenize.tokenize(prediction, self._stemmer) 81 | result = {} 82 | 83 | for rouge_type in self.rouge_types: 84 | if rouge_type == "rougeL": 85 | # Rouge from longest common subsequences. 86 | scores = _score_lcs(target_tokens, prediction_tokens) 87 | elif rouge_type == "rougeLsum": 88 | # Note: Does not support multi-line text. 89 | def get_sents(text): 90 | # Assume sentences are separated by newline. 91 | sents = six.ensure_str(text).split("\n") 92 | sents = [x for x in sents if len(x)] 93 | return sents 94 | 95 | target_tokens_list = [ 96 | tokenize.tokenize(s, self._stemmer) for s in get_sents(target)] 97 | prediction_tokens_list = [ 98 | tokenize.tokenize(s, self._stemmer) for s in get_sents(prediction)] 99 | scores = _summary_level_lcs(target_tokens_list, 100 | prediction_tokens_list) 101 | elif re.match(r"rouge[0-9]$", six.ensure_str(rouge_type)): 102 | # Rouge from n-grams. 103 | n = int(rouge_type[5:]) 104 | if n <= 0: 105 | raise ValueError("rougen requires positive n: %s" % rouge_type) 106 | target_ngrams = _create_ngrams(target_tokens, n) 107 | prediction_ngrams = _create_ngrams(prediction_tokens, n) 108 | scores = _score_ngrams(target_ngrams, prediction_ngrams) 109 | else: 110 | raise ValueError("Invalid rouge type: %s" % rouge_type) 111 | result[rouge_type] = scores 112 | 113 | return result 114 | 115 | 116 | def _create_ngrams(tokens, n): 117 | """Creates ngrams from the given list of tokens. 118 | Args: 119 | tokens: A list of tokens from which ngrams are created. 120 | n: Number of tokens to use, e.g. 2 for bigrams. 121 | Returns: 122 | A dictionary mapping each bigram to the number of occurrences. 123 | """ 124 | 125 | ngrams = collections.Counter() 126 | for ngram in (tuple(tokens[i:i + n]) for i in range(len(tokens) - n + 1)): 127 | ngrams[ngram] += 1 128 | return ngrams 129 | 130 | 131 | def _score_lcs(target_tokens, prediction_tokens): 132 | """Computes LCS (Longest Common Subsequence) rouge scores. 133 | Args: 134 | target_tokens: Tokens from the target text. 135 | prediction_tokens: Tokens from the predicted text. 136 | Returns: 137 | A Score object containing computed scores. 138 | """ 139 | 140 | if not target_tokens or not prediction_tokens: 141 | return scoring.Score(precision=0, recall=0, fmeasure=0) 142 | 143 | # Compute length of LCS from the bottom up in a table (DP appproach). 144 | lcs_table = _lcs_table(target_tokens, prediction_tokens) 145 | lcs_length = lcs_table[-1][-1] 146 | 147 | precision = lcs_length / len(prediction_tokens) 148 | recall = lcs_length / len(target_tokens) 149 | fmeasure = scoring.fmeasure(precision, recall) 150 | 151 | return scoring.Score(precision=precision, recall=recall, fmeasure=fmeasure) 152 | 153 | 154 | def _lcs_table(ref, can): 155 | """Create 2-d LCS score table.""" 156 | rows = len(ref) 157 | cols = len(can) 158 | lcs_table = [[0] * (cols + 1) for _ in range(rows + 1)] 159 | for i in range(1, rows + 1): 160 | for j in range(1, cols + 1): 161 | if ref[i - 1] == can[j - 1]: 162 | lcs_table[i][j] = lcs_table[i - 1][j - 1] + 1 163 | else: 164 | lcs_table[i][j] = max(lcs_table[i - 1][j], lcs_table[i][j - 1]) 165 | return lcs_table 166 | 167 | 168 | def _backtrack_norec(t, ref, can): 169 | """Read out LCS.""" 170 | i = len(ref) 171 | j = len(can) 172 | lcs = [] 173 | while i > 0 and j > 0: 174 | if ref[i - 1] == can[j - 1]: 175 | lcs.insert(0, i-1) 176 | i -= 1 177 | j -= 1 178 | elif t[i][j - 1] > t[i - 1][j]: 179 | j -= 1 180 | else: 181 | i -= 1 182 | return lcs 183 | 184 | 185 | def _summary_level_lcs(ref_sent, can_sent): 186 | """ROUGE: Summary-level LCS, section 3.2 in ROUGE paper. 187 | Args: 188 | ref_sent: list of tokenized reference sentences 189 | can_sent: list of tokenized candidate sentences 190 | Returns: 191 | summary level ROUGE score 192 | """ 193 | if not ref_sent or not can_sent: 194 | return scoring.Score(precision=0, recall=0, fmeasure=0) 195 | 196 | m = sum(map(len, ref_sent)) 197 | n = sum(map(len, can_sent)) 198 | if not n or not m: 199 | return scoring.Score(precision=0, recall=0, fmeasure=0) 200 | 201 | # get token counts to prevent double counting 202 | token_cnts_r = collections.Counter() 203 | token_cnts_c = collections.Counter() 204 | for s in ref_sent: 205 | # s is a list of tokens 206 | token_cnts_r.update(s) 207 | for s in can_sent: 208 | token_cnts_c.update(s) 209 | 210 | hits = 0 211 | for r in ref_sent: 212 | lcs = _union_lcs(r, can_sent) 213 | # Prevent double-counting: 214 | # The paper describes just computing hits += len(_union_lcs()), 215 | # but the implementation prevents double counting. We also 216 | # implement this as in version 1.5.5. 217 | for t in lcs: 218 | if token_cnts_c[t] > 0 and token_cnts_r[t] > 0: 219 | hits += 1 220 | token_cnts_c[t] -= 1 221 | token_cnts_r[t] -= 1 222 | 223 | recall = hits / m 224 | precision = hits / n 225 | fmeasure = scoring.fmeasure(precision, recall) 226 | return scoring.Score(precision=precision, recall=recall, fmeasure=fmeasure) 227 | 228 | 229 | def _union_lcs(ref, c_list): 230 | """Find union LCS between a ref sentence and list of candidate sentences. 231 | Args: 232 | ref: list of tokens 233 | c_list: list of list of indices for LCS into reference summary 234 | Returns: 235 | List of tokens in ref representing union LCS. 236 | """ 237 | lcs_list = [lcs_ind(ref, c) for c in c_list] 238 | return [ref[i] for i in _find_union(lcs_list)] 239 | 240 | 241 | def _find_union(lcs_list): 242 | """Finds union LCS given a list of LCS.""" 243 | return sorted(list(set().union(*lcs_list))) 244 | 245 | 246 | def lcs_ind(ref, can): 247 | """Returns one of the longest lcs.""" 248 | t = _lcs_table(ref, can) 249 | return _backtrack_norec(t, ref, can) 250 | 251 | 252 | def _score_ngrams(target_ngrams, prediction_ngrams): 253 | """Compute n-gram based rouge scores. 254 | Args: 255 | target_ngrams: A Counter object mapping each ngram to number of 256 | occurrences for the target text. 257 | prediction_ngrams: A Counter object mapping each ngram to number of 258 | occurrences for the prediction text. 259 | Returns: 260 | A Score object containing computed scores. 261 | """ 262 | 263 | intersection_ngrams_count = 0 264 | for ngram in six.iterkeys(target_ngrams): 265 | intersection_ngrams_count += min(target_ngrams[ngram], 266 | prediction_ngrams[ngram]) 267 | target_ngrams_count = sum(target_ngrams.values()) 268 | prediction_ngrams_count = sum(prediction_ngrams.values()) 269 | 270 | precision = intersection_ngrams_count / max(prediction_ngrams_count, 1) 271 | recall = intersection_ngrams_count / max(target_ngrams_count, 1) 272 | fmeasure = scoring.fmeasure(precision, recall) 273 | 274 | return scoring.Score(precision=precision, recall=recall, fmeasure=fmeasure) 275 | -------------------------------------------------------------------------------- /compare_mt/rouge/run.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Google Research Authors. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #!/bin/bash 16 | set -e 17 | set -x 18 | 19 | virtualenv -p python3 . 20 | source ./bin/activate 21 | 22 | pip install -r rouge/requirements.txt 23 | python -m rouge.io_test 24 | python -m rouge.rouge_scorer_test 25 | python -m rouge.scoring_test 26 | -------------------------------------------------------------------------------- /compare_mt/rouge/scoring.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Library for scoring and evaluation of text samples. 17 | 18 | Aggregation functions use bootstrap resampling to compute confidence intervals 19 | as per the original ROUGE perl implementation. 20 | """ 21 | 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import abc 27 | import collections 28 | 29 | import numpy as np 30 | import six 31 | from six.moves import xrange # pylint: disable=redefined-builtin 32 | 33 | 34 | class Score( 35 | collections.namedtuple("Score", ["precision", "recall", "fmeasure"])): 36 | """Tuple containing precision, recall, and f-measure values.""" 37 | 38 | 39 | class BaseScorer(object): 40 | """Base class for Scorer objects.""" 41 | 42 | @abc.abstractmethod 43 | def score(self, target, prediction): 44 | """Calculates score between the target and prediction. 45 | 46 | Args: 47 | target: Text containing the target (ground truth) text. 48 | prediction: Text containing the predicted text. 49 | Returns: 50 | A dict mapping each score_type (string) to Score object. 51 | """ 52 | 53 | 54 | class AggregateScore( 55 | collections.namedtuple("AggregateScore", ["low", "mid", "high"])): 56 | """Tuple containing confidence intervals for scores.""" 57 | 58 | 59 | class BootstrapAggregator(object): 60 | """Aggregates scores to provide confidence intervals. 61 | 62 | Sample usage: 63 | scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL']) 64 | aggregator = Aggregator() 65 | aggregator.add_scores(scorer.score("one two three", "one two")) 66 | aggregator.add_scores(scorer.score("one two five six", "seven eight")) 67 | result = aggregator.aggregate() 68 | print result 69 | {'rougeL': AggregateScore( 70 | low=Score(precision=0.0, recall=0.0, fmeasure=0.0), 71 | mid=Score(precision=0.5, recall=0.33, fmeasure=0.40), 72 | high=Score(precision=1.0, recall=0.66, fmeasure=0.80)), 73 | 'rouge1': AggregateScore( 74 | low=Score(precision=0.0, recall=0.0, fmeasure=0.0), 75 | mid=Score(precision=0.5, recall=0.33, fmeasure=0.40), 76 | high=Score(precision=1.0, recall=0.66, fmeasure=0.80))} 77 | """ 78 | 79 | def __init__(self, 80 | confidence_interval=0.95, 81 | n_samples=1000): 82 | """Initializes a BootstrapAggregator object. 83 | 84 | Args: 85 | confidence_interval: Confidence interval to compute on the mean as a 86 | decimal. 87 | n_samples: Number of samples to use for bootstrap resampling. 88 | Raises: 89 | ValueError: If invalid argument is given. 90 | """ 91 | 92 | if confidence_interval < 0 or confidence_interval > 1: 93 | raise ValueError("confidence_interval must be in range [0, 1]") 94 | if n_samples <= 0: 95 | raise ValueError("n_samples must be positive") 96 | 97 | self._n_samples = n_samples 98 | self._confidence_interval = confidence_interval 99 | self._scores = collections.defaultdict(list) 100 | 101 | def add_scores(self, scores): 102 | """Adds a sample for future aggregation. 103 | 104 | Args: 105 | scores: Dict mapping score_type strings to Score object. 106 | """ 107 | 108 | for score_type, score in six.iteritems(scores): 109 | self._scores[score_type].append((score.precision, score.recall, 110 | score.fmeasure)) 111 | 112 | def aggregate(self): 113 | """Aggregates scores previously added using add_scores. 114 | 115 | Returns: 116 | A dict mapping score_type to AggregateScore objects. 117 | """ 118 | 119 | result = {} 120 | for score_type, scores in six.iteritems(self._scores): 121 | # Stack scores into a 2-d matrix of (sample, measure). 122 | score_matrix = np.vstack(scores) 123 | # Percentiles are returned as (interval, measure). 124 | percentiles = self._bootstrap_resample(score_matrix) 125 | # Extract the three intervals (low, mid, high). 126 | intervals = tuple((Score( 127 | precision=percentiles[j, 0], 128 | recall=percentiles[j, 1], 129 | fmeasure=percentiles[j, 2]) for j in xrange(3))) 130 | result[score_type] = AggregateScore( 131 | low=intervals[0], mid=intervals[1], high=intervals[2]) 132 | return result 133 | 134 | def _bootstrap_resample(self, matrix): 135 | """Performs bootstrap resampling on a matrix of scores. 136 | 137 | Args: 138 | matrix: A 2-d matrix of (sample, measure). 139 | Returns: 140 | A 2-d matrix of (bounds, measure). There are three bounds: low (row 0), 141 | mid (row 1) and high (row 2). Mid is always the mean, while low and high 142 | bounds are specified by self._confidence_interval (which defaults to 0.95 143 | meaning it will return the 2.5th and 97.5th percentiles for a 95% 144 | confidence interval on the mean). 145 | """ 146 | 147 | # Matrix of (bootstrap sample, measure). 148 | sample_mean = np.zeros((self._n_samples, matrix.shape[1])) 149 | for i in xrange(self._n_samples): 150 | sample_idx = np.random.choice( 151 | np.arange(matrix.shape[0]), size=matrix.shape[0]) 152 | sample = matrix[sample_idx, :] 153 | sample_mean[i, :] = np.mean(sample, axis=0) 154 | 155 | # Take percentiles on the estimate of the mean using bootstrap samples. 156 | # Final result is a (bounds, measure) matrix. 157 | percentile_delta = (1 - self._confidence_interval) / 2 158 | q = 100 * np.array([percentile_delta, 0.5, 1 - percentile_delta]) 159 | return np.percentile(sample_mean, q, axis=0) 160 | 161 | 162 | def fmeasure(precision, recall): 163 | """Computes f-measure given precision and recall values.""" 164 | 165 | if precision + recall > 0: 166 | return 2 * precision * recall / (precision + recall) 167 | else: 168 | return 0.0 169 | -------------------------------------------------------------------------------- /compare_mt/rouge/tokenize.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2019 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Lint as: python2, python3 17 | """A library for tokenizing text.""" 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | import re 24 | import six 25 | 26 | EMPTY_OR_INVALID_TOKENS = re.compile(r"^[a-z0-9]+$") 27 | 28 | def tokenize(text, stemmer): 29 | """Tokenize input text into a list of tokens. 30 | This approach aims to replicate the approach taken by Chin-Yew Lin in 31 | the original ROUGE implementation. 32 | Args: 33 | text: A text blob to tokenize. 34 | stemmer: An optional stemmer. 35 | Returns: 36 | A list of string tokens extracted from input text. 37 | """ 38 | 39 | # Convert everything to lowercase. 40 | text = text.lower() 41 | # Replace any non-alpha-numeric characters with spaces. 42 | text = re.sub(r"[^a-z0-9]+", " ", six.ensure_str(text)) 43 | 44 | tokens = re.split(r"\s+", text) 45 | if stemmer: 46 | # Only stem words more than 3 characters long. 47 | tokens = [stemmer.stem(x) if len(x) > 3 else x for x in tokens] 48 | 49 | # One final check to drop any empty or invalid tokens. 50 | tokens = [x for x in tokens if EMPTY_OR_INVALID_TOKENS.match(six.ensure_str(x))] 51 | 52 | return tokens 53 | -------------------------------------------------------------------------------- /compare_mt/sign_utils.py: -------------------------------------------------------------------------------- 1 | ######################################################################################## 2 | # Compare two systems using bootstrap resampling # 3 | # adapted from https://github.com/neubig/util-scripts/blob/master/paired-bootstrap.py # 4 | # # 5 | # See, e.g. the following paper for references # 6 | # # 7 | # Statistical Significance Tests for Machine Translation Evaluation # 8 | # Philipp Koehn # 9 | # http://www.aclweb.org/anthology/W04-3250 # 10 | # # 11 | ######################################################################################## 12 | 13 | import numpy as np 14 | 15 | 16 | def eval_with_paired_bootstrap(ref, outs, src, 17 | scorer, 18 | compare_directions=[(0, 1)], 19 | num_samples=1000, sample_ratio=0.5, 20 | cache_stats=None): 21 | """ 22 | Evaluate with paired boostrap. 23 | This compares several systems, performing a signifiance tests with 24 | paired bootstrap resampling to compare the accuracy of the specified systems. 25 | 26 | Args: 27 | ref: The correct labels 28 | outs: The output of systems 29 | src: The source corpus 30 | scorer: The scorer 31 | compare_directions: A string specifying which two systems to compare 32 | num_samples: The number of bootstrap samples to take 33 | sample_ratio: The ratio of samples to take every time 34 | cache_stats: The precomputed statistics 35 | 36 | Returns: 37 | A tuple containing the win ratios, statistics for systems 38 | """ 39 | sys_scores = [[] for _ in outs] 40 | wins = [[0, 0, 0] for _ in compare_directions] if compare_directions is not None else None 41 | n = len(ref) 42 | ids = list(range(n)) 43 | 44 | if cache_stats is None: 45 | cache_stats = [scorer.cache_stats(ref, out, src=src) for out in outs] 46 | sample_size = int(n*sample_ratio) 47 | for _ in range(num_samples): 48 | # Subsample the gold and system outputs (with replacement) 49 | reduced_ids = np.random.choice(ids, size=sample_size, replace=True) 50 | # Calculate accuracy on the reduced sample and save stats 51 | if cache_stats[0]: 52 | sys_score, _ = zip(*[scorer.score_cached_corpus(reduced_ids, cache_stat) for cache_stat in cache_stats]) 53 | else: 54 | reduced_ref = [ref[i] for i in reduced_ids] 55 | reduced_outs = [[out[i] for i in reduced_ids] for out in outs] 56 | reduced_src = [src[i] for i in reduced_ids] 57 | sys_score, _ = zip(*[scorer.score_corpus(reduced_ref, reduced_out, reduced_src) for reduced_out in reduced_outs]) 58 | 59 | if wins is not None: 60 | for i, compare_direction in enumerate(compare_directions): 61 | left, right = compare_direction 62 | if sys_score[left] > sys_score[right]: 63 | wins[i][0] += 1 64 | if sys_score[left] < sys_score[right]: 65 | wins[i][1] += 1 66 | else: 67 | wins[i][2] += 1 68 | 69 | for i in range(len(outs)): 70 | sys_scores[i].append(sys_score[i]) 71 | 72 | # Print win stats 73 | wins = [[x/float(num_samples) for x in win] for win in wins] if wins is not None else None 74 | 75 | # Print system stats 76 | sys_stats = [] 77 | for i in range(len(outs)): 78 | sys_scores[i].sort() 79 | sys_stats.append({ 80 | 'mean':np.mean(sys_scores[i]), 81 | 'median':np.median(sys_scores[i]), 82 | 'lower_bound':sys_scores[i][int(num_samples * 0.025)], 83 | 'upper_bound':sys_scores[i][int(num_samples * 0.975)] 84 | }) 85 | 86 | return wins, sys_stats 87 | -------------------------------------------------------------------------------- /compare_mt/stat_utils.py: -------------------------------------------------------------------------------- 1 | 2 | def extract_salient_features(dict1, dict2, alpha=1.0): 3 | """ 4 | Score salient features given to dictionaries. 5 | 6 | Args: 7 | dict1: First set of feature coutns 8 | dict2: Second set of feature counts 9 | alpha: The amount of smoothing (default 1 to Laplace smoothed probabilities) 10 | 11 | Returns: 12 | Laplace smoothed differences between features 13 | """ 14 | all_keys = set(dict1.keys()) | set(dict2.keys()) 15 | scores = {} 16 | for k in all_keys: 17 | scores[k] = (dict1[k]+alpha) / (dict1[k] + dict2[k] + 2*alpha) 18 | return scores -------------------------------------------------------------------------------- /compare_mt/version_info.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.11" 2 | -------------------------------------------------------------------------------- /example/ted.sys1.eng.senttag: -------------------------------------------------------------------------------- 1 | 20 2 | 10 3 | 20 4 | 20 5 | 20 6 | 0 7 | 20 8 | 80 9 | 0 10 | 10 11 | 0 12 | 10 13 | 30 14 | 20 15 | 10 16 | 30 17 | 20 18 | 20 19 | 10 20 | 0 21 | 0 22 | 10 23 | 0 24 | 20 25 | 10 26 | 20 27 | 10 28 | 20 29 | 10 30 | 20 31 | 50 32 | 50 33 | 10 34 | 30 35 | 10 36 | 20 37 | 10 38 | 30 39 | 30 40 | 30 41 | 20 42 | 30 43 | 0 44 | 0 45 | 10 46 | 0 47 | 20 48 | 10 49 | 10 50 | 0 51 | 0 52 | 20 53 | 0 54 | 10 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 0 61 | 0 62 | 0 63 | 0 64 | 10 65 | 0 66 | 0 67 | 0 68 | 0 69 | 10 70 | 0 71 | 20 72 | 10 73 | 60 74 | 20 75 | 20 76 | 10 77 | 10 78 | 40 79 | 10 80 | 20 81 | 20 82 | 20 83 | 10 84 | 20 85 | 10 86 | 20 87 | 40 88 | 10 89 | 10 90 | 10 91 | 0 92 | 0 93 | 10 94 | 0 95 | 0 96 | 10 97 | 10 98 | 10 99 | 10 100 | 10 101 | 10 102 | 10 103 | 10 104 | 0 105 | 0 106 | 20 107 | 20 108 | 10 109 | 40 110 | 0 111 | 10 112 | 10 113 | 10 114 | 10 115 | 20 116 | 20 117 | 30 118 | 30 119 | 10 120 | 20 121 | 10 122 | 10 123 | 10 124 | 30 125 | 20 126 | 10 127 | 20 128 | 0 129 | 10 130 | 30 131 | 20 132 | 10 133 | 30 134 | 60 135 | 10 136 | 0 137 | 10 138 | 10 139 | 30 140 | 10 141 | 20 142 | 40 143 | 40 144 | 70 145 | 30 146 | 30 147 | 20 148 | 40 149 | 0 150 | 0 151 | 0 152 | 0 153 | 0 154 | 20 155 | 0 156 | 40 157 | 20 158 | 10 159 | 0 160 | 0 161 | 10 162 | 10 163 | 0 164 | 10 165 | 0 166 | 0 167 | 40 168 | 10 169 | 10 170 | 0 171 | 0 172 | 0 173 | 0 174 | 10 175 | 10 176 | 20 177 | 0 178 | 20 179 | 0 180 | 10 181 | 0 182 | 10 183 | 0 184 | 10 185 | 10 186 | 0 187 | 0 188 | 30 189 | 10 190 | 10 191 | 10 192 | 20 193 | 0 194 | 0 195 | 10 196 | 10 197 | 20 198 | 10 199 | 10 200 | 10 201 | 10 202 | 10 203 | 20 204 | 0 205 | 0 206 | 20 207 | 20 208 | 30 209 | 10 210 | 10 211 | 10 212 | 10 213 | 10 214 | 0 215 | 20 216 | 10 217 | 10 218 | 30 219 | 10 220 | 0 221 | 30 222 | 10 223 | 10 224 | 10 225 | 10 226 | 10 227 | 30 228 | 20 229 | 20 230 | 20 231 | 20 232 | 0 233 | 10 234 | 20 235 | 30 236 | 10 237 | 0 238 | 0 239 | 20 240 | 10 241 | 10 242 | 20 243 | 30 244 | 10 245 | 0 246 | 0 247 | 30 248 | 20 249 | 10 250 | 30 251 | 10 252 | 10 253 | 10 254 | 0 255 | 0 256 | 10 257 | 10 258 | 20 259 | 20 260 | 10 261 | 0 262 | 10 263 | 0 264 | 0 265 | 10 266 | 0 267 | 30 268 | 20 269 | 20 270 | 0 271 | 0 272 | 10 273 | 0 274 | 0 275 | 10 276 | 20 277 | 10 278 | 10 279 | 20 280 | 20 281 | 30 282 | 0 283 | 10 284 | 10 285 | 0 286 | 0 287 | 0 288 | 10 289 | 20 290 | 10 291 | 20 292 | 30 293 | 10 294 | 20 295 | 10 296 | 30 297 | 10 298 | 0 299 | 0 300 | 10 301 | 10 302 | 0 303 | 0 304 | 20 305 | 10 306 | 20 307 | 0 308 | 0 309 | 20 310 | 0 311 | 0 312 | 20 313 | 10 314 | 10 315 | 20 316 | 20 317 | 10 318 | 10 319 | 20 320 | 20 321 | 10 322 | 10 323 | 40 324 | 10 325 | 20 326 | 10 327 | 30 328 | 20 329 | 0 330 | 0 331 | 30 332 | 20 333 | 0 334 | 0 335 | 10 336 | 10 337 | 0 338 | 40 339 | 10 340 | 0 341 | 30 342 | 10 343 | 10 344 | 10 345 | 20 346 | 40 347 | 10 348 | 10 349 | 10 350 | 30 351 | 10 352 | 10 353 | 0 354 | 10 355 | 10 356 | 10 357 | 10 358 | 10 359 | 20 360 | 20 361 | 10 362 | 20 363 | 10 364 | 10 365 | 30 366 | 20 367 | 10 368 | 10 369 | 0 370 | 30 371 | 10 372 | 20 373 | 10 374 | 10 375 | 10 376 | 10 377 | 10 378 | 10 379 | 20 380 | 10 381 | 30 382 | 30 383 | 0 384 | 0 385 | 0 386 | 0 387 | 0 388 | 10 389 | 0 390 | 0 391 | 10 392 | 10 393 | 20 394 | 0 395 | 10 396 | 10 397 | 10 398 | 10 399 | 0 400 | 10 401 | 10 402 | 10 403 | 10 404 | 10 405 | 0 406 | 10 407 | 20 408 | 10 409 | 10 410 | 10 411 | 10 412 | 10 413 | 10 414 | 10 415 | 10 416 | 0 417 | 0 418 | 30 419 | 10 420 | 10 421 | 0 422 | 10 423 | 10 424 | 10 425 | 10 426 | 10 427 | 0 428 | 0 429 | 0 430 | 0 431 | 20 432 | 0 433 | 10 434 | 10 435 | 10 436 | 20 437 | 10 438 | 10 439 | 10 440 | 10 441 | 20 442 | 0 443 | 0 444 | 20 445 | 10 446 | 20 447 | 20 448 | 10 449 | 10 450 | 10 451 | 10 452 | 10 453 | 10 454 | 0 455 | 0 456 | 10 457 | 10 458 | 20 459 | 10 460 | 20 461 | 20 462 | 0 463 | 0 464 | 10 465 | 10 466 | 0 467 | 20 468 | 10 469 | 10 470 | 0 471 | 10 472 | 0 473 | 0 474 | 10 475 | 10 476 | 10 477 | 20 478 | 30 479 | 0 480 | 10 481 | 10 482 | 20 483 | 10 484 | 10 485 | 40 486 | 10 487 | 50 488 | 0 489 | 30 490 | 20 491 | 20 492 | 20 493 | 0 494 | 0 495 | 20 496 | 10 497 | 30 498 | 10 499 | 20 500 | 20 501 | 10 502 | 20 503 | 20 504 | 30 505 | 10 506 | 0 507 | 20 508 | 20 509 | 40 510 | 20 511 | 10 512 | 20 513 | 20 514 | 0 515 | 30 516 | 10 517 | 40 518 | 0 519 | 40 520 | 10 521 | 50 522 | 10 523 | 10 524 | 40 525 | 0 526 | 10 527 | 40 528 | 30 529 | 30 530 | 20 531 | 30 532 | 20 533 | 90 534 | 10 535 | 30 536 | 30 537 | 20 538 | 20 539 | 30 540 | 50 541 | 40 542 | 10 543 | 10 544 | 20 545 | 20 546 | 0 547 | 0 548 | 10 549 | 10 550 | 10 551 | 20 552 | 50 553 | 0 554 | 10 555 | 10 556 | 30 557 | 100 558 | 10 559 | 10 560 | 50 561 | 100 562 | 20 563 | 20 564 | 0 565 | 30 566 | 60 567 | 0 568 | 10 569 | 10 570 | 10 571 | 10 572 | 30 573 | 0 574 | 30 575 | 10 576 | 20 577 | 0 578 | 20 579 | 20 580 | 40 581 | 10 582 | 10 583 | 20 584 | 10 585 | 20 586 | 20 587 | 40 588 | 40 589 | 10 590 | 30 591 | 0 592 | 40 593 | 30 594 | 40 595 | 10 596 | 10 597 | 0 598 | 0 599 | 0 600 | 10 601 | 10 602 | 30 603 | 0 604 | 10 605 | 20 606 | 10 607 | 20 608 | 0 609 | 30 610 | 10 611 | 0 612 | 0 613 | 10 614 | 0 615 | 10 616 | 20 617 | 0 618 | 0 619 | 10 620 | 10 621 | 0 622 | 20 623 | 10 624 | 10 625 | 0 626 | 10 627 | 10 628 | 10 629 | 10 630 | 10 631 | 30 632 | 10 633 | 0 634 | 20 635 | 10 636 | 10 637 | 30 638 | 0 639 | 10 640 | 10 641 | 10 642 | 20 643 | 20 644 | 20 645 | 10 646 | 20 647 | 0 648 | 0 649 | 10 650 | 0 651 | 40 652 | 30 653 | 0 654 | 0 655 | 0 656 | 0 657 | 40 658 | 0 659 | 0 660 | 20 661 | 20 662 | 0 663 | 0 664 | 10 665 | 0 666 | 10 667 | 0 668 | 20 669 | 20 670 | 0 671 | 0 672 | 20 673 | 30 674 | 20 675 | 10 676 | 10 677 | 10 678 | 50 679 | 10 680 | 10 681 | 40 682 | 20 683 | 20 684 | 30 685 | 0 686 | 10 687 | 10 688 | 40 689 | 10 690 | 20 691 | 20 692 | 0 693 | 30 694 | 20 695 | 0 696 | 40 697 | 10 698 | 10 699 | 30 700 | 0 701 | 10 702 | 20 703 | 10 704 | 20 705 | 20 706 | 0 707 | 0 708 | 20 709 | 30 710 | 20 711 | 20 712 | 20 713 | 0 714 | 30 715 | 20 716 | 10 717 | 10 718 | 20 719 | 0 720 | 20 721 | 10 722 | 0 723 | 20 724 | 0 725 | 30 726 | 10 727 | 50 728 | 0 729 | 40 730 | 10 731 | 10 732 | 0 733 | 10 734 | 10 735 | 20 736 | 0 737 | 10 738 | 0 739 | 20 740 | 10 741 | 20 742 | 0 743 | 50 744 | 20 745 | 50 746 | 20 747 | 50 748 | 0 749 | 0 750 | 10 751 | 10 752 | 20 753 | 20 754 | 20 755 | 30 756 | 30 757 | 20 758 | 50 759 | 0 760 | 20 761 | 20 762 | 30 763 | 10 764 | 10 765 | 10 766 | 0 767 | 30 768 | 30 769 | 10 770 | 40 771 | 30 772 | 10 773 | 10 774 | 10 775 | 10 776 | 30 777 | 20 778 | 10 779 | 0 780 | 10 781 | 20 782 | 20 783 | 20 784 | 40 785 | 40 786 | 10 787 | 10 788 | 10 789 | 20 790 | 20 791 | 0 792 | 30 793 | 0 794 | 30 795 | 0 796 | 30 797 | 0 798 | 10 799 | 20 800 | 10 801 | 30 802 | 10 803 | 10 804 | 10 805 | 0 806 | 0 807 | 0 808 | 0 809 | 0 810 | 20 811 | 60 812 | 10 813 | 0 814 | 0 815 | 10 816 | 0 817 | 0 818 | 0 819 | 0 820 | 10 821 | 20 822 | 10 823 | 0 824 | 0 825 | 10 826 | 30 827 | 10 828 | 20 829 | 0 830 | 20 831 | 10 832 | 20 833 | 10 834 | 10 835 | 0 836 | 10 837 | 10 838 | 0 839 | 10 840 | 0 841 | 20 842 | 10 843 | 20 844 | 0 845 | 10 846 | 10 847 | 10 848 | 10 849 | 0 850 | 10 851 | 0 852 | 10 853 | 20 854 | 0 855 | 10 856 | 10 857 | 10 858 | 10 859 | 20 860 | 60 861 | 0 862 | 0 863 | 20 864 | 10 865 | 10 866 | 0 867 | 10 868 | 20 869 | 0 870 | 10 871 | 10 872 | 0 873 | 10 874 | 10 875 | 10 876 | 10 877 | 0 878 | 10 879 | 10 880 | 0 881 | 10 882 | 10 883 | 30 884 | 10 885 | 0 886 | 0 887 | 10 888 | 0 889 | 0 890 | 20 891 | 10 892 | 10 893 | 0 894 | 0 895 | 0 896 | 20 897 | 20 898 | 10 899 | 10 900 | 0 901 | 0 902 | 10 903 | 20 904 | 10 905 | 10 906 | 10 907 | 10 908 | 0 909 | 30 910 | 10 911 | 10 912 | 0 913 | 10 914 | 10 915 | 10 916 | 10 917 | 10 918 | 20 919 | 0 920 | 10 921 | 0 922 | 0 923 | 0 924 | 40 925 | 0 926 | 0 927 | 10 928 | 10 929 | 20 930 | 10 931 | 10 932 | 10 933 | 20 934 | 10 935 | 20 936 | 20 937 | 0 938 | 10 939 | 0 940 | 0 941 | 0 942 | 20 943 | 40 944 | 0 945 | 10 946 | 10 947 | 0 948 | 0 949 | 10 950 | 0 951 | 10 952 | 10 953 | 0 954 | 10 955 | 0 956 | 10 957 | 10 958 | 0 959 | 10 960 | 10 961 | 10 962 | 10 963 | 10 964 | 20 965 | 0 966 | 10 967 | 0 968 | 0 969 | 40 970 | 10 971 | 10 972 | 0 973 | 30 974 | 20 975 | 30 976 | 10 977 | 10 978 | 10 979 | 30 980 | 0 981 | 10 982 | 30 983 | 10 984 | 10 985 | 10 986 | 20 987 | 10 988 | 20 989 | 20 990 | 10 991 | 20 992 | 20 993 | 30 994 | 20 995 | 30 996 | 10 997 | 20 998 | 20 999 | 30 1000 | 10 1001 | 20 1002 | 10 1003 | 10 1004 | 10 1005 | 20 1006 | 30 1007 | 70 1008 | 10 1009 | 10 1010 | 10 1011 | 10 1012 | 20 1013 | 10 1014 | 10 1015 | 20 1016 | 10 1017 | 20 1018 | 10 1019 | 20 1020 | 10 1021 | 0 1022 | 20 1023 | 10 1024 | 20 1025 | 10 1026 | 10 1027 | 30 1028 | 10 1029 | 0 1030 | 20 1031 | 0 1032 | 20 1033 | 10 1034 | 30 1035 | 10 1036 | 10 1037 | 10 1038 | 10 1039 | 20 1040 | 20 1041 | 20 1042 | 20 1043 | 60 1044 | 40 1045 | 10 1046 | 0 1047 | 10 1048 | 20 1049 | 10 1050 | 10 1051 | 0 1052 | 0 1053 | 10 1054 | 10 1055 | 10 1056 | 10 1057 | 20 1058 | 0 1059 | 20 1060 | 0 1061 | 10 1062 | 10 1063 | 10 1064 | 10 1065 | 20 1066 | 10 1067 | 20 1068 | 10 1069 | 20 1070 | 20 1071 | 10 1072 | 10 1073 | 10 1074 | 10 1075 | 0 1076 | 10 1077 | 0 1078 | 10 1079 | 20 1080 | 30 1081 | 30 1082 | 30 1083 | 0 1084 | 0 1085 | 20 1086 | 10 1087 | 0 1088 | 0 1089 | 10 1090 | 10 1091 | 10 1092 | 0 1093 | 0 1094 | 10 1095 | 10 1096 | 0 1097 | 0 1098 | 0 1099 | 10 1100 | 10 1101 | 0 1102 | 10 1103 | 10 1104 | 0 1105 | 0 1106 | 0 1107 | 10 1108 | 10 1109 | 20 1110 | 20 1111 | 10 1112 | 0 1113 | 10 1114 | 30 1115 | 0 1116 | 10 1117 | 10 1118 | 10 1119 | 10 1120 | 20 1121 | 10 1122 | 40 1123 | 10 1124 | 10 1125 | 10 1126 | 10 1127 | 20 1128 | 30 1129 | 20 1130 | 20 1131 | 10 1132 | 40 1133 | 30 1134 | 20 1135 | 0 1136 | 10 1137 | 20 1138 | 0 1139 | 10 1140 | 0 1141 | 10 1142 | 10 1143 | 30 1144 | 0 1145 | 10 1146 | 0 1147 | 10 1148 | 10 1149 | 30 1150 | 0 1151 | 10 1152 | 60 1153 | 20 1154 | 60 1155 | 10 1156 | 10 1157 | 10 1158 | 20 1159 | 20 1160 | 30 1161 | 20 1162 | 0 1163 | 0 1164 | 0 1165 | 0 1166 | 0 1167 | 10 1168 | 20 1169 | 20 1170 | 0 1171 | 10 1172 | 10 1173 | 10 1174 | 30 1175 | 10 1176 | 40 1177 | 10 1178 | 10 1179 | 10 1180 | 10 1181 | 20 1182 | 10 1183 | 10 1184 | 10 1185 | 10 1186 | 20 1187 | 0 1188 | 10 1189 | 10 1190 | 0 1191 | 0 1192 | 70 1193 | 0 1194 | 10 1195 | 20 1196 | 0 1197 | 10 1198 | 10 1199 | 20 1200 | 20 1201 | 0 1202 | 30 1203 | 10 1204 | 10 1205 | 20 1206 | 60 1207 | 0 1208 | 20 1209 | 10 1210 | 0 1211 | 0 1212 | 30 1213 | 10 1214 | 30 1215 | 20 1216 | 0 1217 | 10 1218 | 10 1219 | 20 1220 | 10 1221 | 20 1222 | 10 1223 | 20 1224 | 10 1225 | 0 1226 | 0 1227 | 20 1228 | 20 1229 | 10 1230 | 40 1231 | 0 1232 | 10 1233 | 10 1234 | 0 1235 | 10 1236 | 20 1237 | 0 1238 | 10 1239 | 10 1240 | 30 1241 | 10 1242 | 10 1243 | 20 1244 | 30 1245 | 20 1246 | 10 1247 | 10 1248 | 10 1249 | 10 1250 | 40 1251 | 20 1252 | 20 1253 | 10 1254 | 70 1255 | 0 1256 | 10 1257 | 10 1258 | 10 1259 | 20 1260 | 0 1261 | 20 1262 | 10 1263 | 10 1264 | 40 1265 | 10 1266 | 50 1267 | 10 1268 | 10 1269 | 10 1270 | 30 1271 | 20 1272 | 10 1273 | 20 1274 | 10 1275 | 10 1276 | 10 1277 | 0 1278 | 0 1279 | 10 1280 | 0 1281 | 10 1282 | 20 1283 | 10 1284 | 0 1285 | 10 1286 | 20 1287 | 10 1288 | 10 1289 | 10 1290 | 10 1291 | 20 1292 | 0 1293 | 20 1294 | 10 1295 | 10 1296 | 40 1297 | 40 1298 | 10 1299 | 60 1300 | 30 1301 | 10 1302 | 0 1303 | 10 1304 | 10 1305 | 10 1306 | 0 1307 | 10 1308 | 20 1309 | 10 1310 | 0 1311 | 10 1312 | 30 1313 | 20 1314 | 0 1315 | 20 1316 | 20 1317 | 0 1318 | 10 1319 | 30 1320 | 30 1321 | 10 1322 | 10 1323 | 20 1324 | 0 1325 | 10 1326 | 40 1327 | 10 1328 | 20 1329 | 10 1330 | 30 1331 | 30 1332 | 10 1333 | 10 1334 | 10 1335 | 20 1336 | 0 1337 | 10 1338 | 10 1339 | 0 1340 | 20 1341 | 20 1342 | 0 1343 | 10 1344 | 10 1345 | 30 1346 | 30 1347 | 0 1348 | 40 1349 | 30 1350 | 20 1351 | 0 1352 | 0 1353 | 10 1354 | 0 1355 | 0 1356 | 10 1357 | 0 1358 | 10 1359 | 0 1360 | 10 1361 | 20 1362 | 10 1363 | 10 1364 | 20 1365 | 10 1366 | 0 1367 | 0 1368 | 0 1369 | 10 1370 | 30 1371 | 10 1372 | 20 1373 | 30 1374 | 10 1375 | 70 1376 | 10 1377 | 10 1378 | 20 1379 | 20 1380 | 40 1381 | 10 1382 | 10 1383 | 0 1384 | 60 1385 | 10 1386 | 50 1387 | 10 1388 | 10 1389 | 20 1390 | 10 1391 | 0 1392 | 10 1393 | 10 1394 | 20 1395 | 60 1396 | 10 1397 | 10 1398 | 20 1399 | 20 1400 | 40 1401 | 0 1402 | 10 1403 | 10 1404 | 10 1405 | 10 1406 | 10 1407 | 10 1408 | 20 1409 | 10 1410 | 30 1411 | 20 1412 | 10 1413 | 10 1414 | 10 1415 | 30 1416 | 0 1417 | 10 1418 | 20 1419 | 10 1420 | 0 1421 | 0 1422 | 10 1423 | 10 1424 | 20 1425 | 20 1426 | 10 1427 | 0 1428 | 0 1429 | 20 1430 | 10 1431 | 0 1432 | 30 1433 | 0 1434 | 10 1435 | 0 1436 | 20 1437 | 10 1438 | 20 1439 | 10 1440 | 0 1441 | 0 1442 | 0 1443 | 0 1444 | 20 1445 | 0 1446 | 20 1447 | 20 1448 | 0 1449 | 20 1450 | 10 1451 | 10 1452 | 0 1453 | 20 1454 | 10 1455 | 10 1456 | 10 1457 | 0 1458 | 0 1459 | 30 1460 | 0 1461 | 30 1462 | 0 1463 | 0 1464 | 0 1465 | 10 1466 | 20 1467 | 10 1468 | 0 1469 | 20 1470 | 10 1471 | 0 1472 | 10 1473 | 0 1474 | 10 1475 | 10 1476 | 30 1477 | 20 1478 | 10 1479 | 0 1480 | 20 1481 | 0 1482 | 0 1483 | 10 1484 | 20 1485 | 10 1486 | 30 1487 | 10 1488 | 10 1489 | 10 1490 | 0 1491 | 0 1492 | 0 1493 | 0 1494 | 10 1495 | 20 1496 | 0 1497 | 30 1498 | 0 1499 | 10 1500 | 20 1501 | 0 1502 | 20 1503 | 10 1504 | 0 1505 | 30 1506 | 20 1507 | 10 1508 | 10 1509 | 10 1510 | 10 1511 | 10 1512 | 20 1513 | 20 1514 | 10 1515 | 50 1516 | 0 1517 | 20 1518 | 10 1519 | 30 1520 | 0 1521 | 30 1522 | 10 1523 | 0 1524 | 0 1525 | 20 1526 | 20 1527 | 0 1528 | 10 1529 | 10 1530 | 20 1531 | 20 1532 | 0 1533 | 0 1534 | 0 1535 | 20 1536 | 0 1537 | 10 1538 | 30 1539 | 0 1540 | 0 1541 | 0 1542 | 20 1543 | 30 1544 | 60 1545 | 20 1546 | 20 1547 | 0 1548 | 20 1549 | 0 1550 | 10 1551 | 0 1552 | 10 1553 | 10 1554 | 0 1555 | 10 1556 | 10 1557 | 0 1558 | 40 1559 | 30 1560 | 0 1561 | 20 1562 | 40 1563 | 30 1564 | 30 1565 | 0 1566 | 10 1567 | 20 1568 | 10 1569 | 0 1570 | 20 1571 | 10 1572 | 10 1573 | 10 1574 | 10 1575 | 0 1576 | 10 1577 | 0 1578 | 0 1579 | 10 1580 | 10 1581 | 0 1582 | 10 1583 | 10 1584 | 0 1585 | 0 1586 | 10 1587 | 10 1588 | 0 1589 | 10 1590 | 10 1591 | 10 1592 | 10 1593 | 10 1594 | 0 1595 | 0 1596 | 10 1597 | 10 1598 | 30 1599 | 10 1600 | 30 1601 | 0 1602 | 0 1603 | 10 1604 | 20 1605 | 0 1606 | 10 1607 | 40 1608 | 10 1609 | 10 1610 | 0 1611 | 10 1612 | 20 1613 | 0 1614 | 40 1615 | 20 1616 | 20 1617 | 10 1618 | 0 1619 | 10 1620 | 20 1621 | 0 1622 | 40 1623 | 10 1624 | 10 1625 | 20 1626 | 0 1627 | 20 1628 | 10 1629 | 0 1630 | 10 1631 | 20 1632 | 0 1633 | 20 1634 | 10 1635 | 10 1636 | 30 1637 | 20 1638 | 10 1639 | 10 1640 | 0 1641 | 0 1642 | 0 1643 | 0 1644 | 10 1645 | 10 1646 | 20 1647 | 10 1648 | 0 1649 | 10 1650 | 30 1651 | 30 1652 | 10 1653 | 10 1654 | 20 1655 | 10 1656 | 10 1657 | 10 1658 | 60 1659 | 0 1660 | 10 1661 | 20 1662 | 0 1663 | 0 1664 | 20 1665 | 10 1666 | 20 1667 | 20 1668 | 0 1669 | 40 1670 | 10 1671 | 0 1672 | 40 1673 | 10 1674 | 10 1675 | 0 1676 | 20 1677 | 10 1678 | 0 1679 | 20 1680 | 0 1681 | 10 1682 | 10 1683 | 10 1684 | 10 1685 | 10 1686 | 10 1687 | 20 1688 | 20 1689 | 10 1690 | 10 1691 | 20 1692 | 10 1693 | 30 1694 | 10 1695 | 10 1696 | 20 1697 | 0 1698 | 10 1699 | 10 1700 | 0 1701 | 10 1702 | 10 1703 | 0 1704 | 40 1705 | 20 1706 | 60 1707 | 20 1708 | 10 1709 | 0 1710 | 0 1711 | 40 1712 | 10 1713 | 30 1714 | 10 1715 | 20 1716 | 20 1717 | 10 1718 | 60 1719 | 30 1720 | 50 1721 | 20 1722 | 50 1723 | 30 1724 | 10 1725 | 50 1726 | 80 1727 | 10 1728 | 50 1729 | 30 1730 | 20 1731 | 10 1732 | 10 1733 | 0 1734 | 20 1735 | 10 1736 | 20 1737 | 20 1738 | 10 1739 | 30 1740 | 30 1741 | 0 1742 | 50 1743 | 20 1744 | 20 1745 | 10 1746 | 20 1747 | 10 1748 | 70 1749 | 20 1750 | 30 1751 | 20 1752 | 20 1753 | 0 1754 | 50 1755 | 10 1756 | 30 1757 | 10 1758 | 20 1759 | 20 1760 | 30 1761 | 10 1762 | 40 1763 | 20 1764 | 0 1765 | 50 1766 | 10 1767 | 20 1768 | 20 1769 | 0 1770 | 20 1771 | 10 1772 | 20 1773 | 80 1774 | 0 1775 | 70 1776 | 40 1777 | 0 1778 | 10 1779 | 0 1780 | 40 1781 | 0 1782 | 10 1783 | 10 1784 | 0 1785 | 10 1786 | 10 1787 | 10 1788 | 30 1789 | 20 1790 | 20 1791 | 0 1792 | 40 1793 | 30 1794 | 70 1795 | 10 1796 | 10 1797 | 20 1798 | 0 1799 | 40 1800 | 60 1801 | 0 1802 | 10 1803 | 0 1804 | 0 1805 | 10 1806 | 0 1807 | 10 1808 | 10 1809 | 50 1810 | 10 1811 | 10 1812 | 10 1813 | 10 1814 | 10 1815 | 0 1816 | 10 1817 | 20 1818 | 30 1819 | 10 1820 | 10 1821 | 0 1822 | 10 1823 | 10 1824 | 10 1825 | 10 1826 | 10 1827 | 10 1828 | 10 1829 | 30 1830 | 10 1831 | 20 1832 | 0 1833 | 10 1834 | 10 1835 | 20 1836 | 20 1837 | 20 1838 | 0 1839 | 30 1840 | 20 1841 | 0 1842 | 0 1843 | 0 1844 | 30 1845 | 0 1846 | 20 1847 | 10 1848 | 0 1849 | 20 1850 | 10 1851 | 0 1852 | 0 1853 | 30 1854 | 40 1855 | 0 1856 | 10 1857 | 10 1858 | 10 1859 | 40 1860 | 0 1861 | 0 1862 | 10 1863 | 0 1864 | 20 1865 | 10 1866 | 10 1867 | 10 1868 | 0 1869 | 0 1870 | 30 1871 | 30 1872 | 10 1873 | 20 1874 | 10 1875 | 10 1876 | 20 1877 | 20 1878 | 10 1879 | 10 1880 | 10 1881 | 30 1882 | 10 1883 | 10 1884 | 40 1885 | 20 1886 | 20 1887 | 10 1888 | 30 1889 | 0 1890 | 0 1891 | 10 1892 | 0 1893 | 10 1894 | 0 1895 | 30 1896 | 10 1897 | 0 1898 | 0 1899 | 10 1900 | 10 1901 | 10 1902 | 20 1903 | 10 1904 | 30 1905 | 40 1906 | 10 1907 | 10 1908 | 10 1909 | 10 1910 | 10 1911 | 0 1912 | 0 1913 | 0 1914 | 0 1915 | 0 1916 | 10 1917 | 10 1918 | 0 1919 | 10 1920 | 10 1921 | 0 1922 | 20 1923 | 20 1924 | 10 1925 | 10 1926 | 40 1927 | 10 1928 | 10 1929 | 20 1930 | 10 1931 | 20 1932 | 0 1933 | 0 1934 | 20 1935 | 0 1936 | 20 1937 | 0 1938 | 10 1939 | 10 1940 | 10 1941 | 10 1942 | 40 1943 | 10 1944 | 0 1945 | 40 1946 | 10 1947 | 0 1948 | 10 1949 | 10 1950 | 30 1951 | 20 1952 | 10 1953 | 0 1954 | 10 1955 | 30 1956 | 20 1957 | 10 1958 | 10 1959 | 20 1960 | 0 1961 | 30 1962 | 30 1963 | 10 1964 | 20 1965 | 10 1966 | 20 1967 | 0 1968 | 30 1969 | 20 1970 | 0 1971 | 0 1972 | 10 1973 | 0 1974 | 10 1975 | 0 1976 | 30 1977 | 10 1978 | 70 1979 | 20 1980 | 10 1981 | 10 1982 | 10 1983 | 10 1984 | 30 1985 | 0 1986 | 20 1987 | 10 1988 | 0 1989 | 0 1990 | 20 1991 | 0 1992 | 10 1993 | 10 1994 | 10 1995 | 40 1996 | 40 1997 | 0 1998 | 0 1999 | 0 2000 | 40 2001 | 0 2002 | 20 2003 | 10 2004 | 10 2005 | 10 2006 | 0 2007 | 20 2008 | 10 2009 | 20 2010 | 10 2011 | 20 2012 | 0 2013 | 0 2014 | 10 2015 | 10 2016 | 10 2017 | 10 2018 | 20 2019 | 0 2020 | 10 2021 | 0 2022 | 10 2023 | 0 2024 | 0 2025 | 10 2026 | 20 2027 | 10 2028 | 10 2029 | 0 2030 | 0 2031 | 20 2032 | 10 2033 | 10 2034 | 10 2035 | 10 2036 | 0 2037 | 10 2038 | 10 2039 | 0 2040 | 10 2041 | 20 2042 | 40 2043 | 0 2044 | 10 2045 | 10 2046 | 20 2047 | 10 2048 | 10 2049 | 10 2050 | 20 2051 | 10 2052 | 10 2053 | 0 2054 | 10 2055 | 10 2056 | 0 2057 | 10 2058 | 10 2059 | 20 2060 | 10 2061 | 20 2062 | 10 2063 | 20 2064 | 30 2065 | 10 2066 | 10 2067 | 0 2068 | 0 2069 | 20 2070 | 40 2071 | 0 2072 | 10 2073 | 0 2074 | 10 2075 | 10 2076 | 10 2077 | 0 2078 | 10 2079 | 10 2080 | 0 2081 | 10 2082 | 10 2083 | 10 2084 | 10 2085 | 10 2086 | 0 2087 | 0 2088 | 20 2089 | 20 2090 | 10 2091 | 0 2092 | 20 2093 | 30 2094 | 10 2095 | 20 2096 | 0 2097 | 0 2098 | 10 2099 | 20 2100 | 10 2101 | 40 2102 | 20 2103 | 10 2104 | 20 2105 | 10 2106 | 20 2107 | 30 2108 | 100 2109 | 0 2110 | 30 2111 | 10 2112 | 30 2113 | 50 2114 | 50 2115 | 30 2116 | 0 2117 | 0 2118 | 20 2119 | 10 2120 | 20 2121 | 20 2122 | 30 2123 | 10 2124 | 30 2125 | 0 2126 | 10 2127 | 30 2128 | 20 2129 | 40 2130 | 20 2131 | 0 2132 | 10 2133 | 10 2134 | 10 2135 | 20 2136 | 30 2137 | 20 2138 | 30 2139 | 10 2140 | 40 2141 | 10 2142 | 20 2143 | 60 2144 | 20 2145 | 30 2146 | 40 2147 | 10 2148 | 0 2149 | 10 2150 | 20 2151 | 10 2152 | 40 2153 | 20 2154 | 20 2155 | 10 2156 | 40 2157 | 30 2158 | 0 2159 | 20 2160 | 0 2161 | 20 2162 | 10 2163 | 30 2164 | 0 2165 | 10 2166 | 0 2167 | 10 2168 | 30 2169 | 10 2170 | 30 2171 | 0 2172 | 20 2173 | 0 2174 | 10 2175 | 20 2176 | 20 2177 | 0 2178 | 50 2179 | 10 2180 | 20 2181 | 80 2182 | 20 2183 | 0 2184 | 20 2185 | 50 2186 | 20 2187 | 30 2188 | 10 2189 | 10 2190 | 10 2191 | 30 2192 | 20 2193 | 40 2194 | 50 2195 | 50 2196 | 20 2197 | 10 2198 | 30 2199 | 10 2200 | 10 2201 | 10 2202 | 10 2203 | 10 2204 | 10 2205 | 50 2206 | 10 2207 | 10 2208 | 30 2209 | 10 2210 | 10 2211 | 10 2212 | 20 2213 | 10 2214 | 0 2215 | 0 2216 | 10 2217 | 10 2218 | 20 2219 | 10 2220 | 20 2221 | 0 2222 | 30 2223 | 10 2224 | 50 2225 | 0 2226 | 0 2227 | 0 2228 | 0 2229 | 0 2230 | 40 2231 | 0 2232 | 40 2233 | 20 2234 | 20 2235 | 20 2236 | 0 2237 | 20 2238 | 20 2239 | 10 2240 | 10 2241 | 0 2242 | 10 2243 | 20 2244 | 20 2245 | 0 2246 | 20 2247 | 10 2248 | 0 2249 | 20 2250 | 40 2251 | 30 2252 | 40 2253 | 100 2254 | 10 2255 | 80 2256 | 0 2257 | 10 2258 | 10 2259 | 10 2260 | 10 2261 | 10 2262 | 10 2263 | 30 2264 | 20 2265 | 40 2266 | 50 2267 | 40 2268 | 80 2269 | 20 2270 | 40 2271 | 50 2272 | 70 2273 | 0 2274 | 10 2275 | 0 2276 | 20 2277 | 30 2278 | 10 2279 | 20 2280 | 0 2281 | 40 2282 | 10 2283 | 20 2284 | 10 2285 | 20 2286 | 10 2287 | 0 2288 | 20 2289 | 20 2290 | 10 2291 | 10 2292 | 10 2293 | 0 2294 | 10 2295 | 10 2296 | 0 2297 | 0 2298 | 10 2299 | 20 2300 | 50 2301 | 0 2302 | 0 2303 | 0 2304 | 10 2305 | 0 2306 | 0 2307 | 10 2308 | 10 2309 | 20 2310 | 20 2311 | 20 2312 | 10 2313 | 10 2314 | 10 2315 | 10 2316 | 10 2317 | 0 2318 | 10 2319 | 10 2320 | 20 2321 | 30 2322 | 10 2323 | 10 2324 | 0 2325 | 20 2326 | 60 2327 | 10 2328 | 20 2329 | 0 2330 | 30 2331 | 10 2332 | 10 2333 | 0 2334 | 20 2335 | 20 2336 | 0 2337 | 0 2338 | 10 2339 | 0 2340 | 10 2341 | 10 2342 | 20 2343 | 0 2344 | 10 2345 | 50 2346 | 50 2347 | 40 2348 | 50 2349 | 10 2350 | 20 2351 | 30 2352 | 20 2353 | 20 2354 | 0 2355 | 0 2356 | 0 2357 | 0 2358 | 20 2359 | 20 2360 | 10 2361 | 10 2362 | 10 2363 | 20 2364 | 10 2365 | 10 2366 | 0 2367 | 0 2368 | 0 2369 | 10 2370 | 20 2371 | 30 2372 | 10 2373 | 20 2374 | 10 2375 | 0 2376 | 10 2377 | 30 2378 | 40 2379 | 20 2380 | 10 2381 | 10 2382 | 10 2383 | 30 2384 | 10 2385 | 10 2386 | 10 2387 | 10 2388 | 20 2389 | 10 2390 | 20 2391 | 30 2392 | 20 2393 | 0 2394 | 20 2395 | 20 2396 | 10 2397 | 0 2398 | 0 2399 | 20 2400 | 0 2401 | 10 2402 | 10 2403 | 20 2404 | 40 2405 | 30 2406 | 20 2407 | 10 2408 | 10 2409 | 20 2410 | 30 2411 | 20 2412 | 10 2413 | 0 2414 | 20 2415 | 20 2416 | 10 2417 | 0 2418 | 30 2419 | 10 2420 | 20 2421 | 10 2422 | 20 2423 | 0 2424 | 0 2425 | 0 2426 | 30 2427 | 30 2428 | 0 2429 | 20 2430 | 10 2431 | 10 2432 | 0 2433 | 0 2434 | 0 2435 | 20 2436 | 10 2437 | 40 2438 | 10 2439 | 10 2440 | 10 2441 | 0 2442 | 0 2443 | 10 2444 | 10 2445 | 10 2446 | -------------------------------------------------------------------------------- /example/ted.sys2.eng.senttag: -------------------------------------------------------------------------------- 1 | 20 2 | 10 3 | 20 4 | 20 5 | 10 6 | 0 7 | 20 8 | 70 9 | 0 10 | 10 11 | 10 12 | 10 13 | 30 14 | 20 15 | 10 16 | 20 17 | 20 18 | 20 19 | 10 20 | 0 21 | 0 22 | 10 23 | 0 24 | 0 25 | 10 26 | 20 27 | 0 28 | 30 29 | 10 30 | 20 31 | 40 32 | 50 33 | 0 34 | 20 35 | 10 36 | 20 37 | 20 38 | 20 39 | 30 40 | 30 41 | 20 42 | 30 43 | 0 44 | 0 45 | 10 46 | 0 47 | 10 48 | 10 49 | 10 50 | 0 51 | 0 52 | 20 53 | 0 54 | 10 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 0 61 | 0 62 | 10 63 | 0 64 | 10 65 | 0 66 | 0 67 | 0 68 | 0 69 | 10 70 | 10 71 | 20 72 | 20 73 | 60 74 | 20 75 | 10 76 | 10 77 | 10 78 | 30 79 | 10 80 | 20 81 | 30 82 | 20 83 | 10 84 | 20 85 | 0 86 | 20 87 | 40 88 | 10 89 | 10 90 | 10 91 | 10 92 | 10 93 | 10 94 | 0 95 | 0 96 | 10 97 | 0 98 | 10 99 | 0 100 | 10 101 | 10 102 | 10 103 | 20 104 | 0 105 | 0 106 | 10 107 | 20 108 | 0 109 | 40 110 | 10 111 | 10 112 | 20 113 | 10 114 | 10 115 | 30 116 | 10 117 | 20 118 | 30 119 | 10 120 | 20 121 | 10 122 | 10 123 | 10 124 | 20 125 | 20 126 | 0 127 | 20 128 | 0 129 | 20 130 | 30 131 | 20 132 | 10 133 | 30 134 | 50 135 | 20 136 | 0 137 | 10 138 | 10 139 | 30 140 | 10 141 | 20 142 | 30 143 | 40 144 | 70 145 | 20 146 | 20 147 | 20 148 | 40 149 | 0 150 | 0 151 | 0 152 | 0 153 | 0 154 | 20 155 | 10 156 | 40 157 | 20 158 | 10 159 | 0 160 | 0 161 | 10 162 | 10 163 | 0 164 | 10 165 | 0 166 | 0 167 | 40 168 | 10 169 | 10 170 | 0 171 | 0 172 | 0 173 | 10 174 | 10 175 | 10 176 | 20 177 | 0 178 | 20 179 | 0 180 | 10 181 | 10 182 | 10 183 | 0 184 | 10 185 | 0 186 | 0 187 | 0 188 | 20 189 | 0 190 | 10 191 | 10 192 | 20 193 | 10 194 | 10 195 | 10 196 | 10 197 | 20 198 | 10 199 | 10 200 | 10 201 | 10 202 | 10 203 | 10 204 | 10 205 | 0 206 | 20 207 | 20 208 | 30 209 | 10 210 | 10 211 | 10 212 | 10 213 | 10 214 | 0 215 | 10 216 | 10 217 | 20 218 | 30 219 | 10 220 | 0 221 | 20 222 | 10 223 | 10 224 | 10 225 | 10 226 | 10 227 | 20 228 | 30 229 | 10 230 | 20 231 | 20 232 | 0 233 | 10 234 | 30 235 | 30 236 | 10 237 | 0 238 | 0 239 | 20 240 | 10 241 | 10 242 | 20 243 | 30 244 | 10 245 | 0 246 | 0 247 | 30 248 | 20 249 | 10 250 | 30 251 | 10 252 | 10 253 | 10 254 | 0 255 | 0 256 | 10 257 | 10 258 | 20 259 | 20 260 | 10 261 | 0 262 | 10 263 | 0 264 | 10 265 | 10 266 | 0 267 | 30 268 | 10 269 | 10 270 | 0 271 | 0 272 | 10 273 | 0 274 | 10 275 | 0 276 | 30 277 | 10 278 | 10 279 | 20 280 | 20 281 | 20 282 | 0 283 | 10 284 | 10 285 | 0 286 | 0 287 | 0 288 | 10 289 | 20 290 | 10 291 | 20 292 | 30 293 | 10 294 | 10 295 | 20 296 | 20 297 | 10 298 | 10 299 | 0 300 | 10 301 | 10 302 | 0 303 | 0 304 | 20 305 | 10 306 | 20 307 | 0 308 | 0 309 | 20 310 | 0 311 | 0 312 | 20 313 | 10 314 | 10 315 | 20 316 | 10 317 | 10 318 | 10 319 | 20 320 | 10 321 | 10 322 | 10 323 | 30 324 | 10 325 | 20 326 | 10 327 | 30 328 | 20 329 | 0 330 | 0 331 | 20 332 | 20 333 | 0 334 | 0 335 | 10 336 | 10 337 | 10 338 | 30 339 | 10 340 | 0 341 | 30 342 | 10 343 | 10 344 | 10 345 | 20 346 | 40 347 | 10 348 | 10 349 | 10 350 | 30 351 | 10 352 | 0 353 | 10 354 | 10 355 | 10 356 | 10 357 | 10 358 | 10 359 | 20 360 | 20 361 | 10 362 | 20 363 | 0 364 | 10 365 | 20 366 | 10 367 | 10 368 | 0 369 | 0 370 | 30 371 | 10 372 | 20 373 | 10 374 | 0 375 | 10 376 | 10 377 | 10 378 | 10 379 | 20 380 | 0 381 | 30 382 | 30 383 | 0 384 | 0 385 | 10 386 | 0 387 | 0 388 | 0 389 | 10 390 | 0 391 | 10 392 | 20 393 | 20 394 | 0 395 | 10 396 | 10 397 | 10 398 | 0 399 | 0 400 | 10 401 | 10 402 | 0 403 | 10 404 | 10 405 | 10 406 | 10 407 | 10 408 | 10 409 | 10 410 | 10 411 | 10 412 | 10 413 | 10 414 | 10 415 | 10 416 | 0 417 | 0 418 | 20 419 | 10 420 | 10 421 | 0 422 | 10 423 | 10 424 | 0 425 | 10 426 | 0 427 | 0 428 | 0 429 | 0 430 | 10 431 | 10 432 | 0 433 | 10 434 | 10 435 | 10 436 | 20 437 | 10 438 | 10 439 | 10 440 | 10 441 | 20 442 | 0 443 | 0 444 | 20 445 | 10 446 | 20 447 | 10 448 | 10 449 | 10 450 | 10 451 | 10 452 | 10 453 | 10 454 | 10 455 | 0 456 | 10 457 | 10 458 | 20 459 | 10 460 | 20 461 | 20 462 | 0 463 | 0 464 | 10 465 | 10 466 | 0 467 | 10 468 | 10 469 | 10 470 | 0 471 | 10 472 | 0 473 | 0 474 | 10 475 | 10 476 | 10 477 | 20 478 | 30 479 | 0 480 | 10 481 | 10 482 | 20 483 | 10 484 | 10 485 | 40 486 | 10 487 | 50 488 | 0 489 | 30 490 | 20 491 | 10 492 | 20 493 | 0 494 | 0 495 | 20 496 | 20 497 | 30 498 | 10 499 | 20 500 | 20 501 | 20 502 | 20 503 | 20 504 | 40 505 | 10 506 | 0 507 | 20 508 | 20 509 | 40 510 | 20 511 | 0 512 | 10 513 | 10 514 | 0 515 | 30 516 | 10 517 | 40 518 | 0 519 | 40 520 | 10 521 | 40 522 | 10 523 | 10 524 | 40 525 | 0 526 | 20 527 | 40 528 | 40 529 | 20 530 | 10 531 | 30 532 | 10 533 | 70 534 | 20 535 | 30 536 | 20 537 | 20 538 | 20 539 | 30 540 | 50 541 | 30 542 | 10 543 | 10 544 | 30 545 | 20 546 | 0 547 | 0 548 | 10 549 | 10 550 | 10 551 | 10 552 | 40 553 | 10 554 | 10 555 | 10 556 | 30 557 | 50 558 | 10 559 | 10 560 | 60 561 | 50 562 | 10 563 | 20 564 | 0 565 | 30 566 | 60 567 | 0 568 | 20 569 | 10 570 | 10 571 | 10 572 | 20 573 | 0 574 | 30 575 | 10 576 | 20 577 | 0 578 | 20 579 | 20 580 | 40 581 | 0 582 | 10 583 | 20 584 | 10 585 | 20 586 | 40 587 | 50 588 | 40 589 | 20 590 | 30 591 | 0 592 | 50 593 | 20 594 | 40 595 | 10 596 | 10 597 | 0 598 | 0 599 | 0 600 | 10 601 | 10 602 | 30 603 | 0 604 | 20 605 | 20 606 | 10 607 | 10 608 | 0 609 | 20 610 | 10 611 | 0 612 | 0 613 | 10 614 | 0 615 | 10 616 | 20 617 | 0 618 | 0 619 | 10 620 | 10 621 | 0 622 | 30 623 | 0 624 | 10 625 | 0 626 | 10 627 | 10 628 | 10 629 | 10 630 | 10 631 | 30 632 | 10 633 | 0 634 | 20 635 | 10 636 | 10 637 | 30 638 | 0 639 | 10 640 | 10 641 | 10 642 | 20 643 | 20 644 | 10 645 | 10 646 | 20 647 | 0 648 | 0 649 | 10 650 | 0 651 | 40 652 | 20 653 | 0 654 | 0 655 | 0 656 | 0 657 | 40 658 | 0 659 | 0 660 | 20 661 | 20 662 | 0 663 | 10 664 | 10 665 | 0 666 | 10 667 | 0 668 | 20 669 | 20 670 | 0 671 | 0 672 | 20 673 | 40 674 | 20 675 | 10 676 | 10 677 | 10 678 | 50 679 | 0 680 | 10 681 | 40 682 | 20 683 | 20 684 | 30 685 | 10 686 | 10 687 | 0 688 | 40 689 | 10 690 | 30 691 | 20 692 | 0 693 | 30 694 | 20 695 | 0 696 | 30 697 | 10 698 | 10 699 | 40 700 | 0 701 | 10 702 | 20 703 | 10 704 | 20 705 | 20 706 | 0 707 | 0 708 | 20 709 | 30 710 | 10 711 | 10 712 | 20 713 | 0 714 | 30 715 | 20 716 | 10 717 | 10 718 | 20 719 | 0 720 | 20 721 | 10 722 | 0 723 | 20 724 | 0 725 | 20 726 | 20 727 | 40 728 | 0 729 | 30 730 | 10 731 | 10 732 | 0 733 | 10 734 | 10 735 | 20 736 | 0 737 | 20 738 | 0 739 | 10 740 | 10 741 | 20 742 | 0 743 | 50 744 | 10 745 | 50 746 | 20 747 | 50 748 | 0 749 | 0 750 | 10 751 | 10 752 | 20 753 | 20 754 | 20 755 | 30 756 | 40 757 | 20 758 | 60 759 | 0 760 | 20 761 | 20 762 | 30 763 | 10 764 | 10 765 | 10 766 | 10 767 | 30 768 | 30 769 | 10 770 | 50 771 | 30 772 | 0 773 | 10 774 | 10 775 | 10 776 | 20 777 | 20 778 | 10 779 | 0 780 | 10 781 | 20 782 | 20 783 | 20 784 | 50 785 | 40 786 | 10 787 | 10 788 | 10 789 | 10 790 | 20 791 | 0 792 | 30 793 | 0 794 | 30 795 | 0 796 | 40 797 | 0 798 | 10 799 | 20 800 | 10 801 | 20 802 | 20 803 | 10 804 | 0 805 | 10 806 | 0 807 | 0 808 | 0 809 | 0 810 | 20 811 | 60 812 | 10 813 | 0 814 | 10 815 | 10 816 | 10 817 | 0 818 | 0 819 | 10 820 | 10 821 | 30 822 | 10 823 | 0 824 | 0 825 | 20 826 | 30 827 | 10 828 | 20 829 | 10 830 | 10 831 | 10 832 | 20 833 | 10 834 | 10 835 | 0 836 | 10 837 | 10 838 | 0 839 | 10 840 | 0 841 | 20 842 | 10 843 | 20 844 | 0 845 | 10 846 | 10 847 | 10 848 | 10 849 | 10 850 | 10 851 | 0 852 | 10 853 | 30 854 | 0 855 | 10 856 | 10 857 | 10 858 | 10 859 | 20 860 | 50 861 | 0 862 | 0 863 | 20 864 | 10 865 | 10 866 | 0 867 | 10 868 | 20 869 | 0 870 | 10 871 | 10 872 | 0 873 | 10 874 | 10 875 | 10 876 | 10 877 | 0 878 | 0 879 | 10 880 | 0 881 | 10 882 | 10 883 | 20 884 | 10 885 | 10 886 | 10 887 | 20 888 | 0 889 | 0 890 | 10 891 | 10 892 | 20 893 | 0 894 | 10 895 | 0 896 | 20 897 | 20 898 | 10 899 | 10 900 | 0 901 | 0 902 | 10 903 | 20 904 | 10 905 | 10 906 | 0 907 | 10 908 | 0 909 | 20 910 | 0 911 | 10 912 | 0 913 | 10 914 | 20 915 | 10 916 | 10 917 | 0 918 | 20 919 | 0 920 | 10 921 | 0 922 | 0 923 | 0 924 | 40 925 | 0 926 | 0 927 | 0 928 | 10 929 | 20 930 | 10 931 | 10 932 | 10 933 | 20 934 | 10 935 | 20 936 | 20 937 | 0 938 | 10 939 | 0 940 | 0 941 | 0 942 | 20 943 | 30 944 | 10 945 | 10 946 | 0 947 | 0 948 | 0 949 | 10 950 | 0 951 | 10 952 | 10 953 | 0 954 | 10 955 | 0 956 | 10 957 | 10 958 | 0 959 | 10 960 | 0 961 | 10 962 | 10 963 | 10 964 | 10 965 | 0 966 | 10 967 | 0 968 | 0 969 | 40 970 | 10 971 | 20 972 | 0 973 | 30 974 | 20 975 | 20 976 | 10 977 | 10 978 | 10 979 | 30 980 | 0 981 | 10 982 | 30 983 | 20 984 | 10 985 | 10 986 | 30 987 | 20 988 | 30 989 | 30 990 | 0 991 | 20 992 | 10 993 | 40 994 | 30 995 | 20 996 | 0 997 | 20 998 | 20 999 | 30 1000 | 10 1001 | 20 1002 | 10 1003 | 10 1004 | 20 1005 | 20 1006 | 30 1007 | 60 1008 | 10 1009 | 0 1010 | 10 1011 | 0 1012 | 20 1013 | 10 1014 | 10 1015 | 20 1016 | 10 1017 | 30 1018 | 0 1019 | 20 1020 | 10 1021 | 0 1022 | 10 1023 | 10 1024 | 20 1025 | 10 1026 | 10 1027 | 30 1028 | 10 1029 | 0 1030 | 20 1031 | 0 1032 | 20 1033 | 10 1034 | 30 1035 | 10 1036 | 10 1037 | 10 1038 | 10 1039 | 20 1040 | 20 1041 | 20 1042 | 10 1043 | 70 1044 | 30 1045 | 20 1046 | 0 1047 | 10 1048 | 20 1049 | 20 1050 | 10 1051 | 10 1052 | 10 1053 | 0 1054 | 10 1055 | 10 1056 | 10 1057 | 20 1058 | 0 1059 | 20 1060 | 10 1061 | 10 1062 | 10 1063 | 10 1064 | 10 1065 | 20 1066 | 10 1067 | 20 1068 | 10 1069 | 30 1070 | 20 1071 | 0 1072 | 10 1073 | 20 1074 | 10 1075 | 0 1076 | 10 1077 | 0 1078 | 10 1079 | 20 1080 | 30 1081 | 30 1082 | 40 1083 | 0 1084 | 0 1085 | 10 1086 | 10 1087 | 0 1088 | 0 1089 | 10 1090 | 10 1091 | 10 1092 | 0 1093 | 0 1094 | 10 1095 | 20 1096 | 0 1097 | 0 1098 | 0 1099 | 10 1100 | 10 1101 | 0 1102 | 10 1103 | 10 1104 | 10 1105 | 0 1106 | 10 1107 | 10 1108 | 10 1109 | 10 1110 | 20 1111 | 0 1112 | 0 1113 | 20 1114 | 20 1115 | 0 1116 | 10 1117 | 10 1118 | 10 1119 | 10 1120 | 20 1121 | 10 1122 | 40 1123 | 10 1124 | 10 1125 | 20 1126 | 10 1127 | 10 1128 | 20 1129 | 20 1130 | 20 1131 | 10 1132 | 40 1133 | 40 1134 | 20 1135 | 0 1136 | 10 1137 | 20 1138 | 0 1139 | 10 1140 | 0 1141 | 10 1142 | 10 1143 | 30 1144 | 0 1145 | 20 1146 | 0 1147 | 10 1148 | 0 1149 | 30 1150 | 0 1151 | 10 1152 | 50 1153 | 20 1154 | 60 1155 | 0 1156 | 0 1157 | 10 1158 | 20 1159 | 20 1160 | 30 1161 | 20 1162 | 0 1163 | 0 1164 | 10 1165 | 0 1166 | 0 1167 | 10 1168 | 20 1169 | 30 1170 | 0 1171 | 10 1172 | 10 1173 | 10 1174 | 30 1175 | 10 1176 | 40 1177 | 10 1178 | 10 1179 | 10 1180 | 10 1181 | 20 1182 | 10 1183 | 10 1184 | 20 1185 | 10 1186 | 20 1187 | 0 1188 | 10 1189 | 10 1190 | 0 1191 | 10 1192 | 70 1193 | 0 1194 | 20 1195 | 20 1196 | 0 1197 | 10 1198 | 0 1199 | 20 1200 | 10 1201 | 10 1202 | 30 1203 | 10 1204 | 10 1205 | 20 1206 | 50 1207 | 0 1208 | 20 1209 | 10 1210 | 0 1211 | 0 1212 | 20 1213 | 10 1214 | 30 1215 | 30 1216 | 0 1217 | 10 1218 | 10 1219 | 10 1220 | 10 1221 | 20 1222 | 10 1223 | 20 1224 | 10 1225 | 0 1226 | 0 1227 | 20 1228 | 30 1229 | 10 1230 | 40 1231 | 0 1232 | 10 1233 | 20 1234 | 10 1235 | 10 1236 | 10 1237 | 0 1238 | 20 1239 | 10 1240 | 30 1241 | 10 1242 | 0 1243 | 20 1244 | 20 1245 | 20 1246 | 20 1247 | 10 1248 | 10 1249 | 10 1250 | 40 1251 | 20 1252 | 20 1253 | 20 1254 | 80 1255 | 0 1256 | 10 1257 | 10 1258 | 20 1259 | 30 1260 | 0 1261 | 20 1262 | 10 1263 | 10 1264 | 40 1265 | 0 1266 | 40 1267 | 10 1268 | 0 1269 | 10 1270 | 40 1271 | 20 1272 | 10 1273 | 20 1274 | 10 1275 | 10 1276 | 10 1277 | 10 1278 | 0 1279 | 10 1280 | 0 1281 | 10 1282 | 20 1283 | 20 1284 | 0 1285 | 20 1286 | 20 1287 | 10 1288 | 10 1289 | 10 1290 | 10 1291 | 20 1292 | 0 1293 | 10 1294 | 10 1295 | 10 1296 | 40 1297 | 50 1298 | 10 1299 | 40 1300 | 40 1301 | 20 1302 | 10 1303 | 10 1304 | 20 1305 | 10 1306 | 0 1307 | 10 1308 | 20 1309 | 10 1310 | 0 1311 | 10 1312 | 30 1313 | 30 1314 | 0 1315 | 20 1316 | 20 1317 | 10 1318 | 10 1319 | 20 1320 | 30 1321 | 10 1322 | 20 1323 | 20 1324 | 10 1325 | 0 1326 | 40 1327 | 10 1328 | 30 1329 | 20 1330 | 30 1331 | 20 1332 | 10 1333 | 0 1334 | 10 1335 | 20 1336 | 0 1337 | 10 1338 | 10 1339 | 0 1340 | 20 1341 | 20 1342 | 10 1343 | 10 1344 | 20 1345 | 30 1346 | 40 1347 | 0 1348 | 40 1349 | 30 1350 | 30 1351 | 0 1352 | 10 1353 | 10 1354 | 0 1355 | 0 1356 | 10 1357 | 0 1358 | 0 1359 | 0 1360 | 10 1361 | 20 1362 | 10 1363 | 10 1364 | 20 1365 | 10 1366 | 0 1367 | 10 1368 | 10 1369 | 10 1370 | 30 1371 | 10 1372 | 20 1373 | 40 1374 | 10 1375 | 60 1376 | 20 1377 | 10 1378 | 20 1379 | 20 1380 | 40 1381 | 10 1382 | 10 1383 | 0 1384 | 60 1385 | 10 1386 | 60 1387 | 10 1388 | 10 1389 | 20 1390 | 0 1391 | 10 1392 | 10 1393 | 10 1394 | 20 1395 | 60 1396 | 10 1397 | 10 1398 | 10 1399 | 20 1400 | 40 1401 | 0 1402 | 20 1403 | 0 1404 | 10 1405 | 10 1406 | 10 1407 | 10 1408 | 20 1409 | 10 1410 | 20 1411 | 10 1412 | 0 1413 | 20 1414 | 10 1415 | 30 1416 | 0 1417 | 10 1418 | 20 1419 | 10 1420 | 0 1421 | 0 1422 | 10 1423 | 10 1424 | 20 1425 | 20 1426 | 10 1427 | 0 1428 | 0 1429 | 10 1430 | 10 1431 | 0 1432 | 30 1433 | 0 1434 | 10 1435 | 0 1436 | 20 1437 | 10 1438 | 20 1439 | 10 1440 | 0 1441 | 0 1442 | 0 1443 | 0 1444 | 30 1445 | 0 1446 | 20 1447 | 10 1448 | 0 1449 | 10 1450 | 10 1451 | 10 1452 | 0 1453 | 20 1454 | 10 1455 | 10 1456 | 10 1457 | 0 1458 | 10 1459 | 40 1460 | 0 1461 | 30 1462 | 10 1463 | 0 1464 | 10 1465 | 10 1466 | 10 1467 | 0 1468 | 0 1469 | 20 1470 | 10 1471 | 0 1472 | 0 1473 | 0 1474 | 10 1475 | 10 1476 | 30 1477 | 10 1478 | 20 1479 | 0 1480 | 10 1481 | 10 1482 | 0 1483 | 10 1484 | 20 1485 | 10 1486 | 20 1487 | 10 1488 | 10 1489 | 10 1490 | 0 1491 | 0 1492 | 0 1493 | 0 1494 | 20 1495 | 10 1496 | 0 1497 | 20 1498 | 0 1499 | 10 1500 | 20 1501 | 10 1502 | 20 1503 | 10 1504 | 10 1505 | 20 1506 | 20 1507 | 10 1508 | 10 1509 | 10 1510 | 10 1511 | 10 1512 | 20 1513 | 20 1514 | 10 1515 | 50 1516 | 0 1517 | 20 1518 | 10 1519 | 20 1520 | 0 1521 | 30 1522 | 10 1523 | 0 1524 | 0 1525 | 20 1526 | 10 1527 | 0 1528 | 10 1529 | 10 1530 | 20 1531 | 10 1532 | 0 1533 | 0 1534 | 0 1535 | 20 1536 | 0 1537 | 0 1538 | 30 1539 | 0 1540 | 0 1541 | 0 1542 | 20 1543 | 30 1544 | 60 1545 | 20 1546 | 20 1547 | 0 1548 | 10 1549 | 0 1550 | 10 1551 | 0 1552 | 10 1553 | 10 1554 | 10 1555 | 20 1556 | 10 1557 | 0 1558 | 40 1559 | 30 1560 | 10 1561 | 20 1562 | 30 1563 | 40 1564 | 20 1565 | 0 1566 | 10 1567 | 30 1568 | 10 1569 | 0 1570 | 20 1571 | 10 1572 | 10 1573 | 0 1574 | 10 1575 | 0 1576 | 10 1577 | 0 1578 | 0 1579 | 10 1580 | 0 1581 | 0 1582 | 0 1583 | 0 1584 | 0 1585 | 10 1586 | 0 1587 | 10 1588 | 0 1589 | 10 1590 | 10 1591 | 10 1592 | 10 1593 | 10 1594 | 10 1595 | 10 1596 | 10 1597 | 10 1598 | 40 1599 | 10 1600 | 30 1601 | 0 1602 | 10 1603 | 10 1604 | 20 1605 | 0 1606 | 10 1607 | 50 1608 | 10 1609 | 10 1610 | 0 1611 | 10 1612 | 20 1613 | 0 1614 | 40 1615 | 10 1616 | 20 1617 | 10 1618 | 0 1619 | 10 1620 | 20 1621 | 0 1622 | 30 1623 | 10 1624 | 10 1625 | 20 1626 | 0 1627 | 20 1628 | 10 1629 | 0 1630 | 20 1631 | 10 1632 | 0 1633 | 20 1634 | 10 1635 | 10 1636 | 30 1637 | 20 1638 | 10 1639 | 10 1640 | 0 1641 | 0 1642 | 0 1643 | 0 1644 | 10 1645 | 10 1646 | 10 1647 | 10 1648 | 0 1649 | 10 1650 | 30 1651 | 30 1652 | 10 1653 | 20 1654 | 10 1655 | 10 1656 | 10 1657 | 10 1658 | 60 1659 | 10 1660 | 10 1661 | 20 1662 | 0 1663 | 0 1664 | 20 1665 | 10 1666 | 10 1667 | 30 1668 | 0 1669 | 30 1670 | 10 1671 | 0 1672 | 40 1673 | 10 1674 | 10 1675 | 0 1676 | 20 1677 | 20 1678 | 0 1679 | 20 1680 | 0 1681 | 10 1682 | 10 1683 | 10 1684 | 10 1685 | 10 1686 | 10 1687 | 20 1688 | 20 1689 | 0 1690 | 20 1691 | 20 1692 | 10 1693 | 40 1694 | 10 1695 | 10 1696 | 20 1697 | 0 1698 | 10 1699 | 10 1700 | 0 1701 | 10 1702 | 10 1703 | 0 1704 | 40 1705 | 20 1706 | 60 1707 | 20 1708 | 10 1709 | 0 1710 | 10 1711 | 40 1712 | 10 1713 | 20 1714 | 10 1715 | 20 1716 | 30 1717 | 10 1718 | 70 1719 | 30 1720 | 50 1721 | 20 1722 | 60 1723 | 20 1724 | 10 1725 | 70 1726 | 70 1727 | 10 1728 | 40 1729 | 30 1730 | 20 1731 | 10 1732 | 10 1733 | 0 1734 | 20 1735 | 10 1736 | 20 1737 | 20 1738 | 10 1739 | 40 1740 | 30 1741 | 0 1742 | 50 1743 | 20 1744 | 20 1745 | 20 1746 | 20 1747 | 10 1748 | 60 1749 | 20 1750 | 30 1751 | 20 1752 | 10 1753 | 0 1754 | 50 1755 | 10 1756 | 20 1757 | 0 1758 | 20 1759 | 20 1760 | 30 1761 | 10 1762 | 40 1763 | 20 1764 | 0 1765 | 40 1766 | 10 1767 | 10 1768 | 20 1769 | 0 1770 | 20 1771 | 10 1772 | 20 1773 | 80 1774 | 0 1775 | 70 1776 | 40 1777 | 0 1778 | 10 1779 | 0 1780 | 40 1781 | 10 1782 | 10 1783 | 10 1784 | 0 1785 | 10 1786 | 10 1787 | 10 1788 | 40 1789 | 20 1790 | 20 1791 | 0 1792 | 40 1793 | 50 1794 | 60 1795 | 20 1796 | 10 1797 | 10 1798 | 0 1799 | 40 1800 | 60 1801 | 0 1802 | 10 1803 | 0 1804 | 0 1805 | 20 1806 | 0 1807 | 10 1808 | 10 1809 | 40 1810 | 10 1811 | 0 1812 | 10 1813 | 10 1814 | 10 1815 | 0 1816 | 10 1817 | 20 1818 | 20 1819 | 10 1820 | 10 1821 | 0 1822 | 10 1823 | 10 1824 | 10 1825 | 10 1826 | 10 1827 | 10 1828 | 10 1829 | 20 1830 | 10 1831 | 10 1832 | 0 1833 | 10 1834 | 10 1835 | 10 1836 | 20 1837 | 10 1838 | 0 1839 | 20 1840 | 10 1841 | 0 1842 | 10 1843 | 0 1844 | 20 1845 | 0 1846 | 20 1847 | 10 1848 | 0 1849 | 20 1850 | 10 1851 | 0 1852 | 0 1853 | 30 1854 | 40 1855 | 0 1856 | 10 1857 | 10 1858 | 20 1859 | 30 1860 | 0 1861 | 10 1862 | 10 1863 | 10 1864 | 20 1865 | 10 1866 | 20 1867 | 10 1868 | 0 1869 | 0 1870 | 30 1871 | 30 1872 | 10 1873 | 30 1874 | 0 1875 | 10 1876 | 20 1877 | 20 1878 | 10 1879 | 10 1880 | 10 1881 | 20 1882 | 0 1883 | 20 1884 | 40 1885 | 10 1886 | 10 1887 | 10 1888 | 40 1889 | 0 1890 | 0 1891 | 10 1892 | 0 1893 | 0 1894 | 0 1895 | 30 1896 | 10 1897 | 0 1898 | 0 1899 | 10 1900 | 10 1901 | 10 1902 | 20 1903 | 10 1904 | 20 1905 | 30 1906 | 10 1907 | 20 1908 | 10 1909 | 10 1910 | 10 1911 | 0 1912 | 10 1913 | 0 1914 | 0 1915 | 0 1916 | 10 1917 | 10 1918 | 10 1919 | 10 1920 | 10 1921 | 10 1922 | 20 1923 | 20 1924 | 10 1925 | 10 1926 | 40 1927 | 10 1928 | 10 1929 | 10 1930 | 10 1931 | 20 1932 | 0 1933 | 0 1934 | 20 1935 | 0 1936 | 20 1937 | 0 1938 | 10 1939 | 10 1940 | 10 1941 | 10 1942 | 30 1943 | 20 1944 | 10 1945 | 40 1946 | 10 1947 | 0 1948 | 10 1949 | 10 1950 | 30 1951 | 20 1952 | 10 1953 | 0 1954 | 10 1955 | 30 1956 | 20 1957 | 10 1958 | 10 1959 | 10 1960 | 0 1961 | 30 1962 | 40 1963 | 10 1964 | 10 1965 | 10 1966 | 30 1967 | 0 1968 | 30 1969 | 20 1970 | 10 1971 | 0 1972 | 10 1973 | 0 1974 | 0 1975 | 0 1976 | 30 1977 | 10 1978 | 60 1979 | 20 1980 | 10 1981 | 10 1982 | 10 1983 | 10 1984 | 20 1985 | 0 1986 | 20 1987 | 10 1988 | 0 1989 | 0 1990 | 20 1991 | 0 1992 | 10 1993 | 10 1994 | 10 1995 | 30 1996 | 40 1997 | 0 1998 | 0 1999 | 0 2000 | 40 2001 | 0 2002 | 20 2003 | 10 2004 | 10 2005 | 10 2006 | 0 2007 | 20 2008 | 10 2009 | 20 2010 | 10 2011 | 30 2012 | 0 2013 | 0 2014 | 10 2015 | 10 2016 | 10 2017 | 10 2018 | 20 2019 | 10 2020 | 10 2021 | 0 2022 | 10 2023 | 0 2024 | 0 2025 | 10 2026 | 20 2027 | 10 2028 | 10 2029 | 0 2030 | 0 2031 | 20 2032 | 10 2033 | 10 2034 | 10 2035 | 10 2036 | 0 2037 | 10 2038 | 10 2039 | 0 2040 | 10 2041 | 20 2042 | 50 2043 | 0 2044 | 10 2045 | 10 2046 | 20 2047 | 10 2048 | 10 2049 | 10 2050 | 20 2051 | 10 2052 | 10 2053 | 10 2054 | 20 2055 | 10 2056 | 10 2057 | 10 2058 | 10 2059 | 20 2060 | 20 2061 | 20 2062 | 10 2063 | 20 2064 | 30 2065 | 10 2066 | 0 2067 | 0 2068 | 0 2069 | 20 2070 | 30 2071 | 10 2072 | 0 2073 | 0 2074 | 10 2075 | 10 2076 | 10 2077 | 0 2078 | 10 2079 | 10 2080 | 10 2081 | 10 2082 | 0 2083 | 0 2084 | 10 2085 | 10 2086 | 0 2087 | 0 2088 | 20 2089 | 20 2090 | 0 2091 | 10 2092 | 20 2093 | 20 2094 | 10 2095 | 20 2096 | 0 2097 | 10 2098 | 10 2099 | 20 2100 | 10 2101 | 40 2102 | 20 2103 | 20 2104 | 20 2105 | 20 2106 | 20 2107 | 30 2108 | 60 2109 | 0 2110 | 20 2111 | 10 2112 | 30 2113 | 50 2114 | 40 2115 | 20 2116 | 0 2117 | 0 2118 | 10 2119 | 0 2120 | 30 2121 | 20 2122 | 30 2123 | 10 2124 | 40 2125 | 0 2126 | 10 2127 | 30 2128 | 20 2129 | 40 2130 | 20 2131 | 0 2132 | 0 2133 | 10 2134 | 10 2135 | 20 2136 | 20 2137 | 20 2138 | 30 2139 | 0 2140 | 40 2141 | 10 2142 | 20 2143 | 60 2144 | 20 2145 | 30 2146 | 40 2147 | 10 2148 | 0 2149 | 10 2150 | 20 2151 | 10 2152 | 50 2153 | 20 2154 | 20 2155 | 0 2156 | 30 2157 | 30 2158 | 0 2159 | 30 2160 | 0 2161 | 20 2162 | 20 2163 | 40 2164 | 10 2165 | 10 2166 | 0 2167 | 10 2168 | 40 2169 | 10 2170 | 30 2171 | 0 2172 | 20 2173 | 0 2174 | 20 2175 | 20 2176 | 20 2177 | 0 2178 | 50 2179 | 10 2180 | 20 2181 | 60 2182 | 20 2183 | 0 2184 | 20 2185 | 60 2186 | 20 2187 | 20 2188 | 0 2189 | 10 2190 | 10 2191 | 30 2192 | 20 2193 | 50 2194 | 50 2195 | 50 2196 | 10 2197 | 10 2198 | 30 2199 | 10 2200 | 10 2201 | 10 2202 | 10 2203 | 10 2204 | 10 2205 | 60 2206 | 10 2207 | 10 2208 | 20 2209 | 10 2210 | 10 2211 | 10 2212 | 20 2213 | 10 2214 | 0 2215 | 0 2216 | 10 2217 | 10 2218 | 20 2219 | 10 2220 | 20 2221 | 0 2222 | 30 2223 | 20 2224 | 40 2225 | 0 2226 | 0 2227 | 0 2228 | 10 2229 | 0 2230 | 40 2231 | 0 2232 | 50 2233 | 20 2234 | 20 2235 | 20 2236 | 0 2237 | 20 2238 | 20 2239 | 10 2240 | 10 2241 | 0 2242 | 20 2243 | 10 2244 | 20 2245 | 0 2246 | 30 2247 | 10 2248 | 0 2249 | 10 2250 | 50 2251 | 30 2252 | 50 2253 | 70 2254 | 10 2255 | 50 2256 | 0 2257 | 10 2258 | 10 2259 | 10 2260 | 10 2261 | 10 2262 | 10 2263 | 30 2264 | 20 2265 | 40 2266 | 40 2267 | 50 2268 | 60 2269 | 30 2270 | 50 2271 | 50 2272 | 60 2273 | 0 2274 | 10 2275 | 10 2276 | 20 2277 | 30 2278 | 10 2279 | 30 2280 | 0 2281 | 30 2282 | 10 2283 | 20 2284 | 10 2285 | 20 2286 | 10 2287 | 0 2288 | 30 2289 | 10 2290 | 10 2291 | 10 2292 | 10 2293 | 0 2294 | 20 2295 | 0 2296 | 0 2297 | 0 2298 | 10 2299 | 20 2300 | 40 2301 | 0 2302 | 0 2303 | 0 2304 | 10 2305 | 0 2306 | 0 2307 | 10 2308 | 10 2309 | 20 2310 | 20 2311 | 20 2312 | 10 2313 | 20 2314 | 0 2315 | 0 2316 | 10 2317 | 0 2318 | 20 2319 | 10 2320 | 20 2321 | 20 2322 | 20 2323 | 10 2324 | 0 2325 | 10 2326 | 50 2327 | 20 2328 | 20 2329 | 0 2330 | 30 2331 | 10 2332 | 10 2333 | 0 2334 | 20 2335 | 20 2336 | 10 2337 | 10 2338 | 10 2339 | 0 2340 | 10 2341 | 0 2342 | 10 2343 | 0 2344 | 10 2345 | 50 2346 | 50 2347 | 40 2348 | 50 2349 | 10 2350 | 10 2351 | 30 2352 | 20 2353 | 30 2354 | 0 2355 | 0 2356 | 0 2357 | 10 2358 | 20 2359 | 20 2360 | 10 2361 | 10 2362 | 20 2363 | 30 2364 | 10 2365 | 10 2366 | 0 2367 | 0 2368 | 10 2369 | 0 2370 | 10 2371 | 30 2372 | 10 2373 | 30 2374 | 10 2375 | 0 2376 | 20 2377 | 30 2378 | 30 2379 | 20 2380 | 10 2381 | 10 2382 | 20 2383 | 30 2384 | 10 2385 | 20 2386 | 10 2387 | 10 2388 | 20 2389 | 10 2390 | 20 2391 | 20 2392 | 20 2393 | 0 2394 | 10 2395 | 20 2396 | 10 2397 | 0 2398 | 0 2399 | 20 2400 | 0 2401 | 10 2402 | 10 2403 | 20 2404 | 40 2405 | 30 2406 | 20 2407 | 10 2408 | 20 2409 | 20 2410 | 30 2411 | 20 2412 | 10 2413 | 0 2414 | 20 2415 | 30 2416 | 10 2417 | 0 2418 | 20 2419 | 10 2420 | 20 2421 | 10 2422 | 20 2423 | 0 2424 | 0 2425 | 0 2426 | 30 2427 | 30 2428 | 0 2429 | 20 2430 | 10 2431 | 0 2432 | 0 2433 | 0 2434 | 10 2435 | 10 2436 | 10 2437 | 40 2438 | 10 2439 | 10 2440 | 10 2441 | 0 2442 | 0 2443 | 10 2444 | 10 2445 | 0 2446 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore::DeprecationWarning:nltk.* 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk>=3.2 2 | numpy 3 | matplotlib 4 | absl-py 5 | sacrebleu 6 | -------------------------------------------------------------------------------- /scripts/count.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import sys 3 | 4 | cnts = defaultdict(lambda: 0) 5 | for line in sys.stdin: 6 | for word in line.strip().split(): 7 | cnts[word] += 1 8 | 9 | for k, v in sorted(cnts.items(), key=lambda x: -x[1]): 10 | print(f'{k}\t{v}') 11 | -------------------------------------------------------------------------------- /scripts/interleave.py: -------------------------------------------------------------------------------- 1 | # This script is a simple script to interleave the lines of multiple systems 2 | # It can be used like 3 | # python interleave.py ref.txt sys1.txt sys2.txt 4 | 5 | import sys 6 | import itertools 7 | 8 | filenames = sys.argv[1:] 9 | files = [open(x, 'r') for x in filenames] 10 | assert all(files), f'Could not open all files in {filenames}' 11 | 12 | for lines in itertools.zip_longest(*files): 13 | for line in lines: 14 | print(line.strip('\n')) 15 | print() 16 | -------------------------------------------------------------------------------- /scripts/postag.py: -------------------------------------------------------------------------------- 1 | # This is a simple script to POS tag already-tokenized English using NLTK. To run it just do: 2 | # $ python postag.py < file.eng > file.eng.tag 3 | # You may need to install the NLTK POS tagger if you haven't already, in which case you'll get an error telling you how 4 | # to do so the first time you run this script. 5 | 6 | import nltk 7 | import sys 8 | 9 | for line in sys.stdin: 10 | text = line.strip('\n').split(' ') 11 | print(' '.join([x[1] for x in nltk.pos_tag(text)])) 12 | -------------------------------------------------------------------------------- /scripts/relativepositiontag.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | for line in sys.stdin: 4 | line = line.strip('\n').split(' ') 5 | if len(line) == 1: 6 | print('0') 7 | else: 8 | labels = [f'{float(i)/(len(line)-1):.4f}' for i in range(len(line))] 9 | print(' '.join(labels)) 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import unittest 3 | import codecs 4 | 5 | def test_suite(): 6 | test_loader = unittest.TestLoader() 7 | test_suite = test_loader.discover("compare_mt/tests", pattern="test_*.py") 8 | 9 | return test_suite 10 | 11 | 12 | exec(open('compare_mt/version_info.py').read()) 13 | 14 | setup( 15 | name="compare_mt", 16 | version=__version__, 17 | description="Holistic comparison of the output of text generation models", 18 | long_description=codecs.open("README.md", encoding="utf-8").read(), 19 | long_description_content_type="text/markdown", 20 | url="https://github.com/neulab/compare-mt", 21 | author="Graham Neubig", 22 | license="BSD 3-Clause", 23 | test_suite="setup.test_suite", 24 | classifiers=[ 25 | "Intended Audience :: Developers", 26 | "Topic :: Text Processing", 27 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 28 | "License :: OSI Approved :: BSD License", 29 | "Programming Language :: Python :: 3", 30 | ], 31 | packages=find_packages(), 32 | entry_points={ 33 | "console_scripts": [ 34 | "compare-mt=compare_mt.compare_mt_main:main", 35 | "compare-ll=compare_mt.compare_ll_main:main", 36 | ], 37 | }, 38 | install_requires=[ 39 | "nltk>=3.2", 40 | "numpy", 41 | "matplotlib", 42 | "absl-py", 43 | "sacrebleu" 44 | ], 45 | include_package_data=True, 46 | ) 47 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neulab/compare-mt/b6d8f79d02043243c3d8aa58373a0f4c55e17a69/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import unittest 3 | import numpy as np 4 | import sys 5 | 6 | from compare_mt.cache_utils import CachedPorterStemmer 7 | from nltk.stem.porter import PorterStemmer 8 | 9 | compare_mt_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") 10 | sys.path.append(compare_mt_root) 11 | 12 | from compare_mt import scorers 13 | from compare_mt.corpus_utils import load_tokens, load_alignments 14 | from compare_mt import compare_mt_main 15 | from compare_mt import reporters 16 | 17 | def _get_example_data(): 18 | example_path = os.path.join(compare_mt_root, "example") 19 | ref_file = os.path.join(example_path, "ted.ref.eng") 20 | out1_file = os.path.join(example_path, "ted.sys1.eng") 21 | out2_file = os.path.join(example_path, "ted.sys2.eng") 22 | return [load_tokens(x) for x in (ref_file, out1_file, out2_file)] 23 | 24 | def _get_example_data_detokenized(): 25 | example_path = os.path.join(compare_mt_root, "example") 26 | ref_file = os.path.join(example_path, "ted.ref.detok.eng") 27 | out1_file = os.path.join(example_path, "ted.sys1.detok.eng") 28 | out2_file = os.path.join(example_path, "ted.sys2.detok.eng") 29 | return [load_tokens(x) for x in (ref_file, out1_file, out2_file)] 30 | 31 | 32 | class TestScoreCache(unittest.TestCase): 33 | 34 | @classmethod 35 | def setUpClass(self): 36 | self.ref, self.out1, self.out2 = _get_example_data() 37 | 38 | def test_score_cache(self): 39 | cached_stats1 = compare_mt_main.generate_score_report(self.ref, [self.out1], to_cache=True) 40 | cached_stats2 = compare_mt_main.generate_score_report(self.ref, [self.out2], to_cache=True) 41 | self.assertTrue('scores' in cached_stats1 and 'strs' in cached_stats1 and 'sign_stats' in cached_stats1) 42 | self.assertTrue('scores' in cached_stats2 and 'strs' in cached_stats2 and 'sign_stats' in cached_stats2) 43 | self.assertAlmostEqual(cached_stats1['scores'], 22.44, places=1) 44 | reporters.sys_names = [f'sys{i+1}' for i in range(2)] 45 | cached_report = compare_mt_main.generate_score_report(self.ref, [self.out1, self.out2], cache_dicts=[cached_stats1, cached_stats2], title='Aggregate Scores') 46 | ori_report = compare_mt_main.generate_score_report(self.ref, [self.out1, self.out2], title='Aggregate Scores') 47 | self.assertTrue(cached_report.scores == ori_report.scores) 48 | self.assertTrue(cached_report.strs == ori_report.strs) 49 | self.assertTrue(cached_report.wins == ori_report.wins) 50 | 51 | def test_score_cache_bootstrap(self): 52 | cached_stats1 = compare_mt_main.generate_score_report(self.ref, [self.out1], to_cache=True) 53 | cached_stats2 = compare_mt_main.generate_score_report(self.ref, [self.out2], to_cache=True) 54 | self.assertTrue('scores' in cached_stats1 and 'strs' in cached_stats1 and 'sign_stats' in cached_stats1) 55 | self.assertTrue('scores' in cached_stats2 and 'strs' in cached_stats2 and 'sign_stats' in cached_stats2) 56 | self.assertAlmostEqual(cached_stats1['scores'], 22.44, places=1) 57 | reporters.sys_names = [f'sys{i+1}' for i in range(2)] 58 | cached_report = compare_mt_main.generate_score_report(self.ref, [self.out1, self.out2], cache_dicts=[cached_stats1, cached_stats2], bootstrap=5, title='Aggregate Scores') 59 | ori_report = compare_mt_main.generate_score_report(self.ref, [self.out1, self.out2], bootstrap=5, title='Aggregate Scores') 60 | self.assertTrue(cached_report.scores == ori_report.scores) 61 | self.assertTrue(cached_report.strs == ori_report.strs) 62 | self.assertTrue(cached_report.wins == ori_report.wins) 63 | 64 | class TestWordAccCache(unittest.TestCase): 65 | 66 | @classmethod 67 | def setUpClass(self): 68 | self.ref, self.out1, self.out2 = _get_example_data() 69 | 70 | def test_wordacc_cache(self): 71 | cached_stats1 = compare_mt_main.generate_word_accuracy_report(self.ref, [self.out1], to_cache=True) 72 | cached_stats2 = compare_mt_main.generate_word_accuracy_report(self.ref, [self.out2], to_cache=True) 73 | self.assertTrue('statistics' in cached_stats1 and 'my_ref_total_list' in cached_stats1 and 'my_out_matches_list' in cached_stats1) 74 | self.assertTrue('statistics' in cached_stats2 and 'my_ref_total_list' in cached_stats2 and 'my_out_matches_list' in cached_stats2) 75 | ori_report = compare_mt_main.generate_word_accuracy_report(self.ref, [self.out1, self.out2]) 76 | cached_report = compare_mt_main.generate_word_accuracy_report(self.ref, [self.out1, self.out2], cache_dicts=[cached_stats1, cached_stats2]) 77 | self.assertTrue(cached_report.statistics == ori_report.statistics) 78 | self.assertTrue(cached_report.examples == ori_report.examples) 79 | 80 | class TestSrcWordAccCache(unittest.TestCase): 81 | 82 | @classmethod 83 | def setUpClass(self): 84 | example_path = os.path.join(compare_mt_root, "example") 85 | self.ref, self.out1, self.out2 = _get_example_data() 86 | src_file = os.path.join(example_path, "ted.orig.slk") 87 | self.src = load_tokens(src_file) 88 | self.ref_align_file = os.path.join(example_path, "ted.ref.align") 89 | 90 | def test_src_wordacc_cache(self): 91 | cached_stats1 = compare_mt_main.generate_src_word_accuracy_report(self.ref, [self.out1], self.src, ref_align_file=self.ref_align_file, to_cache=True) 92 | cached_stats2 = compare_mt_main.generate_src_word_accuracy_report(self.ref, [self.out2], self.src, ref_align_file=self.ref_align_file, to_cache=True) 93 | self.assertTrue('statistics' in cached_stats1 and 'my_ref_total_list' in cached_stats1 and 'my_out_matches_list' in cached_stats1) 94 | self.assertTrue('statistics' in cached_stats2 and 'my_ref_total_list' in cached_stats2 and 'my_out_matches_list' in cached_stats2) 95 | ori_report = compare_mt_main.generate_src_word_accuracy_report(self.ref, [self.out1, self.out2], self.src, ref_align_file=self.ref_align_file) 96 | cached_report = compare_mt_main.generate_src_word_accuracy_report(self.ref, [self.out1, self.out2], self.src, ref_align_file=self.ref_align_file, cache_dicts=[cached_stats1, cached_stats2]) 97 | self.assertTrue(cached_report.statistics == ori_report.statistics) 98 | self.assertTrue(cached_report.examples == ori_report.examples) 99 | 100 | class TestSentBucketCache(unittest.TestCase): 101 | 102 | @classmethod 103 | def setUpClass(self): 104 | self.ref, self.out1, self.out2 = _get_example_data() 105 | 106 | def test_sentbucket_cache(self): 107 | cached_stats1 = compare_mt_main.generate_sentence_bucketed_report(self.ref, [self.out1], to_cache=True) 108 | cached_stats2 = compare_mt_main.generate_sentence_bucketed_report(self.ref, [self.out2], to_cache=True) 109 | self.assertTrue('stats' in cached_stats1) 110 | self.assertTrue('stats' in cached_stats2) 111 | ori_report = compare_mt_main.generate_sentence_bucketed_report(self.ref, [self.out1, self.out2]) 112 | cached_report = compare_mt_main.generate_sentence_bucketed_report(self.ref, [self.out1, self.out2], cache_dicts=[cached_stats1, cached_stats2]) 113 | self.assertTrue(cached_report.sys_stats == ori_report.sys_stats) 114 | 115 | class TestNgramCache(unittest.TestCase): 116 | 117 | @classmethod 118 | def setUpClass(self): 119 | self.ref, self.out1, self.out2 = _get_example_data() 120 | 121 | def test_ngram_cache(self): 122 | reporters.sys_names = [f'sys{i+1}' for i in range(2)] 123 | cached_stats1 = compare_mt_main.generate_ngram_report(self.ref, [self.out1], to_cache=True) 124 | cached_stats2 = compare_mt_main.generate_ngram_report(self.ref, [self.out2], to_cache=True) 125 | self.assertTrue('totals' in cached_stats1 and 'matches' in cached_stats1 and 'overs' in cached_stats1 and 'unders' in cached_stats1) 126 | self.assertTrue('totals' in cached_stats2 and 'matches' in cached_stats2 and 'overs' in cached_stats2 and 'unders' in cached_stats2) 127 | ori_report = compare_mt_main.generate_ngram_report(self.ref, [self.out1, self.out2]) 128 | cached_report = compare_mt_main.generate_ngram_report(self.ref, [self.out1, self.out2], cache_dicts=[cached_stats1, cached_stats2]) 129 | self.assertTrue(cached_report.scorelist == ori_report.scorelist) 130 | 131 | class TestSentExamCache(unittest.TestCase): 132 | 133 | @classmethod 134 | def setUpClass(self): 135 | self.ref, self.out1, self.out2 = _get_example_data() 136 | 137 | def test_sentexam_cache(self): 138 | reporters.sys_names = [f'sys{i+1}' for i in range(2)] 139 | cached_stats1 = compare_mt_main.generate_sentence_examples(self.ref, [self.out1], to_cache=True) 140 | cached_stats2 = compare_mt_main.generate_sentence_examples(self.ref, [self.out2], to_cache=True) 141 | self.assertTrue('scores' in cached_stats1 and 'strs' in cached_stats1) 142 | self.assertTrue('scores' in cached_stats2 and 'strs' in cached_stats2) 143 | ori_report = compare_mt_main.generate_sentence_examples(self.ref, [self.out1, self.out2]) 144 | cached_report = compare_mt_main.generate_sentence_examples(self.ref, [self.out1, self.out2], cache_dicts=[cached_stats1, cached_stats2]) 145 | self.assertTrue(cached_report.scorediff_lists== ori_report.scorediff_lists) 146 | 147 | class TestCachedPorterStemmer(unittest.TestCase): 148 | def test_stem(self): 149 | cached_stemmer = CachedPorterStemmer() 150 | stemmer = PorterStemmer() 151 | words = ["cats", "citizen", "best", "citizen" "cats"] 152 | 153 | for w in words: 154 | self.assertEqual(stemmer.stem(w), cached_stemmer.stem(w)) 155 | 156 | if __name__ == "__main__": 157 | unittest.main() 158 | -------------------------------------------------------------------------------- /tests/test_scorers.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import unittest 3 | import numpy as np 4 | import sys 5 | 6 | compare_mt_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") 7 | sys.path.append(compare_mt_root) 8 | 9 | from compare_mt import scorers 10 | from compare_mt.corpus_utils import load_tokens 11 | 12 | 13 | def _get_example_data(): 14 | example_path = os.path.join(compare_mt_root, "example") 15 | ref_file = os.path.join(example_path, "ted.ref.eng") 16 | out1_file = os.path.join(example_path, "ted.sys1.eng") 17 | out2_file = os.path.join(example_path, "ted.sys2.eng") 18 | return [load_tokens(x) for x in (ref_file, out1_file, out2_file)] 19 | 20 | def _get_example_data_detokenized(): 21 | example_path = os.path.join(compare_mt_root, "example") 22 | ref_file = os.path.join(example_path, "ted.ref.detok.eng") 23 | out1_file = os.path.join(example_path, "ted.sys1.detok.eng") 24 | out2_file = os.path.join(example_path, "ted.sys2.detok.eng") 25 | return [load_tokens(x) for x in (ref_file, out1_file, out2_file)] 26 | 27 | 28 | class TestBleuScorer(unittest.TestCase): 29 | 30 | @classmethod 31 | def setUpClass(self): 32 | self.ref, self.out1, self.out2 = _get_example_data() 33 | self.ids = list(range(len(self.ref))) 34 | self.scorer = scorers.create_scorer_from_profile("bleu", case_insensitive=False) 35 | self.cache_stats1 = self.scorer.cache_stats(self.ref, self.out1) 36 | self.cache_stats2 = self.scorer.cache_stats(self.ref, self.out2) 37 | self.n_random_retries = 10 38 | 39 | def test_score_corpus(self): 40 | bleu, _ = self.scorer.score_corpus(self.ref, self.out1) 41 | # Compare to moses multi-bleu.pl 42 | self.assertAlmostEqual(bleu, 22.44, places=1) 43 | 44 | def test_score_sentence(self): 45 | 46 | def should_raise(): 47 | return self.scorer.score_sentence(self.ref[0], self.out1[0]) 48 | 49 | self.assertRaises(NotImplementedError, should_raise) 50 | 51 | 52 | def test_score_cached_corpus(self): 53 | for _ in range(self.n_random_retries): 54 | np.random.shuffle(self.ids) 55 | random_ids = self.ids[:int(len(self.ids)*0.5)] 56 | 57 | # compare-mt implementation 58 | my_sys1_score, _ = self.scorer.score_cached_corpus(random_ids, self.cache_stats1) 59 | my_sys2_score, _ = self.scorer.score_cached_corpus(random_ids, self.cache_stats2) 60 | 61 | # nltk implementation 62 | random_ref = [self.ref[i] for i in random_ids] 63 | random_sys1 = [self.out1[i] for i in random_ids] 64 | random_sys2 = [self.out2[i] for i in random_ids] 65 | nltk_sys1_score, _ = self.scorer.score_corpus(random_ref, random_sys1) 66 | nltk_sys2_score, _ = self.scorer.score_corpus(random_ref, random_sys2) 67 | 68 | self.assertAlmostEqual(my_sys1_score, nltk_sys1_score) 69 | self.assertAlmostEqual(my_sys2_score, nltk_sys2_score) 70 | 71 | 72 | class TestSentBleuScorer(unittest.TestCase): 73 | 74 | @classmethod 75 | def setUpClass(self): 76 | self.ref, self.out, _ = _get_example_data() 77 | self.scorer = scorers.create_scorer_from_profile("sentbleu") 78 | 79 | def test_score_sentence(self): 80 | bleu, _ = self.scorer.score_sentence(self.ref[0], self.out[0]) 81 | # compare to nltk 82 | self.assertAlmostEqual(bleu, 32.44376694160122) 83 | 84 | def test_score_corpus(self): 85 | sent_bleu_corpus, _ = self.scorer.score_corpus(self.ref, self.out) 86 | avg_sent_bleu = sum([self.scorer.score_sentence(ref_sent, out_sent)[0] 87 | for ref_sent, out_sent in zip(self.ref, self.out)]) 88 | avg_sent_bleu /= len(self.ref) 89 | # compare to sacrebleu --force --metrics=chrf 90 | self.assertAlmostEqual(sent_bleu_corpus, avg_sent_bleu) 91 | 92 | 93 | class TestLengthScorer(unittest.TestCase): 94 | 95 | @classmethod 96 | def setUpClass(self): 97 | self.ref, self.out, _ = _get_example_data() 98 | self.scorer = scorers.create_scorer_from_profile("length") 99 | 100 | def test_score_sentence(self): 101 | length_ratio, desc = self.scorer.score_sentence(self.ref[0], self.out[0]) 102 | self.assertAlmostEqual(length_ratio, 22 / 24) 103 | self.assertEqual(desc, "ref=24, out=22") 104 | 105 | def test_score_corpus(self): 106 | length_ratio_corpus, desc = self.scorer.score_corpus(self.ref, self.out) 107 | self.assertAlmostEqual(length_ratio_corpus, 45672 / 48183) 108 | self.assertEqual(desc, "ref=48183, out=45672") 109 | 110 | 111 | 112 | class TestRibesScorer(unittest.TestCase): 113 | 114 | @classmethod 115 | def setUpClass(self): 116 | self.ref, self.out, _ = _get_example_data() 117 | self.scorer = scorers.create_scorer_from_profile("ribes") 118 | 119 | def test_score_sentence(self): 120 | ribes, _ = self.scorer.score_sentence(self.ref[0], self.out[0]) 121 | self.assertAlmostEqual(ribes, 84.9014, 4) 122 | 123 | def test_score_corpus(self): 124 | ribes_corpus, _ = self.scorer.score_corpus(self.ref, self.out) 125 | self.assertAlmostEqual(ribes_corpus, 80.0020, 4) 126 | 127 | 128 | class TestChrFScorer(unittest.TestCase): 129 | 130 | @classmethod 131 | def setUpClass(self): 132 | self.ref, self.out, _ = _get_example_data() 133 | self.scorer = scorers.create_scorer_from_profile("chrf") 134 | 135 | def test_chrf_sentence(self): 136 | chrf, _ = self.scorer.score_sentence(self.ref[0], self.out[0]) 137 | # compare to sacrebleu --force --metrics=chrf 138 | self.assertAlmostEqual(chrf, 59, places=0) 139 | 140 | def test_chrf_corpus(self): 141 | chrf, _ = self.scorer.score_corpus(self.ref, self.out) 142 | # compare to sacrebleu --force --metrics=chrf 143 | self.assertAlmostEqual(chrf, 48, places=0) 144 | 145 | 146 | class TestSacreBleuScorer(unittest.TestCase): 147 | 148 | @classmethod 149 | def setUpClass(self): 150 | self.ref, self.out, _ = _get_example_data_detokenized() 151 | self.scorer = scorers.create_scorer_from_profile("sacrebleu") 152 | 153 | def test_detok_bleu_corpus(self): 154 | detok_bleu, _ = self.scorer.score_corpus(self.ref, self.out) 155 | # compare to sacrebleu 156 | self.assertAlmostEqual(detok_bleu, 21.7, places=0) 157 | 158 | 159 | class TestGleuScorer(unittest.TestCase): 160 | 161 | @classmethod 162 | def setUpClass(cls) -> None: 163 | example_path = os.path.join(compare_mt_root, "example") 164 | filenames = ["ted.ref.eng", "ted.sys1.eng", "ted.orig.slk"] 165 | cls.ref, cls.out, cls.src = [load_tokens(os.path.join(example_path, name)) for name in filenames] 166 | cls.scorer = scorers.create_scorer_from_profile("gleu", case_insensitive=False) 167 | 168 | def test_score_corpus(self): 169 | gleu, _ = self.scorer.score_corpus(self.ref, self.out, self.src) 170 | # Compare to https://github.com/cnap/gec-ranking 171 | self.assertAlmostEqual(gleu, 22.39, places=1) 172 | 173 | def test_score_sentence(self): 174 | src = "A simple src sentence of test .".split() 175 | ref = "A simple source sentence for testing .".split() 176 | out = "A simple src sentence for testing .".split() 177 | gleu, _ = self.scorer.score_sentence(ref, out, src) 178 | # Compare to https://github.com/cnap/gec-ranking 179 | self.assertAlmostEqual(gleu, 33.03, places=1) 180 | 181 | def test_score_cached_corpus(self): 182 | cached_stats = [ 183 | (9, 2, [(2, 2), (1, 1), (0, 0), (0, 0)]), 184 | (4, 13, [(4, 13), (2, 12), (0, 11), (0, 10)]), 185 | (10, 10, [(6, 10), (4, 9), (1, 8), (0, 7)]) 186 | ] 187 | gleu, _ = self.scorer.score_cached_corpus(range(len(cached_stats)), cached_stats) 188 | self.assertEqual(gleu, 0) 189 | 190 | 191 | if __name__ == "__main__": 192 | unittest.main() 193 | --------------------------------------------------------------------------------