├── .gitignore ├── CONTRIBUTING ├── LICENSE ├── README.md ├── __init__.py ├── conda-env.txt ├── evaluate.py ├── evaluate_test.py ├── install_tools.sh ├── leakr_badwords.dic ├── mock_test_data ├── labels │ ├── bucc2018 │ │ ├── test-de.tsv │ │ ├── test-fr.tsv │ │ ├── test-ru.tsv │ │ └── test-zh.tsv │ ├── mewslix │ │ ├── test-ar.json │ │ ├── test-de.json │ │ ├── test-en.json │ │ ├── test-es.json │ │ ├── test-fa.json │ │ ├── test-ja.json │ │ ├── test-pl.json │ │ ├── test-ro.json │ │ ├── test-ta.json │ │ ├── test-tr.json │ │ └── test-uk.json │ ├── mlqa │ │ ├── test-ar.json │ │ ├── test-de.json │ │ ├── test-en.json │ │ ├── test-es.json │ │ ├── test-hi.json │ │ ├── test-vi.json │ │ └── test-zh.json │ ├── panx │ │ ├── test-af.tsv │ │ ├── test-ar.tsv │ │ ├── test-bg.tsv │ │ ├── test-bn.tsv │ │ ├── test-de.tsv │ │ ├── test-el.tsv │ │ ├── test-en.tsv │ │ ├── test-es.tsv │ │ ├── test-et.tsv │ │ ├── test-eu.tsv │ │ ├── test-fa.tsv │ │ ├── test-fi.tsv │ │ ├── test-fr.tsv │ │ ├── test-he.tsv │ │ ├── test-hi.tsv │ │ ├── test-hu.tsv │ │ ├── test-id.tsv │ │ ├── test-it.tsv │ │ ├── test-ja.tsv │ │ ├── test-jv.tsv │ │ ├── test-ka.tsv │ │ ├── test-kk.tsv │ │ ├── test-ko.tsv │ │ ├── test-ml.tsv │ │ ├── test-mr.tsv │ │ ├── test-ms.tsv │ │ ├── test-my.tsv │ │ ├── test-nl.tsv │ │ ├── test-pt.tsv │ │ ├── test-ru.tsv │ │ ├── test-sw.tsv │ │ ├── test-ta.tsv │ │ ├── test-te.tsv │ │ ├── test-th.tsv │ │ ├── test-tl.tsv │ │ ├── test-tr.tsv │ │ ├── test-ur.tsv │ │ ├── test-vi.tsv │ │ ├── test-yo.tsv │ │ └── test-zh.tsv │ ├── pawsx │ │ ├── test-de.tsv │ │ ├── test-en.tsv │ │ ├── test-es.tsv │ │ ├── test-fr.tsv │ │ ├── test-ja.tsv │ │ ├── test-ko.tsv │ │ └── test-zh.tsv │ ├── tatoeba │ │ ├── test-af.tsv │ │ ├── test-ar.tsv │ │ ├── test-bg.tsv │ │ ├── test-bn.tsv │ │ ├── test-de.tsv │ │ ├── test-el.tsv │ │ ├── test-es.tsv │ │ ├── test-et.tsv │ │ ├── test-eu.tsv │ │ ├── test-fa.tsv │ │ ├── test-fi.tsv │ │ ├── test-fr.tsv │ │ ├── test-he.tsv │ │ ├── test-hi.tsv │ │ ├── test-hu.tsv │ │ ├── test-id.tsv │ │ ├── test-it.tsv │ │ ├── test-ja.tsv │ │ ├── test-jv.tsv │ │ ├── test-ka.tsv │ │ ├── test-kk.tsv │ │ ├── test-ko.tsv │ │ ├── test-ml.tsv │ │ ├── test-mr.tsv │ │ ├── test-nl.tsv │ │ ├── test-pt.tsv │ │ ├── test-ru.tsv │ │ ├── test-sw.tsv │ │ ├── test-ta.tsv │ │ ├── test-te.tsv │ │ ├── test-th.tsv │ │ ├── test-tl.tsv │ │ ├── test-tr.tsv │ │ ├── test-ur.tsv │ │ ├── test-vi.tsv │ │ └── test-zh.tsv │ ├── tydiqa │ │ ├── test-ar.json │ │ ├── test-bn.json │ │ ├── test-en.json │ │ ├── test-fi.json │ │ ├── test-id.json │ │ ├── test-ko.json │ │ ├── test-ru.json │ │ ├── test-sw.json │ │ └── test-te.json │ ├── udpos │ │ ├── test-af.tsv │ │ ├── test-ar.tsv │ │ ├── test-bg.tsv │ │ ├── test-de.tsv │ │ ├── test-el.tsv │ │ ├── test-en.tsv │ │ ├── test-es.tsv │ │ ├── test-et.tsv │ │ ├── test-eu.tsv │ │ ├── test-fa.tsv │ │ ├── test-fi.tsv │ │ ├── test-fr.tsv │ │ ├── test-he.tsv │ │ ├── test-hi.tsv │ │ ├── test-hu.tsv │ │ ├── test-id.tsv │ │ ├── test-it.tsv │ │ ├── test-ja.tsv │ │ ├── test-kk.tsv │ │ ├── test-ko.tsv │ │ ├── test-mr.tsv │ │ ├── test-nl.tsv │ │ ├── test-pt.tsv │ │ ├── test-ru.tsv │ │ ├── test-ta.tsv │ │ ├── test-te.tsv │ │ ├── test-th.tsv │ │ ├── test-tl.tsv │ │ ├── test-tr.tsv │ │ ├── test-ur.tsv │ │ ├── test-vi.tsv │ │ ├── test-yo.tsv │ │ └── test-zh.tsv │ ├── xnli │ │ ├── test-ar.tsv │ │ ├── test-bg.tsv │ │ ├── test-de.tsv │ │ ├── test-el.tsv │ │ ├── test-en.tsv │ │ ├── test-es.tsv │ │ ├── test-fr.tsv │ │ ├── test-hi.tsv │ │ ├── test-ru.tsv │ │ ├── test-sw.tsv │ │ ├── test-th.tsv │ │ ├── test-tr.tsv │ │ ├── test-ur.tsv │ │ ├── test-vi.tsv │ │ └── test-zh.tsv │ └── xquad │ │ ├── test-ar.json │ │ ├── test-de.json │ │ ├── test-el.json │ │ ├── test-en.json │ │ ├── test-es.json │ │ ├── test-hi.json │ │ ├── test-ru.json │ │ ├── test-th.json │ │ ├── test-tr.json │ │ ├── test-vi.json │ │ └── test-zh.json └── predictions │ ├── bucc2018 │ ├── test-de.tsv │ ├── test-fr.tsv │ ├── test-ru.tsv │ └── test-zh.tsv │ ├── mewslix │ ├── test-ar.json │ ├── test-de.json │ ├── test-en.json │ ├── test-es.json │ ├── test-fa.json │ ├── test-ja.json │ ├── test-pl.json │ ├── test-ro.json │ ├── test-ta.json │ ├── test-tr.json │ └── test-uk.json │ ├── mlqa │ ├── test-ar.json │ ├── test-de.json │ ├── test-en.json │ ├── test-es.json │ ├── test-hi.json │ ├── test-vi.json │ └── test-zh.json │ ├── panx │ ├── test-af.tsv │ ├── test-ar.tsv │ ├── test-bg.tsv │ ├── test-bn.tsv │ ├── test-de.tsv │ ├── test-el.tsv │ ├── test-en.tsv │ ├── test-es.tsv │ ├── test-et.tsv │ ├── test-eu.tsv │ ├── test-fa.tsv │ ├── test-fi.tsv │ ├── test-fr.tsv │ ├── test-he.tsv │ ├── test-hi.tsv │ ├── test-hu.tsv │ ├── test-id.tsv │ ├── test-it.tsv │ ├── test-ja.tsv │ ├── test-jv.tsv │ ├── test-ka.tsv │ ├── test-kk.tsv │ ├── test-ko.tsv │ ├── test-ml.tsv │ ├── test-mr.tsv │ ├── test-ms.tsv │ ├── test-my.tsv │ ├── test-nl.tsv │ ├── test-pt.tsv │ ├── test-ru.tsv │ ├── test-sw.tsv │ ├── test-ta.tsv │ ├── test-te.tsv │ ├── test-th.tsv │ ├── test-tl.tsv │ ├── test-tr.tsv │ ├── test-ur.tsv │ ├── test-vi.tsv │ ├── test-yo.tsv │ └── test-zh.tsv │ ├── pawsx │ ├── test-de.tsv │ ├── test-en.tsv │ ├── test-es.tsv │ ├── test-fr.tsv │ ├── test-ja.tsv │ ├── test-ko.tsv │ └── test-zh.tsv │ ├── tatoeba │ ├── test-af.tsv │ ├── test-ar.tsv │ ├── test-bg.tsv │ ├── test-bn.tsv │ ├── test-de.tsv │ ├── test-el.tsv │ ├── test-es.tsv │ ├── test-et.tsv │ ├── test-eu.tsv │ ├── test-fa.tsv │ ├── test-fi.tsv │ ├── test-fr.tsv │ ├── test-he.tsv │ ├── test-hi.tsv │ ├── test-hu.tsv │ ├── test-id.tsv │ ├── test-it.tsv │ ├── test-ja.tsv │ ├── test-jv.tsv │ ├── test-ka.tsv │ ├── test-kk.tsv │ ├── test-ko.tsv │ ├── test-ml.tsv │ ├── test-mr.tsv │ ├── test-nl.tsv │ ├── test-pt.tsv │ ├── test-ru.tsv │ ├── test-sw.tsv │ ├── test-ta.tsv │ ├── test-te.tsv │ ├── test-th.tsv │ ├── test-tl.tsv │ ├── test-tr.tsv │ ├── test-ur.tsv │ ├── test-vi.tsv │ └── test-zh.tsv │ ├── tydiqa │ ├── test-ar.json │ ├── test-bn.json │ ├── test-en.json │ ├── test-fi.json │ ├── test-id.json │ ├── test-ko.json │ ├── test-ru.json │ ├── test-sw.json │ └── test-te.json │ ├── udpos │ ├── test-af.tsv │ ├── test-ar.tsv │ ├── test-bg.tsv │ ├── test-de.tsv │ ├── test-el.tsv │ ├── test-en.tsv │ ├── test-es.tsv │ ├── test-et.tsv │ ├── test-eu.tsv │ ├── test-fa.tsv │ ├── test-fi.tsv │ ├── test-fr.tsv │ ├── test-he.tsv │ ├── test-hi.tsv │ ├── test-hu.tsv │ ├── test-id.tsv │ ├── test-it.tsv │ ├── test-ja.tsv │ ├── test-kk.tsv │ ├── test-ko.tsv │ ├── test-mr.tsv │ ├── test-nl.tsv │ ├── test-pt.tsv │ ├── test-ru.tsv │ ├── test-ta.tsv │ ├── test-te.tsv │ ├── test-th.tsv │ ├── test-tl.tsv │ ├── test-tr.tsv │ ├── test-ur.tsv │ ├── test-vi.tsv │ ├── test-yo.tsv │ └── test-zh.tsv │ ├── xnli │ ├── test-ar.tsv │ ├── test-bg.tsv │ ├── test-de.tsv │ ├── test-el.tsv │ ├── test-en.tsv │ ├── test-es.tsv │ ├── test-fr.tsv │ ├── test-hi.tsv │ ├── test-ru.tsv │ ├── test-sw.tsv │ ├── test-th.tsv │ ├── test-tr.tsv │ ├── test-ur.tsv │ ├── test-vi.tsv │ └── test-zh.tsv │ └── xquad │ ├── test-ar.json │ ├── test-de.json │ ├── test-el.json │ ├── test-en.json │ ├── test-es.json │ ├── test-hi.json │ ├── test-ru.json │ ├── test-th.json │ ├── test-tr.json │ ├── test-vi.json │ └── test-zh.json ├── multichecklist ├── README.md ├── __init__.py ├── checklist_templates.tsv ├── checklist_utils.py ├── generate_test_utils.py ├── generate_tests.py └── test_models.py ├── scripts ├── download_data.sh ├── eval_qa.sh ├── predict_qa.sh ├── preprocess_panx.sh ├── preprocess_udpos.sh ├── run_bucc2018.sh ├── run_eval_lareqa.sh ├── run_eval_mewslix.sh ├── run_tatoeba.sh ├── train.sh ├── train_lareqa.sh ├── train_mewslix.sh ├── train_panx.sh ├── train_pawsx.sh ├── train_qa.sh ├── train_udpos.sh ├── train_xcopa.sh └── train_xnli.sh ├── third_party ├── __init__.py ├── bert.py ├── evaluate_mlqa.py ├── evaluate_retrieval.py ├── evaluate_squad.py ├── processors │ ├── lareqa.py │ ├── mewslix.py │ ├── pawsx.py │ ├── sb_sed.py │ ├── squad.py │ ├── utils.py │ └── xnli.py ├── roberta.py ├── run_classify.py ├── run_retrieval_el.py ├── run_retrieval_qa.py ├── run_squad.py ├── run_tag.py ├── run_xcopa.py ├── siqa_data │ └── siqa_data.py ├── ud-conversion-tools │ ├── conllu_to_conll.py │ └── lib │ │ ├── __init__.py │ │ └── conll.py ├── utils_lareqa.py ├── utils_mewslix.py ├── utils_retrieve.py ├── utils_tag.py ├── xcopa_data │ └── xcopa_data.py ├── xlm.py └── xlm_roberta.py ├── utils_preprocess.py └── xtreme_score.png /.gitignore: -------------------------------------------------------------------------------- 1 | models/ 2 | figures/ 3 | outputs/ 4 | analysis/ 5 | data/ 6 | download/ 7 | *.tsv 8 | *.csv 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | pip-wheel-metadata/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | db.sqlite3 71 | db.sqlite3-journal 72 | 73 | # Flask stuff: 74 | instance/ 75 | .webassets-cache 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 104 | __pypackages__/ 105 | 106 | # Celery stuff 107 | celerybeat-schedule 108 | celerybeat.pid 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /install_tools.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -eux # for easier debugging 17 | 18 | REPO=$PWD 19 | LIB=$REPO/third_party 20 | mkdir -p $LIB 21 | 22 | # install conda env 23 | conda create --name xtreme --file conda-env.txt 24 | conda init bash 25 | 26 | # If 'conda activate' fails below, try uncommenting the following lines, 27 | # based on https://github.com/conda/conda/issues/7980. 28 | # CONDA_PATH=$(conda info | grep -i 'base environment' | awk '{print $4}') 29 | # source $CONDA_PATH/etc/profile.d/conda.sh 30 | 31 | conda activate xtreme 32 | 33 | # install latest transformer 34 | cd $LIB 35 | git clone https://github.com/huggingface/transformers 36 | cd transformers 37 | git checkout cefd51c50cc08be8146c1151544495968ce8f2ad 38 | pip install . 39 | cd $LIB 40 | 41 | pip install seqeval 42 | pip install tensorboardx 43 | 44 | # install XLM tokenizer 45 | pip install sacremoses 46 | pip install pythainlp 47 | pip install jieba 48 | 49 | git clone https://github.com/neubig/kytea.git && cd kytea 50 | autoreconf -i 51 | ./configure --prefix=${CONDA_PREFIX} 52 | make && make install 53 | pip install kytea 54 | -------------------------------------------------------------------------------- /leakr_badwords.dic: -------------------------------------------------------------------------------- 1 | ;; Changed 'mum' to Info severity 2 | $RE:(^|[\W])((?i)mum(?-i))($|[\W]);0;Use regex as mum is a common letter combo 3 | -------------------------------------------------------------------------------- /mock_test_data/labels/bucc2018/test-de.tsv: -------------------------------------------------------------------------------- 1 | de-000413461 en-000091985 2 | de-000413482 en-000367951 3 | de-000413487 en-000092032 4 | de-000413503 en-000296187 5 | de-000413582 en-000336047 6 | de-000413632 en-000240614 7 | de-000413732 en-000339402 8 | de-000413744 en-000184053 9 | de-000413761 en-000045799 10 | de-000413869 en-000138555 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/bucc2018/test-fr.tsv: -------------------------------------------------------------------------------- 1 | fr-000000045 en-000042536 2 | fr-000000092 en-000181413 3 | fr-000000098 en-000036493 4 | fr-000000181 en-000063864 5 | fr-000000259 en-000292398 6 | fr-000000359 en-000050051 7 | fr-000000470 en-000059683 8 | fr-000000545 en-000352111 9 | fr-000000606 en-000330731 10 | fr-000000614 en-000081622 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/bucc2018/test-ru.tsv: -------------------------------------------------------------------------------- 1 | ru-000000041 en-000089671 2 | ru-000000112 en-000214117 3 | ru-000000228 en-000075910 4 | ru-000000379 en-000163824 5 | ru-000000421 en-000504782 6 | ru-000000429 en-000474098 7 | ru-000000617 en-000248027 8 | ru-000000869 en-000247914 9 | ru-000000947 en-000039212 10 | ru-000001037 en-000383323 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/bucc2018/test-zh.tsv: -------------------------------------------------------------------------------- 1 | zh-000000033 en-000005983 2 | zh-000000231 en-000047360 3 | zh-000000272 en-000062892 4 | zh-000000438 en-000065621 5 | zh-000000639 en-000005169 6 | zh-000000643 en-000063761 7 | zh-000000735 en-000013476 8 | zh-000000915 en-000030760 9 | zh-000001083 en-000027140 10 | zh-000001321 en-000039422 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-ar.json: -------------------------------------------------------------------------------- 1 | {"8e18e51eced73e6495df0043192edbfe": ["Q46930"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-de.json: -------------------------------------------------------------------------------- 1 | {"4be5a1742223cc3a8c01e6bf9c6e3f27": ["Q156913"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-en.json: -------------------------------------------------------------------------------- 1 | {"64ca9e2f229acf8e39c2a3d2e45f81e7": ["Q720285"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-es.json: -------------------------------------------------------------------------------- 1 | {"4a2d7fd3e4791f09bc3c804a15d647ef": ["Q786"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-fa.json: -------------------------------------------------------------------------------- 1 | {"d35cc57a7869168ddeb8143c1b2260f3": ["Q76"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-ja.json: -------------------------------------------------------------------------------- 1 | {"d0e7a9dd0359610c53bba176d702dfce": ["Q174691"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-pl.json: -------------------------------------------------------------------------------- 1 | {"64232b8a3c3ee67f76f96ccd963b78f7": ["Q1362561"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-ro.json: -------------------------------------------------------------------------------- 1 | {"ebd92132adbb679fdd090503cd925f81": ["Q185007"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-ta.json: -------------------------------------------------------------------------------- 1 | {"12760cb39680a822c3cd0c8495cf1b4b": ["Q11468"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-tr.json: -------------------------------------------------------------------------------- 1 | {"9f39acb0fef259aaf24224fe41954f6c": ["Q258"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mewslix/test-uk.json: -------------------------------------------------------------------------------- 1 | {"9f4dba86a6d21cfd246353403da46abd": ["Q1899"]} -------------------------------------------------------------------------------- /mock_test_data/labels/mlqa/test-de.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Am 27. M\u00e4rz um 12:36 Uhr warfen ein oder zwei ann\u00e4hernd gleichzeitige phreatische Eruptionen (explosionsartige Dampfaustritte des Grundwassers) Asche und Ger\u00f6ll aus dem Inneren des Gipfelkraters und erzeugten neben einer etwa 1800 Meter hohen Asches\u00e4ule auch einen neuen, etwa 76 Meter durchmessenden Krater. Zum gleichen Zeitpunkt bildete sich im Gipfelbereich ein etwa 4900 Meter langes, nach Osten verlaufendes Netz aus Rissen. Weitere Erdsto\u00dfwellen und eine Reihe von Dampfexplosionen schleuderten Vulkanasche zwischen 3050 und 3350 Meter hoch \u00fcber den Krater hinaus. Der Gro\u00dfteil dieser Asche ging in einem Umkreis zwischen f\u00fcnf und 19 Kilometern nieder; einzelne Niederschl\u00e4ge wurden jedoch noch 240 Kilometer s\u00fcdlich in Bend, Oregon, und 285 Kilometer \u00f6stlich in Spokane, Washington, festgestellt. Aufgrund der durch Schneeschmelze drohenden \u00dcberflutungen und der durch Erdst\u00f6\u00dfe ausgel\u00f6sten Erdrutsche wurde ab dem 27. M\u00e4rz eine Sicherheitszone im Umkreis von rund 25 Kilometern um den Vulkan errichtet. Die in diesem Gebiet lebenden Menschen wurden evakuiert.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "text": "3050 und 3350 Meter", 12 | "answer_start": 523 13 | } 14 | ], 15 | "id": "c076d4b0a1967805a45e7ab1e247e3a0739d4850", 16 | "question": "Wie viele Meter \u00fcber den Krater war die Dampfexplosion? " 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "Ausbruch des Mount St. Helens 1980" 22 | } 23 | ], 24 | "version": 1.0 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/mlqa/test-en.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "In 1994, five unnamed civilian contractors and the widows of contractors Walter Kasza and Robert Frost sued the USAF and the United States Environmental Protection Agency. Their suit, in which they were represented by George Washington University law professor Jonathan Turley, alleged they had been present when large quantities of unknown chemicals had been burned in open pits and trenches at Groom. Biopsies taken from the complainants were analyzed by Rutgers University biochemists, who found high levels of dioxin, dibenzofuran, and trichloroethylene in their body fat. The complainants alleged they had sustained skin, liver, and respiratory injuries due to their work at Groom, and that this had contributed to the deaths of Frost and Kasza. The suit sought compensation for the injuries they had sustained, claiming the USAF had illegally handled toxic materials, and that the EPA had failed in its duty to enforce the Resource Conservation and Recovery Act (which governs handling of dangerous materials). They also sought detailed information about the chemicals to which they were allegedly exposed, hoping this would facilitate the medical treatment of survivors. Congressman Lee H. Hamilton, former chairman of the House Intelligence Committee, told 60 Minutes reporter Lesley Stahl, \"The Air Force is classifying all information about Area 51 in order to protect themselves from a lawsuit.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 607, 12 | "text": "had sustained skin, liver, and respiratory injuries" 13 | } 14 | ], 15 | "id": "d066a75dbe8cd3e2b57c415a8eb54a08dc7e72a7", 16 | "question": "what did the complainants alleged happen to them?" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "Area 51" 22 | } 23 | ], 24 | "version": 1.0 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/mlqa/test-es.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Tras la erupci\u00f3n, las emisiones de material pirocl\u00e1stico que se produjeron desde la brecha creada por el derrumbe fueron en su mayor\u00eda de origen magm\u00e1tico, y en menor proporci\u00f3n de fragmentos de rocas volc\u00e1nicas preexistentes. Los dep\u00f3sitos resultantes formaron unas estructuras en forma de abanico que segu\u00edan un patr\u00f3n de hojas, lenguas y l\u00f3bulos superpuestos entre s\u00ed. Durante la erupci\u00f3n del 18 de mayo se produjeron por lo menos 17 emisiones de flujo pirocl\u00e1stico separadas en el tiempo, cuyos vol\u00famenes de agregaci\u00f3n rondaban los 208 millones de m3.", 7 | "qas": [ 8 | { 9 | "question": "\u00bfCu\u00e1l fue la apariencia de los dep\u00f3sitos que quedaron de ese deslizamiento de tierra?", 10 | "answers": 11 | [ 12 | { 13 | "text": "unas estructuras en forma de abanico que segu\u00edan un patr\u00f3n de hojas, lenguas y l\u00f3bulos", 14 | "answer_start": 262 15 | } 16 | ], 17 | "id": "b77c037b331e06542272669766df3b9515366b57" 18 | } 19 | ] 20 | } 21 | ], 22 | "title": "Erupci\u00f3n del Monte Santa Helena de 1980" 23 | } 24 | ], 25 | "version": 1.0 26 | } 27 | -------------------------------------------------------------------------------- /mock_test_data/labels/mlqa/test-hi.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\u0909\u0938\u0940 \"\u090f\u0930\u093f\u092f\u093e XX \" \u0928\u093e\u092e\u0915\u0930\u0923 \u092a\u094d\u0930\u0923\u093e\u0932\u0940 \u0915\u093e \u092a\u094d\u0930\u092f\u094b\u0917 \u0928\u0947\u0935\u093e\u0926\u093e \u092a\u0930\u0940\u0915\u094d\u0937\u0923 \u0938\u094d\u0925\u0932 \u0915\u0947 \u0905\u0928\u094d\u092f \u092d\u093e\u0917\u094b\u0902 \u0915\u0947 \u0932\u093f\u090f \u0915\u093f\u092f\u093e \u0917\u092f\u093e \u0939\u0948\u0964\u092e\u0942\u0932 \u0930\u0942\u092a \u092e\u0947\u0902 6 \u092c\u091f\u0947 10 \u092e\u0940\u0932 \u0915\u093e \u092f\u0939 \u0906\u092f\u0924\u093e\u0915\u093e\u0930 \u0905\u0921\u094d\u0921\u093e \u0905\u092c \u0924\u0925\u093e\u0915\u0925\u093f\u0924 '\u0917\u094d\u0930\u0942\u092e \u092c\u0949\u0915\u094d\u0938 \" \u0915\u093e \u090f\u0915 \u092d\u093e\u0917 \u0939\u0948, \u091c\u094b \u0915\u093f 23 \u092c\u091f\u0947 25.3 \u092e\u0940\u0932 \u0915\u093e \u090f\u0915 \u092a\u094d\u0930\u0924\u093f\u092c\u0902\u0927\u093f\u0924 \u0939\u0935\u093e\u0908 \u0915\u094d\u0937\u0947\u0924\u094d\u0930 \u0939\u0948\u0964 \u092f\u0939 \u0915\u094d\u0937\u0947\u0924\u094d\u0930 NTS \u0915\u0947 \u0906\u0902\u0924\u0930\u093f\u0915 \u0938\u0921\u093c\u0915 \u092a\u094d\u0930\u092c\u0902\u0927\u0928 \u0938\u0947 \u091c\u0941\u0921\u093c\u093e \u0939\u0948, \u091c\u093f\u0938\u0915\u0940 \u092a\u0915\u094d\u0915\u0940 \u0938\u0921\u093c\u0915\u0947\u0902 \u0926\u0915\u094d\u0937\u093f\u0923 \u092e\u0947\u0902 \u092e\u0930\u0915\u0930\u0940 \u0915\u0940 \u0913\u0930 \u0914\u0930 \u092a\u0936\u094d\u091a\u093f\u092e \u092e\u0947\u0902 \u092f\u0941\u0915\u094d\u0915\u093e \u092b\u094d\u0932\u0948\u091f \u0915\u0940 \u0913\u0930 \u091c\u093e\u0924\u0940 \u0939\u0948\u0902\u0964 \u091d\u0940\u0932 \u0938\u0947 \u0909\u0924\u094d\u0924\u0930 \u092a\u0942\u0930\u094d\u0935 \u0915\u0940 \u0913\u0930 \u092c\u0922\u093c\u0924\u0947 \u0939\u0941\u090f \u0935\u094d\u092f\u093e\u092a\u0915 \u0914\u0930 \u0914\u0930 \u0938\u0941\u0935\u094d\u092f\u0935\u0938\u094d\u0925\u093f\u0924 \u0917\u094d\u0930\u0942\u092e \u091d\u0940\u0932 \u0915\u0940 \u0938\u0921\u093c\u0915\u0947\u0902 \u090f\u0915 \u0926\u0930\u094d\u0930\u0947 \u0915\u0947 \u091c\u0930\u093f\u092f\u0947 \u092a\u0947\u091a\u0940\u0926\u093e \u092a\u0939\u093e\u0921\u093c\u093f\u092f\u094b\u0902 \u0938\u0947 \u0939\u094b\u0915\u0930 \u0917\u0941\u091c\u0930\u0924\u0940 \u0939\u0948\u0902\u0964 \u092a\u0939\u0932\u0947 \u0938\u0921\u093c\u0915\u0947\u0902 \u0917\u094d\u0930\u0942\u092e \u0918\u093e\u091f\u0940", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 378, 12 | "text": "\u0909\u0924\u094d\u0924\u0930 \u092a\u0942\u0930\u094d\u0935" 13 | } 14 | ], 15 | "id": "eeb8dbd25efe5221dc6723ddee95daa07d2c8478", 16 | "question": "\u091d\u0940\u0932 \u0915\u0947 \u0938\u093e\u092a\u0947\u0915\u094d\u0937 \u0917\u094d\u0930\u0942\u092e \u0932\u0947\u0915 \u0930\u094b\u0921 \u0915\u0939\u093e\u0901 \u091c\u093e\u0924\u0940 \u0925\u0940?" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "\u090f\u0930\u093f\u092f\u093e 51" 22 | } 23 | ], 24 | "version": 1.0 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/mlqa/test-vi.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Th\u00e0nh ph\u1ed1 Mi\u00eau L\u1eadt ti\u1ebfng Trung:\u82d7\u6817\u5e02, B\u00ednh \u00e2m:Mi\u00e1ol\u00ec Sh\u00ec, Pe\u030dh-\u014de-j\u012b:Bi\u00e2u-le\u030dk-chh\u012b) l\u00e0 huy\u1ec7n l\u1ef5 c\u1ee7a Huy\u1ec7n Mi\u00eau L\u1eadt, \u0110\u00e0i Loan. T\u1eeb Mi\u00eau L\u1eadtl\u00e0 k\u1ebft h\u1ee3p c\u1ee7a hai t\u1eeb trong ti\u1ebfng Kh\u00e1ch Gia l\u00e0 m\u00e8o (\u8c93) v\u00e0 th\u00e0nh ph\u1ed1 (\u88e1), \u0111\u01b0\u1ee3c ph\u00e1t \u00e2m g\u1ea7n nh\u01b0 l\u00e0 Pali (Bari) trong c\u00e1c ng\u00f4n ng\u1eef c\u1ee7a Th\u1ed5 d\u00e2n \u0110\u00e0i Loan. Th\u00e0nh ph\u1ed1 c\u00f3 t\u1ef7 l\u1ec7 ng\u01b0\u1eddi Kh\u00e1ch Gia cao nh\u1ea5t t\u1ea1i \u0110\u00e0i Loan. N\u0103m 2009, d\u00e2n s\u1ed1 th\u00e0nh ph\u1ed1 l\u00e0 90.209 ng\u01b0\u1eddi tr\u00ean t\u1ed5ng di\u1ec7n t\u00edch l\u00e0 37,8878 km\u00b2", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 115, 12 | "text": "\u0110\u00e0i Loan" 13 | } 14 | ], 15 | "id": "d436fa739423f2c2aeabf1ca6a1ea0d525823bad", 16 | "question": "Th\u00e0nh ph\u1ed1 Miaoli n\u1eb1m \u1edf qu\u1ed1c gia n\u00e0o?" 17 | }, 18 | { 19 | "answers": [ 20 | { 21 | "answer_start": 311, 22 | "text": "Kh\u00e1ch Gia" 23 | } 24 | ], 25 | "id": "2f0d6ff162619164bb113c0cadbcca06a50d2e5b", 26 | "question": "Miaoli c\u00f3 t\u1ef7 l\u1ec7 cao lo\u1ea1i ng\u01b0\u1eddi n\u00e0o?" 27 | } 28 | ] 29 | } 30 | ], 31 | "title": "Mi\u00eau L\u1eadt (th\u00e0nh ph\u1ed1)" 32 | } 33 | ], 34 | "version": 1.0 35 | } 36 | -------------------------------------------------------------------------------- /mock_test_data/labels/mlqa/test-zh.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\u5728\u7535\u8def\u5b66\u91cc\uff0c\u7535\u52a8\u52bf\uff08\u82f1\u8bed\uff1aelectromotive force\uff0c\u7f29\u5199\u4e3aemf\uff09\u8868\u5f81\u4e00\u4e9b\u7535\u8def\u5143\u4ef6\u4f9b\u5e94\u7535\u80fd\u7684\u7279\u6027\u3002\u8fd9\u4e9b\u7535\u8def\u5143\u4ef6\u79f0\u4e3a\u300c\u7535\u52a8\u52bf\u6e90\u300d\u3002\u7535\u5316\u7535\u6c60\u3001\u592a\u9633\u80fd\u7535\u6c60\u3001\u71c3\u6599\u7535\u6c60\u3001\u70ed\u7535\u88c5\u7f6e\u3001\u53d1\u7535\u673a\u7b49\u7b49\uff0c\u90fd\u662f\u7535\u52a8\u52bf\u6e90\u3002\u7535\u52a8\u52bf\u6e90\u6240\u4f9b\u5e94\u7684\u80fd\u91cf\u6bcf\u5355\u4f4d\u7535\u8377\u662f\u5176\u7535\u52a8\u52bf\u3002\u5047\u8bbe\uff0c\u7535\u8377", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 71, 12 | "text": "\u7535\u5316\u7535\u6c60" 13 | } 14 | ], 15 | "id": "465f3fb044b5c50a78a2e2f9bc94c424d1f7d039", 16 | "question": "\u5404\u7535\u5316\u7535\u6c60\u90fd\u80fd\u63d0\u4f9b\u7535\u52a8\u52bf\uff1f" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "\u96fb\u52d5\u52e2" 22 | } 23 | ], 24 | "version": 1.0 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-af.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | B-ORG 10 | I-ORG 11 | O 12 | O 13 | 14 | O 15 | O 16 | O 17 | O 18 | O 19 | B-LOC 20 | O 21 | O 22 | O 23 | O 24 | O 25 | O 26 | O 27 | O 28 | O 29 | O 30 | O 31 | O 32 | O 33 | O 34 | O 35 | O 36 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ar.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | I-ORG 6 | I-ORG 7 | O 8 | 9 | O 10 | B-LOC 11 | I-LOC 12 | I-LOC 13 | I-LOC 14 | I-LOC 15 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-bg.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | B-LOC 6 | O 7 | B-LOC 8 | O 9 | B-LOC 10 | O 11 | 12 | O 13 | O 14 | O 15 | B-LOC 16 | O 17 | O 18 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-bn.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | I-ORG 4 | I-ORG 5 | 6 | B-PER 7 | I-PER 8 | I-PER 9 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-de.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-LOC 3 | I-LOC 4 | I-LOC 5 | I-LOC 6 | 7 | B-PER 8 | I-PER 9 | O 10 | B-PER 11 | I-PER 12 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-el.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | B-PER 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | O 18 | O 19 | O 20 | O 21 | O 22 | O 23 | B-ORG 24 | I-ORG 25 | O 26 | 27 | B-LOC 28 | O 29 | B-LOC 30 | O 31 | O 32 | O 33 | B-LOC 34 | O 35 | O 36 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-en.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | B-LOC 13 | O 14 | O 15 | O 16 | O 17 | B-LOC 18 | O 19 | O 20 | O 21 | 22 | O 23 | B-PER 24 | I-PER 25 | O 26 | B-PER 27 | I-PER 28 | O 29 | O 30 | B-ORG 31 | I-ORG 32 | O 33 | O 34 | O 35 | O 36 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-es.tsv: -------------------------------------------------------------------------------- 1 | B-PER 2 | I-PER 3 | I-PER 4 | 5 | O 6 | B-PER 7 | I-PER 8 | I-PER 9 | I-PER 10 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-et.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | B-ORG 9 | I-ORG 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | O 18 | 19 | O 20 | O 21 | B-ORG 22 | O 23 | O 24 | O 25 | O 26 | O 27 | O 28 | O 29 | O 30 | O 31 | O 32 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-eu.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | I-ORG 7 | I-ORG 8 | I-ORG 9 | I-ORG 10 | I-ORG 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | O 18 | 19 | B-LOC 20 | I-LOC 21 | O 22 | O 23 | O 24 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-fa.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-ORG 3 | I-ORG 4 | I-ORG 5 | 6 | O 7 | B-PER 8 | I-PER 9 | I-PER 10 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-fi.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | B-ORG 9 | I-ORG 10 | O 11 | O 12 | 13 | O 14 | O 15 | B-LOC 16 | O 17 | O 18 | O 19 | O 20 | O 21 | O 22 | O 23 | O 24 | O 25 | O 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-fr.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | O 6 | O 7 | O 8 | O 9 | O 10 | 11 | B-PER 12 | I-PER 13 | I-PER 14 | O 15 | O 16 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-he.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-LOC 4 | O 5 | O 6 | 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | B-ORG 16 | I-ORG 17 | I-ORG 18 | O 19 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-hi.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-LOC 4 | 5 | O 6 | B-LOC 7 | O 8 | O 9 | O 10 | O 11 | O 12 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-hu.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | B-LOC 6 | O 7 | 8 | B-PER 9 | I-PER 10 | I-PER 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-id.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | I-ORG 7 | 8 | O 9 | B-PER 10 | I-PER 11 | I-PER 12 | I-PER 13 | I-PER 14 | I-PER 15 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-it.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | O 6 | O 7 | 8 | O 9 | B-ORG 10 | I-ORG 11 | I-ORG 12 | I-ORG 13 | I-ORG 14 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ja.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | B-PER 12 | I-PER 13 | I-PER 14 | I-PER 15 | 16 | O 17 | O 18 | B-ORG 19 | I-ORG 20 | I-ORG 21 | I-ORG 22 | I-ORG 23 | O 24 | O 25 | O 26 | O 27 | O 28 | B-ORG 29 | I-ORG 30 | I-ORG 31 | I-ORG 32 | I-ORG 33 | I-ORG 34 | I-ORG 35 | O 36 | I-ORG 37 | I-ORG 38 | I-ORG 39 | I-ORG 40 | I-ORG 41 | O 42 | O 43 | O 44 | O 45 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-jv.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-LOC 4 | I-LOC 5 | O 6 | O 7 | 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | B-ORG 17 | I-ORG 18 | I-ORG 19 | O 20 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ka.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-LOC 3 | O 4 | O 5 | O 6 | 7 | B-LOC 8 | I-LOC 9 | O 10 | B-LOC 11 | O 12 | B-LOC 13 | O 14 | O 15 | O 16 | O 17 | O 18 | O 19 | O 20 | O 21 | O 22 | O 23 | O 24 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-kk.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-PER 3 | I-PER 4 | I-PER 5 | 6 | B-PER 7 | I-PER 8 | O 9 | O 10 | O 11 | O 12 | O 13 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ko.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-PER 4 | I-PER 5 | O 6 | O 7 | 8 | O 9 | O 10 | B-ORG 11 | O 12 | O 13 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ml.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-ORG 3 | I-ORG 4 | I-ORG 5 | 6 | B-ORG 7 | I-ORG 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | O 18 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-mr.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | I-ORG 7 | 8 | O 9 | B-PER 10 | I-PER 11 | I-PER 12 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ms.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | 7 | B-PER 8 | I-PER 9 | O 10 | O 11 | O 12 | B-ORG 13 | I-ORG 14 | O 15 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-my.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOC 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | 14 | O 15 | O 16 | O 17 | O 18 | O 19 | B-PER 20 | B-PER 21 | O 22 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-nl.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | B-LOC 16 | O 17 | O 18 | O 19 | 20 | O 21 | O 22 | B-PER 23 | I-PER 24 | O 25 | O 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-pt.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-PER 4 | I-PER 5 | O 6 | O 7 | 8 | O 9 | O 10 | O 11 | O 12 | B-ORG 13 | O 14 | O 15 | O 16 | O 17 | O 18 | B-ORG 19 | O 20 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ru.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | O 6 | O 7 | 8 | O 9 | O 10 | B-LOC 11 | I-LOC 12 | O 13 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-sw.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-LOC 4 | O 5 | O 6 | O 7 | 8 | O 9 | B-PER 10 | I-PER 11 | I-PER 12 | O 13 | O 14 | B-PER 15 | I-PER 16 | I-PER 17 | O 18 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ta.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | I-ORG 6 | O 7 | O 8 | 9 | O 10 | B-LOC 11 | O 12 | O 13 | O 14 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-te.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | I-ORG 4 | I-ORG 5 | 6 | O 7 | O 8 | O 9 | O 10 | B-LOC 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-th.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-PER 3 | I-PER 4 | I-PER 5 | I-PER 6 | O 7 | I-PER 8 | I-PER 9 | I-PER 10 | I-PER 11 | I-PER 12 | B-PER 13 | I-PER 14 | I-PER 15 | I-PER 16 | O 17 | O 18 | O 19 | O 20 | O 21 | O 22 | O 23 | O 24 | O 25 | O 26 | O 27 | O 28 | O 29 | O 30 | 31 | O 32 | O 33 | O 34 | O 35 | O 36 | O 37 | O 38 | O 39 | O 40 | B-PER 41 | I-PER 42 | I-PER 43 | I-PER 44 | I-PER 45 | I-PER 46 | I-PER 47 | I-PER 48 | I-PER 49 | I-PER 50 | I-PER 51 | I-PER 52 | I-PER 53 | I-PER 54 | I-PER 55 | I-PER 56 | I-PER 57 | I-PER 58 | I-PER 59 | I-PER 60 | I-PER 61 | I-PER 62 | I-PER 63 | I-PER 64 | I-PER 65 | I-PER 66 | I-PER 67 | I-PER 68 | I-PER 69 | I-PER 70 | O 71 | I-PER 72 | I-PER 73 | I-PER 74 | I-PER 75 | I-PER 76 | I-PER 77 | I-PER 78 | I-PER 79 | I-PER 80 | I-PER 81 | I-PER 82 | I-PER 83 | I-PER 84 | I-PER 85 | I-PER 86 | I-PER 87 | B-PER 88 | I-PER 89 | I-PER 90 | I-PER 91 | I-PER 92 | I-PER 93 | I-PER 94 | I-PER 95 | I-PER 96 | O 97 | I-PER 98 | I-PER 99 | I-PER 100 | I-PER 101 | I-PER 102 | I-PER 103 | I-PER 104 | I-PER 105 | I-PER 106 | I-PER 107 | I-PER 108 | I-PER 109 | I-PER 110 | I-PER 111 | I-PER 112 | I-PER 113 | I-PER 114 | I-PER 115 | I-PER 116 | I-PER 117 | I-PER 118 | I-PER 119 | I-PER 120 | I-PER 121 | I-PER 122 | I-PER 123 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-tl.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-LOC 4 | 5 | B-ORG 6 | I-ORG 7 | I-ORG 8 | I-ORG 9 | I-ORG 10 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-tr.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | O 4 | B-ORG 5 | I-ORG 6 | O 7 | O 8 | 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | B-LOC 16 | B-ORG 17 | I-ORG 18 | O 19 | O 20 | O 21 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-ur.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | O 6 | O 7 | 8 | O 9 | B-PER 10 | I-PER 11 | I-PER 12 | I-PER 13 | I-PER 14 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-vi.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | O 4 | B-LOC 5 | I-LOC 6 | I-LOC 7 | I-LOC 8 | I-LOC 9 | 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | B-PER 18 | I-PER 19 | I-PER 20 | O 21 | O 22 | O 23 | O 24 | O 25 | O 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-yo.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-LOC 4 | I-LOC 5 | I-LOC 6 | 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | B-LOC 15 | O 16 | -------------------------------------------------------------------------------- /mock_test_data/labels/panx/test-zh.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | B-ORG 14 | I-ORG 15 | I-ORG 16 | 17 | O 18 | B-LOC 19 | I-LOC 20 | I-LOC 21 | I-LOC 22 | -------------------------------------------------------------------------------- /mock_test_data/labels/pawsx/test-de.tsv: -------------------------------------------------------------------------------- 1 | 1 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 1 8 | 1 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/pawsx/test-en.tsv: -------------------------------------------------------------------------------- 1 | 1 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 1 8 | 1 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/pawsx/test-es.tsv: -------------------------------------------------------------------------------- 1 | 1 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 1 8 | 1 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/pawsx/test-fr.tsv: -------------------------------------------------------------------------------- 1 | 1 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 1 8 | 1 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/pawsx/test-ja.tsv: -------------------------------------------------------------------------------- 1 | 1 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 1 8 | 1 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/pawsx/test-ko.tsv: -------------------------------------------------------------------------------- 1 | 1 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 1 8 | 1 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/pawsx/test-zh.tsv: -------------------------------------------------------------------------------- 1 | 1 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 1 8 | 1 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-af.tsv: -------------------------------------------------------------------------------- 1 | 308 2 | 441 3 | 713 4 | 233 5 | 757 6 | 35 7 | 93 8 | 939 9 | 872 10 | 52 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ar.tsv: -------------------------------------------------------------------------------- 1 | 669 2 | 982 3 | 957 4 | 832 5 | 543 6 | 320 7 | 702 8 | 752 9 | 519 10 | 919 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-bg.tsv: -------------------------------------------------------------------------------- 1 | 28 2 | 951 3 | 949 4 | 863 5 | 280 6 | 444 7 | 272 8 | 142 9 | 0 10 | 459 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-bn.tsv: -------------------------------------------------------------------------------- 1 | 193 2 | 86 3 | 934 4 | 926 5 | 485 6 | 370 7 | 768 8 | 481 9 | 325 10 | 13 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-de.tsv: -------------------------------------------------------------------------------- 1 | 367 2 | 258 3 | 374 4 | 529 5 | 528 6 | 283 7 | 348 8 | 344 9 | 282 10 | 212 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-el.tsv: -------------------------------------------------------------------------------- 1 | 847 2 | 672 3 | 840 4 | 841 5 | 994 6 | 77 7 | 786 8 | 775 9 | 129 10 | 288 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-es.tsv: -------------------------------------------------------------------------------- 1 | 575 2 | 761 3 | 406 4 | 857 5 | 37 6 | 44 7 | 899 8 | 367 9 | 409 10 | 692 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-et.tsv: -------------------------------------------------------------------------------- 1 | 961 2 | 693 3 | 698 4 | 752 5 | 794 6 | 993 7 | 247 8 | 199 9 | 958 10 | 501 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-eu.tsv: -------------------------------------------------------------------------------- 1 | 937 2 | 287 3 | 750 4 | 239 5 | 335 6 | 508 7 | 24 8 | 247 9 | 305 10 | 37 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-fa.tsv: -------------------------------------------------------------------------------- 1 | 376 2 | 889 3 | 521 4 | 430 5 | 162 6 | 896 7 | 936 8 | 25 9 | 716 10 | 809 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-fi.tsv: -------------------------------------------------------------------------------- 1 | 316 2 | 951 3 | 811 4 | 893 5 | 765 6 | 338 7 | 806 8 | 563 9 | 408 10 | 458 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-fr.tsv: -------------------------------------------------------------------------------- 1 | 957 2 | 981 3 | 721 4 | 980 5 | 912 6 | 444 7 | 973 8 | 971 9 | 970 10 | 969 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-he.tsv: -------------------------------------------------------------------------------- 1 | 635 2 | 562 3 | 610 4 | 329 5 | 952 6 | 429 7 | 34 8 | 53 9 | 566 10 | 854 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-hi.tsv: -------------------------------------------------------------------------------- 1 | 988 2 | 215 3 | 129 4 | 308 5 | 408 6 | 411 7 | 418 8 | 105 9 | 553 10 | 69 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-hu.tsv: -------------------------------------------------------------------------------- 1 | 417 2 | 559 3 | 599 4 | 202 5 | 516 6 | 413 7 | 825 8 | 939 9 | 731 10 | 428 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-id.tsv: -------------------------------------------------------------------------------- 1 | 194 2 | 178 3 | 208 4 | 423 5 | 728 6 | 701 7 | 0 8 | 801 9 | 871 10 | 519 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-it.tsv: -------------------------------------------------------------------------------- 1 | 164 2 | 866 3 | 723 4 | 434 5 | 720 6 | 235 7 | 275 8 | 733 9 | 700 10 | 915 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ja.tsv: -------------------------------------------------------------------------------- 1 | 555 2 | 660 3 | 81 4 | 801 5 | 800 6 | 580 7 | 291 8 | 635 9 | 632 10 | 579 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-jv.tsv: -------------------------------------------------------------------------------- 1 | 199 2 | 148 3 | 37 4 | 109 5 | 69 6 | 35 7 | 182 8 | 82 9 | 143 10 | 153 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ka.tsv: -------------------------------------------------------------------------------- 1 | 173 2 | 231 3 | 680 4 | 607 5 | 142 6 | 168 7 | 198 8 | 682 9 | 609 10 | 594 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-kk.tsv: -------------------------------------------------------------------------------- 1 | 299 2 | 99 3 | 537 4 | 281 5 | 518 6 | 308 7 | 394 8 | 282 9 | 362 10 | 191 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ko.tsv: -------------------------------------------------------------------------------- 1 | 755 2 | 334 3 | 515 4 | 996 5 | 412 6 | 519 7 | 671 8 | 377 9 | 705 10 | 754 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ml.tsv: -------------------------------------------------------------------------------- 1 | 674 2 | 71 3 | 218 4 | 437 5 | 43 6 | 474 7 | 29 8 | 612 9 | 176 10 | 317 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-mr.tsv: -------------------------------------------------------------------------------- 1 | 366 2 | 179 3 | 766 4 | 788 5 | 88 6 | 707 7 | 998 8 | 751 9 | 292 10 | 91 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-nl.tsv: -------------------------------------------------------------------------------- 1 | 401 2 | 198 3 | 774 4 | 501 5 | 810 6 | 29 7 | 18 8 | 35 9 | 408 10 | 926 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-pt.tsv: -------------------------------------------------------------------------------- 1 | 916 2 | 296 3 | 364 4 | 922 5 | 804 6 | 802 7 | 803 8 | 801 9 | 523 10 | 693 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ru.tsv: -------------------------------------------------------------------------------- 1 | 343 2 | 461 3 | 464 4 | 639 5 | 725 6 | 592 7 | 40 8 | 779 9 | 518 10 | 215 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-sw.tsv: -------------------------------------------------------------------------------- 1 | 114 2 | 81 3 | 80 4 | 97 5 | 95 6 | 100 7 | 98 8 | 76 9 | 107 10 | 109 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ta.tsv: -------------------------------------------------------------------------------- 1 | 102 2 | 264 3 | 260 4 | 174 5 | 82 6 | 224 7 | 142 8 | 98 9 | 178 10 | 235 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-te.tsv: -------------------------------------------------------------------------------- 1 | 79 2 | 37 3 | 8 4 | 13 5 | 110 6 | 205 7 | 153 8 | 113 9 | 112 10 | 162 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-th.tsv: -------------------------------------------------------------------------------- 1 | 536 2 | 38 3 | 545 4 | 256 5 | 489 6 | 155 7 | 276 8 | 547 9 | 231 10 | 420 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-tl.tsv: -------------------------------------------------------------------------------- 1 | 207 2 | 67 3 | 563 4 | 166 5 | 845 6 | 764 7 | 367 8 | 787 9 | 14 10 | 345 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-tr.tsv: -------------------------------------------------------------------------------- 1 | 501 2 | 515 3 | 143 4 | 774 5 | 881 6 | 44 7 | 45 8 | 170 9 | 326 10 | 94 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-ur.tsv: -------------------------------------------------------------------------------- 1 | 753 2 | 367 3 | 476 4 | 835 5 | 839 6 | 716 7 | 500 8 | 718 9 | 842 10 | 841 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-vi.tsv: -------------------------------------------------------------------------------- 1 | 310 2 | 403 3 | 952 4 | 975 5 | 92 6 | 472 7 | 451 8 | 460 9 | 469 10 | 506 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tatoeba/test-zh.tsv: -------------------------------------------------------------------------------- 1 | 687 2 | 981 3 | 504 4 | 127 5 | 508 6 | 852 7 | 425 8 | 530 9 | 797 10 | 110 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-ar.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\u0623\u0642\u064a\u0645\u062a \u0627\u0644\u0628\u0637\u0648\u0644\u0629 21 \u0645\u0631\u0629\u060c \u0634\u0627\u0631\u0643 \u0641\u064a \u0627\u0644\u0646\u0647\u0627\u0626\u064a\u0627\u062a 78 \u062f\u0648\u0644\u0629\u060c \u0648\u0639\u062f\u062f \u0627\u0644\u0641\u0631\u0642 \u0627\u0644\u062a\u064a \u0641\u0627\u0632\u062a \u0628\u0627\u0644\u0628\u0637\u0648\u0644\u0629 \u062d\u062a\u0649 \u0627\u0644\u0622\u0646 8 \u0641\u0631\u0642\u060c \u0648\u064a\u0639\u062f \u0627\u0644\u0645\u0646\u062a\u062e\u0628 \u0627\u0644\u0628\u0631\u0627\u0632\u064a\u0644\u064a \u0627\u0644\u0623\u0643\u062b\u0631 \u062a\u062a\u0648\u064a\u062c\u0627\u064b \u0628\u0627\u0644\u0643\u0623\u0633 \u062d\u064a\u062b \u0641\u0627\u0632 \u0628\u0647\u0627 5 \u0645\u0631\u0627\u062a \u0623\u0639\u0648\u0627\u0645: 1958\u060c 1962\u060c 1970\u060c 1994 \u06482002. \u064a\u0644\u064a\u0647 \u0627\u0644\u0645\u0646\u062a\u062e\u0628 \u0627\u0644\u0625\u064a\u0637\u0627\u0644\u064a \u0627\u0644\u0630\u064a \u0623\u062d\u0631\u0632\u0647\u0627 4 \u0645\u0631\u0627\u062a \u0641\u064a \u0623\u0639\u0648\u0627\u0645: 1934\u060c 1938\u060c 1982 \u06482006\u060c \u0628\u0627\u0644\u0645\u0634\u0627\u0631\u0643\u0629 \u0645\u0639 \u0627\u0644\u0645\u0646\u062a\u062e\u0628 \u0627\u0644\u0623\u0644\u0645\u0627\u0646\u064a \u0627\u0644\u0630\u064a \u062d\u0642\u0642\u0647\u0627 4 \u0645\u0631\u0627\u062a \u0623\u064a\u0636\u0627\u064b \u0623\u0639\u0648\u0627\u0645: 1954\u060c 1974 \u06481990 \u06482014\u060c \u062b\u0645 \u0627\u0644\u0623\u0648\u0631\u0648\u063a\u0648\u0627\u064a \u0648\u0627\u0644\u0623\u0631\u062c\u0646\u062a\u064a\u0646 \u0648\u0641\u0631\u0646\u0633\u0627 \u0628\u0631\u0635\u064a\u062f \u0628\u0637\u0648\u0644\u062a\u064a\u0646. \u0628\u064a\u0646\u0645\u0627 \u0623\u062d\u0631\u0632\u062a \u0645\u0646\u062a\u062e\u0628\u0627\u062a \u0625\u0646\u062c\u0644\u062a\u0631\u0627 \u0648\u0625\u0633\u0628\u0627\u0646\u064a\u0627 \u0627\u0644\u0628\u0637\u0648\u0644\u0629 \u0645\u0631\u0629 \u0648\u0627\u062d\u062f\u0629.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 394, 12 | "text": "\u0628\u0637\u0648\u0644\u062a\u064a\u0646" 13 | } 14 | ], 15 | "id": "arabic-2387335860751143628-1", 16 | "question": "\u0643\u0645 \u0639\u062f\u062f \u0645\u0631\u0627\u062a \u0641\u0648\u0632 \u0627\u0644\u0623\u0648\u0631\u0648\u063a\u0648\u0627\u064a \u0628\u0628\u0637\u0648\u0644\u0629 \u0643\u0627\u0633 \u0627\u0644\u0639\u0627\u0644\u0645 \u0644\u0643\u0631\u0648 \u0627\u0644\u0642\u062f\u0645\u061f" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "\u0642\u0627\u0626\u0645\u0629 \u0646\u0647\u0627\u0626\u064a\u0627\u062a \u0643\u0623\u0633 \u0627\u0644\u0639\u0627\u0644\u0645" 22 | } 23 | ], 24 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-en.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Wound care encourages and speeds wound healing via cleaning and protection from reinjury or infection. Depending on each patient's needs, it can range from the simplest first aid to entire nursing specialties such as wound, ostomy, and continence nursing and burn center care.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 0, 12 | "text": "Wound care" 13 | } 14 | ], 15 | "id": "english--3215621880858840488-2", 16 | "question": "What is a way to increase your wound healing speed?" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "Wound healing" 22 | } 23 | ], 24 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-fi.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Vanhin tunnettu lasilaatu on alkali\u00adkalkki\u00adlasi, jota valmistetaan soodan, hiekan ja kalkin seoksesta. Egyptiss\u00e4 siit\u00e4 valmistettiin lasitettuja kivi\u00adhelmi\u00e4 jo noin vuonna 4000 eaa.[8] Jo varhain havaittiin, ett\u00e4 lasi voitiin saada v\u00e4rilliseksi lis\u00e4\u00e4m\u00e4ll\u00e4 siihen eri malmeja. T\u00e4llaista v\u00e4rillist\u00e4 lasia k\u00e4ytettiin keramiikan lasitukseen sek\u00e4 jalokivien j\u00e4ljitelmiin.[8]", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 165, 12 | "text": "vuonna 4000 eaa" 13 | }, 14 | { 15 | "answer_start": 172, 16 | "text": "4000 eaa" 17 | } 18 | ], 19 | "id": "finnish--267686407665346253-29", 20 | "question": "Kauanko lasia on valmistettu?" 21 | } 22 | ] 23 | } 24 | ], 25 | "title": "Lasi" 26 | } 27 | ], 28 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 29 | } 30 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-id.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Kolumbus bukanlah orang pertama yang tiba di Amerika, yang ia dapati sudah diduduki. Ia juga bukan orang Eropa pertama yang sampai ke benua itu karena sekarang telah diakui secara meluas bahwa orang-orang Viking dari Eropa Utara telah berkunjung ke Amerika Utara pada abad ke 11 dan mendirikan koloni L'Anse aux Meadows untuk jangka waktu singkat. Terdapat perkiraan bahwa pelayar yang tidak dikenali pernah melawat ke Amerika sebelum Kolumbus dan membekalkannya dengan sumber untuk kejayaannya. Terdapat juga banyak teori mengenai ekspedisi ke Amerika oleh berbagai orang sepanjang masa itu.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 193, 12 | "text": "orang-orang Viking dari Eropa Utara" 13 | } 14 | ], 15 | "id": "indonesian-1906590851264221380-1", 16 | "question": "Siapakah yang menemuka benua Amerika ?" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "Kristoforus Kolumbus" 22 | } 23 | ], 24 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-ko.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\ud55c\ub2c8\ubc1c \ubc14\ub974\uce74(, , \uae30\uc6d0\uc804 247\ub144 ~ \uae30\uc6d0\uc804 183\ub144 \ub610\ub294 \uae30\uc6d0\uc804 181\ub144)\ub294 \uace0\ub300 \uce74\ub974\ud0c0\uace0\uc758 \uad70\uc0ac \uc9c0\ub3c4\uc790\ub85c, \ub300\uac1c \uc5ed\uc0ac\uc0c1 \uac00\uc7a5 \uc704\ub300\ud55c \uad70\uc0ac\ub839\uad00\ub4e4 \uc911 \ud558\ub098\ub85c \ud3c9\uac00\ub41c\ub2e4. \ud55c\ub2c8\ubc1c \ubc14\ub974\uce74\ub294 \uc81c1\ucc28 \ud3ec\uc5d0\ub2c8 \uc804\uc7c1\uc5d0\uc11c \uce74\ub974\ud0c0\uace0\uc758 \uc0ac\ub839\uad00\uc774\uc5c8\ub358 \ud558\ubc00\uce74\ub974 \ubc14\ub974\uce74\uc758 \uc544\ub4e4\ub85c \ud0dc\uc5b4\ub0ac\uc73c\uba70, \ud615\uc81c\ub85c\ub294 \ub9c8\uace0 \ubc14\ub974\uce74\uc640 \ud558\uc2a4\ub4dc\ub8e8\ubc1c \ubc14\ub974\uce74\uac00 \uc788\uc5c8\ub2e4. \ub610 \uacf5\uc815\ud55c \ud558\uc2a4\ub4dc\ub8e8\ubc1c\uacfc\ub294 \ucc98\ub0a8\ub9e4\ubd80\uc9c0\uac04\uc774\uc5c8\ub2e4.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 78, 12 | "text": "\uad70\uc0ac\ub839\uad00" 13 | }, 14 | { 15 | "answer_start": 78, 16 | "text": "\uad70\uc0ac\ub839\uad00" 17 | } 18 | ], 19 | "id": "korean-3543942044825767374-0", 20 | "question": "\ud55c\ub2c8\ubc1c \ubc14\ub974\uce74\uc758 \ucd5c\uc885 \uacc4\uae09\uc740 \ubb34\uc5c7\uc778\uac00\uc694?" 21 | } 22 | ] 23 | } 24 | ], 25 | "title": "\ud55c\ub2c8\ubc1c" 26 | } 27 | ], 28 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 29 | } 30 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-ru.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\u0420\u0430\u0437\u0432\u0438\u0432\u0430\u044f \u0442\u0435\u043e\u0440\u0438\u044e \u041a\u0430\u043d\u0442\u0430, \u0432 1832\u20141833 \u0433\u043e\u0434\u0430\u0445 \u0420\u043e\u0431\u0435\u0440\u0442 \u0444\u043e\u043d \u041c\u043e\u043b\u044c \u043f\u043e\u043f\u0443\u043b\u044f\u0440\u0438\u0437\u043e\u0432\u0430\u043b \u0442\u0435\u0440\u043c\u0438\u043d \u00ab\u043f\u0440\u0430\u0432\u043e\u0432\u043e\u0435 \u0433\u043e\u0441\u0443\u0434\u0430\u0440\u0441\u0442\u0432\u043e\u00bb (German: Rechtsstaat). \u0412 \u0447\u0438\u0441\u043b\u0435 \u0434\u0440\u0443\u0433\u0438\u0445 \u0432\u0438\u0434\u043d\u044b\u0445 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u0438\u0442\u0435\u043b\u0435\u0439 \u044d\u0442\u043e\u0433\u043e \u043d\u0430\u043f\u0440\u0430\u0432\u043b\u0435\u043d\u0438\u044f \u0431\u044b\u043b\u0438 \u041a\u0430\u0440\u043b \u0412\u0435\u043b\u044c\u043a\u0435\u0440, \u041e\u0442\u0442\u043e \u0411\u044d\u0440, \u0424\u0440\u0438\u0434\u0440\u0438\u0445 \u0428\u0442\u0430\u043b\u044c, \u0420\u0443\u0434\u043e\u043b\u044c\u0444 \u0413\u043d\u0435\u0439\u0441\u0442, \u0425\u0440\u0438\u0441\u0442\u043e\u0444 \u0444\u043e\u043d \u0417\u0438\u0433\u0432\u0430\u0440\u0442, \u041b\u043e\u0440\u0435\u043d\u0446 \u0444\u043e\u043d \u0428\u0442\u0435\u0439\u043d, \u0413\u0435\u043e\u0440\u0433 \u0415\u043b\u043b\u0438\u043d\u0435\u043a.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 25, 12 | "text": "1832\u20141833" 13 | }, 14 | { 15 | "answer_start": 25, 16 | "text": "1832\u20141833" 17 | } 18 | ], 19 | "id": "russian--1141212800069921250-46", 20 | "question": "\u041a\u043e\u0433\u0434\u0430 \u043f\u043e\u044f\u0432\u0438\u043b\u0441\u044f \u0442\u0435\u0440\u043c\u0438\u043d \u043f\u0440\u0430\u0432\u043e\u0432\u043e\u0301\u0435 \u0433\u043e\u0441\u0443\u0434\u0430\u0301\u0440\u0441\u0442\u0432\u043e?" 21 | } 22 | ] 23 | } 24 | ], 25 | "title": "\u041f\u0440\u0430\u0432\u043e\u0432\u043e\u0435 \u0433\u043e\u0441\u0443\u0434\u0430\u0440\u0441\u0442\u0432\u043e" 26 | } 27 | ], 28 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 29 | } 30 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-sw.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Kama kijana, Silva aliingizwa kwenye shule iliyo jirani ya Campo Grande ya Rio - kwa usawa shule ya kulisha kwa Fluminense. Alipokuwa na umri wa miaka 14, Silva alimvutia kocha wa Fluminense Maurinho wakati wa mchezo wa kirafiki huko Xer\u00e9m. Alipewa jaribio fupi, ambapo alipewa nafasi ya kiungo wa kujihami. Silva alipewa fursa ya kucheza alipokuwa mdogo. Mwaka wa 1999, akiwa na umri wa miaka 15, Silva aliulizwa Madureira, Olaria na Flamengo; alikataliwa katika kila klabu, jaribio la Flamengo likiwa gumu sana kwa sababu hata hakukuwa na mafunzo ya makocha.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 365, 12 | "text": "1999" 13 | } 14 | ], 15 | "id": "swahili--8211684794284159625-4", 16 | "question": "Je,Thiago Silva alianza kucheza soka mwaka upi?" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "Thiago Silva" 22 | } 23 | ], 24 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/tydiqa/test-te.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\u0c2e\u0c39\u0c3e \u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c02 \u0c32\u0c47\u0c26\u0c3e \u0c2e\u0c39\u0c3e\u0c38\u0c3e\u0c17\u0c30\u0c02 (Ocean), \u0c2d\u0c42\u0c17\u0c4b\u0c33\u0c02 \u0c2f\u0c4a\u0c15\u0c4d\u0c15 \u0c1c\u0c32\u0c3e\u0c35\u0c30\u0c23\u0c02\u0c32\u0c4b \u0c2a\u0c4d\u0c30\u0c27\u0c3e\u0c28 \u0c2d\u0c3e\u0c17\u0c02. \u0c09\u0c2a\u0c4d\u0c2a\u0c41 \u0c28\u0c40\u0c1f\u0c3f\u0c24\u0c4b \u0c28\u0c3f\u0c02\u0c21\u0c3f\u0c28 \u0c08 \u0c2e\u0c39\u0c3e \u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c3e\u0c32\u0c41 \u0c2d\u0c42\u0c2e\u0c3f \u0c09\u0c2a\u0c30\u0c3f\u0c24\u0c32\u0c2e\u0c41 \u0c2a\u0c48 71% \u0c2a\u0c48\u0c17\u0c3e \u0c35\u0c3f\u0c38\u0c4d\u0c24\u0c30\u0c3f\u0c02\u0c1a\u0c3f \u0c09\u0c28\u0c4d\u0c28\u0c3e\u0c2f\u0c3f. \u0c35\u0c40\u0c1f\u0c3f \u0c2e\u0c4a\u0c24\u0c4d\u0c24\u0c02 \u0c35\u0c48\u0c36\u0c3e\u0c32\u0c4d\u0c2f\u0c02 36.1 \u0c15\u0c4b\u0c1f\u0c4d\u0c32 \u0c1a\u0c26\u0c30\u0c2a\u0c41 \u0c15\u0c3f\u0c32\u0c4b \u0c2e\u0c40\u0c1f\u0c30\u0c4d\u0c32\u0c41. \u0c2a\u0c4d\u0c30\u0c2a\u0c02\u0c1a\u0c02 \u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30 \u0c1c\u0c32\u0c3e\u0c32\u0c32\u0c4b \u0c26\u0c3e\u0c26\u0c3e\u0c2a\u0c41 \u0c38\u0c17 \u0c2d\u0c3e\u0c17\u0c2e\u0c41 3,000 \u0c2e\u0c40\u0c1f\u0c30\u0c4d\u0c32\u0c41 (9,800 \u0c05\u0c21\u0c41\u0c17\u0c41\u0c32\u0c41) \u0c2b\u0c48\u0c17\u0c3e \u0c32\u0c4b\u0c24\u0c48\u0c28\u0c35\u0c3f. \u0c38\u0c30\u0c3e\u0c38\u0c30\u0c3f \u0c2e\u0c39\u0c3e\u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c3e\u0c32 '\u0c38\u0c46\u0c32\u0c48\u0c28\u0c3f\u0c1f\u0c40' (\u0c09\u0c2a\u0c4d\u0c2a\u0c26\u0c28\u0c02) \u0c26\u0c3e\u0c26\u0c3e\u0c2a\u0c41 \u0c2e\u0c3f\u0c32\u0c3f\u0c2f\u0c28\u0c41\u0c15\u0c41 35 \u0c35\u0c02\u0c24\u0c41\u0c32\u0c41 (3.5%). \u0c26\u0c3e\u0c26\u0c3e\u0c2a\u0c41 \u0c05\u0c28\u0c4d\u0c28\u0c3f \u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30 \u0c1c\u0c32\u0c3e\u0c32 \u0c38\u0c46\u0c32\u0c48\u0c28\u0c3f\u0c1f\u0c40 \u0c2e\u0c3f\u0c32\u0c3f\u0c2f\u0c28\u0c41\u0c15\u0c41 31 \u0c28\u0c41\u0c02\u0c21\u0c3f 38 \u0c35\u0c02\u0c24\u0c41\u0c32\u0c41 \u0c09\u0c02\u0c1f\u0c41\u0c02\u0c26\u0c3f. \u0c2e\u0c39\u0c3e\u0c38\u0c3e\u0c17\u0c30\u0c3e\u0c32\u0c28\u0c4d\u0c28\u0c40 \u0c15\u0c32\u0c3f\u0c38\u0c3f \u0c09\u0c28\u0c4d\u0c28\u0c3e \u0c17\u0c3e\u0c28\u0c3f \u0c35\u0c4d\u0c2f\u0c3e\u0c35\u0c39\u0c3e\u0c30\u0c3f\u0c15\u0c02\u0c17\u0c3e \u0c10\u0c26\u0c41 \u0c35\u0c47\u0c30\u0c41 \u0c35\u0c47\u0c30\u0c41 \u0c2e\u0c39\u0c3e\u0c38\u0c3e\u0c17\u0c30\u0c3e\u0c32\u0c41\u0c17\u0c3e \u0c17\u0c41\u0c30\u0c4d\u0c24\u0c3f\u0c38\u0c4d\u0c24\u0c3e\u0c30\u0c41. \u0c05\u0c35\u0c3f \u0c2a\u0c38\u0c3f\u0c2b\u0c3f\u0c15\u0c4d \u0c2e\u0c39\u0c3e\u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c02, \u0c05\u0c1f\u0c4d\u0c32\u0c3e\u0c02\u0c1f\u0c3f\u0c15\u0c4d \u0c2e\u0c39\u0c3e\u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c02, \u0c39\u0c3f\u0c02\u0c26\u0c42 \u0c2e\u0c39\u0c3e\u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c02, \u0c06\u0c30\u0c4d\u0c15\u0c3f\u0c1f\u0c3f\u0c15\u0c4d \u0c2e\u0c39\u0c3e\u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c02 \u0c2e\u0c30\u0c3f\u0c2f\u0c41 \u0c26\u0c15\u0c4d\u0c37\u0c3f\u0c23 \u0c2e\u0c39\u0c3e\u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c02.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 479, 12 | "text": "\u0c10\u0c26\u0c41" 13 | } 14 | ], 15 | "id": "telugu--2245295572008910947-0", 16 | "question": "\u0c2e\u0c39\u0c3e \u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c3e\u0c32\u0c41 \u0c0e\u0c28\u0c4d\u0c28\u0c3f \u0c09\u0c28\u0c4d\u0c28\u0c3e\u0c2f\u0c3f?" 17 | } 18 | ] 19 | } 20 | ], 21 | "title": "\u0c2e\u0c39\u0c3e\u0c38\u0c2e\u0c41\u0c26\u0c4d\u0c30\u0c02" 22 | } 23 | ], 24 | "version": "TyDiQA-GoldP-1.1-for-SQuAD-1.1" 25 | } 26 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-af.tsv: -------------------------------------------------------------------------------- 1 | ADJ 2 | NOUN 3 | AUX 4 | PRON 5 | PRON 6 | ADJ 7 | NOUN 8 | ADP 9 | SYM 10 | NOUN 11 | VERB 12 | PUNCT 13 | 14 | DET 15 | NOUN 16 | AUX 17 | PRON 18 | NOUN 19 | ADV 20 | SYM 21 | SYM 22 | NOUN 23 | VERB 24 | PUNCT 25 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-ar.tsv: -------------------------------------------------------------------------------- 1 | VERB 2 | PROPN 3 | PROPN 4 | PUNCT 5 | NOUN 6 | ADJ 7 | ADP 8 | PROPN 9 | ADP 10 | NOUN 11 | VERB 12 | PRON 13 | ADV 14 | PROPN 15 | PUNCT 16 | PUNCT 17 | ADP 18 | NOUN 19 | ADP 20 | NOUN 21 | NOUN 22 | ADJ 23 | PART 24 | ADJ 25 | ADP 26 | PROPN 27 | ADJ 28 | PUNCT 29 | PART 30 | PART 31 | NOUN 32 | ADJ 33 | ADP 34 | NOUN 35 | VERB 36 | ADP 37 | PRON 38 | ADP 39 | VERB 40 | PUNCT 41 | PUNCT 42 | 43 | ADP 44 | NOUN 45 | ADP 46 | PRON 47 | PRON 48 | VERB 49 | NOUN 50 | ADJ 51 | ADP 52 | NOUN 53 | NOUN 54 | ADJ 55 | ADP 56 | PROPN 57 | PROPN 58 | PUNCT 59 | PART 60 | AUX 61 | NOUN 62 | ADJ 63 | DET 64 | NOUN 65 | PUNCT 66 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-bg.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | AUX 3 | PUNCT 4 | SCONJ 5 | VERB 6 | AUX 7 | VERB 8 | PUNCT 9 | ADP 10 | ADJ 11 | NOUN 12 | ADP 13 | NOUN 14 | PRON 15 | PUNCT 16 | 17 | AUX 18 | INTJ 19 | VERB 20 | AUX 21 | AUX 22 | VERB 23 | PUNCT 24 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-de.tsv: -------------------------------------------------------------------------------- 1 | DET 2 | NOUN 3 | AUX 4 | ADP 5 | NOUN 6 | PUNCT 7 | CCONJ 8 | ADV 9 | PRON 10 | ADP 11 | ADJ 12 | PUNCT 13 | 14 | PRON 15 | AUX 16 | ADV 17 | NUM 18 | PRON 19 | PROPN 20 | VERB 21 | CCONJ 22 | SCONJ 23 | PRON 24 | DET 25 | NOUN 26 | ADV 27 | VERB 28 | AUX 29 | PRON 30 | ADV 31 | ADV 32 | ADP 33 | DET 34 | NOUN 35 | DET 36 | PROPN 37 | CCONJ 38 | DET 39 | PROPN 40 | VERB 41 | PUNCT 42 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-el.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | NOUN 3 | PUNCT 4 | DET 5 | NOUN 6 | DET 7 | ADJ 8 | NOUN 9 | NOUN 10 | AUX 11 | ADV 12 | ADV 13 | ADJ 14 | PUNCT 15 | ADV 16 | ADV 17 | SCONJ 18 | VERB 19 | ADJ 20 | ADJ 21 | NOUN 22 | PUNCT 23 | CCONJ 24 | CCONJ 25 | SCONJ 26 | DET 27 | NOUN 28 | PART 29 | VERB 30 | AUX 31 | VERB 32 | DET 33 | NOUN 34 | ADP 35 | DET 36 | PRON 37 | VERB 38 | PUNCT 39 | 40 | AUX 41 | PRON 42 | VERB 43 | ADV 44 | DET 45 | ADJ 46 | NOUN 47 | PRON 48 | DET 49 | NOUN 50 | PUNCT 51 | ADV 52 | VERB 53 | DET 54 | NOUN 55 | DET 56 | NOUN 57 | DET 58 | NOUN 59 | PUNCT 60 | ADV 61 | ADP 62 | DET 63 | PRON 64 | DET 65 | NOUN 66 | NOUN 67 | VERB 68 | AUX 69 | VERB 70 | DET 71 | NOUN 72 | DET 73 | NOUN 74 | SCONJ 75 | DET 76 | NOUN 77 | PRON 78 | PART 79 | VERB 80 | ADJ 81 | NOUN 82 | ADV 83 | ADP 84 | DET 85 | NOUN 86 | DET 87 | ADP 88 | NOUN 89 | NOUN 90 | PUNCT 91 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-en.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | AUX 3 | PRON 4 | PUNCT 5 | 6 | PRON 7 | AUX 8 | PRON 9 | PUNCT 10 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-es.tsv: -------------------------------------------------------------------------------- 1 | ADP 2 | ADV 3 | VERB 4 | NOUN 5 | ADP 6 | DET 7 | ADJ 8 | NOUN 9 | ADP 10 | DET 11 | NOUN 12 | ADJ 13 | ADP 14 | NOUN 15 | ADJ 16 | PUNCT 17 | DET 18 | NOUN 19 | ADP 20 | NOUN 21 | ADJ 22 | AUX 23 | ADJ 24 | PUNCT 25 | VERB 26 | DET 27 | NOUN 28 | ADP 29 | DET 30 | NOUN 31 | ADP 32 | NOUN 33 | PROPN 34 | PROPN 35 | PUNCT 36 | NOUN 37 | ADJ 38 | ADP 39 | NOUN 40 | PROPN 41 | PUNCT 42 | 43 | ADP 44 | DET 45 | PRON 46 | VERB 47 | DET 48 | NOUN 49 | ADP 50 | DET 51 | NOUN 52 | ADJ 53 | ADP 54 | PROPN 55 | NOUN 56 | PUNCT 57 | DET 58 | AUX 59 | NOUN 60 | ADJ 61 | PUNCT 62 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-et.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | 3 | NOUN 4 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-eu.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | PUNCT 3 | CCONJ 4 | PUNCT 5 | PART 6 | AUX 7 | ADJ 8 | NOUN 9 | VERB 10 | NOUN 11 | NOUN 12 | PUNCT 13 | NOUN 14 | ADJ 15 | NOUN 16 | ADJ 17 | AUX 18 | ADV 19 | PUNCT 20 | 21 | PROPN 22 | NOUN 23 | ADJ 24 | VERB 25 | AUX 26 | NOUN 27 | DET 28 | VERB 29 | ADJ 30 | VERB 31 | CCONJ 32 | NOUN 33 | ADJ 34 | NUM 35 | ADV 36 | VERB 37 | PROPN 38 | NOUN 39 | PUNCT 40 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-fa.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | VERB 3 | SCONJ 4 | ADP 5 | NOUN 6 | NUM 7 | NOUN 8 | PRON 9 | PART 10 | NOUN 11 | VERB 12 | PUNCT 13 | 14 | NOUN 15 | PRON 16 | VERB 17 | SCONJ 18 | NOUN 19 | ADP 20 | DET 21 | NOUN 22 | NOUN 23 | VERB 24 | PUNCT 25 | DET 26 | NOUN 27 | VERB 28 | SCONJ 29 | ADP 30 | NOUN 31 | SCONJ 32 | ADP 33 | NOUN 34 | PRON 35 | VERB 36 | PUNCT 37 | NOUN 38 | AUX 39 | PUNCT 40 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-fi.tsv: -------------------------------------------------------------------------------- 1 | ADV 2 | NOUN 3 | 4 | ADV 5 | AUX 6 | VERB 7 | NOUN 8 | PUNCT 9 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-fr.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | AUX 3 | PRON 4 | SCONJ 5 | DET 6 | NOUN 7 | ADP 8 | NOUN 9 | PUNCT 10 | 11 | VERB 12 | PRON 13 | VERB 14 | DET 15 | NOUN 16 | ADP 17 | NOUN 18 | PUNCT 19 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-he.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | ADV 3 | VERB 4 | ADP 5 | NOUN 6 | PUNCT 7 | PROPN 8 | PROPN 9 | AUX 10 | AUX 11 | VERB 12 | NOUN 13 | PUNCT 14 | VERB 15 | PROPN 16 | PROPN 17 | VERB 18 | ADP 19 | DET 20 | NOUN 21 | ADJ 22 | NOUN 23 | VERB 24 | NOUN 25 | VERB 26 | NOUN 27 | VERB 28 | PROPN 29 | VERB 30 | PUNCT 31 | PROPN 32 | PROPN 33 | VERB 34 | ADP 35 | NOUN 36 | NOUN 37 | PUNCT 38 | 39 | NOUN 40 | DET 41 | PUNCT 42 | NUM 43 | AUX 44 | NOUN 45 | NOUN 46 | NUM 47 | NUM 48 | PUNCT 49 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-hi.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | ADP 3 | PROPN 4 | PROPN 5 | PUNCT 6 | PROPN 7 | PROPN 8 | CCONJ 9 | PROPN 10 | PART 11 | ADJ 12 | NOUN 13 | AUX 14 | PUNCT 15 | 16 | DET 17 | NOUN 18 | ADP 19 | NOUN 20 | ADP 21 | PROPN 22 | NOUN 23 | AUX 24 | PUNCT 25 | PRON 26 | PART 27 | PROPN 28 | ADP 29 | NOUN 30 | VERB 31 | CCONJ 32 | ADJ 33 | VERB 34 | ADP 35 | NOUN 36 | PART 37 | NOUN 38 | VERB 39 | AUX 40 | AUX 41 | PUNCT 42 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-hu.tsv: -------------------------------------------------------------------------------- 1 | DET 2 | ADJ 3 | NOUN 4 | ADV 5 | DET 6 | ADJ 7 | NOUN 8 | ADJ 9 | ADJ 10 | NOUN 11 | VERB 12 | DET 13 | ADJ 14 | NOUN 15 | NOUN 16 | CCONJ 17 | DET 18 | NOUN 19 | PUNCT 20 | 21 | ADV 22 | ADV 23 | DET 24 | ADJ 25 | NOUN 26 | ADJ 27 | NOUN 28 | VERB 29 | PUNCT 30 | CCONJ 31 | DET 32 | ADJ 33 | NOUN 34 | CCONJ 35 | DET 36 | NOUN 37 | NOUN 38 | CCONJ 39 | ADJ 40 | NOUN 41 | VERB 42 | DET 43 | NOUN 44 | PUNCT 45 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-id.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | ADP 3 | NUM 4 | NOUN 5 | NUM 6 | VERB 7 | ADP 8 | PROPN 9 | PROPN 10 | ADP 11 | NOUN 12 | PROPN 13 | NUM 14 | CCONJ 15 | NUM 16 | NOUN 17 | ADJ 18 | VERB 19 | ADP 20 | ADJ 21 | PRON 22 | VERB 23 | ADP 24 | VERB 25 | NOUN 26 | ADP 27 | NOUN 28 | PROPN 29 | PUNCT 30 | PROPN 31 | ADJ 32 | NOUN 33 | VERB 34 | PROPN 35 | ADV 36 | VERB 37 | ADJ 38 | ADP 39 | NOUN 40 | NOUN 41 | NOUN 42 | PUNCT 43 | VERB 44 | ADP 45 | NOUN 46 | ADJ 47 | ADP 48 | PROPN 49 | PROPN 50 | CCONJ 51 | VERB 52 | ADP 53 | NOUN 54 | NUM 55 | PROPN 56 | ADP 57 | NOUN 58 | ADJ 59 | PUNCT 60 | 61 | PROPN 62 | AUX 63 | NOUN 64 | PRON 65 | VERB 66 | ADP 67 | PROPN 68 | PROPN 69 | PUNCT 70 | PROPN 71 | PUNCT 72 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-it.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | ADP 3 | NOUN 4 | PROPN 5 | PUNCT 6 | VERB 7 | ADV 8 | PUNCT 9 | DET 10 | NUM 11 | SYM 12 | ADP 13 | NUM 14 | NOUN 15 | ADJ 16 | PUNCT 17 | AUX 18 | VERB 19 | DET 20 | NUM 21 | ADP 22 | ADJ 23 | SYM 24 | SYM 25 | SYM 26 | 27 | SYM 28 | SYM 29 | SYM 30 | INTJ 31 | PROPN 32 | PUNCT 33 | X 34 | X 35 | NOUN 36 | ADV 37 | PRON 38 | PUNCT 39 | SYM 40 | ADJ 41 | NOUN 42 | PROPN 43 | PUNCT 44 | SYM 45 | X 46 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-ja.tsv: -------------------------------------------------------------------------------- 1 | PUNCT 2 | PROPN 3 | ADP 4 | NOUN 5 | ADP 6 | ADP 7 | NOUN 8 | ADP 9 | ADV 10 | VERB 11 | SCONJ 12 | AUX 13 | NOUN 14 | ADP 15 | PUNCT 16 | NOUN 17 | ADP 18 | ADJ 19 | AUX 20 | NOUN 21 | ADP 22 | VERB 23 | SCONJ 24 | AUX 25 | AUX 26 | PUNCT 27 | ADP 28 | VERB 29 | AUX 30 | NOUN 31 | ADP 32 | PUNCT 33 | PROPN 34 | NOUN 35 | ADJ 36 | NOUN 37 | PROPN 38 | SYM 39 | PROPN 40 | ADP 41 | NOUN 42 | ADP 43 | VERB 44 | AUX 45 | AUX 46 | PUNCT 47 | 48 | PROPN 49 | ADP 50 | NOUN 51 | ADP 52 | PROPN 53 | ADP 54 | NOUN 55 | ADP 56 | VERB 57 | AUX 58 | AUX 59 | NOUN 60 | ADP 61 | ADP 62 | PUNCT 63 | DET 64 | NOUN 65 | ADP 66 | ADV 67 | VERB 68 | PUNCT 69 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-kk.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | NUM 3 | NOUN 4 | NOUN 5 | PROPN 6 | NUM 7 | NOUN 8 | AUX 9 | PUNCT 10 | 11 | VERB 12 | PUNCT 13 | NOUN 14 | PRON 15 | PRON 16 | VERB 17 | VERB 18 | AUX 19 | PUNCT 20 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-ko.tsv: -------------------------------------------------------------------------------- 1 | DET 2 | ADV 3 | SCONJ 4 | PROPN 5 | NOUN 6 | ADV 7 | VERB 8 | CCONJ 9 | PRON 10 | NOUN 11 | NOUN 12 | ADV 13 | VERB 14 | NOUN 15 | ADJ 16 | AUX 17 | PUNCT 18 | 19 | ADV 20 | VERB 21 | NOUN 22 | SCONJ 23 | NOUN 24 | NOUN 25 | VERB 26 | PROPN 27 | NOUN 28 | NOUN 29 | NOUN 30 | VERB 31 | ADV 32 | VERB 33 | PUNCT 34 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-mr.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | VERB 3 | PRON 4 | NOUN 5 | VERB 6 | AUX 7 | PUNCT 8 | 9 | ADJ 10 | NOUN 11 | ADJ 12 | NOUN 13 | VERB 14 | PUNCT 15 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-nl.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | PUNCT 3 | NOUN 4 | CCONJ 5 | NOUN 6 | AUX 7 | ADP 8 | PROPN 9 | VERB 10 | ADP 11 | DET 12 | NOUN 13 | ADP 14 | DET 15 | NOUN 16 | ADP 17 | DET 18 | NOUN 19 | ADP 20 | DET 21 | NOUN 22 | ADP 23 | DET 24 | NUM 25 | ADJ 26 | NOUN 27 | CCONJ 28 | PROPN 29 | PUNCT 30 | PROPN 31 | PUNCT 32 | ADP 33 | PROPN 34 | PUNCT 35 | 36 | CCONJ 37 | ADP 38 | ADJ 39 | NOUN 40 | PUNCT 41 | CCONJ 42 | ADP 43 | VERB 44 | CCONJ 45 | VERB 46 | VERB 47 | DET 48 | ADJ 49 | PRON 50 | NOUN 51 | ADP 52 | DET 53 | ADJ 54 | VERB 55 | VERB 56 | PUNCT 57 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-pt.tsv: -------------------------------------------------------------------------------- 1 | CCONJ 2 | ADP 3 | ADV 4 | VERB 5 | DET 6 | NOUN 7 | ADJ 8 | VERB 9 | DET 10 | NOUN 11 | ADP 12 | NOUN 13 | ADP 14 | NOUN 15 | PUNCT 16 | PUNCT 17 | VERB 18 | PUNCT 19 | 20 | DET 21 | NOUN 22 | VERB 23 | CCONJ 24 | DET 25 | NOUN 26 | AUX 27 | VERB 28 | DET 29 | NOUN 30 | ADP 31 | NOUN 32 | ADP 33 | NOUN 34 | CCONJ 35 | ADP 36 | NOUN 37 | ADP 38 | NOUN 39 | ADP 40 | NOUN 41 | PUNCT 42 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-ru.tsv: -------------------------------------------------------------------------------- 1 | ADP 2 | ADJ 3 | NOUN 4 | NOUN 5 | NOUN 6 | PROPN 7 | PUNCT 8 | NOUN 9 | ADP 10 | PROPN 11 | VERB 12 | ADP 13 | NUM 14 | NOUN 15 | PUNCT 16 | 17 | VERB 18 | ADP 19 | NOUN 20 | ADP 21 | ADJ 22 | NOUN 23 | NOUN 24 | ADJ 25 | CCONJ 26 | ADJ 27 | NOUN 28 | VERB 29 | NOUN 30 | CCONJ 31 | NOUN 32 | NOUN 33 | PUNCT 34 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-ta.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | ADJ 3 | NOUN 4 | NOUN 5 | VERB 6 | ADJ 7 | NOUN 8 | VERB 9 | AUX 10 | PUNCT 11 | 12 | ADJ 13 | NOUN 14 | ADV 15 | VERB 16 | AUX 17 | PROPN 18 | NOUN 19 | VERB 20 | VERB 21 | AUX 22 | VERB 23 | PROPN 24 | PUNCT 25 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-te.tsv: -------------------------------------------------------------------------------- 1 | VERB 2 | PUNCT 3 | 4 | NOUN 5 | VERB 6 | PUNCT 7 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-th.tsv: -------------------------------------------------------------------------------- 1 | PUNCT 2 | ADP 3 | ADP 4 | VERB 5 | PART 6 | VERB 7 | NOUN 8 | ADJ 9 | AUX 10 | NOUN 11 | DET 12 | PART 13 | PART 14 | VERB 15 | PART 16 | ADV 17 | ADP 18 | NOUN 19 | PUNCT 20 | VERB 21 | NOUN 22 | NOUN 23 | DET 24 | ADV 25 | PART 26 | AUX 27 | NOUN 28 | ADJ 29 | PUNCT 30 | PROPN 31 | PROPN 32 | NOUN 33 | VERB 34 | ADJ 35 | ADP 36 | PROPN 37 | VERB 38 | VERB 39 | NOUN 40 | ADP 41 | NOUN 42 | NOUN 43 | 44 | ADP 45 | NOUN 46 | DET 47 | VERB 48 | VERB 49 | ADJ 50 | NOUN 51 | ADP 52 | PROPN 53 | NOUN 54 | DET 55 | VERB 56 | VERB 57 | PART 58 | ADV 59 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-tl.tsv: -------------------------------------------------------------------------------- 1 | VERB 2 | DET 3 | NOUN 4 | PUNCT 5 | 6 | VERB 7 | ADP 8 | NOUN 9 | DET 10 | NOUN 11 | PUNCT 12 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-tr.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | DET 3 | NOUN 4 | NOUN 5 | NOUN 6 | VERB 7 | PUNCT 8 | 9 | NOUN 10 | NOUN 11 | NOUN 12 | VERB 13 | AUX 14 | PUNCT 15 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-ur.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | PROPN 3 | PART 4 | PROPN 5 | PROPN 6 | PROPN 7 | PROPN 8 | ADP 9 | PROPN 10 | ADP 11 | ADJ 12 | NOUN 13 | ADP 14 | ADJ 15 | NOUN 16 | ADP 17 | NOUN 18 | ADP 19 | NOUN 20 | VERB 21 | AUX 22 | NOUN 23 | NOUN 24 | NOUN 25 | ADP 26 | NOUN 27 | ADJ 28 | VERB 29 | PUNCT 30 | 31 | PRON 32 | ADP 33 | PROPN 34 | ADP 35 | ADJ 36 | CCONJ 37 | PROPN 38 | PROPN 39 | ADP 40 | ADJ 41 | NOUN 42 | ADP 43 | ADJ 44 | CCONJ 45 | ADJ 46 | VERB 47 | ADP 48 | ADP 49 | ADJ 50 | NOUN 51 | NOUN 52 | NOUN 53 | VERB 54 | ADP 55 | NOUN 56 | VERB 57 | AUX 58 | VERB 59 | SCONJ 60 | DET 61 | NOUN 62 | ADP 63 | ADJ 64 | NOUN 65 | ADP 66 | ADP 67 | ADP 68 | NOUN 69 | VERB 70 | AUX 71 | ADP 72 | NOUN 73 | AUX 74 | PUNCT 75 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-vi.tsv: -------------------------------------------------------------------------------- 1 | PUNCT 2 | NOUN 3 | X 4 | VERB 5 | PUNCT 6 | NOUN 7 | PUNCT 8 | NOUN 9 | PUNCT 10 | NOUN 11 | PUNCT 12 | ADJ 13 | ADP 14 | VERB 15 | PROPN 16 | CCONJ 17 | VERB 18 | X 19 | PUNCT 20 | NUM 21 | NOUN 22 | VERB 23 | PUNCT 24 | PUNCT 25 | 26 | NOUN 27 | NOUN 28 | NUM 29 | NOUN 30 | X 31 | VERB 32 | X 33 | PUNCT 34 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-yo.tsv: -------------------------------------------------------------------------------- 1 | ADP 2 | NOUN 3 | NOUN 4 | DET 5 | NOUN 6 | VERB 7 | DET 8 | NOUN 9 | CCONJ 10 | NOUN 11 | PUNCT 12 | 13 | NOUN 14 | CCONJ 15 | VERB 16 | ADP 17 | NOUN 18 | PUNCT 19 | PRON 20 | CCONJ 21 | ADJ 22 | PUNCT 23 | NOUN 24 | CCONJ 25 | VERB 26 | ADP 27 | ADJ 28 | NOUN 29 | PUNCT 30 | NOUN 31 | NOUN 32 | CCONJ 33 | AUX 34 | NOUN 35 | ADP 36 | NOUN 37 | DET 38 | PUNCT 39 | -------------------------------------------------------------------------------- /mock_test_data/labels/udpos/test-zh.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | ADV 3 | VERB 4 | NOUN 5 | PRON 6 | PUNCT 7 | 8 | VERB 9 | ADJ 10 | NOUN 11 | PART 12 | NOUN 13 | ADV 14 | VERB 15 | ADP 16 | PRON 17 | PART 18 | ADJ 19 | NOUN 20 | PUNCT 21 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-ar.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-bg.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-de.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-el.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-en.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-es.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-fr.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-hi.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-ru.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-sw.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-th.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-tr.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-ur.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-vi.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xnli/test-zh.tsv: -------------------------------------------------------------------------------- 1 | contradiction 2 | entailment 3 | neutral 4 | neutral 5 | entailment 6 | contradiction 7 | contradiction 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/labels/xquad/test-de.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "Die Verteidigung der Panthers gab nur 308 Punkte ab und belegte den sechsten Platz in der Liga, w\u00e4hrend sie die NFL mit 24 Interceptions in dieser Kategorie anf\u00fchrte und sich mit vier Pro Bowl-Selektionen r\u00fchmen konnte. Pro Bowl Defensive Tackle Kawann Short f\u00fchrte das Team mit 11 Sacks an, erzwang zudem drei Fumbles und erzielte zwei Fumble Recoverys. Mario Addison, ebenfalls Lineman, addierte 6\u00bd Sacks hinzu. Die Panthers-Line pr\u00e4sentierte auch den erfahrenen Defensive End Jared Allen, einen 5-fachen Pro-Bowler, der mit 136 Sacks der aktive Anf\u00fchrer in der NFL-Kategorie Karriere-Sacks war, sowie den Defensive End Kony Ealy, der 5 Sacks in nur 9 Starts erzielte. Nach ihnen wurden zwei der drei Linebacker der Panthers ausgew\u00e4hlt, um im Pro Bowl zu spielen: Thomas Davis und Luke Kuechly. Davis erzielte 5\u00bd Sacks, vier erzwungene Fumbles und vier Interceptions, w\u00e4hrend Kuechly das Team bei den Tackles anf\u00fchrte (118), zwei Fumbles erzwang und vier P\u00e4sse abfing. Carolinas Secondarys bestanden aus dem Pro Bowl-Safety Kurt Coleman, der das Team mit einem Karrierehoch von sieben Interceptions anf\u00fchrte und gleichzeitig 88 Tackles erzielen konnte, und Pro Bowl-Cornerback Josh Norman, der sich w\u00e4hrend der Saison zur Shutdown Corner entwickelte und vier Interceptions erzielte, von denen zwei zu Touchdowns f\u00fcr sein Team wurden.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 38, 12 | "text": "308" 13 | } 14 | ], 15 | "id": "56beb4343aeaaa14008c925b", 16 | "question": "Wie viele Punkte gab die Verteidigung der Panthers ab?" 17 | }, 18 | { 19 | "answers": [ 20 | { 21 | "answer_start": 527, 22 | "text": "136" 23 | } 24 | ], 25 | "id": "56beb4343aeaaa14008c925c", 26 | "question": "Wie viele Sacks erzielte Jared Allen in seiner Karriere?" 27 | } 28 | ] 29 | } 30 | ] 31 | } 32 | ], 33 | "version": "1.1" 34 | } 35 | -------------------------------------------------------------------------------- /mock_test_data/labels/xquad/test-en.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "The Panthers defense gave up just 308 points, ranking sixth in the league, while also leading the NFL in interceptions with 24 and boasting four Pro Bowl selections. Pro Bowl defensive tackle Kawann Short led the team in sacks with 11, while also forcing three fumbles and recovering two. Fellow lineman Mario Addison added 6\u00bd sacks. The Panthers line also featured veteran defensive end Jared Allen, a 5-time pro bowler who was the NFL's active career sack leader with 136, along with defensive end Kony Ealy, who had 5 sacks in just 9 starts. Behind them, two of the Panthers three starting linebackers were also selected to play in the Pro Bowl: Thomas Davis and Luke Kuechly. Davis compiled 5\u00bd sacks, four forced fumbles, and four interceptions, while Kuechly led the team in tackles (118) forced two fumbles, and intercepted four passes of his own. Carolina's secondary featured Pro Bowl safety Kurt Coleman, who led the team with a career high seven interceptions, while also racking up 88 tackles and Pro Bowl cornerback Josh Norman, who developed into a shutdown corner during the season and had four interceptions, two of which were returned for touchdowns.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 34, 12 | "text": "308" 13 | } 14 | ], 15 | "id": "56beb4343aeaaa14008c925b", 16 | "question": "How many points did the Panthers defense surrender?" 17 | }, 18 | { 19 | "answers": [ 20 | { 21 | "answer_start": 470, 22 | "text": "136" 23 | } 24 | ], 25 | "id": "56beb4343aeaaa14008c925c", 26 | "question": "How many career sacks did Jared Allen have?" 27 | } 28 | ] 29 | } 30 | ] 31 | } 32 | ], 33 | "version": "1.1" 34 | } 35 | -------------------------------------------------------------------------------- /mock_test_data/labels/xquad/test-es.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\ufeffLos Panthers, que adem\u00e1s de liderar las intercepciones de la NFL con 24 y contar con cuatro jugadores de la Pro Bowl, cedieron solo 308 puntos en defensa y se sit\u00faan en el sexto lugar de la liga. Kawann Short, tacle defensivo de la Pro Bowl, lider\u00f3 al equipo con 11 capturas, 3 balones sueltos forzados y 2 recuperaciones. A su vez, el liniero Mario Addison, consigui\u00f3 6 capturas y media. En la l\u00ednea de los Panthers, tambi\u00e9n destac\u00f3 como ala defensiva el veterano Jared Allen \u20155 veces jugador de la Pro Bowl y que fue el l\u00edder, en activo, de capturas de la NFL con 136\u2015 junto con el tambi\u00e9n ala defensiva Kony Ealy, que lleva 5 capturas en solo 9 partidos como titular. Detr\u00e1s de ellos, Thomas Davis y Luke Kuechly, dos de los tres apoyadores titulares que tambi\u00e9n han sido seleccionados para jugar la Pro Bowl. Davis se hizo con 5 capturas y media, 4 balones sueltos forzados y 4 intercepciones, mientras que Kuechly lider\u00f3 al equipo en derribos (118), forz\u00f3 2 balones sueltos e intercept\u00f3 4 pases. La secundaria de Carolina cont\u00f3, por un lado, con la seguridad del jugador de la Pro Bowl Kurt Coleman que asumi\u00f3 las riendas del equipo gracias a sus 7 intercepciones (nunca hab\u00eda conseguido tantas hasta ahora) y a sus 88 derribos, y, por otro lado, con el esquinero Josh Norman, tambi\u00e9n jugador de la Pro Bowl y que a pesar de haber estado de capa ca\u00edda durante la temporada, consigui\u00f3 4 intercepciones, de las cuales dos se convirtieron en touchdowns.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 133, 12 | "text": "308" 13 | } 14 | ], 15 | "id": "56beb4343aeaaa14008c925b", 16 | "question": "\u00bfCu\u00e1ntos puntos dejaron escapar en defensa los Panthers?" 17 | }, 18 | { 19 | "answers": [ 20 | { 21 | "answer_start": 567, 22 | "text": "136" 23 | } 24 | ], 25 | "id": "56beb4343aeaaa14008c925c", 26 | "question": "\u00bfCu\u00e1ntas capturas ha conseguido Jared Allen en su carrera?" 27 | } 28 | ] 29 | } 30 | ] 31 | } 32 | ], 33 | "version": "1.1" 34 | } 35 | -------------------------------------------------------------------------------- /mock_test_data/labels/xquad/test-tr.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\ufeffPanthers savunmas\u0131 ligdeki derecesi alt\u0131nc\u0131 s\u0131rada olarak sadece 308 say\u0131 b\u0131rakt\u0131, ayn\u0131 zamanda NFL'de 24 topu kapma ile ligin ba\u015f\u0131n\u0131 \u00e7ekmesi ve d\u00f6rt Pro Bowl se\u00e7meleri ile \u00f6v\u00fcn\u00e7 duydu. Pro Bowl orta \u00e7izgi savunmac\u0131s\u0131 Kawann Short, tak\u0131m\u0131 11'le birlikte sack ederken, ayn\u0131 zamanda \u00fc\u00e7 oyuncuyu d\u00fc\u015f\u00fcrme ve iki yeniden savunmaya zorlad\u0131. Yan hakem Mario Addison, 6\u00bd sack ekledi. Panthers hatt\u0131nda ayr\u0131ca, sadece 9 ba\u015flang\u0131\u00e7ta 5 sack eden u\u00e7 \u00e7izgi savunmac\u0131s\u0131 Kony Ealy ile birlikte 136 kez ile NFL'nin aktif kariyer sack lideri ve 5 kez profesyonel bir top at\u0131c\u0131s\u0131 olan Jared Allen \u00f6ne \u00e7\u0131kmaktad\u0131r. Arkalar\u0131nda, Panthers'lerin \u00fc\u00e7 adet ikinci hat savunma oyuncusundan ikisi de Pro Bowl'da oynamak i\u00e7in se\u00e7ildi: Thomas Davis ve Luke Kuechly. Davis, d\u00f6rd\u00fcnde mecburi oyuncuyu d\u00fc\u015f\u00fcrme ve d\u00f6rt topu kapma olmak \u00fczere 5\u00bd sack etmeyi bir araya getirirken, Kuechly top \u00e7almalarda (118) iki mecburi oyuncuyu d\u00fc\u015f\u00fcrme ve kendi ba\u015f\u0131na d\u00f6rt pas\u0131n\u0131 kesme ile tak\u0131ma \u00f6nc\u00fcl\u00fck etti. Carolina\u2019n\u0131n ikincisi olarak g\u00f6sterilen Pro Bowl g\u00fcvenli\u011fi i\u00e7in kariyerinin en y\u00fckse\u011fi olarak 7 top kapma ile tak\u0131m\u0131 s\u00fcr\u00fckleyen Kurt Coleman\u2019a ve 88 top \u00e7alma say\u0131s\u0131yla ve Pro Bowl k\u00f6\u015fe savunmac\u0131s\u0131 sezon boyunca bir kapal\u0131 k\u00f6\u015fe i\u00e7inde geli\u015fen ikisi kale \u00e7izgisini ge\u00e7i\u015ften d\u00f6nen d\u00f6rt top kapma geli\u015ftiren Josh Norman'a rol verdi.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 66, 12 | "text": "308" 13 | } 14 | ], 15 | "id": "56beb4343aeaaa14008c925b", 16 | "question": "Panthers savunmas\u0131 ka\u00e7 say\u0131 b\u0131rakm\u0131\u015ft\u0131r?" 17 | }, 18 | { 19 | "answers": [ 20 | { 21 | "answer_start": 480, 22 | "text": "136" 23 | } 24 | ], 25 | "id": "56beb4343aeaaa14008c925c", 26 | "question": "Jared Allen'\u0131n ka\u00e7 tane kariyer sack edi\u015fi vard\u0131r?" 27 | } 28 | ] 29 | } 30 | ] 31 | } 32 | ], 33 | "version": "1.1" 34 | } 35 | -------------------------------------------------------------------------------- /mock_test_data/labels/xquad/test-vi.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\u0110\u1ed9i th\u1ee7 c\u1ee7a Panthers ch\u1ec9 thua 308 \u0111i\u1ec3m, \u0111\u1ee9ng th\u1ee9 s\u00e1u trong gi\u1ea3i \u0111\u1ea5u, \u0111\u1ed3ng th\u1eddi d\u1eabn \u0111\u1ea7u NFL v\u1ec1 s\u1ed1 l\u1ea7n \u0111o\u1ea1t b\u00f3ng (intercept) v\u1edbi 24 l\u1ea7n v\u00e0 t\u1ef1 h\u00e0o v\u1edbi b\u1ed1n l\u1ef1a ch\u1ecdn Pro Bowl. Ng\u01b0\u1eddi h\u00fac (tackle) trong \u0111\u1ed9i th\u1ee7 tham gia Pro Bowl, Kawann Short d\u1eabn \u0111\u1ea7u \u0111\u1ed9i v\u1ec1 s\u1ed1 l\u1ea7n v\u1eadt ng\u00e3 (sack) v\u1edbi 11 l\u1ea7n, \u0111\u1ed3ng th\u1eddi h\u00fac v\u0103ng b\u00f3ng (fumble) 3 l\u1ea7n v\u00e0 l\u1ea5y l\u1ea1i \u0111\u01b0\u1ee3c b\u00f3ng (recover) 2 l\u1ea7n. \u0110\u1ed3ng nghi\u1ec7p lineman Mario Addison \u0111\u00e3 th\u00eam 6\u00bd l\u1ea7n v\u1eadt ng\u00e3. \u0110\u1ed9i h\u00ecnh Panthers c\u0169ng c\u00f3 ng\u01b0\u1eddi ti\u1ec1n v\u1ec7 (defensive end) k\u1ef3 c\u1ef1u Jared Allen, ng\u01b0\u1eddi tham gia Pro Bowl 5 l\u1ea7n, d\u1eabn \u0111\u1ea7u v\u1ec1 s\u1ed1 l\u1ea7n v\u1eadt ng\u00e3 trong s\u1ef1 nghi\u1ec7p NFL v\u1edbi 136 l\u1ea7n, c\u00f9ng v\u1edbi ng\u01b0\u1eddi ti\u1ec1n v\u1ec7 Kony Ealy, ng\u01b0\u1eddi \u0111\u00e3 c\u00f3 5 l\u1ea7n v\u1eadt ng\u00e3 sau 9 l\u1ea7n xu\u1ea5t ph\u00e1t. Ph\u00eda sau h\u1ecd, hai trong s\u1ed1 ba ng\u01b0\u1eddi h\u00e0ng v\u1ec7 (linebacker) xu\u1ea5t ph\u00e1t c\u1ee7a Panthers c\u0169ng \u0111\u01b0\u1ee3c ch\u1ecdn \u0111\u1ec3 ch\u01a1i trong Pro Bowl: Thomas Davis v\u00e0 Luke Kuechly. Davis \u0111\u00e3 c\u00f3 5\u00bd l\u1ea7n v\u1eadt ng\u00e3, 4 l\u1ea7n h\u00fac v\u0103ng b\u00f3ng v\u00e0 4 l\u1ea7n \u0111o\u1ea1t b\u00f3ng, trong khi Kuechly d\u1eabn \u0111\u1ea7u \u0111\u1ed9i v\u1ec1 s\u1ed1 l\u1ea7n h\u00fac (118 l\u1ea7n), 2 l\u1ea7n h\u00fac v\u0103ng b\u00f3ng v\u00e0 \u0111o\u1ea1t b\u00f3ng t\u1eeb c\u00e1c \u0111\u01b0\u1eddng chuy\u1ec1n 4 l\u1ea7n. \u0110\u1ed9i h\u00ecnh ph\u00eda sau c\u1ee7a Carolina c\u00f3 h\u1eadu-h\u1eadu v\u1ec7 (safety) tham gia Pro Bowl Kurt Coleman, ng\u01b0\u1eddi d\u1eabn \u0111\u1ea7u \u0111\u1ed9i b\u00f3ng v\u1edbi b\u1ea3y l\u1ea7n \u0111o\u1ea1t b\u00f3ng trong s\u1ef1 nghi\u1ec7p, \u0111\u1ed3ng th\u1eddi c\u00f3 88 c\u00fa h\u00fac b\u00f3ng v\u00e0 trung v\u1ec7 (cornerback) tham gia Pro Bowl Josh Norman, ng\u01b0\u1eddi \u0111\u00e3 ph\u00e1t tri\u1ec3n th\u00e0nh m\u1ed9t shutdown corner trong m\u00f9a gi\u1ea3i v\u00e0 c\u00f3 b\u1ed1n l\u1ea7n \u0111o\u1ea1t b\u00f3ng, hai trong s\u1ed1 \u0111\u00f3 \u0111\u00e3 tr\u1edf th\u00e0nh touchdown.", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 30, 12 | "text": "308" 13 | } 14 | ], 15 | "id": "56beb4343aeaaa14008c925b", 16 | "question": "\u0110\u1ed9i th\u1ee7 Panthers \u0111\u00e3 thua bao nhi\u00eau \u0111i\u1ec3m?" 17 | }, 18 | { 19 | "answers": [ 20 | { 21 | "answer_start": 577, 22 | "text": "136" 23 | } 24 | ], 25 | "id": "56beb4343aeaaa14008c925c", 26 | "question": "Jared Allen c\u00f3 bao nhi\u00eau l\u1ea7n v\u1eadt ng\u00e3 trong s\u1ef1 nghi\u1ec7p?" 27 | } 28 | ] 29 | } 30 | ] 31 | } 32 | ], 33 | "version": "1.1" 34 | } 35 | -------------------------------------------------------------------------------- /mock_test_data/labels/xquad/test-zh.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": [ 3 | { 4 | "paragraphs": [ 5 | { 6 | "context": "\u9ed1\u8c79\u961f\u7684\u9632\u5b88\u53ea\u4e22\u4e86 308\u5206\uff0c\u5728\u8054\u8d5b\u4e2d\u6392\u540d\u7b2c\u516d\uff0c\u540c\u65f6\u4e5f\u4ee5 24 \u6b21\u62e6\u622a\u9886\u5148\u56fd\u5bb6\u6a44\u6984\u7403\u8054\u76df (NFL)\uff0c\u5e76\u4e14\u56db\u6b21\u5165\u9009\u804c\u4e1a\u7897\u3002\u804c\u4e1a\u7897\u9632\u5b88\u622a\u950b\u5361\u4e07\u00b7\u8096\u7279\u4ee5 11 \u5206\u9886\u5148\u4e8e\u5168\u961f\uff0c\u540c\u65f6\u8fd8\u6709\u4e09\u6b21\u8feb\u4f7f\u6389\u7403\u548c\u4e24\u6b21\u91cd\u65b0\u63a5\u7403\u3002\u4ed6\u7684\u961f\u53cb\u9a6c\u91cc\u5965\u00b7\u7231\u8fea\u751f\u8d21\u732e\u4e86 6\u00bd \u6b21\u64d2\u6740\u3002\u9ed1\u8c79\u961f\u7684\u9632\u7ebf\u4e0a\u6709\u7ecf\u9a8c\u4e30\u5bcc\u7684\u9632\u5b88\u7aef\u950b\u8d3e\u91cc\u5fb7\u00b7\u827e\u4f26\uff0c\u4ed6\u662f\u4e94\u6b21\u804c\u4e1a\u7897\u9009\u624b\uff0c\u66fe\u4ee5 136 \u6b21\u64d2\u6740\u6210\u4e3a NFL \u804c\u4e1a\u751f\u6daf\u4e2d\u7684\u6d3b\u8dc3\u9886\u8896\u3002\u53e6\u5916\u8fd8\u6709\u5728 9 \u573a\u9996\u53d1\u4e2d\u5c31\u62ff\u4e0b 5 \u6b21\u64d2\u6740\u7684\u9632\u5b88\u7aef\u950b\u79d1\u5c3c\u00b7\u4f0a\u5229\u3002\u5728\u4ed6\u4eec\u8eab\u540e\uff0c\u9ed1\u8c79\u961f\u7684\u4e09\u540d\u9996\u53d1\u7ebf\u536b\u4e2d\u6709\u4e24\u4eba\u5165\u9009\u4e86\u804c\u4e1a\u7897\uff1a\u6258\u9a6c\u65af\u00b7\u6234\u7ef4\u65af\u548c\u5362\u514b\u00b7\u574e\u514b\u5229\u3002\u6234\u7ef4\u65af\u5b8c\u6210\u4e86 5\u00bd \u6b21\u64d2\u6740\u3001\u56db\u6b21\u8feb\u4f7f\u6389\u7403\u548c\u56db\u6b21\u62e6\u622a\uff0c\u800c\u574e\u514b\u5229\u5e26\u9886\u7403\u961f\u5728\u64d2\u62b1 (118) \u4e2d\u8feb\u4f7f\u4e24\u6b21\u6389\u7403\u5e76\u62e6\u622a\u4e86\u4ed6\u81ea\u5df1\u7684\u56db\u6b21\u4f20\u7403\u3002\u5361\u7f57\u83b1\u7eb3\u7684\u7b2c\u4e8c\u9632\u7ebf\u6709\u804c\u4e1a\u7897\u5b89\u5168\u536b\u79d1\u7279\u00b7\u79d1\u5c14\u66fc\u548c\u804c\u4e1a\u7897\u89d2\u536b\u7ea6\u4ec0\u00b7\u8bfa\u66fc\uff0c\u79d1\u5c14\u66fc\u5e26\u9886\u7403\u961f\u5b8c\u6210\u4e86\u804c\u4e1a\u751f\u6daf\u4e2d\u9ad8\u8fbe\u4e03\u6b21\u62e6\u622a\u5e76\u540c\u65f6\u8d21\u732e\u4e86 88 \u6b21\u64d2\u62b1\uff0c\u800c\u8bfa\u66fc\u5728\u672c\u8d5b\u5b63\u6210\u957f\u4e3a\u4e00\u540d\u5c01\u9501\u89d2\u536b\u5e76\u5b8c\u6210\u4e86\u56db\u6b21\u62e6\u622a\uff0c\u5176\u4e2d\u4e24\u6b21\u88ab\u5224\u89e6\u5730\u5f97\u5206\u3002", 7 | "qas": [ 8 | { 9 | "answers": [ 10 | { 11 | "answer_start": 10, 12 | "text": "308" 13 | } 14 | ], 15 | "id": "56beb4343aeaaa14008c925b", 16 | "question": "\u9ed1\u8c79\u961f\u7684\u9632\u5b88\u4e22\u4e86\u591a\u5c11\u5206\uff1f" 17 | }, 18 | { 19 | "answers": [ 20 | { 21 | "answer_start": 162, 22 | "text": "136 \u6b21" 23 | } 24 | ], 25 | "id": "56beb4343aeaaa14008c925c", 26 | "question": "\u8d3e\u91cc\u5fb7\u5728\u804c\u4e1a\u751f\u6daf\u4e2d\u6709\u591a\u5c11\u6b21\u64d2\u6740\uff1f" 27 | } 28 | ] 29 | } 30 | ] 31 | } 32 | ], 33 | "version": "1.1" 34 | } 35 | -------------------------------------------------------------------------------- /mock_test_data/predictions/bucc2018/test-de.tsv: -------------------------------------------------------------------------------- 1 | de-000413461 en-000367951 2 | de-000413482 en-000091985 3 | de-000413487 en-000339402 4 | de-000413503 en-000296187 5 | de-000413582 en-000336047 6 | de-000413632 en-000240614 7 | de-000413732 en-000184053 8 | de-000413744 en-000092032 9 | de-000413761 en-000045799 10 | de-000413869 en-000138555 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/bucc2018/test-fr.tsv: -------------------------------------------------------------------------------- 1 | fr-000000045 en-000292398 2 | fr-000000092 en-000181413 3 | fr-000000098 en-000352111 4 | fr-000000181 en-000063864 5 | fr-000000259 en-000042536 6 | fr-000000359 en-000050051 7 | fr-000000470 en-000059683 8 | fr-000000545 en-000081622 9 | fr-000000606 en-000330731 10 | fr-000000614 en-000036493 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/bucc2018/test-ru.tsv: -------------------------------------------------------------------------------- 1 | ru-000000041 en-000248027 2 | ru-000000112 en-000214117 3 | ru-000000228 en-000075910 4 | ru-000000379 en-000504782 5 | ru-000000421 en-000383323 6 | ru-000000429 en-000474098 7 | ru-000000617 en-000089671 8 | ru-000000869 en-000247914 9 | ru-000000947 en-000039212 10 | ru-000001037 en-000163824 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/bucc2018/test-zh.tsv: -------------------------------------------------------------------------------- 1 | zh-000000033 en-000005983 2 | zh-000000231 en-000047360 3 | zh-000000272 en-000027140 4 | zh-000000438 en-000065621 5 | zh-000000639 en-000005169 6 | zh-000000643 en-000063761 7 | zh-000000735 en-000062892 8 | zh-000000915 en-000030760 9 | zh-000001083 en-000013476 10 | zh-000001321 en-000039422 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-ar.json: -------------------------------------------------------------------------------- 1 | {"8e18e51eced73e6495df0043192edbfe": ["Q4963862", "Q42309905", "Q45789", "Q13403337", "Q5564588", "Q4009605", "Q1635932", "Q4980057", "Q5958027", "Q233750", "Q2922959", "Q203023", "Q2425422", "Q2340576", "Q4639323", "Q46930", "Q66891", "Q5423986", "Q15556629", "Q1347825"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-de.json: -------------------------------------------------------------------------------- 1 | {"4be5a1742223cc3a8c01e6bf9c6e3f27": ["Q11490423", "Q156913", "Q490356", "Q16222746", "Q4873731", "Q2102531", "Q209944", "Q4630241", "Q9033638", "Q18249334", "Q65216438", "Q333185", "Q2530561", "Q20013418", "Q10826362", "Q2575270", "Q2914850", "Q55697199", "Q853167", "Q111730"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-en.json: -------------------------------------------------------------------------------- 1 | {"64ca9e2f229acf8e39c2a3d2e45f81e7": ["Q82674", "Q13551861", "Q2418898", "Q198748", "Q1146387", "Q6730240", "Q6769706", "Q2315496", "Q3375182", "Q711611", "Q55732114", "Q720285", "Q4760035", "Q28670149", "Q375278", "Q260559", "Q82840", "Q878942", "Q269810", "Q427535"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-es.json: -------------------------------------------------------------------------------- 1 | {"4a2d7fd3e4791f09bc3c804a15d647ef": ["Q6151759", "Q19904197", "Q1138905", "Q440165", "Q787524", "Q13050046", "Q15748660", "Q6604140", "Q11400285", "Q20071151", "Q2912875", "Q786", "Q1999706", "Q11398056", "Q4486275", "Q3744158", "Q63524702", "Q38745473", "Q37996883", "Q29260670"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-fa.json: -------------------------------------------------------------------------------- 1 | {"d35cc57a7869168ddeb8143c1b2260f3": ["Q333972", "Q48270", "Q5254564", "Q76", "Q5947394", "Q3151708", "Q1756916", "Q63091766", "Q13104276", "Q5839704", "Q6598064", "Q1008989", "Q48762758", "Q55842144", "Q461358", "Q447087", "Q13640998", "Q535894", "Q223278", "Q3504372"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-ja.json: -------------------------------------------------------------------------------- 1 | {"d0e7a9dd0359610c53bba176d702dfce": ["Q1210312", "Q3662301", "Q2877167", "Q13548902", "Q3458109", "Q65159649", "Q49892", "Q204547", "Q12699816", "Q372592", "Q1776619", "Q16633277", "Q1658454", "Q174691", "Q1053638", "Q23653996", "Q798074", "Q24939391", "Q8037644", "Q65967892"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-pl.json: -------------------------------------------------------------------------------- 1 | {"64232b8a3c3ee67f76f96ccd963b78f7": ["Q1033066", "Q565472", "Q11598441", "Q29522", "Q16027287", "Q1174348", "Q1052293", "Q16903684", "Q12860947", "Q48769622", "Q2606279", "Q7315521", "Q268776", "Q13621486", "Q1400430", "Q7124665", "Q11280748", "Q710911", "Q1362561", "Q34754"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-ro.json: -------------------------------------------------------------------------------- 1 | {"ebd92132adbb679fdd090503cd925f81": ["Q1144739", "Q5836568", "Q20582855", "Q1311", "Q711832", "Q185007", "Q311559", "Q50391138", "Q55418237", "Q5037965", "Q601712", "Q6654524", "Q615949", "Q980941", "Q5188638", "Q15060144", "Q6737309", "Q21670139", "Q1040955", "Q928053"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-ta.json: -------------------------------------------------------------------------------- 1 | {"12760cb39680a822c3cd0c8495cf1b4b": ["Q22959171", "Q13385006", "Q608803", "Q3046191", "Q1750336", "Q15353797", "Q1695555", "Q124473", "Q836937", "Q3297349", "Q430687", "Q2181287", "Q11468", "Q20393369", "Q888226", "Q56477015", "Q22692651", "Q13829184", "Q2479497", "Q3207103"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-tr.json: -------------------------------------------------------------------------------- 1 | {"9f39acb0fef259aaf24224fe41954f6c": ["Q11350542", "Q188447", "Q15905812", "Q15868", "Q6630136", "Q6734763", "Q105927", "Q258", "Q9181720", "Q313196", "Q4099359", "Q15567185", "Q587455", "Q190436", "Q5284896", "Q18709782", "Q16233625", "Q5246694", "Q11620425", "Q12568992"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mewslix/test-uk.json: -------------------------------------------------------------------------------- 1 | {"9f4dba86a6d21cfd246353403da46abd": ["Q524624", "Q3830755", "Q3800390", "Q508679", "Q20383186", "Q930701", "Q18682623", "Q16969424", "Q1899", "Q2320371", "Q266613", "Q2469647", "Q749794", "Q6241038", "Q5754881", "Q2879448", "Q1630799", "Q447", "Q628319", "Q25515301"]} -------------------------------------------------------------------------------- /mock_test_data/predictions/mlqa/test-ar.json: -------------------------------------------------------------------------------- 1 | { 2 | "ba7865d50777f2b90ba88fcb070a672d042b6b69": "\u0627\u0644\u0637\u0631\u0642 \u0627\u0644\u062a\u0631\u0627\u0628\u064a\u0629", 3 | "eeb8dbd25efe5221dc6723ddee95daa07d2c8478": "\u0634\u0645\u0627\u0644 \u0634\u0631\u0642" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/mlqa/test-de.json: -------------------------------------------------------------------------------- 1 | { 2 | "c076d4b0a1967805a45e7ab1e247e3a0739d4850": "3050 und 3350" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/mlqa/test-en.json: -------------------------------------------------------------------------------- 1 | { 2 | "d066a75dbe8cd3e2b57c415a8eb54a08dc7e72a7": "had sustained skin," 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/mlqa/test-es.json: -------------------------------------------------------------------------------- 1 | { 2 | "b77c037b331e06542272669766df3b9515366b57": "unas estructuras en" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/mlqa/test-hi.json: -------------------------------------------------------------------------------- 1 | { 2 | "eeb8dbd25efe5221dc6723ddee95daa07d2c8478": "\u0909\u0924\u094d\u0924\u0930 \u092a\u0942\u0930\u094d\u0935" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/mlqa/test-vi.json: -------------------------------------------------------------------------------- 1 | { 2 | "d436fa739423f2c2aeabf1ca6a1ea0d525823bad": "\u0110\u00e0i Loan", 3 | "2f0d6ff162619164bb113c0cadbcca06a50d2e5b": "Kh\u00e1ch Gia" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/mlqa/test-zh.json: -------------------------------------------------------------------------------- 1 | { 2 | "465f3fb044b5c50a78a2e2f9bc94c424d1f7d039": "\u7535\u5316\u7535\u6c60" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-af.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | B-ORG 10 | I-ORG 11 | O 12 | O 13 | 14 | O 15 | O 16 | O 17 | O 18 | O 19 | B-LOC 20 | O 21 | O 22 | O 23 | O 24 | O 25 | O 26 | O 27 | O 28 | O 29 | O 30 | O 31 | O 32 | O 33 | O 34 | O 35 | O 36 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ar.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | I-ORG 6 | O 7 | O 8 | 9 | B-LOC 10 | I-LOC 11 | I-LOC 12 | I-LOC 13 | I-LOC 14 | I-LOC 15 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-bg.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | B-LOC 6 | O 7 | B-LOC 8 | O 9 | B-LOC 10 | O 11 | 12 | O 13 | B-ORG 14 | O 15 | B-LOC 16 | O 17 | O 18 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-bn.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | I-ORG 4 | I-ORG 5 | 6 | B-PER 7 | I-PER 8 | I-PER 9 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-de.tsv: -------------------------------------------------------------------------------- 1 | B-PER 2 | I-PER 3 | O 4 | I-PER 5 | O 6 | 7 | B-PER 8 | I-PER 9 | O 10 | B-PER 11 | I-PER 12 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-el.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | B-LOC 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | O 18 | O 19 | O 20 | O 21 | O 22 | O 23 | B-ORG 24 | I-ORG 25 | O 26 | 27 | B-ORG 28 | O 29 | B-LOC 30 | O 31 | O 32 | O 33 | B-LOC 34 | O 35 | O 36 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-en.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | B-LOC 13 | O 14 | O 15 | O 16 | O 17 | B-LOC 18 | O 19 | O 20 | O 21 | 22 | O 23 | B-PER 24 | I-PER 25 | O 26 | B-PER 27 | I-PER 28 | O 29 | O 30 | B-ORG 31 | I-ORG 32 | O 33 | O 34 | O 35 | O 36 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-es.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-LOC 4 | 5 | O 6 | B-PER 7 | I-PER 8 | I-PER 9 | I-PER 10 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-et.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | B-PER 9 | I-PER 10 | O 11 | O 12 | O 13 | O 14 | O 15 | B-ORG 16 | O 17 | O 18 | 19 | O 20 | O 21 | B-ORG 22 | O 23 | O 24 | O 25 | O 26 | O 27 | O 28 | O 29 | O 30 | O 31 | O 32 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-eu.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | I-ORG 7 | I-ORG 8 | I-ORG 9 | I-ORG 10 | I-ORG 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | O 18 | 19 | B-LOC 20 | I-LOC 21 | O 22 | O 23 | O 24 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-fa.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | O 4 | O 5 | 6 | O 7 | B-PER 8 | I-PER 9 | I-PER 10 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-fi.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | B-ORG 9 | I-ORG 10 | O 11 | O 12 | 13 | O 14 | O 15 | B-ORG 16 | I-PER 17 | O 18 | B-ORG 19 | O 20 | O 21 | O 22 | O 23 | O 24 | O 25 | O 26 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-fr.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-LOC 4 | I-LOC 5 | O 6 | O 7 | O 8 | O 9 | O 10 | 11 | B-PER 12 | I-PER 13 | I-PER 14 | O 15 | O 16 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-he.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-LOC 4 | O 5 | O 6 | 7 | O 8 | O 9 | O 10 | I-ORG 11 | I-ORG 12 | O 13 | O 14 | O 15 | O 16 | I-ORG 17 | I-ORG 18 | O 19 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-hi.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-LOC 4 | 5 | O 6 | B-LOC 7 | O 8 | O 9 | O 10 | O 11 | O 12 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-hu.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | B-LOC 6 | O 7 | 8 | O 9 | B-PER 10 | I-PER 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-id.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | I-ORG 7 | 8 | O 9 | B-PER 10 | I-PER 11 | I-PER 12 | I-PER 13 | I-PER 14 | I-PER 15 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-it.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | O 6 | O 7 | 8 | B-ORG 9 | B-ORG 10 | I-ORG 11 | I-ORG 12 | I-ORG 13 | I-ORG 14 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ja.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | B-PER 12 | I-PER 13 | I-PER 14 | I-PER 15 | 16 | O 17 | B-ORG 18 | I-ORG 19 | I-ORG 20 | I-ORG 21 | I-ORG 22 | I-ORG 23 | I-ORG 24 | I-ORG 25 | I-ORG 26 | I-ORG 27 | I-ORG 28 | I-ORG 29 | I-ORG 30 | I-ORG 31 | I-ORG 32 | I-ORG 33 | I-ORG 34 | I-ORG 35 | I-ORG 36 | I-ORG 37 | I-ORG 38 | I-ORG 39 | I-ORG 40 | I-ORG 41 | I-ORG 42 | O 43 | O 44 | O 45 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-jv.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-LOC 4 | I-LOC 5 | O 6 | O 7 | 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | B-ORG 17 | I-ORG 18 | I-ORG 19 | O 20 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ka.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-LOC 3 | O 4 | B-LOC 5 | O 6 | 7 | B-LOC 8 | B-ORG 9 | O 10 | B-LOC 11 | O 12 | B-LOC 13 | O 14 | O 15 | B-LOC 16 | O 17 | O 18 | O 19 | B-ORG 20 | I-ORG 21 | I-ORG 22 | O 23 | O 24 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-kk.tsv: -------------------------------------------------------------------------------- 1 | B-PER 2 | I-PER 3 | I-PER 4 | I-PER 5 | 6 | B-PER 7 | I-PER 8 | O 9 | B-ORG 10 | O 11 | O 12 | O 13 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ko.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | O 6 | O 7 | 8 | O 9 | O 10 | B-ORG 11 | O 12 | O 13 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ml.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOC 3 | I-LOC 4 | I-LOC 5 | 6 | B-PER 7 | I-PER 8 | O 9 | O 10 | O 11 | O 12 | O 13 | B-PER 14 | O 15 | O 16 | O 17 | O 18 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-mr.tsv: -------------------------------------------------------------------------------- 1 | O 2 | B-LOC 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | I-ORG 7 | 8 | O 9 | B-PER 10 | I-PER 11 | I-PER 12 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ms.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | 7 | B-PER 8 | I-PER 9 | O 10 | O 11 | O 12 | B-ORG 13 | I-ORG 14 | O 15 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-my.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | B-LOC 3 | O 4 | O 5 | O 6 | O 7 | O 8 | B-ORG 9 | O 10 | O 11 | O 12 | O 13 | 14 | O 15 | O 16 | O 17 | O 18 | B-ORG 19 | B-ORG 20 | B-ORG 21 | O 22 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-nl.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | O 4 | O 5 | O 6 | O 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | B-ORG 16 | O 17 | O 18 | O 19 | 20 | O 21 | O 22 | B-PER 23 | I-PER 24 | O 25 | O 26 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-pt.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-PER 4 | I-PER 5 | O 6 | O 7 | 8 | O 9 | O 10 | O 11 | O 12 | B-ORG 13 | O 14 | O 15 | O 16 | O 17 | O 18 | B-ORG 19 | O 20 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ru.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-PER 4 | I-PER 5 | O 6 | O 7 | 8 | O 9 | O 10 | B-ORG 11 | I-ORG 12 | O 13 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-sw.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | O 3 | I-ORG 4 | O 5 | O 6 | O 7 | 8 | B-ORG 9 | I-ORG 10 | I-ORG 11 | I-ORG 12 | O 13 | O 14 | B-PER 15 | I-PER 16 | I-PER 17 | O 18 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ta.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | I-ORG 6 | O 7 | O 8 | 9 | O 10 | B-LOC 11 | I-ORG 12 | I-ORG 13 | I-ORG 14 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-te.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | I-ORG 4 | I-ORG 5 | 6 | B-ORG 7 | I-ORG 8 | I-ORG 9 | O 10 | O 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-th.tsv: -------------------------------------------------------------------------------- 1 | O 2 | I-ORG 3 | I-ORG 4 | I-ORG 5 | I-ORG 6 | O 7 | I-ORG 8 | I-ORG 9 | I-ORG 10 | I-ORG 11 | I-ORG 12 | I-ORG 13 | I-ORG 14 | O 15 | O 16 | O 17 | O 18 | O 19 | I-ORG 20 | I-ORG 21 | I-ORG 22 | I-ORG 23 | I-ORG 24 | I-ORG 25 | I-ORG 26 | I-ORG 27 | I-ORG 28 | I-ORG 29 | O 30 | 31 | B-ORG 32 | I-ORG 33 | I-ORG 34 | I-ORG 35 | B-ORG 36 | I-ORG 37 | I-ORG 38 | I-ORG 39 | I-ORG 40 | I-ORG 41 | I-ORG 42 | I-ORG 43 | I-ORG 44 | I-ORG 45 | I-ORG 46 | I-ORG 47 | I-ORG 48 | I-ORG 49 | I-ORG 50 | I-ORG 51 | I-ORG 52 | I-ORG 53 | I-ORG 54 | I-ORG 55 | I-ORG 56 | I-ORG 57 | I-ORG 58 | I-ORG 59 | I-ORG 60 | I-ORG 61 | I-ORG 62 | I-ORG 63 | I-ORG 64 | I-ORG 65 | I-ORG 66 | I-ORG 67 | O 68 | O 69 | I-ORG 70 | I-ORG 71 | I-ORG 72 | I-ORG 73 | I-ORG 74 | I-ORG 75 | I-ORG 76 | I-ORG 77 | I-ORG 78 | I-ORG 79 | I-ORG 80 | I-ORG 81 | O 82 | O 83 | I-ORG 84 | I-ORG 85 | I-ORG 86 | I-ORG 87 | I-ORG 88 | I-ORG 89 | I-ORG 90 | O 91 | O 92 | O 93 | O 94 | I-ORG 95 | O 96 | O 97 | O 98 | O 99 | I-ORG 100 | I-ORG 101 | I-ORG 102 | I-ORG 103 | I-ORG 104 | I-ORG 105 | I-ORG 106 | I-ORG 107 | I-ORG 108 | I-ORG 109 | O 110 | I-ORG 111 | I-ORG 112 | I-ORG 113 | I-ORG 114 | I-ORG 115 | I-ORG 116 | I-ORG 117 | I-ORG 118 | O 119 | I-ORG 120 | I-ORG 121 | I-ORG 122 | I-ORG 123 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-tl.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-LOC 4 | 5 | B-ORG 6 | I-ORG 7 | I-ORG 8 | I-ORG 9 | B-LOC 10 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-tr.tsv: -------------------------------------------------------------------------------- 1 | B-ORG 2 | I-ORG 3 | O 4 | B-ORG 5 | I-ORG 6 | O 7 | O 8 | 9 | O 10 | O 11 | O 12 | O 13 | O 14 | O 15 | B-ORG 16 | I-ORG 17 | I-ORG 18 | O 19 | O 20 | O 21 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-ur.tsv: -------------------------------------------------------------------------------- 1 | O 2 | O 3 | B-ORG 4 | I-ORG 5 | O 6 | O 7 | 8 | B-LOC 9 | B-PER 10 | I-PER 11 | I-PER 12 | I-PER 13 | I-PER 14 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-vi.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | O 4 | B-LOC 5 | I-LOC 6 | I-LOC 7 | I-LOC 8 | I-LOC 9 | 10 | O 11 | O 12 | O 13 | O 14 | O 15 | O 16 | O 17 | B-PER 18 | I-PER 19 | I-PER 20 | O 21 | O 22 | O 23 | O 24 | O 25 | O 26 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-yo.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | I-ORG 4 | I-LOC 5 | I-LOC 6 | 7 | O 8 | O 9 | O 10 | O 11 | O 12 | B-LOC 13 | O 14 | B-LOC 15 | O 16 | -------------------------------------------------------------------------------- /mock_test_data/predictions/panx/test-zh.tsv: -------------------------------------------------------------------------------- 1 | B-LOC 2 | I-LOC 3 | O 4 | O 5 | B-ORG 6 | I-ORG 7 | O 8 | O 9 | O 10 | O 11 | O 12 | O 13 | B-PER 14 | I-PER 15 | I-PER 16 | 17 | O 18 | B-LOC 19 | I-LOC 20 | I-LOC 21 | I-LOC 22 | -------------------------------------------------------------------------------- /mock_test_data/predictions/pawsx/test-de.tsv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 0 6 | 1 7 | 1 8 | 0 9 | 1 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/pawsx/test-en.tsv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 0 5 | 0 6 | 1 7 | 1 8 | 0 9 | 1 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/pawsx/test-es.tsv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 0 6 | 1 7 | 1 8 | 0 9 | 1 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/pawsx/test-fr.tsv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 0 6 | 1 7 | 1 8 | 0 9 | 0 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/pawsx/test-ja.tsv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 0 6 | 0 7 | 1 8 | 0 9 | 1 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/pawsx/test-ko.tsv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 0 8 | 0 9 | 1 10 | 1 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/pawsx/test-zh.tsv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 0 6 | 1 7 | 1 8 | 0 9 | 1 10 | 0 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-af.tsv: -------------------------------------------------------------------------------- 1 | 308 2 | 441 3 | 713 4 | 233 5 | 757 6 | 73 7 | 93 8 | 397 9 | 872 10 | 869 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ar.tsv: -------------------------------------------------------------------------------- 1 | 669 2 | 669 3 | 543 4 | 325 5 | 543 6 | 455 7 | 455 8 | 151 9 | 519 10 | 710 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-bg.tsv: -------------------------------------------------------------------------------- 1 | 28 2 | 951 3 | 949 4 | 863 5 | 280 6 | 444 7 | 272 8 | 142 9 | 228 10 | 459 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-bn.tsv: -------------------------------------------------------------------------------- 1 | 485 2 | 86 3 | 926 4 | 926 5 | 485 6 | 769 7 | 768 8 | 389 9 | 325 10 | 13 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-de.tsv: -------------------------------------------------------------------------------- 1 | 367 2 | 258 3 | 374 4 | 529 5 | 266 6 | 283 7 | 267 8 | 344 9 | 275 10 | 212 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-el.tsv: -------------------------------------------------------------------------------- 1 | 847 2 | 672 3 | 481 4 | 809 5 | 994 6 | 77 7 | 445 8 | 775 9 | 789 10 | 32 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-es.tsv: -------------------------------------------------------------------------------- 1 | 575 2 | 761 3 | 406 4 | 857 5 | 751 6 | 44 7 | 899 8 | 367 9 | 409 10 | 683 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-et.tsv: -------------------------------------------------------------------------------- 1 | 538 2 | 453 3 | 945 4 | 850 5 | 3 6 | 993 7 | 653 8 | 199 9 | 651 10 | 501 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-eu.tsv: -------------------------------------------------------------------------------- 1 | 574 2 | 871 3 | 750 4 | 239 5 | 335 6 | 24 7 | 24 8 | 462 9 | 356 10 | 330 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-fa.tsv: -------------------------------------------------------------------------------- 1 | 889 2 | 889 3 | 521 4 | 162 5 | 162 6 | 970 7 | 936 8 | 271 9 | 350 10 | 809 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-fi.tsv: -------------------------------------------------------------------------------- 1 | 316 2 | 811 3 | 811 4 | 893 5 | 765 6 | 338 7 | 296 8 | 563 9 | 408 10 | 400 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-fr.tsv: -------------------------------------------------------------------------------- 1 | 957 2 | 981 3 | 721 4 | 980 5 | 912 6 | 444 7 | 973 8 | 980 9 | 970 10 | 980 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-he.tsv: -------------------------------------------------------------------------------- 1 | 635 2 | 562 3 | 562 4 | 329 5 | 952 6 | 429 7 | 431 8 | 117 9 | 566 10 | 854 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-hi.tsv: -------------------------------------------------------------------------------- 1 | 988 2 | 215 3 | 129 4 | 308 5 | 408 6 | 878 7 | 418 8 | 105 9 | 917 10 | 69 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-hu.tsv: -------------------------------------------------------------------------------- 1 | 417 2 | 559 3 | 417 4 | 202 5 | 516 6 | 939 7 | 835 8 | 939 9 | 731 10 | 428 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-id.tsv: -------------------------------------------------------------------------------- 1 | 194 2 | 178 3 | 208 4 | 423 5 | 757 6 | 728 7 | 977 8 | 618 9 | 631 10 | 330 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-it.tsv: -------------------------------------------------------------------------------- 1 | 164 2 | 866 3 | 723 4 | 434 5 | 720 6 | 235 7 | 275 8 | 733 9 | 700 10 | 915 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ja.tsv: -------------------------------------------------------------------------------- 1 | 555 2 | 660 3 | 81 4 | 569 5 | 800 6 | 280 7 | 291 8 | 564 9 | 632 10 | 579 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-jv.tsv: -------------------------------------------------------------------------------- 1 | 130 2 | 12 3 | 37 4 | 105 5 | 101 6 | 132 7 | 182 8 | 105 9 | 101 10 | 45 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ka.tsv: -------------------------------------------------------------------------------- 1 | 231 2 | 231 3 | 231 4 | 298 5 | 338 6 | 168 7 | 41 8 | 697 9 | 121 10 | 322 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-kk.tsv: -------------------------------------------------------------------------------- 1 | 299 2 | 99 3 | 394 4 | 281 5 | 344 6 | 308 7 | 185 8 | 270 9 | 127 10 | 34 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ko.tsv: -------------------------------------------------------------------------------- 1 | 755 2 | 332 3 | 986 4 | 996 5 | 75 6 | 519 7 | 274 8 | 377 9 | 21 10 | 102 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ml.tsv: -------------------------------------------------------------------------------- 1 | 674 2 | 405 3 | 405 4 | 508 5 | 664 6 | 474 7 | 574 8 | 485 9 | 240 10 | 317 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-mr.tsv: -------------------------------------------------------------------------------- 1 | 179 2 | 998 3 | 766 4 | 788 5 | 88 6 | 998 7 | 998 8 | 751 9 | 292 10 | 91 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-nl.tsv: -------------------------------------------------------------------------------- 1 | 401 2 | 29 3 | 774 4 | 501 5 | 810 6 | 29 7 | 29 8 | 35 9 | 408 10 | 926 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-pt.tsv: -------------------------------------------------------------------------------- 1 | 916 2 | 296 3 | 364 4 | 922 5 | 916 6 | 916 7 | 803 8 | 803 9 | 523 10 | 45 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ru.tsv: -------------------------------------------------------------------------------- 1 | 343 2 | 461 3 | 464 4 | 639 5 | 725 6 | 592 7 | 636 8 | 636 9 | 518 10 | 596 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-sw.tsv: -------------------------------------------------------------------------------- 1 | 80 2 | 80 3 | 80 4 | 80 5 | 98 6 | 107 7 | 80 8 | 80 9 | 107 10 | 80 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ta.tsv: -------------------------------------------------------------------------------- 1 | 102 2 | 264 3 | 264 4 | 82 5 | 82 6 | 270 7 | 270 8 | 135 9 | 178 10 | 135 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-te.tsv: -------------------------------------------------------------------------------- 1 | 179 2 | 37 3 | 3 4 | 13 5 | 3 6 | 119 7 | 119 8 | 119 9 | 119 10 | 162 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-th.tsv: -------------------------------------------------------------------------------- 1 | 323 2 | 155 3 | 37 4 | 103 5 | 155 6 | 445 7 | 163 8 | 103 9 | 317 10 | 445 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-tl.tsv: -------------------------------------------------------------------------------- 1 | 207 2 | 845 3 | 845 4 | 787 5 | 845 6 | 207 7 | 207 8 | 787 9 | 14 10 | 345 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-tr.tsv: -------------------------------------------------------------------------------- 1 | 501 2 | 515 3 | 364 4 | 804 5 | 881 6 | 710 7 | 45 8 | 384 9 | 326 10 | 750 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-ur.tsv: -------------------------------------------------------------------------------- 1 | 753 2 | 367 3 | 160 4 | 92 5 | 790 6 | 839 7 | 500 8 | 620 9 | 786 10 | 548 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-vi.tsv: -------------------------------------------------------------------------------- 1 | 310 2 | 403 3 | 952 4 | 975 5 | 469 6 | 831 7 | 451 8 | 460 9 | 469 10 | 696 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tatoeba/test-zh.tsv: -------------------------------------------------------------------------------- 1 | 931 2 | 405 3 | 504 4 | 127 5 | 127 6 | 852 7 | 425 8 | 530 9 | 797 10 | 601 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-ar.json: -------------------------------------------------------------------------------- 1 | { 2 | "arabic-2387335860751143628-1": "\u0628\u0637\u0648\u0644\u062a\u064a\u0646" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-bn.json: -------------------------------------------------------------------------------- 1 | { 2 | "bengali-608617701520483849-1": "\u099c\u09b0\u09cd\u099c \u09b8\u09bf\u09a1\u09c7\u09a8\u09b9\u09be\u09ae \u0995\u09cd\u09b2\u09be\u09b0\u09cd\u0995" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-en.json: -------------------------------------------------------------------------------- 1 | { 2 | "english--3215621880858840488-2": "Wound care" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-fi.json: -------------------------------------------------------------------------------- 1 | { 2 | "finnish--267686407665346253-29": "vuonna 4000 eaa" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-id.json: -------------------------------------------------------------------------------- 1 | { 2 | "indonesian-1906590851264221380-1": "orang-orang Viking dari" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-ko.json: -------------------------------------------------------------------------------- 1 | { 2 | "korean-3543942044825767374-0": "\uad70\uc0ac\ub839\uad00" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-ru.json: -------------------------------------------------------------------------------- 1 | { 2 | "russian--1141212800069921250-46": "1832\u20141833" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-sw.json: -------------------------------------------------------------------------------- 1 | { 2 | "swahili--8211684794284159625-4": "1999" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/tydiqa/test-te.json: -------------------------------------------------------------------------------- 1 | { 2 | "telugu--2245295572008910947-0": "\u0c10\u0c26\u0c41" 3 | } 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-af.tsv: -------------------------------------------------------------------------------- 1 | ADJ 2 | NOUN 3 | AUX 4 | PRON 5 | PRON 6 | ADJ 7 | NOUN 8 | ADP 9 | NUM 10 | NOUN 11 | VERB 12 | PUNCT 13 | 14 | DET 15 | NOUN 16 | AUX 17 | PRON 18 | NOUN 19 | ADV 20 | NUM 21 | NUM 22 | NOUN 23 | VERB 24 | PUNCT 25 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-ar.tsv: -------------------------------------------------------------------------------- 1 | VERB 2 | PROPN 3 | PROPN 4 | PUNCT 5 | DET 6 | ADJ 7 | ADP 8 | PROPN 9 | ADP 10 | NOUN 11 | VERB 12 | PROPN 13 | PROPN 14 | DET 15 | PUNCT 16 | PUNCT 17 | ADV 18 | ADJ 19 | ADP 20 | NOUN 21 | DET 22 | ADJ 23 | AUX 24 | ADJ 25 | ADP 26 | PROPN 27 | PROPN 28 | PUNCT 29 | CCONJ 30 | SCONJ 31 | DET 32 | ADJ 33 | ADP 34 | DET 35 | ADJ 36 | ADP 37 | PRON 38 | PRON 39 | VERB 40 | PUNCT 41 | PUNCT 42 | 43 | ADP 44 | DET 45 | ADP 46 | NOUN 47 | PRON 48 | VERB 49 | DET 50 | ADJ 51 | ADP 52 | NOUN 53 | DET 54 | ADJ 55 | ADP 56 | DET 57 | PROPN 58 | PUNCT 59 | AUX 60 | VERB 61 | NOUN 62 | ADJ 63 | ADJ 64 | DET 65 | PUNCT 66 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-bg.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | VERB 3 | PUNCT 4 | SCONJ 5 | VERB 6 | PART 7 | VERB 8 | PUNCT 9 | ADP 10 | ADV 11 | NOUN 12 | ADP 13 | NOUN 14 | PRON 15 | PUNCT 16 | 17 | ADV 18 | ADV 19 | AUX 20 | PART 21 | PRON 22 | VERB 23 | PUNCT 24 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-de.tsv: -------------------------------------------------------------------------------- 1 | DET 2 | NOUN 3 | VERB 4 | ADP 5 | NOUN 6 | PUNCT 7 | CCONJ 8 | PRON 9 | ADJ 10 | ADP 11 | ADJ 12 | PUNCT 13 | 14 | PRON 15 | AUX 16 | ADV 17 | NUM 18 | PRON 19 | NOUN 20 | VERB 21 | CCONJ 22 | SCONJ 23 | PRON 24 | DET 25 | NOUN 26 | ADV 27 | VERB 28 | AUX 29 | PRON 30 | ADV 31 | ADV 32 | DET 33 | ADJ 34 | NOUN 35 | DET 36 | PROPN 37 | CCONJ 38 | DET 39 | NOUN 40 | VERB 41 | PUNCT 42 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-el.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | PROPN 3 | PUNCT 4 | DET 5 | NOUN 6 | ADP 7 | ADJ 8 | NOUN 9 | NOUN 10 | AUX 11 | ADV 12 | ADV 13 | ADJ 14 | PUNCT 15 | ADV 16 | ADV 17 | SCONJ 18 | VERB 19 | ADV 20 | ADJ 21 | NOUN 22 | PUNCT 23 | CCONJ 24 | ADV 25 | SCONJ 26 | DET 27 | PROPN 28 | ADV 29 | VERB 30 | PART 31 | VERB 32 | DET 33 | NOUN 34 | ADP 35 | PRON 36 | PRON 37 | VERB 38 | PUNCT 39 | 40 | AUX 41 | PRON 42 | VERB 43 | VERB 44 | DET 45 | ADJ 46 | NOUN 47 | DET 48 | ADP 49 | NOUN 50 | PUNCT 51 | ADV 52 | VERB 53 | DET 54 | NOUN 55 | DET 56 | NOUN 57 | ADP 58 | NOUN 59 | PUNCT 60 | VERB 61 | ADP 62 | PRON 63 | PRON 64 | DET 65 | NOUN 66 | NOUN 67 | VERB 68 | PART 69 | VERB 70 | DET 71 | NOUN 72 | DET 73 | NOUN 74 | SCONJ 75 | DET 76 | NOUN 77 | PRON 78 | ADV 79 | VERB 80 | ADJ 81 | NOUN 82 | ADJ 83 | ADP 84 | DET 85 | NOUN 86 | ADP 87 | ADJ 88 | ADJ 89 | NOUN 90 | PUNCT 91 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-en.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | AUX 3 | PRON 4 | PUNCT 5 | 6 | PRON 7 | AUX 8 | PRON 9 | PUNCT 10 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-es.tsv: -------------------------------------------------------------------------------- 1 | SCONJ 2 | PART 3 | VERB 4 | NOUN 5 | ADP 6 | DET 7 | ADJ 8 | NOUN 9 | ADP 10 | DET 11 | NOUN 12 | ADJ 13 | ADP 14 | PROPN 15 | PROPN 16 | PUNCT 17 | DET 18 | NOUN 19 | ADP 20 | NOUN 21 | ADJ 22 | AUX 23 | ADJ 24 | PUNCT 25 | VERB 26 | DET 27 | PROPN 28 | ADP 29 | DET 30 | NOUN 31 | ADP 32 | NOUN 33 | PROPN 34 | PROPN 35 | PUNCT 36 | PROPN 37 | ADJ 38 | ADP 39 | PROPN 40 | PROPN 41 | PUNCT 42 | 43 | ADP 44 | PRON 45 | PRON 46 | VERB 47 | DET 48 | NOUN 49 | ADP 50 | DET 51 | NOUN 52 | NOUN 53 | ADP 54 | PROPN 55 | PROPN 56 | PUNCT 57 | PRON 58 | AUX 59 | PRON 60 | ADJ 61 | PUNCT 62 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-et.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | 3 | VERB 4 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-eu.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | PUNCT 3 | ADV 4 | PUNCT 5 | AUX 6 | AUX 7 | ADJ 8 | NOUN 9 | VERB 10 | NOUN 11 | NOUN 12 | PUNCT 13 | ADJ 14 | NOUN 15 | NOUN 16 | ADJ 17 | VERB 18 | ADV 19 | PUNCT 20 | 21 | PROPN 22 | NOUN 23 | ADJ 24 | VERB 25 | VERB 26 | NOUN 27 | PRON 28 | VERB 29 | NOUN 30 | VERB 31 | CCONJ 32 | NOUN 33 | ADJ 34 | NUM 35 | ADV 36 | VERB 37 | PROPN 38 | PROPN 39 | PUNCT 40 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-fa.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | VERB 3 | SCONJ 4 | ADP 5 | NOUN 6 | NUM 7 | NOUN 8 | PRON 9 | AUX 10 | VERB 11 | VERB 12 | PUNCT 13 | 14 | NOUN 15 | PRON 16 | VERB 17 | SCONJ 18 | PROPN 19 | ADP 20 | DET 21 | NOUN 22 | VERB 23 | VERB 24 | PUNCT 25 | DET 26 | NOUN 27 | VERB 28 | SCONJ 29 | ADP 30 | NOUN 31 | PRON 32 | ADP 33 | NOUN 34 | PRON 35 | VERB 36 | PUNCT 37 | VERB 38 | VERB 39 | PUNCT 40 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-fi.tsv: -------------------------------------------------------------------------------- 1 | VERB 2 | NOUN 3 | 4 | ADV 5 | AUX 6 | VERB 7 | NOUN 8 | PUNCT 9 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-fr.tsv: -------------------------------------------------------------------------------- 1 | ADV 2 | AUX 3 | PUNCT 4 | ADV 5 | DET 6 | NOUN 7 | ADP 8 | NOUN 9 | PUNCT 10 | 11 | ADV 12 | PUNCT 13 | VERB 14 | DET 15 | NOUN 16 | ADP 17 | NOUN 18 | PUNCT 19 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-he.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | ADV 3 | VERB 4 | ADP 5 | NOUN 6 | PUNCT 7 | PROPN 8 | CCONJ 9 | AUX 10 | VERB 11 | PART 12 | PROPN 13 | PUNCT 14 | VERB 15 | ADP 16 | PROPN 17 | PRON 18 | ADP 19 | ADJ 20 | DET 21 | ADP 22 | NOUN 23 | PRON 24 | ADP 25 | PART 26 | ADP 27 | NOUN 28 | PROPN 29 | VERB 30 | PUNCT 31 | PROPN 32 | CCONJ 33 | VERB 34 | ADP 35 | DET 36 | ADP 37 | PUNCT 38 | 39 | ADP 40 | DET 41 | PUNCT 42 | NUM 43 | VERB 44 | NOUN 45 | DET 46 | NUM 47 | NUM 48 | PUNCT 49 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-hi.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | NOUN 3 | PROPN 4 | NOUN 5 | PUNCT 6 | PROPN 7 | NOUN 8 | CCONJ 9 | PROPN 10 | ADV 11 | ADJ 12 | NOUN 13 | AUX 14 | PUNCT 15 | 16 | NUM 17 | NOUN 18 | ADP 19 | NOUN 20 | ADV 21 | PROPN 22 | PROPN 23 | VERB 24 | PUNCT 25 | ADV 26 | ADV 27 | PROPN 28 | ADP 29 | NOUN 30 | VERB 31 | CCONJ 32 | VERB 33 | VERB 34 | ADP 35 | NOUN 36 | ADV 37 | NOUN 38 | VERB 39 | PROPN 40 | AUX 41 | PUNCT 42 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-hu.tsv: -------------------------------------------------------------------------------- 1 | DET 2 | ADJ 3 | NOUN 4 | ADV 5 | DET 6 | ADJ 7 | NOUN 8 | NOUN 9 | ADJ 10 | NOUN 11 | VERB 12 | DET 13 | ADJ 14 | NOUN 15 | NOUN 16 | CCONJ 17 | DET 18 | NOUN 19 | PUNCT 20 | 21 | ADV 22 | ADV 23 | DET 24 | ADJ 25 | NOUN 26 | ADJ 27 | NOUN 28 | VERB 29 | PUNCT 30 | CCONJ 31 | DET 32 | ADJ 33 | NOUN 34 | CCONJ 35 | DET 36 | NOUN 37 | NOUN 38 | ADV 39 | ADJ 40 | NOUN 41 | VERB 42 | DET 43 | NOUN 44 | PUNCT 45 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-id.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | ADP 3 | NUM 4 | NOUN 5 | ADJ 6 | VERB 7 | ADP 8 | PROPN 9 | PROPN 10 | ADP 11 | NOUN 12 | PROPN 13 | NUM 14 | CCONJ 15 | NUM 16 | NOUN 17 | ADJ 18 | VERB 19 | ADP 20 | NOUN 21 | PRON 22 | VERB 23 | PART 24 | VERB 25 | NOUN 26 | ADP 27 | NOUN 28 | PROPN 29 | PUNCT 30 | NOUN 31 | ADJ 32 | NOUN 33 | VERB 34 | PROPN 35 | AUX 36 | VERB 37 | ADJ 38 | ADP 39 | NOUN 40 | NOUN 41 | NOUN 42 | PUNCT 43 | VERB 44 | ADP 45 | ADP 46 | NOUN 47 | ADP 48 | PROPN 49 | PROPN 50 | CCONJ 51 | VERB 52 | ADP 53 | NOUN 54 | NUM 55 | PROPN 56 | ADP 57 | NOUN 58 | ADV 59 | PUNCT 60 | 61 | PROPN 62 | AUX 63 | NOUN 64 | PRON 65 | VERB 66 | ADP 67 | PROPN 68 | PROPN 69 | PUNCT 70 | PROPN 71 | PUNCT 72 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-it.tsv: -------------------------------------------------------------------------------- 1 | ADJ 2 | ADP 3 | PROPN 4 | PROPN 5 | PUNCT 6 | NOUN 7 | ADV 8 | PUNCT 9 | DET 10 | NUM 11 | SYM 12 | ADP 13 | NUM 14 | NOUN 15 | PROPN 16 | PUNCT 17 | AUX 18 | VERB 19 | DET 20 | NUM 21 | ADP 22 | ADJ 23 | X 24 | SYM 25 | SYM 26 | 27 | SYM 28 | X 29 | X 30 | X 31 | PROPN 32 | PUNCT 33 | X 34 | VERB 35 | NOUN 36 | ADJ 37 | X 38 | PUNCT 39 | SYM 40 | ADJ 41 | NOUN 42 | PROPN 43 | PUNCT 44 | SYM 45 | PUNCT 46 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-ja.tsv: -------------------------------------------------------------------------------- 1 | PUNCT 2 | PROPN 3 | ADP 4 | PROPN 5 | ADP 6 | ADP 7 | NOUN 8 | AUX 9 | ADV 10 | ADJ 11 | ADP 12 | ADJ 13 | NOUN 14 | ADV 15 | PUNCT 16 | NOUN 17 | ADP 18 | ADJ 19 | ADJ 20 | NOUN 21 | AUX 22 | ADJ 23 | ADV 24 | X 25 | ADJ 26 | PUNCT 27 | ADP 28 | VERB 29 | NUM 30 | NOUN 31 | ADP 32 | PUNCT 33 | PROPN 34 | PROPN 35 | ADJ 36 | PROPN 37 | PROPN 38 | PUNCT 39 | PROPN 40 | AUX 41 | PROPN 42 | ADP 43 | VERB 44 | ADP 45 | X 46 | PUNCT 47 | 48 | PROPN 49 | ADP 50 | PROPN 51 | ADP 52 | NOUN 53 | PART 54 | NOUN 55 | ADP 56 | VERB 57 | ADP 58 | ADP 59 | NOUN 60 | X 61 | AUX 62 | PUNCT 63 | DET 64 | NOUN 65 | AUX 66 | ADV 67 | ADJ 68 | PUNCT 69 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-kk.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | NUM 3 | NOUN 4 | NOUN 5 | PROPN 6 | NUM 7 | NOUN 8 | VERB 9 | PUNCT 10 | 11 | INTJ 12 | PUNCT 13 | PROPN 14 | PRON 15 | ADV 16 | NOUN 17 | NOUN 18 | ADV 19 | PUNCT 20 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-ko.tsv: -------------------------------------------------------------------------------- 1 | DET 2 | NOUN 3 | VERB 4 | PROPN 5 | NOUN 6 | ADV 7 | VERB 8 | CCONJ 9 | ADJ 10 | NOUN 11 | NOUN 12 | PROPN 13 | VERB 14 | ADP 15 | ADJ 16 | VERB 17 | PUNCT 18 | 19 | X 20 | ADJ 21 | NOUN 22 | ADV 23 | NOUN 24 | ADJ 25 | ADJ 26 | PROPN 27 | PROPN 28 | PROPN 29 | NOUN 30 | ADJ 31 | NOUN 32 | VERB 33 | PUNCT 34 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-mr.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | VERB 3 | PRON 4 | NOUN 5 | VERB 6 | VERB 7 | PUNCT 8 | 9 | ADJ 10 | NOUN 11 | ADJ 12 | NOUN 13 | VERB 14 | PUNCT 15 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-nl.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | PUNCT 3 | NOUN 4 | CCONJ 5 | NOUN 6 | AUX 7 | ADP 8 | PROPN 9 | VERB 10 | ADP 11 | DET 12 | NOUN 13 | ADP 14 | DET 15 | NOUN 16 | ADP 17 | DET 18 | NOUN 19 | ADP 20 | DET 21 | NOUN 22 | ADP 23 | DET 24 | NUM 25 | ADJ 26 | NOUN 27 | CCONJ 28 | PROPN 29 | PUNCT 30 | PROPN 31 | PUNCT 32 | ADP 33 | PROPN 34 | PUNCT 35 | 36 | PART 37 | ADP 38 | ADJ 39 | NOUN 40 | PUNCT 41 | CCONJ 42 | ADP 43 | NOUN 44 | CCONJ 45 | NOUN 46 | VERB 47 | DET 48 | NOUN 49 | DET 50 | NOUN 51 | ADP 52 | DET 53 | NOUN 54 | VERB 55 | VERB 56 | PUNCT 57 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-pt.tsv: -------------------------------------------------------------------------------- 1 | CCONJ 2 | SCONJ 3 | PART 4 | VERB 5 | DET 6 | NOUN 7 | ADJ 8 | VERB 9 | DET 10 | NOUN 11 | ADP 12 | NOUN 13 | ADP 14 | NOUN 15 | PUNCT 16 | PUNCT 17 | VERB 18 | PUNCT 19 | 20 | DET 21 | NOUN 22 | VERB 23 | SCONJ 24 | DET 25 | NOUN 26 | AUX 27 | VERB 28 | DET 29 | NOUN 30 | ADP 31 | NOUN 32 | ADP 33 | NOUN 34 | CCONJ 35 | ADP 36 | NOUN 37 | ADP 38 | NOUN 39 | ADP 40 | NOUN 41 | PUNCT 42 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-ru.tsv: -------------------------------------------------------------------------------- 1 | ADP 2 | ADJ 3 | NOUN 4 | NOUN 5 | NOUN 6 | PROPN 7 | PUNCT 8 | NOUN 9 | ADP 10 | PROPN 11 | VERB 12 | ADV 13 | NUM 14 | NUM 15 | PUNCT 16 | 17 | VERB 18 | ADP 19 | NOUN 20 | ADP 21 | ADJ 22 | NOUN 23 | NOUN 24 | ADJ 25 | CCONJ 26 | ADJ 27 | NOUN 28 | VERB 29 | NOUN 30 | CCONJ 31 | NOUN 32 | NOUN 33 | PUNCT 34 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-ta.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | ADJ 3 | NOUN 4 | NOUN 5 | VERB 6 | NOUN 7 | NOUN 8 | NOUN 9 | VERB 10 | PUNCT 11 | 12 | ADJ 13 | NOUN 14 | ADV 15 | VERB 16 | ADP 17 | NOUN 18 | NOUN 19 | NOUN 20 | VERB 21 | ADJ 22 | VERB 23 | PROPN 24 | PUNCT 25 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-te.tsv: -------------------------------------------------------------------------------- 1 | VERB 2 | PUNCT 3 | 4 | ADV 5 | VERB 6 | PUNCT 7 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-th.tsv: -------------------------------------------------------------------------------- 1 | PUNCT 2 | PROPN 3 | VERB 4 | NOUN 5 | VERB 6 | VERB 7 | NOUN 8 | PROPN 9 | AUX 10 | NOUN 11 | SCONJ 12 | PRON 13 | VERB 14 | VERB 15 | NOUN 16 | VERB 17 | ADP 18 | PROPN 19 | NOUN 20 | NOUN 21 | NOUN 22 | AUX 23 | VERB 24 | PRON 25 | AUX 26 | VERB 27 | NOUN 28 | PROPN 29 | PUNCT 30 | PROPN 31 | PROPN 32 | PROPN 33 | VERB 34 | ADJ 35 | ADP 36 | PROPN 37 | VERB 38 | VERB 39 | VERB 40 | ADP 41 | NOUN 42 | NOUN 43 | 44 | VERB 45 | NOUN 46 | PRON 47 | VERB 48 | SCONJ 49 | PROPN 50 | PROPN 51 | ADP 52 | PROPN 53 | NOUN 54 | NOUN 55 | AUX 56 | VERB 57 | VERB 58 | VERB 59 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-tl.tsv: -------------------------------------------------------------------------------- 1 | VERB 2 | DET 3 | NOUN 4 | PUNCT 5 | 6 | VERB 7 | ADP 8 | NOUN 9 | DET 10 | NOUN 11 | PUNCT 12 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-tr.tsv: -------------------------------------------------------------------------------- 1 | INTJ 2 | DET 3 | NOUN 4 | NOUN 5 | VERB 6 | NOUN 7 | PUNCT 8 | 9 | PROPN 10 | NOUN 11 | NOUN 12 | VERB 13 | PRON 14 | PUNCT 15 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-ur.tsv: -------------------------------------------------------------------------------- 1 | PROPN 2 | PROPN 3 | PROPN 4 | PROPN 5 | PROPN 6 | PROPN 7 | PROPN 8 | AUX 9 | PROPN 10 | ADP 11 | ADJ 12 | NOUN 13 | PROPN 14 | ADJ 15 | PROPN 16 | ADP 17 | NOUN 18 | ADP 19 | NOUN 20 | ADJ 21 | ADP 22 | ADJ 23 | PROPN 24 | PROPN 25 | AUX 26 | NOUN 27 | VERB 28 | VERB 29 | PUNCT 30 | 31 | PRON 32 | AUX 33 | PROPN 34 | ADP 35 | NOUN 36 | CCONJ 37 | PROPN 38 | PROPN 39 | ADP 40 | ADJ 41 | NOUN 42 | ADP 43 | ADJ 44 | CCONJ 45 | ADJ 46 | VERB 47 | ADP 48 | ADV 49 | ADJ 50 | PROPN 51 | PROPN 52 | VERB 53 | VERB 54 | ADP 55 | NOUN 56 | ADV 57 | ADV 58 | VERB 59 | SCONJ 60 | DET 61 | NOUN 62 | ADV 63 | ADJ 64 | NOUN 65 | ADP 66 | NOUN 67 | ADP 68 | NOUN 69 | VERB 70 | VERB 71 | ADP 72 | NOUN 73 | AUX 74 | PUNCT 75 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-vi.tsv: -------------------------------------------------------------------------------- 1 | NOUN 2 | PROPN 3 | AUX 4 | VERB 5 | PUNCT 6 | NOUN 7 | PUNCT 8 | NOUN 9 | PUNCT 10 | NOUN 11 | PUNCT 12 | ADJ 13 | PART 14 | VERB 15 | PRON 16 | SCONJ 17 | VERB 18 | ADP 19 | PUNCT 20 | NOUN 21 | NOUN 22 | NOUN 23 | PUNCT 24 | PUNCT 25 | 26 | NOUN 27 | NOUN 28 | ADJ 29 | NOUN 30 | ADV 31 | VERB 32 | ADV 33 | PUNCT 34 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-yo.tsv: -------------------------------------------------------------------------------- 1 | ADP 2 | NOUN 3 | NOUN 4 | ADJ 5 | PROPN 6 | VERB 7 | DET 8 | NOUN 9 | CCONJ 10 | NOUN 11 | PUNCT 12 | 13 | NOUN 14 | ADV 15 | AUX 16 | ADP 17 | NOUN 18 | PUNCT 19 | PRON 20 | ADV 21 | NOUN 22 | PUNCT 23 | NOUN 24 | PRON 25 | AUX 26 | VERB 27 | NOUN 28 | NOUN 29 | PUNCT 30 | PROPN 31 | PROPN 32 | ADV 33 | AUX 34 | VERB 35 | VERB 36 | NOUN 37 | NOUN 38 | PUNCT 39 | -------------------------------------------------------------------------------- /mock_test_data/predictions/udpos/test-zh.tsv: -------------------------------------------------------------------------------- 1 | PRON 2 | AUX 3 | VERB 4 | VERB 5 | PRON 6 | PUNCT 7 | 8 | VERB 9 | ADJ 10 | NOUN 11 | ADP 12 | NOUN 13 | CCONJ 14 | VERB 15 | ADP 16 | PRON 17 | ADP 18 | NOUN 19 | NOUN 20 | PUNCT 21 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-ar.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | neutral 3 | contradiction 4 | neutral 5 | contradiction 6 | contradiction 7 | entailment 8 | neutral 9 | entailment 10 | entailment 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-bg.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | entailment 3 | contradiction 4 | entailment 5 | neutral 6 | contradiction 7 | entailment 8 | neutral 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-de.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | neutral 3 | contradiction 4 | entailment 5 | contradiction 6 | contradiction 7 | entailment 8 | neutral 9 | contradiction 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-el.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | contradiction 4 | neutral 5 | entailment 6 | contradiction 7 | entailment 8 | neutral 9 | entailment 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-en.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | neutral 4 | contradiction 5 | contradiction 6 | contradiction 7 | entailment 8 | entailment 9 | contradiction 10 | entailment 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-es.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | contradiction 4 | entailment 5 | contradiction 6 | contradiction 7 | entailment 8 | entailment 9 | contradiction 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-fr.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | contradiction 4 | entailment 5 | entailment 6 | contradiction 7 | entailment 8 | neutral 9 | contradiction 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-hi.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | neutral 4 | neutral 5 | neutral 6 | contradiction 7 | neutral 8 | contradiction 9 | entailment 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-ru.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | entailment 3 | neutral 4 | neutral 5 | neutral 6 | contradiction 7 | entailment 8 | entailment 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-sw.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | neutral 3 | entailment 4 | entailment 5 | entailment 6 | entailment 7 | entailment 8 | contradiction 9 | neutral 10 | neutral 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-th.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | entailment 3 | contradiction 4 | entailment 5 | neutral 6 | contradiction 7 | entailment 8 | contradiction 9 | entailment 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-tr.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | entailment 3 | contradiction 4 | contradiction 5 | neutral 6 | contradiction 7 | neutral 8 | neutral 9 | entailment 10 | entailment 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-ur.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | contradiction 4 | entailment 5 | entailment 6 | contradiction 7 | entailment 8 | neutral 9 | neutral 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-vi.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | contradiction 4 | contradiction 5 | neutral 6 | contradiction 7 | entailment 8 | neutral 9 | contradiction 10 | contradiction 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xnli/test-zh.tsv: -------------------------------------------------------------------------------- 1 | entailment 2 | contradiction 3 | contradiction 4 | entailment 5 | contradiction 6 | contradiction 7 | entailment 8 | neutral 9 | contradiction 10 | entailment 11 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-ar.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-de.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-el.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-en.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-es.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "5 veces jugador de la Pro Bowl y que fue el l\u00edder, en activo, de capturas de la NFL con 136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-hi.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-ru.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-th.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "\u0e2d\u0e34\u0e19\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e40\u0e0b\u0e1b\u0e2a\u0e35\u0e48\u0e04\u0e23\u0e31\u0e49\u0e07" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-tr.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "5" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-vi.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308", 3 | "56beb4343aeaaa14008c925c": "136" 4 | } 5 | -------------------------------------------------------------------------------- /mock_test_data/predictions/xquad/test-zh.json: -------------------------------------------------------------------------------- 1 | { 2 | "56beb4343aeaaa14008c925b": "308\u5206", 3 | "56beb4343aeaaa14008c925c": "136 \u6b21\u64d2\u6740" 4 | } 5 | -------------------------------------------------------------------------------- /multichecklist/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /multichecklist/checklist_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # coding=utf-8 16 | # Based on functions from: 17 | # https://github.com/marcotcr/checklist/blob/master/notebooks/SQuAD.ipynb 18 | # Licensed under the MIT License. 19 | """Utility functions for working with CheckLists.""" 20 | 21 | import itertools 22 | 23 | 24 | def format_squad_with_context(x, pred, conf, label=None, *args, **kwargs): 25 | c, q = x 26 | ret = 'C: %s\nQ: %s\n' % (c, q) 27 | if label is not None: 28 | ret += 'A: %s\n' % label 29 | ret += 'P: %s\n' % pred 30 | return ret 31 | 32 | 33 | def crossproduct(t): 34 | # takes the output of editor.template and does the cross product of contexts and qas 35 | ret = [] 36 | ret_labels = [] 37 | for x in t.data: 38 | cs = x['contexts'] 39 | qas = x['qas'] 40 | d = list(itertools.product(cs, qas)) 41 | ret.append([(x[0], x[1][0]) for x in d]) 42 | ret_labels.append([x[1][1] for x in d]) 43 | t.data = ret 44 | t.labels = ret_labels 45 | return t 46 | -------------------------------------------------------------------------------- /scripts/eval_qa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Script to evaluate the predictions of a trained model on XQuAD, TyDi QA, and MLQA. 17 | REPO=$PWD 18 | DIR=${REPO}/download 19 | XQUAD_DIR=${DIR}/xquad 20 | MLQA_DIR=${DIR}/mlqa 21 | TYDIQA_DIR=${DIR}/tydiqa 22 | 23 | EVAL_SQUAD=${PWD}/third_party/evaluate_squad.py 24 | EVAL_MLQA=${PWD}/third_party/evaluate_mlqa.py 25 | 26 | PREDICTIONS_DIR=${REPO}/predictions 27 | XQUAD_PRED_DIR=${PREDICTIONS_DIR}/xquad 28 | MLQA_PRED_DIR=${PREDICTIONS_DIR}/mlqa 29 | TYDIQA_PRED_DIR=${PREDICTIONS_DIR}/tydiqa 30 | 31 | for pred_path in ${PREDICTIONS_DIR} ${XQUAD_PRED_DIR} ${MLQA_PRED_DIR} ${TYDIQA_PRED_DIR}; do 32 | if [ ! -d ${pred_path} ] 33 | then 34 | echo "Predictions path ${pred_path} does not exist." 35 | exit 36 | fi 37 | done 38 | 39 | echo 40 | echo "XQuAD" 41 | for lang in en es de el ru tr ar vi th zh hi; do 42 | echo -n " $lang " 43 | TEST_FILE=${XQUAD_DIR}/xquad.$lang.json 44 | PRED_FILE=${XQUAD_PRED_DIR}/predictions_${lang}_.json 45 | python "${EVAL_SQUAD}" "${TEST_FILE}" "${PRED_FILE}" 46 | done 47 | 48 | echo 49 | echo "MLQA" 50 | for lang in en es de ar hi vi zh; do 51 | echo -n " $lang " 52 | TEST_FILE=${MLQA_DIR}/MLQA_V1/test/test-context-$lang-question-$lang.json 53 | PRED_FILE=${MLQA_PRED_DIR}/predictions_${lang}_.json 54 | python "${EVAL_MLQA}" "${TEST_FILE}" "${PRED_FILE}" ${lang} 55 | done 56 | 57 | echo "TyDi QA Gold Passage" 58 | for lang in en ar bn fi id ko ru sw te; do 59 | echo -n " $lang " 60 | TEST_FILE=${TYDIQA_DIR}/tydiqa-goldp-v1.1-dev/tydiqa.$lang.dev.json 61 | PRED_FILE=${TYDIQA_PRED_DIR}/predictions_${lang}_.json 62 | python "${EVAL_SQUAD}" "${TEST_FILE}" "${PRED_FILE}" 63 | done 64 | -------------------------------------------------------------------------------- /scripts/predict_qa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Script to obtain predictions using a trained model on XQuAD, TyDi QA, and MLQA. 17 | REPO=$PWD 18 | MODEL=${1:-bert-base-multilingual-cased} 19 | MODEL_PATH=${2} 20 | TGT=${3:-xquad} 21 | GPU=${4:-0} 22 | DATA_DIR=${5:-"$REPO/download/"} 23 | 24 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 25 | MODEL_TYPE="bert" 26 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 27 | MODEL_TYPE="xlm" 28 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 29 | MODEL_TYPE="xlm-roberta" 30 | fi 31 | 32 | if [ ! -d "${MODEL_PATH}" ]; then 33 | echo "Model path ${MODEL_PATH} does not exist." 34 | exit 35 | fi 36 | 37 | DIR=${DATA_DIR}/${TGT}/ 38 | PREDICTIONS_DIR=${MODEL_PATH}/predictions 39 | PRED_DIR=${PREDICTIONS_DIR}/$TGT/ 40 | mkdir -p "${PRED_DIR}" 41 | 42 | if [ $TGT == 'xquad' ]; then 43 | langs=( en es de el ru tr ar vi th zh hi ) 44 | elif [ $TGT == 'mlqa' ]; then 45 | langs=( en es de ar hi vi zh ) 46 | elif [ $TGT == 'tydiqa' ]; then 47 | langs=( en ar bn fi id ko ru sw te ) 48 | fi 49 | 50 | echo "************************" 51 | echo ${MODEL} 52 | echo "************************" 53 | 54 | echo 55 | echo "Predictions on $TGT" 56 | for lang in ${langs[@]}; do 57 | echo " $lang " 58 | if [ $TGT == 'xquad' ]; then 59 | TEST_FILE=${DIR}/xquad.$lang.json 60 | elif [ $TGT == 'mlqa' ]; then 61 | TEST_FILE=${DIR}/MLQA_V1/test/test-context-$lang-question-$lang.json 62 | elif [ $TGT == 'tydiqa' ]; then 63 | TEST_FILE=${DIR}/tydiqa-goldp-v1.1-dev/tydiqa.goldp.$lang.dev.json 64 | fi 65 | 66 | CUDA_VISIBLE_DEVICES=${GPU} python third_party/run_squad.py \ 67 | --model_type ${MODEL_TYPE} \ 68 | --model_name_or_path ${MODEL_PATH} \ 69 | --do_eval \ 70 | --eval_lang ${lang} \ 71 | --predict_file "${TEST_FILE}" \ 72 | --output_dir "${PRED_DIR}" &> /dev/null 73 | done 74 | 75 | # Rename files to test pattern 76 | for lang in ${langs[@]}; do 77 | mv $PRED_DIR/predictions_${lang}_.json $PRED_DIR/test-$lang.json 78 | done 79 | -------------------------------------------------------------------------------- /scripts/preprocess_panx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | DATA_DIR=${2:-"$REPO/download/"} 19 | 20 | TASK='panx' 21 | MAXL=128 22 | LANGS="ar,he,vi,id,jv,ms,tl,eu,ml,ta,te,af,nl,en,de,el,bn,hi,mr,ur,fa,fr,it,pt,es,bg,ru,ja,ka,ko,th,sw,yo,my,zh,kk,tr,et,fi,hu,qu,pl,uk,az,lt,pa,gu,ro" 23 | LC="" 24 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 25 | MODEL_TYPE="bert" 26 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 27 | MODEL_TYPE="xlm" 28 | LC=" --do_lower_case" 29 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 30 | MODEL_TYPE="xlmr" 31 | fi 32 | SAVE_DIR="$DATA_DIR/$TASK/${TASK}_processed_maxlen${MAXL}" 33 | mkdir -p $SAVE_DIR 34 | python3 $REPO/utils_preprocess.py \ 35 | --data_dir $DATA_DIR/$TASK/ \ 36 | --task panx_tokenize \ 37 | --model_name_or_path $MODEL \ 38 | --model_type $MODEL_TYPE \ 39 | --max_len $MAXL \ 40 | --output_dir $SAVE_DIR \ 41 | --languages $LANGS $LC >> $SAVE_DIR/preprocess.log 42 | if [ ! -f $SAVE_DIR/labels.txt ]; then 43 | cat $SAVE_DIR/*/*.${MODEL} | cut -f 2 | grep -v "^$" | sort | uniq > $SAVE_DIR/labels.txt 44 | fi 45 | -------------------------------------------------------------------------------- /scripts/preprocess_udpos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | DATA_DIR=${2:-"$REPO/download/"} 19 | 20 | TASK='udpos' 21 | MAXL=128 22 | LANGS='af,ar,bg,de,el,en,es,et,eu,fa,fi,fr,he,hi,hu,id,it,ja,kk,ko,mr,nl,pt,ru,ta,te,th,tl,tr,ur,vi,yo,zh,lt,pl,uk,wo,ro' 23 | LC="" 24 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 25 | MODEL_TYPE="bert" 26 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 27 | MODEL_TYPE="xlm" 28 | LC=" --do_lower_case" 29 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 30 | MODEL_TYPE="xlmr" 31 | fi 32 | 33 | SAVE_DIR="$DATA_DIR/${TASK}/udpos_processed_maxlen${MAXL}" 34 | mkdir -p $SAVE_DIR 35 | python3 $REPO/utils_preprocess.py \ 36 | --data_dir $DATA_DIR/${TASK}/ \ 37 | --task udpos_tokenize \ 38 | --model_name_or_path $MODEL \ 39 | --model_type $MODEL_TYPE \ 40 | --max_len $MAXL \ 41 | --output_dir $SAVE_DIR \ 42 | --languages $LANGS $LC >> $SAVE_DIR/process.log 43 | if [ ! -f $SAVE_DIR/labels.txt ]; then 44 | echo "create label" 45 | cat $SAVE_DIR/*/*.${MODEL} | cut -f 2 | grep -v "^$" | sort | uniq > $SAVE_DIR/labels.txt 46 | fi 47 | -------------------------------------------------------------------------------- /scripts/run_bucc2018.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | GPU=${2:-0} 19 | DATA_DIR=${3:-"$REPO/download/"} 20 | OUT_DIR=${4:-"$REPO/outputs/"} 21 | 22 | export CUDA_VISIBLE_DEVICES=$GPU 23 | 24 | TASK='bucc2018' 25 | DATA_DIR=$DATA_DIR/$TASK/ 26 | MAXL=512 27 | TL='en' 28 | 29 | NLAYER=12 30 | LC="" 31 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 32 | MODEL_TYPE="bert" 33 | DIM=768 34 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 35 | MODEL_TYPE="xlm" 36 | DIM=1280 37 | LC=" --do_lower_case" 38 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 39 | MODEL_TYPE="xlmr" 40 | DIM=1024 41 | NLAYER=24 42 | fi 43 | 44 | SP='test' 45 | for SL in fr ru zh de; do 46 | PRED_DIR=$REPO/predictions/ 47 | OUT=$OUT_DIR/$TASK/$MODEL-${SL} 48 | mkdir -p $OUT 49 | for sp in 'test' 'dev'; do 50 | for lg in "$SL" "$TL"; do 51 | FILE=$DATA_DIR/${SL}-${TL}.${sp}.${lg} 52 | cut -f2 $FILE > $OUT/${SL}-${TL}.${sp}.${lg}.txt 53 | cut -f1 $FILE > $OUT/${SL}-${TL}.${sp}.${lg}.id 54 | done 55 | done 56 | 57 | CP="candidates" 58 | python $REPO/third_party/evaluate_retrieval.py \ 59 | --model_type $MODEL_TYPE \ 60 | --model_name_or_path $MODEL \ 61 | --embed_size $DIM \ 62 | --batch_size 100 \ 63 | --task_name $TASK \ 64 | --src_language $SL \ 65 | --tgt_language $TL \ 66 | --pool_type cls \ 67 | --max_seq_length $MAXL \ 68 | --data_dir $DATA_DIR \ 69 | --output_dir $OUT \ 70 | --predict_dir $PRED_DIR \ 71 | --candidate_prefix $CP \ 72 | --log_file mine-bitext-${SL}.log \ 73 | --extract_embeds \ 74 | --mine_bitext \ 75 | --specific_layer 7 \ 76 | --dist cosine $LC 77 | 78 | done 79 | -------------------------------------------------------------------------------- /scripts/run_eval_lareqa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Evaluate a fine-tuned model (trained using scripts/train_lareqa.sh) on the 17 | # LAReQA retrieval task. 18 | 19 | REPO=$PWD 20 | MODEL=${1:-bert-base-multilingual-cased} 21 | GPU=${2:-0} 22 | DATA_DIR=${3:-"$REPO/download/"} 23 | OUT_DIR=${4:-"$REPO/outputs/"} 24 | # Select a checkpoint based on validation performance. Results in the XTREME-R 25 | # paper used mBERT checkpoint-9000 and XLM-R checkpoint-10000. 26 | CHECKPOINT=${5:-checkpoint-9000} 27 | 28 | TASK='lareqa' 29 | 30 | # These settings should match those used in scripts/train_lareqa.sh 31 | MAX_SEQ_LEN=352 # Total sequence length (query + answer) 32 | MAX_QUERY_LEN=96 33 | MAX_ANSWER_LEN=256 34 | NUM_EPOCHS=3.0 35 | LR=2e-5 36 | 37 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 38 | MODEL_TYPE="bert-retrieval" 39 | DIM=768 40 | DO_LOWER_CASE="" 41 | elif [ $MODEL == "xlm-roberta-large" ]; then 42 | MODEL_TYPE="xlmr-retrieval" 43 | DIM=1024 44 | DO_LOWER_CASE="--do_lower_case" 45 | fi 46 | 47 | MODEL_DIR=$OUT_DIR/$TASK/${MODEL}_LR${LR}_EPOCH${NUM_EPOCHS}_LEN${MAX_SEQ_LEN} 48 | MODEL_PATH=$MODEL_DIR/$CHECKPOINT 49 | OUTPUT_DIR=$MODEL_DIR/eval_$CHECKPOINT 50 | mkdir -p $OUTPUT_DIR 51 | 52 | export CUDA_VISIBLE_DEVICES=$GPU 53 | 54 | python $REPO/third_party/evaluate_retrieval.py \ 55 | --model_type $MODEL_TYPE \ 56 | --model_name_or_path $MODEL_PATH \ 57 | --embed_size $DIM \ 58 | --batch_size 100 \ 59 | --task_name $TASK \ 60 | --pool_type cls \ 61 | --max_seq_length $MAX_SEQ_LEN \ 62 | --max_query_length $MAX_QUERY_LEN \ 63 | --max_answer_length $MAX_ANSWER_LEN \ 64 | --data_dir $DATA_DIR \ 65 | --output_dir $OUTPUT_DIR \ 66 | --extract_embeds \ 67 | --dist cosine \ 68 | $DO_LOWER_CASE 69 | -------------------------------------------------------------------------------- /scripts/run_eval_mewslix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -eu 17 | REPO=$PWD 18 | # Note: The default evaluates the frozen model. To evaluate a model fine-tuned 19 | # with train_mewsli.x, pass its model directory. 20 | MODEL=${1:-bert-base-multilingual-cased} 21 | GPU=${2:-0} 22 | DATA_DIR=${3:-"$REPO/download"} 23 | OUT_DIR=${4:-"$REPO/outputs"} 24 | TASK='mewslix' 25 | 26 | # These settings should match those used in scripts/train_mewslix.sh 27 | MAX_SEQ_LEN=64 28 | # Infer model type from the model argument, which could be a path. 29 | MODEL_BASE="$(basename ${MODEL})" 30 | # Note explicit lack of quotes below to get wildcard matching. 31 | if [[ ${MODEL_BASE} == bert-base-multilingual-cased* ]]; then 32 | MODEL_TYPE="bert-retrieval" 33 | DIM=768 34 | DO_LOWER_CASE="" 35 | elif [[ ${MODEL_BASE} == xlm-roberta-large* ]]; then 36 | MODEL_TYPE="xlmr-retrieval" 37 | DIM=1024 38 | DO_LOWER_CASE="--do_lower_case" 39 | else 40 | echo "Failed to identify model type." 41 | exit 42 | fi 43 | if [[ -d "${MODEL}" ]]; then 44 | # When provided a directory, output the results to its subdirectory. 45 | OUTPUT_DIR="${MODEL}/run_eval" 46 | else 47 | # Otherwise assume it is a vanilla pretrained (so no model directory) and just 48 | # output to a reasonable location: 49 | OUTPUT_DIR="$OUT_DIR/$TASK/${MODEL}_LEN${MAX_SEQ_LEN}/run_eval" 50 | fi 51 | mkdir -p $OUTPUT_DIR 52 | export CUDA_VISIBLE_DEVICES=$GPU 53 | python $REPO/third_party/evaluate_retrieval.py \ 54 | --model_type $MODEL_TYPE \ 55 | --model_name_or_path $MODEL \ 56 | --embed_size $DIM \ 57 | --batch_size 100 \ 58 | --task_name $TASK \ 59 | --pool_type cls \ 60 | --max_seq_length $MAX_SEQ_LEN \ 61 | --data_dir $DATA_DIR/$TASK \ 62 | --output_dir $OUTPUT_DIR \ 63 | --dist cosine \ 64 | $DO_LOWER_CASE \ 65 | 2>&1 | tee $OUTPUT_DIR/eval.log -------------------------------------------------------------------------------- /scripts/run_tatoeba.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | GPU=${2:-0} 19 | DATA_DIR=${3:-"$REPO/download/"} 20 | OUT_DIR=${4:-"$REPO/outputs/"} 21 | 22 | export CUDA_VISIBLE_DEVICES=$GPU 23 | 24 | TASK='tatoeba' 25 | TL='en' 26 | MAXL=512 27 | LC="" 28 | LAYER=7 29 | NLAYER=12 30 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 31 | MODEL_TYPE="bert" 32 | DIM=768 33 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 34 | MODEL_TYPE="xlm" 35 | LC=" --do_lower_case" 36 | DIM=1280 37 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 38 | MODEL_TYPE="xlmr" 39 | DIM=1024 40 | NLAYER=24 41 | LAYER=13 42 | fi 43 | 44 | # Add fine-tuned model path here 45 | MODEL=/mnt/disk-1/models/squad/xlm-roberta-large_LR3e-5_EPOCH2.0_maxlen384_batchsize2_gradacc16 46 | 47 | OUT=$OUT_DIR/$TASK/${MODEL}_${MAXL}/ 48 | mkdir -p $OUT 49 | for SL in ar he vi id jv tl eu ml ta te af nl en de el bn hi mr ur fa fr it pt es bg ru ja ka ko th sw zh kk tr et fi hu az lt pl uk ro; do 50 | python $REPO/third_party/evaluate_retrieval.py \ 51 | --model_type $MODEL_TYPE \ 52 | --model_name_or_path $MODEL \ 53 | --embed_size $DIM \ 54 | --batch_size 100 \ 55 | --task_name $TASK \ 56 | --src_language $SL \ 57 | --tgt_language en \ 58 | --data_dir $DATA_DIR/$TASK/ \ 59 | --max_seq_length $MAXL \ 60 | --output_dir $OUT \ 61 | --log_file embed-cosine \ 62 | --num_layers $NLAYER \ 63 | --dist cosine $LC \ 64 | --specific_layer $LAYER 65 | done 66 | -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | TASK=${2:-pawsx} 19 | GPU=${3:-0} 20 | DATA_DIR=${4:-"$REPO/download/"} 21 | OUT_DIR=${5:-"$REPO/outputs-temp/"} 22 | echo "Fine-tuning $MODEL on $TASK using GPU $GPU" 23 | echo "Load data from $DATA_DIR, and save models to $OUT_DIR" 24 | 25 | if [ $TASK == 'pawsx' ]; then 26 | bash $REPO/scripts/train_pawsx.sh $MODEL $GPU $DATA_DIR $OUT_DIR 27 | elif [ $TASK == 'xnli' ]; then 28 | bash $REPO/scripts/train_xnli.sh $MODEL $GPU $DATA_DIR $OUT_DIR 29 | elif [ $TASK == 'udpos' ]; then 30 | bash $REPO/scripts/preprocess_udpos.sh $MODEL $DATA_DIR 31 | bash $REPO/scripts/train_udpos.sh $MODEL $GPU $DATA_DIR $OUT_DIR 32 | elif [ $TASK == 'panx' ]; then 33 | bash $REPO/scripts/preprocess_panx.sh $MODEL $DATA_DIR 34 | bash $REPO/scripts/train_panx.sh $MODEL $GPU $DATA_DIR $OUT_DIR 35 | elif [ $TASK == 'xquad' ]; then 36 | bash $REPO/scripts/train_qa.sh $MODEL squad $TASK $GPU $DATA_DIR $OUT_DIR 37 | elif [ $TASK == 'mlqa' ]; then 38 | bash $REPO/scripts/train_qa.sh $MODEL squad $TASK $GPU $DATA_DIR $OUT_DIR 39 | elif [ $TASK == 'tydiqa' ]; then 40 | bash $REPO/scripts/train_qa.sh $MODEL tydiqa $TASK $GPU $DATA_DIR $OUT_DIR 41 | elif [ $TASK == 'bucc2018' ]; then 42 | bash $REPO/scripts/run_bucc2018.sh $MODEL $GPU $DATA_DIR $OUT_DIR 43 | elif [ $TASK == 'tatoeba' ]; then 44 | bash $REPO/scripts/run_tatoeba.sh $MODEL $GPU $DATA_DIR $OUT_DIR 45 | elif [ $TASK == 'mewslix' ]; then 46 | bash $REPO/scripts/train_mewslix.sh $MODEL $GPU $DATA_DIR $OUT_DIR 47 | elif [ $TASK == 'lareqa' ]; then 48 | bash $REPO/scripts/train_lareqa.sh $MODEL $GPU $DATA_DIR $OUT_DIR 49 | bash $REPO/scripts/run_eval_lareqa.sh $MODEL $GPU $DATA_DIR $OUT_DIR 50 | fi 51 | 52 | -------------------------------------------------------------------------------- /scripts/train_lareqa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Fine-tune a pretrained multilingual encoder on the LAReQA retrieval task. 17 | 18 | REPO=$PWD 19 | MODEL=${1:-bert-base-multilingual-cased} 20 | GPU=${2:-0} 21 | DATA_DIR=${3:-"$REPO/download/"} 22 | OUT_DIR=${4:-"$REPO/outputs/"} 23 | 24 | TASK='lareqa' 25 | 26 | # These settings should match those used in scripts/run_eval_lareqa.sh 27 | MAX_SEQ_LEN=352 # Total sequence length (query + answer) 28 | MAX_QUERY_LEN=96 29 | MAX_ANSWER_LEN=256 30 | NUM_EPOCHS=3.0 31 | LR=2e-5 32 | 33 | PER_GPU_BATCH_SIZE=4 34 | GRAD_ACC_STEPS=4 35 | 36 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 37 | MODEL_TYPE="bert-retrieval" 38 | DO_LOWER_CASE="" 39 | elif [ $MODEL == "xlm-roberta-large" ]; then 40 | MODEL_TYPE="xlmr-retrieval" 41 | DO_LOWER_CASE="--do_lower_case" 42 | fi 43 | 44 | MODEL_DIR=$OUT_DIR/$TASK/${MODEL}_LR${LR}_EPOCH${NUM_EPOCHS}_LEN${MAX_SEQ_LEN} 45 | mkdir -p $MODEL_DIR 46 | 47 | export CUDA_VISIBLE_DEVICES=$GPU 48 | 49 | python third_party/run_retrieval_qa.py \ 50 | --model_type $MODEL_TYPE \ 51 | --model_name_or_path $MODEL \ 52 | --do_train \ 53 | --do_eval \ 54 | --evaluate_during_training \ 55 | --train_file $DATA_DIR/squad/train-v1.1.json \ 56 | --predict_file $DATA_DIR/squad/dev-v1.1.json \ 57 | --per_gpu_train_batch_size $PER_GPU_BATCH_SIZE \ 58 | --learning_rate $LR \ 59 | --num_train_epochs $NUM_EPOCHS \ 60 | --max_seq_length $MAX_SEQ_LEN \ 61 | --max_query_length $MAX_QUERY_LEN \ 62 | --max_answer_length $MAX_ANSWER_LEN \ 63 | --logging_steps 1000 \ 64 | --save_steps 1000 \ 65 | --overwrite_output_dir \ 66 | --gradient_accumulation_steps $GRAD_ACC_STEPS \ 67 | --warmup_steps 0 \ 68 | --output_dir $MODEL_DIR \ 69 | --weight_decay 0.0 \ 70 | --threads 8 \ 71 | --train_lang en \ 72 | --eval_lang en \ 73 | $DO_LOWER_CASE 74 | -------------------------------------------------------------------------------- /scripts/train_mewslix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Fine-tune a pretrained multilingual encoder on the Mewsli-X retrieval task. 17 | set -eu 18 | REPO=$PWD 19 | MODEL=${1:-bert-base-multilingual-cased} 20 | GPU=${2:-0} 21 | DATA_DIR=${3:-"$REPO/download"} 22 | OUT_DIR=${4:-"$REPO/outputs"} 23 | TASK='mewslix' 24 | 25 | # These settings should match those used in scripts/run_eval_mewslix.sh 26 | # They are primarily aimed at a quick training time (~1h using 1 GPU for mBERT). 27 | MAX_SEQ_LEN=64 28 | NUM_EPOCHS=2 29 | GRAD_ACC_STEPS=4 30 | 31 | # Learning rates were set based on best dev-set loss on the English 32 | # 'wikipedia_pairs-dev' after 2 epochs, searching over {1e-5, 2e-5, 5e-5, 1e-4}. 33 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 34 | MODEL_TYPE="bert-retrieval" 35 | LR=2e-5 36 | DO_LOWER_CASE="" 37 | PER_GPU_BATCH_SIZE=64 # largest power of two that fit 16GB GPU RAM 38 | LOGGING_STEPS=50 39 | SAVE_STEPS=100 40 | elif [ $MODEL == "xlm-roberta-large" ]; then 41 | MODEL_TYPE="xlmr-retrieval" 42 | LR=1e-5 43 | DO_LOWER_CASE="--do_lower_case" 44 | PER_GPU_BATCH_SIZE=8 # largest power of two that fit 16GB GPU RAM 45 | LOGGING_STEPS=500 46 | SAVE_STEPS=2000 47 | else 48 | echo "$MODEL not configured." 49 | fi 50 | 51 | HYPER_INFO="LR${LR}_EPOCH${NUM_EPOCHS}_LEN${MAX_SEQ_LEN}_BS${PER_GPU_BATCH_SIZE}_ACC${GRAD_ACC_STEPS}" 52 | MODEL_DIR="${OUT_DIR}/${TASK}/${MODEL}_${HYPER_INFO}" 53 | mkdir -p $MODEL_DIR 54 | 55 | export CUDA_VISIBLE_DEVICES=$GPU 56 | 57 | echo $MODEL_DIR/train.log 58 | python third_party/run_retrieval_el.py \ 59 | --model_type $MODEL_TYPE \ 60 | --model_name_or_path $MODEL \ 61 | --do_train \ 62 | --do_eval \ 63 | --evaluate_during_training \ 64 | --data_dir $DATA_DIR/${TASK} \ 65 | --train_file wikipedia_pairs-train.jsonl \ 66 | --predict_file wikipedia_pairs-dev.jsonl \ 67 | --per_gpu_train_batch_size $PER_GPU_BATCH_SIZE \ 68 | --learning_rate $LR \ 69 | --num_train_epochs $NUM_EPOCHS \ 70 | --max_seq_length $MAX_SEQ_LEN \ 71 | --logging_steps $LOGGING_STEPS \ 72 | --save_steps $SAVE_STEPS \ 73 | --overwrite_output_dir \ 74 | --gradient_accumulation_steps $GRAD_ACC_STEPS \ 75 | --warmup_steps 0 \ 76 | --output_dir $MODEL_DIR \ 77 | --weight_decay 0.0 \ 78 | --threads 8 \ 79 | --train_lang en \ 80 | --eval_lang en \ 81 | $DO_LOWER_CASE \ 82 | 2>&1 | tee $MODEL_DIR/train.log 83 | 84 | set +x 85 | bash $REPO/scripts/run_eval_mewslix.sh $MODEL_DIR $GPU $DATA_DIR $OUT_DIR 86 | -------------------------------------------------------------------------------- /scripts/train_panx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | GPU=${2:-0} 19 | DATA_DIR=${3:-"$REPO/download/"} 20 | OUT_DIR=${4:-"$REPO/outputs/"} 21 | 22 | export CUDA_VISIBLE_DEVICES=$GPU 23 | TASK='panx' 24 | LANGS="ar,he,vi,id,jv,ms,tl,eu,ml,ta,te,af,nl,en,de,el,bn,hi,mr,ur,fa,fr,it,pt,es,bg,ru,ja,ka,ko,th,sw,yo,my,zh,kk,tr,et,fi,hu,qu,pl,uk,az,lt,pa,gu,ro" 25 | NUM_EPOCHS=10 26 | MAX_LENGTH=128 27 | LR=2e-5 28 | 29 | LC="" 30 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 31 | MODEL_TYPE="bert" 32 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 33 | MODEL_TYPE="xlm" 34 | LC=" --do_lower_case" 35 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 36 | MODEL_TYPE="xlmr" 37 | fi 38 | 39 | if [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-roberta-large" ]; then 40 | BATCH_SIZE=2 41 | GRAD_ACC=16 42 | else 43 | BATCH_SIZE=8 44 | GRAD_ACC=4 45 | fi 46 | 47 | DATA_DIR=$DATA_DIR/${TASK}/${TASK}_processed_maxlen${MAX_LENGTH}/ 48 | OUTPUT_DIR="$OUT_DIR/$TASK/${MODEL}-LR${LR}-epoch${NUM_EPOCHS}-MaxLen${MAX_LENGTH}/" 49 | mkdir -p $OUTPUT_DIR 50 | python $REPO/third_party/run_tag.py \ 51 | --data_dir $DATA_DIR \ 52 | --model_type $MODEL_TYPE \ 53 | --labels $DATA_DIR/labels.txt \ 54 | --model_name_or_path $MODEL \ 55 | --output_dir $OUTPUT_DIR \ 56 | --max_seq_length $MAX_LENGTH \ 57 | --num_train_epochs $NUM_EPOCHS \ 58 | --gradient_accumulation_steps $GRAD_ACC \ 59 | --per_gpu_train_batch_size $BATCH_SIZE \ 60 | --per_gpu_eval_batch_size 32 \ 61 | --save_steps 1000 \ 62 | --seed 1 \ 63 | --learning_rate $LR \ 64 | --do_train \ 65 | --do_eval \ 66 | --do_predict \ 67 | --predict_langs $LANGS \ 68 | --train_langs en \ 69 | --log_file $OUTPUT_DIR/train.log \ 70 | --eval_all_checkpoints \ 71 | --eval_patience -1 \ 72 | --overwrite_output_dir \ 73 | --save_only_best_checkpoint $LC 74 | 75 | -------------------------------------------------------------------------------- /scripts/train_pawsx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | GPU=${2:-0} 19 | DATA_DIR=${3:-"$REPO/download/"} 20 | OUT_DIR=${4:-"$REPO/outputs/"} 21 | 22 | export CUDA_VISIBLE_DEVICES=$GPU 23 | 24 | TASK='pawsx' 25 | LR=2e-5 26 | EPOCH=5 27 | MAXL=128 28 | LANGS="de,en,es,fr,ja,ko,zh" 29 | LC="" 30 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 31 | MODEL_TYPE="bert" 32 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 33 | MODEL_TYPE="xlm" 34 | LC=" --do_lower_case" 35 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 36 | MODEL_TYPE="xlmr" 37 | fi 38 | 39 | if [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-roberta-large" ]; then 40 | BATCH_SIZE=2 41 | GRAD_ACC=16 42 | else 43 | BATCH_SIZE=8 44 | GRAD_ACC=4 45 | fi 46 | 47 | SAVE_DIR="${OUT_DIR}/${TASK}/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}/" 48 | mkdir -p $SAVE_DIR 49 | 50 | python $PWD/third_party/run_classify.py \ 51 | --model_type $MODEL_TYPE \ 52 | --model_name_or_path $MODEL \ 53 | --train_language en \ 54 | --task_name $TASK \ 55 | --do_train \ 56 | --do_eval \ 57 | --do_predict \ 58 | --train_split train \ 59 | --test_split test \ 60 | --data_dir $DATA_DIR/$TASK/ \ 61 | --gradient_accumulation_steps $GRAD_ACC \ 62 | --save_steps 200 \ 63 | --per_gpu_train_batch_size $BATCH_SIZE \ 64 | --learning_rate $LR \ 65 | --num_train_epochs $EPOCH \ 66 | --max_seq_length $MAXL \ 67 | --output_dir $SAVE_DIR \ 68 | --eval_all_checkpoints \ 69 | --overwrite_output_dir \ 70 | --overwrite_cache \ 71 | --log_file 'train.log' \ 72 | --predict_languages $LANGS \ 73 | --save_only_best_checkpoint $LC \ 74 | --eval_test_set 75 | 76 | -------------------------------------------------------------------------------- /scripts/train_qa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Script to train a model on SQuAD v1.1 or the English TyDiQA-GoldP train data. 17 | 18 | REPO=$PWD 19 | MODEL=${1:-bert-base-multilingual-cased} 20 | SRC=${2:-squad} 21 | TGT=${3:-xquad} 22 | GPU=${4:-0} 23 | DATA_DIR=${5:-"$REPO/download/"} 24 | OUT_DIR=${6:-"$REPO/outputs/"} 25 | 26 | BATCH_SIZE=4 27 | GRAD_ACC=8 28 | 29 | MAXL=384 30 | LR=3e-5 31 | NUM_EPOCHS=3.0 32 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 33 | MODEL_TYPE="bert" 34 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 35 | MODEL_TYPE="xlm" 36 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 37 | MODEL_TYPE="xlm-roberta" 38 | fi 39 | 40 | # Model path where trained model should be stored 41 | MODEL_PATH=$OUT_DIR/$SRC/${MODEL}_LR${LR}_EPOCH${NUM_EPOCHS}_maxlen${MAXL}_batchsize${BATCH_SIZE}_gradacc${GRAD_ACC} 42 | mkdir -p $MODEL_PATH 43 | # Train either on the SQuAD or TyDiQa-GoldP English train file 44 | if [ $SRC == 'squad' ]; then 45 | TASK_DATA_DIR=${DATA_DIR}/squad 46 | TRAIN_FILE=${TASK_DATA_DIR}/train-v1.1.json 47 | PREDICT_FILE=${TASK_DATA_DIR}/dev-v1.1.json 48 | else 49 | TASK_DATA_DIR=${DATA_DIR}/tydiqa 50 | TRAIN_FILE=${TASK_DATA_DIR}/tydiqa-goldp-v1.1-train/tydiqa.en.train.json 51 | PREDICT_FILE=${TASK_DATA_DIR}/tydiqa-goldp-v1.1-dev/tydiqa.goldp.en.dev.json 52 | fi 53 | 54 | # train 55 | CUDA_VISIBLE_DEVICES=$GPU python third_party/run_squad.py \ 56 | --model_type ${MODEL_TYPE} \ 57 | --model_name_or_path ${MODEL} \ 58 | --do_train \ 59 | --do_eval \ 60 | --data_dir ${TASK_DATA_DIR} \ 61 | --train_file ${TRAIN_FILE} \ 62 | --predict_file ${PREDICT_FILE} \ 63 | --per_gpu_train_batch_size ${BATCH_SIZE} \ 64 | --learning_rate ${LR} \ 65 | --num_train_epochs ${NUM_EPOCHS} \ 66 | --max_seq_length $MAXL \ 67 | --doc_stride 128 \ 68 | --save_steps -1 \ 69 | --overwrite_output_dir \ 70 | --gradient_accumulation_steps ${GRAD_ACC} \ 71 | --warmup_steps 500 \ 72 | --output_dir ${MODEL_PATH} \ 73 | --weight_decay 0.0001 \ 74 | --threads 8 \ 75 | --train_lang en \ 76 | --eval_lang en 77 | 78 | # predict 79 | bash scripts/predict_qa.sh $MODEL $MODEL_PATH $TGT $GPU $DATA_DIR 80 | -------------------------------------------------------------------------------- /scripts/train_udpos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | GPU=${2:-0} 19 | DATA_DIR=${3:-"$REPO/download/"} 20 | OUT_DIR=${4:-"$REPO/outputs/"} 21 | 22 | TASK='udpos' 23 | export CUDA_VISIBLE_DEVICES=$GPU 24 | LANGS='af,ar,bg,de,el,en,es,et,eu,fa,fi,fr,he,hi,hu,id,it,ja,kk,ko,mr,nl,pt,ru,ta,te,th,tl,tr,ur,vi,yo,zh,lt,pl,uk,ro' 25 | NUM_EPOCHS=10 26 | MAX_LENGTH=128 27 | LR=2e-5 28 | 29 | LC="" 30 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 31 | MODEL_TYPE="bert" 32 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 33 | MODEL_TYPE="xlm" 34 | LC=" --do_lower_case" 35 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 36 | MODEL_TYPE="xlmr" 37 | fi 38 | 39 | if [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-roberta-large" ]; then 40 | BATCH_SIZE=2 41 | GRAD_ACC=16 42 | else 43 | BATCH_SIZE=8 44 | GRAD_ACC=4 45 | fi 46 | 47 | DATA_DIR=$DATA_DIR/$TASK/${TASK}_processed_maxlen${MAX_LENGTH}/ 48 | OUTPUT_DIR="$OUT_DIR/$TASK/${MODEL}-LR${LR}-epoch${NUM_EPOCHS}-MaxLen${MAX_LENGTH}" 49 | mkdir -p $OUTPUT_DIR 50 | python3 $REPO/third_party/run_tag.py \ 51 | --data_dir $DATA_DIR \ 52 | --model_type $MODEL_TYPE \ 53 | --labels $DATA_DIR/labels.txt \ 54 | --model_name_or_path $MODEL \ 55 | --output_dir $OUTPUT_DIR \ 56 | --max_seq_length $MAX_LENGTH \ 57 | --num_train_epochs $NUM_EPOCHS \ 58 | --gradient_accumulation_steps $GRAD_ACC \ 59 | --per_gpu_train_batch_size $BATCH_SIZE \ 60 | --save_steps 500 \ 61 | --seed 1 \ 62 | --learning_rate $LR \ 63 | --do_train \ 64 | --do_eval \ 65 | --do_predict \ 66 | --do_predict_dev \ 67 | --evaluate_during_training \ 68 | --predict_langs $LANGS \ 69 | --log_file $OUTPUT_DIR/train.log \ 70 | --eval_all_checkpoints \ 71 | --overwrite_output_dir \ 72 | --save_only_best_checkpoint $LC 73 | -------------------------------------------------------------------------------- /scripts/train_xcopa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Script to train a model on the English SIQa and the English COPA data and to 17 | # produce predictions on the multilingual XCOPA test data. 18 | # Note that running this script requires a newer version of Transformers 19 | # (we used 4.9.2) as well as HuggingFace datasets (pip install datasets). 20 | 21 | REPO=$PWD 22 | MODEL=${1:-bert-base-multilingual-cased} 23 | GPU=${2:-1} 24 | DATA_DIR=${3:-"$REPO/download/"} 25 | OUT_DIR=${4:-"$REPO/outputs/"} 26 | 27 | export CUDA_VISIBLE_DEVICES=$GPU 28 | 29 | LR=2e-5 30 | EPOCH=5 31 | MAXL=128 32 | langs="et,ht,id,it,qu,sw,ta,th,tr,vi,zh" 33 | 34 | LC="" 35 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 36 | MODEL_TYPE="bert" 37 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 38 | MODEL_TYPE="xlm" 39 | LC=" --do_lower_case" 40 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 41 | MODEL_TYPE="xlmr" 42 | fi 43 | 44 | if [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-roberta-large" ]; then 45 | MAXL=128 46 | LR=3e-5 47 | BATCH_SIZE=2 48 | GRAD_ACC=16 49 | else 50 | MAXL=128 51 | LR=2e-5 52 | BATCH_SIZE=8 53 | GRAD_ACC=4 54 | fi 55 | 56 | TASK=siqa 57 | 58 | SIQA_SAVE_DIR="$OUT_DIR/$TASK/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}/" 59 | mkdir -p $SIQA_SAVE_DIR 60 | 61 | echo "Training on ${TASK}" 62 | 63 | CUDA_VISIBLE_DEVICES=$GPU python third_party/run_xcopa.py \ 64 | --task ${TASK} \ 65 | --model_name_or_path $MODEL \ 66 | --output_dir $SIQA_SAVE_DIR/ \ 67 | --do_train \ 68 | --do_eval \ 69 | --do_predict \ 70 | --overwrite_output_dir \ 71 | --gradient_accumulation_steps $GRAD_ACC \ 72 | --per_gpu_train_batch_size $BATCH_SIZE \ 73 | --learning_rate $LR \ 74 | --max_seq_length $MAXL \ 75 | --num_train_epochs $EPOCH 76 | 77 | # Continue training on English COPA training set 78 | TASK=xcopa 79 | 80 | XCOPA_SAVE_DIR="$OUT_DIR/$TASK/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}/" 81 | mkdir -p $XCOPA_SAVE_DIR 82 | 83 | echo "Fine-tuning on ${TASK}" 84 | 85 | # Note: XLM-R has trouble loading the tokenizer of an existing model 86 | CUDA_VISIBLE_DEVICES=$GPU python third_party/run_xcopa.py \ 87 | --task ${TASK} \ 88 | --train_lang en \ 89 | --tokenizer_name ${MODEL} \ 90 | --predict_langs ${langs} \ 91 | --model_name_or_path ${SIQA_SAVE_DIR} \ 92 | --output_dir $XCOPA_SAVE_DIR/ \ 93 | --do_train \ 94 | --do_eval \ 95 | --overwrite_output_dir \ 96 | --gradient_accumulation_steps $GRAD_ACC \ 97 | --per_gpu_train_batch_size $BATCH_SIZE \ 98 | --learning_rate $LR \ 99 | --max_seq_length $MAXL \ 100 | --num_train_epochs $EPOCH 101 | 102 | # Note: We do this in a separate step so that if we just predict with the model, 103 | # we still load the model trained on COPA. 104 | 105 | echo "Predicting on ${TASK}" 106 | 107 | CUDA_VISIBLE_DEVICES=$GPU python third_party/run_xcopa.py \ 108 | --task ${TASK} \ 109 | --train_lang en \ 110 | --predict_langs ${langs} \ 111 | --tokenizer_name ${MODEL} \ 112 | --model_name_or_path ${XCOPA_SAVE_DIR} \ 113 | --output_dir $XCOPA_SAVE_DIR/xcopa/ \ 114 | --do_predict \ 115 | --overwrite_output_dir 116 | -------------------------------------------------------------------------------- /scripts/train_xnli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2020 Google and DeepMind. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | REPO=$PWD 17 | MODEL=${1:-bert-base-multilingual-cased} 18 | GPU=${2:-0} 19 | DATA_DIR=${3:-"$REPO/download/"} 20 | OUT_DIR=${4:-"$REPO/outputs/"} 21 | 22 | export CUDA_VISIBLE_DEVICES=$GPU 23 | 24 | TASK='xnli' 25 | LR=2e-5 26 | EPOCH=5 27 | MAXL=128 28 | LANGS="ar,bg,de,el,en,es,fr,hi,ru,sw,th,tr,ur,vi,zh" 29 | LC="" 30 | if [ $MODEL == "bert-base-multilingual-cased" ]; then 31 | MODEL_TYPE="bert" 32 | elif [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-mlm-tlm-xnli15-1024" ]; then 33 | MODEL_TYPE="xlm" 34 | LC=" --do_lower_case" 35 | elif [ $MODEL == "xlm-roberta-large" ] || [ $MODEL == "xlm-roberta-base" ]; then 36 | MODEL_TYPE="xlmr" 37 | fi 38 | 39 | if [ $MODEL == "xlm-mlm-100-1280" ] || [ $MODEL == "xlm-roberta-large" ]; then 40 | BATCH_SIZE=2 41 | GRAD_ACC=16 42 | LR=3e-5 43 | else 44 | BATCH_SIZE=8 45 | GRAD_ACC=4 46 | LR=2e-5 47 | fi 48 | 49 | SAVE_DIR="$OUT_DIR/$TASK/${MODEL}-LR${LR}-epoch${EPOCH}-MaxLen${MAXL}/" 50 | mkdir -p $SAVE_DIR 51 | 52 | python $PWD/third_party/run_classify.py \ 53 | --model_type $MODEL_TYPE \ 54 | --model_name_or_path $MODEL \ 55 | --train_language en \ 56 | --task_name $TASK \ 57 | --do_train \ 58 | --do_eval \ 59 | --do_predict \ 60 | --data_dir $DATA_DIR/${TASK} \ 61 | --gradient_accumulation_steps $GRAD_ACC \ 62 | --per_gpu_train_batch_size $BATCH_SIZE \ 63 | --learning_rate $LR \ 64 | --num_train_epochs $EPOCH \ 65 | --max_seq_length $MAXL \ 66 | --output_dir $SAVE_DIR/ \ 67 | --save_steps 100 \ 68 | --eval_all_checkpoints \ 69 | --log_file 'train' \ 70 | --predict_languages $LANGS \ 71 | --save_only_best_checkpoint \ 72 | --overwrite_output_dir \ 73 | --eval_test_set $LC 74 | -------------------------------------------------------------------------------- /third_party/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /third_party/bert.py: -------------------------------------------------------------------------------- 1 | """BERT dual encoder model for retrieval.""" 2 | 3 | import torch 4 | from transformers.modeling_bert import BertModel, BertPreTrainedModel 5 | 6 | 7 | class BertForRetrieval(BertPreTrainedModel): 8 | """BERT dual encoder model for retrieval.""" 9 | 10 | def __init__(self, config, model_attr_name='bert', model_cls=BertModel): 11 | super().__init__(config) 12 | 13 | self.model_attr_name = model_attr_name 14 | self.model_cls = model_cls 15 | 16 | # Set model attribute, e.g. self.bert = BertModel(config) 17 | setattr(self, model_attr_name, model_cls(config)) 18 | 19 | def normalized_cls_token(cls_token): 20 | return torch.nn.functional.normalize(cls_token, p=2, dim=1) 21 | self.normalized_cls_token = normalized_cls_token 22 | self.logit_scale = torch.nn.Parameter(torch.empty(1)) 23 | torch.nn.init.constant_(self.logit_scale, 100.0) 24 | self.init_weights() 25 | 26 | def model(self): 27 | return getattr(self, self.model_attr_name) 28 | 29 | def forward( 30 | self, 31 | q_input_ids=None, 32 | q_attention_mask=None, 33 | q_token_type_ids=None, 34 | a_input_ids=None, 35 | a_attention_mask=None, 36 | a_token_type_ids=None, 37 | position_ids=None, 38 | head_mask=None, 39 | inputs_embeds=None, 40 | inference=False): 41 | outputs_a = self.model()( 42 | q_input_ids, 43 | attention_mask=q_attention_mask, 44 | token_type_ids=q_token_type_ids, 45 | position_ids=position_ids, 46 | head_mask=head_mask, 47 | inputs_embeds=inputs_embeds) 48 | if inference: 49 | # In inference mode, only use the first tower to get the encodings. 50 | return self.normalized_cls_token(outputs_a[1]) 51 | 52 | outputs_b = self.model()( 53 | a_input_ids, 54 | attention_mask=a_attention_mask, 55 | token_type_ids=a_token_type_ids, 56 | position_ids=position_ids, 57 | head_mask=head_mask, 58 | inputs_embeds=inputs_embeds) 59 | 60 | a_encodings = self.normalized_cls_token(outputs_a[1]) 61 | b_encodings = self.normalized_cls_token(outputs_b[1]) 62 | similarity = torch.matmul(a_encodings, torch.transpose(b_encodings, 0, 1)) 63 | logits = similarity * self.logit_scale 64 | batch_size = list(a_encodings.size())[0] 65 | labels = torch.arange(0, batch_size, device=logits.device) 66 | loss = torch.nn.CrossEntropyLoss()(logits, labels) 67 | return loss, a_encodings, b_encodings 68 | -------------------------------------------------------------------------------- /third_party/processors/pawsx.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. 3 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ PAWS-X utils (dataset loading and evaluation) """ 17 | 18 | 19 | import logging 20 | import os 21 | 22 | from transformers import DataProcessor 23 | from .utils import InputExample 24 | 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class PawsxProcessor(DataProcessor): 30 | """Processor for the PAWS-X dataset.""" 31 | 32 | def __init__(self): 33 | pass 34 | 35 | def get_examples(self, data_dir, language='en', split='train'): 36 | """See base class.""" 37 | examples = [] 38 | for lg in language.split(','): 39 | lines = self._read_tsv(os.path.join(data_dir, "{}-{}.tsv".format(split, lg))) 40 | 41 | for (i, line) in enumerate(lines): 42 | guid = "%s-%s-%s" % (split, lg, i) 43 | text_a = line[0] 44 | text_b = line[1] 45 | if split == 'test' and len(line) != 3: 46 | label = "0" 47 | else: 48 | label = str(line[2].strip()) 49 | assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str) 50 | examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, language=lg)) 51 | return examples 52 | 53 | def get_translate_examples(self, data_dir, language='en', split='train'): 54 | """See base class.""" 55 | languages = language.split(',') 56 | examples = [] 57 | for language in languages: 58 | if split == 'train': 59 | file_path = os.path.join(data_dir, "translated/en-{}-translated.tsv".format(language)) 60 | else: 61 | file_path = os.path.join(data_dir, "translated/test-{}-en-translated.tsv".format(language)) 62 | logger.info("reading from " + file_path) 63 | lines = self._read_tsv(file_path) 64 | for (i, line) in enumerate(lines): 65 | guid = "%s-%s-%s" % (split, language, i) 66 | text_a = line[0] 67 | text_b = line[1] 68 | label = str(line[2].strip()) 69 | assert isinstance(text_a, str) and isinstance(text_b, str) and isinstance(label, str) 70 | examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, language=language)) 71 | return examples 72 | 73 | def get_train_examples(self, data_dir, language='en'): 74 | """See base class.""" 75 | return self.get_examples(data_dir, language, split='train') 76 | 77 | def get_translate_train_examples(self, data_dir, language='en'): 78 | """See base class.""" 79 | return self.get_translate_examples(data_dir, language, split='train') 80 | 81 | def get_translate_test_examples(self, data_dir, language='en'): 82 | """See base class.""" 83 | return self.get_translate_examples(data_dir, language, split='test') 84 | 85 | def get_test_examples(self, data_dir, language='en'): 86 | """See base class.""" 87 | return self.get_examples(data_dir, language, split='test') 88 | 89 | def get_dev_examples(self, data_dir, language='en'): 90 | """See base class.""" 91 | return self.get_examples(data_dir, language, split='dev') 92 | 93 | def get_labels(self): 94 | """See base class.""" 95 | return ["0", "1"] 96 | 97 | 98 | pawsx_processors = { 99 | "pawsx": PawsxProcessor, 100 | } 101 | 102 | pawsx_output_modes = { 103 | "pawsx": "classification", 104 | } 105 | 106 | pawsx_tasks_num_labels = { 107 | "pawsx": 2, 108 | } 109 | -------------------------------------------------------------------------------- /third_party/ud-conversion-tools/conllu_to_conll.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import islice 3 | from pathlib import Path 4 | import argparse 5 | import sys, copy 6 | 7 | from lib.conll import CoNLLReader 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser(description="""Convert conllu to conll format""") 11 | parser.add_argument('input', help="conllu file") 12 | parser.add_argument('output', help="target file", type=Path) 13 | parser.add_argument('--replace_subtokens_with_fused_forms', help="By default removes fused tokens", default=False, action="store_true") 14 | parser.add_argument('--remove_deprel_suffixes', help="Restrict deprels to the common universal subset, e.g. nmod:tmod becomes nmod", default=False, action="store_true") 15 | parser.add_argument('--remove_node_properties', help="space-separated list of node properties to remove: form, lemma, cpostag, postag, feats", choices=['form', 'lemma', 'cpostag','postag','feats'], metavar='prop', type=str, nargs='+') 16 | parser.add_argument('--lang', help="specify a language 2-letter code", default="default") 17 | parser.add_argument('--output_format', choices=['conll2006', 'conll2009', 'conllu'], default="conll2006") 18 | parser.add_argument('--remove_arabic_diacritics', help="remove Arabic short vowels", default=False, action="store_true") 19 | parser.add_argument('--print_comments',default=False,action="store_true") 20 | parser.add_argument('--print_fused_forms',default=False,action="store_true") 21 | 22 | args = parser.parse_args() 23 | 24 | if sys.version_info < (3,0): 25 | print("Sorry, requires Python 3.x.") #suggestion: install anaconda python 26 | sys.exit(1) 27 | 28 | POSRANKPRECEDENCEDICT = defaultdict(list) 29 | POSRANKPRECEDENCEDICT["default"] = "VERB NOUN PROPN PRON ADJ NUM ADV INTJ AUX ADP DET PART CCONJ SCONJ X PUNCT ".split(" ") 30 | # POSRANKPRECEDENCEDICT["de"] = "PROPN ADP DET ".split(" ") 31 | POSRANKPRECEDENCEDICT["es"] = "VERB AUX PRON ADP DET".split(" ") 32 | POSRANKPRECEDENCEDICT["fr"] = "VERB AUX PRON NOUN ADJ ADV ADP DET PART SCONJ CONJ".split(" ") 33 | POSRANKPRECEDENCEDICT["it"] = "VERB AUX ADV PRON ADP DET INTJ".split(" ") 34 | 35 | if args.lang in POSRANKPRECEDENCEDICT: 36 | current_pos_precedence_list = POSRANKPRECEDENCEDICT[args.lang] 37 | else: 38 | current_pos_precedence_list = POSRANKPRECEDENCEDICT["default"] 39 | 40 | cio = CoNLLReader() 41 | orig_treebank = cio.read_conll_u(args.input)#, args.keep_fused_forms, args.lang, POSRANKPRECEDENCEDICT) 42 | modif_treebank = copy.copy(orig_treebank) 43 | 44 | # As per Dec 2015 the args.lang variable is redundant once you have current_pos_precedence_list 45 | # We keep it for future modifications, i.e. any language-specific modules 46 | for s in modif_treebank: 47 | # print('sentence', s.get_sentence_as_string(printid=True)) 48 | s.filter_sentence_content(args.replace_subtokens_with_fused_forms, args.lang, current_pos_precedence_list,args.remove_node_properties,args.remove_deprel_suffixes,args.remove_arabic_diacritics) 49 | 50 | cio.write_conll(modif_treebank,args.output, args.output_format,print_fused_forms=args.print_fused_forms, print_comments=args.print_comments) 51 | 52 | if __name__ == "__main__": 53 | main() -------------------------------------------------------------------------------- /third_party/ud-conversion-tools/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/xtreme/838c13b69daafb9328785d16caae2711e4012123/third_party/ud-conversion-tools/lib/__init__.py -------------------------------------------------------------------------------- /xtreme_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/xtreme/838c13b69daafb9328785d16caae2711e4012123/xtreme_score.png --------------------------------------------------------------------------------