├── .github └── workflows │ ├── code_quality.yml │ └── test.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── docs └── datasets.md ├── evaluation ├── __init__.py ├── eval.py ├── models │ ├── __init__.py │ └── loader.py ├── tasks │ ├── __init__.py │ ├── auto_task.py │ ├── jigsaw_toxicity_pred │ │ ├── __init__.py │ │ ├── english.json │ │ └── jigsaw_toxicity_pred.py │ ├── lama_trex │ │ ├── __init__.py │ │ ├── english.json │ │ └── lama_trex.py │ ├── lambada │ │ ├── __init__.py │ │ ├── english.json │ │ └── lambada.py │ ├── piaf │ │ ├── __init__.py │ │ ├── english.json │ │ ├── multilingual.json │ │ └── piaf.py │ ├── piqa │ │ ├── __init__.py │ │ ├── english.json │ │ └── piqa.py │ ├── template │ │ ├── __init__.py │ │ ├── english.json │ │ ├── multilingual.json │ │ └── template.py │ ├── tydiqa_primary │ │ ├── __init__.py │ │ └── tydiqa_primary.py │ ├── tydiqa_secondary │ │ ├── __init__.py │ │ ├── english.json │ │ └── tydiqa_secondary.py │ ├── webnlg │ │ ├── __init__.py │ │ ├── english.json │ │ └── webnlg.py │ ├── wmt │ │ ├── __init__.py │ │ ├── english.json │ │ └── wmt.py │ └── xquad │ │ ├── __init__.py │ │ ├── english.json │ │ ├── multilingual.json │ │ └── xquad.py ├── train.py └── utils │ ├── __init__.py │ ├── io.py │ └── log.py ├── poetry.lock ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── social-impact-group ├── README.md ├── create_bias_eval.ipynb ├── french_sentences.csv ├── french_vocabulary.csv └── resources │ ├── MADAMIRA-release-20190603-2.1.zip │ └── dela-fr-public.dic └── tests └── test_tydiqa_secondary.py /.github/workflows/code_quality.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/.github/workflows/code_quality.yml -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/.github/workflows/test.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/.gitignore -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/README.md -------------------------------------------------------------------------------- /docs/datasets.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/docs/datasets.md -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/eval.py -------------------------------------------------------------------------------- /evaluation/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/models/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/models/loader.py -------------------------------------------------------------------------------- /evaluation/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/__init__.py -------------------------------------------------------------------------------- /evaluation/tasks/auto_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/auto_task.py -------------------------------------------------------------------------------- /evaluation/tasks/jigsaw_toxicity_pred/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/jigsaw_toxicity_pred/english.json: -------------------------------------------------------------------------------- 1 | { 2 | "target_langs": ["english"] 3 | } -------------------------------------------------------------------------------- /evaluation/tasks/jigsaw_toxicity_pred/jigsaw_toxicity_pred.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/jigsaw_toxicity_pred/jigsaw_toxicity_pred.py -------------------------------------------------------------------------------- /evaluation/tasks/lama_trex/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/lama_trex/english.json: -------------------------------------------------------------------------------- 1 | { 2 | "target_langs": ["english"] 3 | } -------------------------------------------------------------------------------- /evaluation/tasks/lama_trex/lama_trex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/lama_trex/lama_trex.py -------------------------------------------------------------------------------- /evaluation/tasks/lambada/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/lambada/english.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /evaluation/tasks/lambada/lambada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/lambada/lambada.py -------------------------------------------------------------------------------- /evaluation/tasks/piaf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/piaf/english.json: -------------------------------------------------------------------------------- 1 | { 2 | "target_langs": [] 3 | } -------------------------------------------------------------------------------- /evaluation/tasks/piaf/multilingual.json: -------------------------------------------------------------------------------- 1 | { 2 | "target_langs": ["french"] 3 | } -------------------------------------------------------------------------------- /evaluation/tasks/piaf/piaf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/piaf/piaf.py -------------------------------------------------------------------------------- /evaluation/tasks/piqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/piqa/english.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /evaluation/tasks/piqa/piqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/piqa/piqa.py -------------------------------------------------------------------------------- /evaluation/tasks/template/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/template/english.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /evaluation/tasks/template/multilingual.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /evaluation/tasks/template/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/template/template.py -------------------------------------------------------------------------------- /evaluation/tasks/tydiqa_primary/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/tydiqa_primary/tydiqa_primary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/tydiqa_primary/tydiqa_primary.py -------------------------------------------------------------------------------- /evaluation/tasks/tydiqa_secondary/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/tydiqa_secondary/english.json: -------------------------------------------------------------------------------- 1 | { 2 | "target_langs": ["english"] 3 | } -------------------------------------------------------------------------------- /evaluation/tasks/tydiqa_secondary/tydiqa_secondary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/tydiqa_secondary/tydiqa_secondary.py -------------------------------------------------------------------------------- /evaluation/tasks/webnlg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/webnlg/english.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/webnlg/english.json -------------------------------------------------------------------------------- /evaluation/tasks/webnlg/webnlg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/webnlg/webnlg.py -------------------------------------------------------------------------------- /evaluation/tasks/wmt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/wmt/english.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/wmt/english.json -------------------------------------------------------------------------------- /evaluation/tasks/wmt/wmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/wmt/wmt.py -------------------------------------------------------------------------------- /evaluation/tasks/xquad/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/tasks/xquad/english.json: -------------------------------------------------------------------------------- 1 | { 2 | "target_langs": ["english"] 3 | } -------------------------------------------------------------------------------- /evaluation/tasks/xquad/multilingual.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/xquad/multilingual.json -------------------------------------------------------------------------------- /evaluation/tasks/xquad/xquad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/tasks/xquad/xquad.py -------------------------------------------------------------------------------- /evaluation/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/train.py -------------------------------------------------------------------------------- /evaluation/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/utils/io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/utils/io.py -------------------------------------------------------------------------------- /evaluation/utils/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/evaluation/utils/log.py -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | isort>=5.9.3 2 | black>=21.7b0 3 | flake8>=3.9.2 4 | pytest>=6.2.4 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/setup.py -------------------------------------------------------------------------------- /social-impact-group/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/social-impact-group/README.md -------------------------------------------------------------------------------- /social-impact-group/create_bias_eval.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/social-impact-group/create_bias_eval.ipynb -------------------------------------------------------------------------------- /social-impact-group/french_sentences.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/social-impact-group/french_sentences.csv -------------------------------------------------------------------------------- /social-impact-group/french_vocabulary.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/social-impact-group/french_vocabulary.csv -------------------------------------------------------------------------------- /social-impact-group/resources/MADAMIRA-release-20190603-2.1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/social-impact-group/resources/MADAMIRA-release-20190603-2.1.zip -------------------------------------------------------------------------------- /social-impact-group/resources/dela-fr-public.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/social-impact-group/resources/dela-fr-public.dic -------------------------------------------------------------------------------- /tests/test_tydiqa_secondary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/evaluation/HEAD/tests/test_tydiqa_secondary.py --------------------------------------------------------------------------------