├── .github ├── scripts │ └── release.py └── workflows │ ├── python-publish.yml │ └── release.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── mtasks.md ├── pyproject.toml ├── setup.cfg ├── src └── tasksource │ ├── .ipynb_checkpoints │ ├── access-checkpoint.py │ ├── preprocess-checkpoint.py │ ├── recast-checkpoint.py │ └── tasks-checkpoint.py │ ├── __init__.py │ ├── access.py │ ├── metadata │ ├── __init__.py │ ├── bigbench_groups.py │ ├── blimp_groups.py │ ├── original.txt │ └── popularity.py │ ├── mtasks.py │ ├── preprocess.py │ ├── recast.py │ └── tasks.py └── tasks.md /.github/scripts/release.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import json 3 | import subprocess 4 | 5 | 6 | def get_last_version() -> str: 7 | """Return the version number of the last release.""" 8 | json_string = ( 9 | subprocess.run( 10 | ["gh", "release", "view", "--json", "tagName"], 11 | check=True, 12 | stdout=subprocess.PIPE, 13 | stderr=subprocess.PIPE, 14 | ) 15 | .stdout.decode("utf8") 16 | .strip() 17 | ) 18 | 19 | return json.loads(json_string)["tagName"] 20 | 21 | 22 | def bump_patch_number(version_number: str) -> str: 23 | """Return a copy of `version_number` with the patch number incremented.""" 24 | major, minor, patch = version_number.split(".") 25 | return f"{major}.{minor}.{int(patch) + 1}" 26 | 27 | 28 | def create_new_patch_release(): 29 | """Create a new patch release on GitHub.""" 30 | try: 31 | last_version_number = get_last_version() 32 | except subprocess.CalledProcessError as err: 33 | if err.stderr.decode("utf8").startswith("HTTP 404:"): 34 | # The project doesn't have any releases yet. 35 | new_version_number = "0.0.1" 36 | else: 37 | raise 38 | else: 39 | new_version_number = bump_patch_number(last_version_number) 40 | 41 | subprocess.run( 42 | ["gh", "release", "create", "--generate-notes", new_version_number], 43 | check=True, 44 | ) 45 | 46 | 47 | if __name__ == "__main__": 48 | create_new_patch_release() 49 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI.org 2 | on: 3 | release: 4 | types: [published] 5 | jobs: 6 | pypi: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v3 11 | with: 12 | fetch-depth: 0 13 | - run: python3 -m pip install --upgrade build && python3 -m build 14 | - name: Publish package 15 | uses: pypa/gh-action-pypi-publish@release/v1 16 | with: 17 | password: ${{ secrets.PYPI_API_TOKEN }} 18 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Create a new patch release 2 | on: workflow_dispatch 3 | jobs: 4 | github: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout 8 | uses: actions/checkout@v3 9 | - name: Create new patch release 10 | run: .github/scripts/release.py 11 | env: 12 | GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.1.0 2 | message: "If you use this work, please cite it as below." 3 | authors: 4 | - family-names: "Sileo" 5 | given-names: "Damien" 6 | title: "tasksource: A Dataset Harmonization Framework for Streamlined NLP Multi-Task Learning and Evaluation" 7 | version: "1.0.0" 8 | date-released: 2023-01-01 9 | url: "https://arxiv.org/abs/2301.05948" 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## tasksource ![](https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/5fc0bcb41160c47d1d43856b/j06-U5e2Tifi2xOnTudqS.jpeg?w=20&h=20&f=face) 600+ curated datasets and preprocessings for instant and interchangeable use 2 | 3 | Huggingface Datasets is an excellent library, but it lacks standardization, and datasets often require preprocessing work to be used interchangeably. 4 | `tasksource` streamlines interchangeable datasets usage to scale evaluation or multi-task learning. 5 | 6 | Each dataset is standardized to a `MultipleChoice`, `Classification`, or `TokenClassification` template with canonical fields. We focus on discriminative tasks (= with negative examples or classes) for our annotations but also provide a `SequenceToSequence` template. All implemented preprocessings are in [tasks.py](https://github.com/sileod/tasksource/blob/main/src/tasksource/tasks.py) or [tasks.md](https://github.com/sileod/tasksource/blob/main/tasks.md). A preprocessing is a function that accepts a dataset and returns the standardized dataset. Preprocessing code is concise and human-readable. 7 | 8 | ### Installation and usage: 9 | `pip install tasksource` 10 | ```python 11 | from tasksource import list_tasks, load_task 12 | df = list_tasks(multilingual=False) # takes some time 13 | 14 | for id in df[df.task_type=="MultipleChoice"].id: 15 | dataset = load_task(id) # all yielded datasets can be used interchangeably 16 | ``` 17 | 18 | Browse the 500+ curated tasks in tasks.md (200+ MultipleChoice tasks, 200+ Classification tasks), and feel free to request a new task. Datasets are downloaded to `$HF_DATASETS_CACHE` (like any Hugging Face dataset), so ensure you have more than 100GB of space available. 19 | 20 | You can now also use: 21 | ```python 22 | load_dataset("tasksource/data", "glue/rte",max_rows=30_000) 23 | ``` 24 | 25 | ### Pretrained models: 26 | 27 | Text encoder pretrained on tasksource reached state-of-the-art results: [🤗/deberta-v3-base-tasksource-nli](https://hf.co/sileod/deberta-v3-base-tasksource-nli) 28 | 29 | Tasksource pretraining is notably helpful for RLHF reward modeling or any kind of classification, including zero-shot. You can also find a large and a multilingual version. 30 | 31 | ### tasksource-instruct 32 | 33 | The repo also contains some recasting code to convert tasksource datasets to instructions, providing one of the richest instruction-tuning datasets: 34 | [🤗/tasksource-instruct-v0](https://hf.co/datasets/tasksource/tasksource-instruct-v0) 35 | 36 | 37 | ### tasksource-label-nli 38 | 39 | We also recast all classification tasks as natural language inference, to improve entailment-based zero-shot classification detection: 40 | [🤗/zero-shot-label-nli](https://huggingface.co/datasets/tasksource/zero-shot-label-nli) 41 | 42 | ### Write and use custom preprocessings 43 | 44 | ```python 45 | from tasksource import MultipleChoice 46 | 47 | codah = MultipleChoice('question_propmt',choices_list='candidate_answers', 48 | labels='correct_answer_idx', 49 | dataset_name='codah', config_name='codah') 50 | 51 | winogrande = MultipleChoice('sentence',['option1','option2'],'answer', 52 | dataset_name='winogrande',config_name='winogrande_xl', 53 | splits=['train','validation',None]) # test labels are not usable 54 | 55 | tasks = [winogrande.load(), codah.load()]) # Aligned datasets (same columns) can be used interchangably 56 | ``` 57 | 58 | ### Citation and contact 59 | 60 | For more details, refer to this [article:](https://arxiv.org/abs/2301.05948) 61 | ```bib 62 | @inproceedings{sileo-2024-tasksource, 63 | title = "tasksource: A Large Collection of {NLP} tasks with a Structured Dataset Preprocessing Framework", 64 | author = "Sileo, Damien", 65 | booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)", 66 | month = may, 67 | year = "2024", 68 | address = "Torino, Italia", 69 | publisher = "ELRA and ICCL", 70 | url = "https://aclanthology.org/2024.lrec-main.1361", 71 | pages = "15655--15684", 72 | } 73 | ``` 74 | For help integrating tasksource into your experiments, please contact [damien.sileo@inria.fr](mailto:damien.sileo@inria.fr). 75 | 76 | 77 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = tasksource 3 | description = Preprocessings to prepare datasets for a task 4 | long_description = file: README.md 5 | long_description_content_type = text/markdown 6 | url = https://github.com/sileod/tasksource/ 7 | classifiers = 8 | Programming Language :: Python :: 3 9 | License :: OSI Approved :: BSD License 10 | Intended Audience :: Developers 11 | 12 | [options] 13 | package_dir = 14 | = src 15 | packages = find: 16 | python_requires = >=3.6 17 | install_requires = 18 | dotwiz 19 | funcy 20 | datasets 21 | exrex 22 | magicattr 23 | pandas 24 | numpy 25 | scipy 26 | sorcery 27 | 28 | [options.packages.find] 29 | where = src 30 | -------------------------------------------------------------------------------- /src/tasksource/.ipynb_checkpoints/access-checkpoint.py: -------------------------------------------------------------------------------- 1 | from .preprocess import Preprocessing 2 | import re 3 | import pandas as pd 4 | from . import tasks, recast 5 | from .metadata import dataset_rank 6 | from datasets import load_dataset 7 | import funcy as fc 8 | import os 9 | import copy 10 | from sorcery import dict_of 11 | from functools import cache 12 | import random 13 | 14 | 15 | class lazy_mtasks: 16 | def __getattr__(self, name): 17 | from . import mtasks 18 | return getattr(mtasks, name) 19 | 20 | def __dir__(self): 21 | from . import mtasks 22 | return dir(mtasks) 23 | lmtasks=lazy_mtasks() 24 | 25 | def parse_var_name(s): 26 | config_name,task_name = None,None 27 | if '__' in s and '___' not in s: # dataset__task 28 | dataset_name, task_name = s.split('__') 29 | elif '__' not in s.replace('___','') and '___' in s: #dataset___config 30 | dataset_name, config_name = s.split('___') 31 | elif '___' in s and '__' in s.split('___')[1]: #dataset___config__task 32 | dataset_name, config_task=s.split('___') 33 | config_name,task_name = config_task.split('__') 34 | else: # dataset 35 | dataset_name = s 36 | return dataset_name,config_name,task_name 37 | 38 | def pretty_name(x): 39 | dn = x.dataset_name.split("/")[-1] 40 | cn = x.config_name if x.config_name else "" 41 | tn = x.task_name if x.task_name else "" 42 | return f"{dn}/{cn}/{tn}".replace('//','/').rstrip('/') 43 | 44 | @cache 45 | def list_tasks(tasks_path=f'{os.path.dirname(__file__)}/tasks.py',multilingual=False,instruct=False, excluded=[]): 46 | if multilingual: 47 | tasks_path=tasks_path.replace('/tasks.py','/mtasks.py') 48 | task_order = open(tasks_path).readlines() 49 | task_order = [x.split('=')[0].rstrip() for x in task_order if '=' in x] 50 | task_order = [x for x in task_order if x.isidentifier()] 51 | task_order = fc.flip(dict(enumerate(task_order))) 52 | 53 | l = [] 54 | _tasks = (lmtasks if multilingual else tasks) 55 | 56 | for key in dir(_tasks): 57 | if key not in task_order: 58 | continue 59 | value=getattr(_tasks, key) 60 | if isinstance(value,Preprocessing): 61 | dataset_name, config_name, task_name = parse_var_name(key) 62 | dataset_name = (value.dataset_name if value.dataset_name else dataset_name) 63 | config_name = (value.config_name if value.config_name else config_name) 64 | hasattr(value,key) 65 | l+=[{'dataset_name': dataset_name, 66 | 'config_name' : config_name, 67 | 'task_name': task_name, 68 | 'preprocessing_name': key, 69 | 'task_type': value.__class__.__name__,'mapping': value, 70 | 'rank':task_order.get(key,None)}] 71 | df=pd.DataFrame(l).explode('config_name') 72 | df = df.sort_values('rank').reset_index(drop=True) 73 | df['id'] = df.apply(lambda x: pretty_name(x), axis=1) 74 | df.insert(0, 'id', df.pop('id')) 75 | del df['rank'] 76 | if instruct: 77 | df=df[df.id.map(lambda x: not any(a in x for a in recast.improper_labels))] 78 | df=df[df.id.map(lambda x: not any(x in a for a in excluded))] 79 | return df 80 | 81 | #task_df =list_tasks() 82 | #mtask_df =list_tasks(multilingual=True) 83 | 84 | def dict_to_query(d=dict(), **kwargs): 85 | d={**d,**kwargs} 86 | return '&'.join([f'`{k}`=="{v}"' for k,v in d.items()]) 87 | 88 | def load_preprocessing(tasks=tasks, **kwargs): 89 | _tasks_df = list_tasks(multilingual=tasks==lmtasks) 90 | y = _tasks_df.copy().query(dict_to_query(**kwargs)).iloc[0] 91 | preprocessing= copy.copy(getattr(tasks, y.preprocessing_name)) 92 | for c in 'dataset_name','config_name': 93 | if not isinstance(getattr(preprocessing,c), str): 94 | setattr(preprocessing,c,getattr(y,c)) 95 | return preprocessing 96 | 97 | def load_task(id=None, dataset_name=None,config_name=None,task_name=None,preprocessing_name=None, 98 | max_rows=None, max_rows_eval=None, multilingual=False, instruct=False, seed=0, **load_dataset_kwargs): 99 | query = dict_of(id, dataset_name, config_name, task_name,preprocessing_name) 100 | query = {k:v for k,v in query.items() if v} 101 | _tasks = (lmtasks if multilingual else tasks) 102 | preprocessing = load_preprocessing(_tasks, **query) 103 | 104 | if "trust_remote_code" not in load_dataset_kwargs: 105 | load_dataset_kwargs["trust_remote_code"] = True 106 | 107 | dataset = load_dataset(preprocessing.dataset_name, preprocessing.config_name, **load_dataset_kwargs) 108 | dataset= preprocessing(dataset,max_rows, max_rows_eval) 109 | dataset.task_type = preprocessing.__class__.__name__ 110 | if instruct: 111 | dataset=recast.recast_instruct(dataset) 112 | return dataset -------------------------------------------------------------------------------- /src/tasksource/.ipynb_checkpoints/preprocess-checkpoint.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from dotwiz import DotWiz 3 | from dataclasses import dataclass 4 | from typing import Union 5 | import itertools 6 | import funcy as fc 7 | import exrex 8 | import magicattr 9 | import numpy as np 10 | import copy 11 | import datasets 12 | import time 13 | 14 | MAX_MC_OPTIONS = 4 15 | 16 | def get_column_names(dataset): 17 | cn = dataset.column_names 18 | if type(cn)==dict: 19 | return set(fc.flatten(cn.values())) 20 | else: 21 | return set(cn) 22 | 23 | 24 | def sample_dataset(dataset,n=10000, n_eval=1000,seed=0): 25 | for k in dataset: 26 | n_k=(n if k=='train' else n_eval) 27 | if n_k and len(dataset[k])>n_k: 28 | dataset[k]=dataset[k].train_test_split(train_size=n_k,seed=seed)['train'] 29 | return dataset 30 | 31 | class Preprocessing(DotWiz): 32 | default_splits = ('train','validation','test') 33 | _instances = [] 34 | 35 | def __post_init__(self): 36 | Preprocessing._instances+=[self] 37 | 38 | @staticmethod 39 | def __map_to_target(x,fn=lambda x:None, target=None): 40 | x[target]=fn(x) 41 | return x 42 | 43 | def load(self): 44 | return self(datasets.load_dataset(self.dataset_name,self.config_name)) 45 | 46 | def __call__(self,dataset, max_rows=None, max_rows_eval=None,seed=0): 47 | dataset = self.pre_process(dataset) 48 | 49 | # manage splits 50 | for k,v in zip(self.default_splits, self.splits): 51 | if v and k!=v: 52 | dataset[k]=dataset[v] 53 | del dataset[v] 54 | if k in dataset and not v: # obfuscated label 55 | del dataset[k] 56 | dataset = fix_splits(dataset) 57 | 58 | for k in list(dataset.keys()): 59 | if k not in self.default_splits: 60 | del dataset[k] 61 | dataset = sample_dataset(dataset, max_rows, max_rows_eval,seed=seed) 62 | 63 | # field annotated with a string 64 | substitutions = {v:k for k,v in self.to_dict().items() 65 | if (k and k not in {'splits','dataset_name','config_name'} 66 | and type(v)==str and k!=v)} 67 | 68 | dataset=dataset.remove_columns([c for c in substitutions.values() if c in dataset['train'].features and c not in substitutions]) 69 | dataset=dataset.rename_columns(substitutions) 70 | 71 | # field annotated with a function 72 | for k in self.to_dict().keys(): 73 | v=getattr(self, k) 74 | if callable(v) and k not in {"post_process","pre_process","load"}: 75 | dataset=dataset.map(self.__map_to_target, 76 | fn_kwargs={'fn':v,'target':k}) 77 | 78 | dataset=dataset.remove_columns( 79 | get_column_names(dataset)-set(self.to_dict().keys())) 80 | dataset = fix_labels(dataset) 81 | dataset = fix_splits(dataset) # again: label mapping changed 82 | dataset = self.post_process(dataset) 83 | return dataset 84 | 85 | 86 | @dataclass 87 | class cat(Preprocessing): 88 | fields:Union[str,list]=None 89 | separator:str=' ' 90 | 91 | def __call__(self, example=None): 92 | y=[np.char.array(example[f]) + sep 93 | for f,sep in zip(self.fields[::-1],itertools.repeat(self.separator))] 94 | y=list(sum(*y)) 95 | if len(y)==1: 96 | y=y[0] 97 | return y 98 | 99 | 100 | def pretty(f): 101 | class pretty_f(DotWiz): 102 | def __init__(self,*args): 103 | self.__f_arg = f(*args) 104 | for a in args: 105 | setattr(self,'value',a) 106 | 107 | def __call__(self, *args,**kwargs): 108 | return self.__f_arg(*args,**kwargs) 109 | 110 | def __repr__(self): 111 | return f"{self.__f_arg.__qualname__ .split('.')[0]}({self.value})" 112 | return pretty_f 113 | 114 | class dotgetter: 115 | def __init__(self, path=''): 116 | self.path=path 117 | 118 | def __bool__(self): 119 | return bool(self.path) 120 | 121 | def __getattr__(self, k): 122 | return self.__class__(f'{self.path}.{k}'.lstrip('.')) 123 | 124 | def __getitem__(self, i): 125 | return self.__class__(f'{self.path}[{i}]') 126 | 127 | def __call__(self, example=None): 128 | return magicattr.get(DotWiz(example), self.path) 129 | 130 | def __hash__(self): 131 | return hash(self.path) 132 | 133 | 134 | @dataclass 135 | class ClassificationFields(Preprocessing): 136 | sentence1:str='sentence1' 137 | sentence2:str='sentence2' 138 | labels:str='labels' 139 | 140 | @dataclass 141 | class Seq2SeqLMFields(Preprocessing): 142 | prompt:str='prompt' 143 | output:str='output' 144 | 145 | @dataclass 146 | class TokenClassificationFields(Preprocessing): 147 | tokens:str='tokens' 148 | labels:str='labels' 149 | 150 | @dataclass 151 | class MultipleChoiceFields(Preprocessing): 152 | inputs:str='input' 153 | choices:Iterable=tuple() 154 | labels:str='labels' 155 | choices_list:str=None 156 | def __post_init__(self): 157 | for i, c in enumerate(self.choices): 158 | setattr(self,f'choice{i}',c) 159 | delattr(self,'choices') 160 | if not self.choices_list: 161 | delattr(self,'choices_list') 162 | 163 | def __call__(self,dataset, *args, **kwargs): 164 | dataset = super().__call__(dataset, *args, **kwargs) 165 | if self.choices_list: 166 | dataset = dataset.filter(lambda x: 10.1].UID) 9 | del dfh, dfm, df 10 | 11 | blimp_groups = { 12 | "syntax": [ 13 | "adjunct_island", 14 | "animate_subject_passive", 15 | "animate_subject_trans", 16 | "causative", 17 | "complex_NP_island", 18 | "coordinate_structure_constraint_complex_left_branch", 19 | "coordinate_structure_constraint_object_extraction", 20 | "drop_argument", 21 | "ellipsis_n_bar_1", 22 | "ellipsis_n_bar_2", 23 | "inchoative", 24 | "intransitive", 25 | "left_branch_island_echo_question", 26 | "left_branch_island_simple_question", 27 | "passive_1", 28 | "passive_2", 29 | "sentential_subject_island", 30 | "transitive", 31 | "wh_island", 32 | "wh_questions_object_gap", 33 | "wh_questions_subject_gap", 34 | "wh_questions_subject_gap_long_distance", 35 | "wh_vs_that_no_gap", 36 | "wh_vs_that_no_gap_long_distance", 37 | "wh_vs_that_with_gap", 38 | "wh_vs_that_with_gap_long_distance" 39 | ], 40 | "morphology": [ 41 | "anaphor_gender_agreement", 42 | "anaphor_number_agreement", 43 | "determiner_noun_agreement_1", 44 | "determiner_noun_agreement_2", 45 | "determiner_noun_agreement_irregular_1", 46 | "determiner_noun_agreement_irregular_2", 47 | "determiner_noun_agreement_with_adj_2", 48 | "determiner_noun_agreement_with_adj_irregular_1", 49 | "determiner_noun_agreement_with_adj_irregular_2", 50 | "determiner_noun_agreement_with_adjective_1", 51 | "distractor_agreement_relational_noun", 52 | "distractor_agreement_relative_clause", 53 | "irregular_past_participle_adjectives", 54 | "irregular_past_participle_verbs", 55 | "irregular_plural_subject_verb_agreement_1", 56 | "irregular_plural_subject_verb_agreement_2", 57 | "regular_plural_subject_verb_agreement_1", 58 | "regular_plural_subject_verb_agreement_2" 59 | ], 60 | "syntax_semantics": [ 61 | "existential_there_object_raising", 62 | "existential_there_subject_raising", 63 | "expletive_it_object_raising", 64 | "only_npi_scope", 65 | "principle_A_c_command", 66 | "principle_A_case_1", 67 | "principle_A_domain_1", 68 | "principle_A_domain_2", 69 | "principle_A_domain_3", 70 | "principle_A_reconstruction", 71 | "sentential_negation_npi_scope", 72 | "tough_vs_raising_1", 73 | "tough_vs_raising_2" 74 | ], 75 | "semantics": [ 76 | "existential_there_quantifiers_1", 77 | "existential_there_quantifiers_2", 78 | "matrix_question_npi_licensor_present", 79 | "npi_present_1", 80 | "npi_present_2", 81 | "only_npi_licensor_present", 82 | "sentential_negation_npi_licensor_present", 83 | "superlative_quantifiers_1", 84 | "superlative_quantifiers_2" 85 | ], 86 | "syntax/semantics": [ 87 | "principle_A_case_2" 88 | ] 89 | } 90 | -------------------------------------------------------------------------------- /src/tasksource/metadata/original.txt: -------------------------------------------------------------------------------- 1 | WANLI 2 | recast/recast_verbnet 3 | recast/recast_verbcorner 4 | recast/recast_ner 5 | recast/recast_sentiment 6 | recast/recast_puns 7 | recast/recast_factuality 8 | recast/recast_megaveridicality 9 | probability_words_nli/reasoning_1hop 10 | probability_words_nli/usnli 11 | probability_words_nli/reasoning_2hop 12 | nan-nli/joey234--nan-nli 13 | nli_fever 14 | breaking_nli 15 | conj_nli 16 | fracas 17 | dialogue_nli 18 | mpe 19 | dnc 20 | recast_white/fnplus 21 | recast_white/sprl 22 | recast_white/dpr 23 | robust_nli/IS_CS 24 | robust_nli/LI_LI 25 | robust_nli/ST_WO 26 | robust_nli/PI_SP 27 | robust_nli/PI_CD 28 | robust_nli/ST_SE 29 | robust_nli/ST_NE 30 | robust_nli/ST_LM 31 | robust_nli_is_sd 32 | robust_nli_li_ts 33 | gen_debiased_nli/snli_seq_z 34 | gen_debiased_nli/snli_z_aug 35 | gen_debiased_nli/snli_par_z 36 | gen_debiased_nli/mnli_par_z 37 | gen_debiased_nli/mnli_z_aug 38 | gen_debiased_nli/mnli_seq_z 39 | add_one_rte 40 | hlgd 41 | conll2003/pos_tags 42 | conll2003/chunk_tags 43 | conll2003/ner_tags 44 | hh-rlhf 45 | model-written-evals 46 | fig-qa 47 | social_i_qa 48 | balanced-copa 49 | e-CARE 50 | insincere-questions 51 | TuringBench 52 | vitaminc/tals--vitaminc 53 | rumoureval_2019/RumourEval2019 54 | tweet_eval/irony 55 | tweet_eval/stance_abortion 56 | tweet_eval/hate 57 | tweet_eval/stance_atheism 58 | tweet_eval/stance_climate 59 | tweet_eval/emoji 60 | tweet_eval/offensive 61 | tweet_eval/sentiment 62 | tweet_eval/emotion 63 | tweet_eval/stance_feminist 64 | tweet_eval/stance_hillary 65 | discovery/discovery 66 | pragmeval/verifiability 67 | pragmeval/mrda 68 | pragmeval/switchboard 69 | pragmeval/emergent 70 | pragmeval/gum 71 | pragmeval/sarcasm 72 | pragmeval/stac 73 | pragmeval/pdtb 74 | silicone/dyda_e 75 | silicone/oasis 76 | silicone/meld_s 77 | silicone/meld_e 78 | silicone/maptask 79 | silicone/dyda_da 80 | silicone/sem 81 | silicone/iemocap 82 | lex_glue/scotus 83 | lex_glue/ledgar 84 | language-identification 85 | rotten_tomatoes 86 | hate_speech18 87 | sms_spam 88 | snips_built_in_intents 89 | hate_speech_offensive 90 | hyperpartisan_news 91 | sciie 92 | citation_intent 93 | scicite 94 | lexical_relation_classification/ROOT09 95 | lexical_relation_classification/CogALexV 96 | lexical_relation_classification/K&H+N 97 | lexical_relation_classification/BLESS 98 | lexical_relation_classification/EVALution 99 | crowdflower/political-media-bias 100 | crowdflower/tweet_global_warming 101 | crowdflower/text_emotion 102 | crowdflower/political-media-message 103 | crowdflower/political-media-audience 104 | crowdflower/economic-news 105 | crowdflower/corporate-messaging 106 | crowdflower/airline-sentiment 107 | crowdflower/sentiment_nuclear_power 108 | ethics/commonsense 109 | ethics/deontology 110 | ethics/justice 111 | ethics/virtue 112 | tweets_hate_speech_detection 113 | wnut_17/wnut_17 114 | ncbi_disease/ncbi_disease 115 | acronym_identification 116 | jnlpba/jnlpba 117 | ontonotes_english/SpeedOfMagic--ontonotes_english 118 | blog_authorship_corpus/gender 119 | blog_authorship_corpus/horoscope 120 | blog_authorship_corpus/job 121 | open_question_type 122 | mc_taco 123 | discosense 124 | EffectiveFeedbackStudentWriting 125 | phrase_similarity 126 | scientific-exaggeration-detection 127 | fever-evidence-related/mwong--fever-related 128 | dynasent/dynabench.dynasent.r1.all/r1 129 | dynasent/dynabench.dynasent.r2.all/r2 130 | sem_eval_2010_task_8 131 | medmcqa 132 | logiqa 133 | cycic_classification 134 | cycic_multiplechoice 135 | commonsense_qa_2.0 136 | lingnli 137 | monotonicity-entailment 138 | arct 139 | scinli 140 | naturallogic 141 | onestop_qa 142 | moral_stories/full 143 | prost 144 | dynahate 145 | syntactic-augmentation-nli 146 | autotnli 147 | CONDAQA 148 | webgpt_comparisons 149 | synthetic-instruct-gptj-pairwise 150 | scruples 151 | wouldyourather 152 | attempto-nli 153 | defeasible-nli/snli 154 | defeasible-nli/atomic 155 | help-nli 156 | nli-veridicality-transitivity 157 | natural-language-satisfiability 158 | lonli 159 | dadc-limit-nli 160 | FLUTE 161 | summarize_from_feedback/comparisons 162 | folio 163 | tomi-nli 164 | avicenna 165 | SHP 166 | MedQA-USMLE-4-options-hf 167 | wikimedqa/medwiki 168 | cicero 169 | mutual 170 | NeQA 171 | quote-repetition 172 | redefine-math 173 | puzzte 174 | implicatures 175 | race-c 176 | spartqa-yn 177 | spartqa-mchoice 178 | temporal-nli 179 | riddle_sense 180 | clcd-english 181 | twentyquestions 182 | reclor 183 | counterfactually-augmented-imdb 184 | counterfactually-augmented-snli 185 | cnli 186 | boolq-natural-perturbations 187 | equate 188 | ScienceQA_text_only 189 | ekar_english 190 | implicit-hate-stg1 191 | logiqa-2.0-nli 192 | PARARULE-Plus 193 | mindgames 194 | universal_dependencies/en_partut/deprel 195 | universal_dependencies/en_lines/deprel 196 | universal_dependencies/en_gum/deprel 197 | universal_dependencies/en_ewt/deprel 198 | ambient 199 | path-naturalness-prediction 200 | cloth 201 | dgen 202 | oasst1_pairwise_rlhf_reward 203 | I2D2 204 | args_me 205 | Touche23-ValueEval 206 | starcon 207 | banking77 208 | ruletaker 209 | lsat_qa/all 210 | ConTRoL-nli 211 | tracie 212 | sherliic 213 | sen-making/1 214 | sen-making/2 215 | mbib-base/cognitive-bias 216 | mbib-base/fake-news 217 | mbib-base/gender-bias 218 | mbib-base/hate-speech 219 | mbib-base/linguistic-bias 220 | mbib-base/political-bias 221 | mbib-base/racial-bias 222 | mbib-base/text-level-bias 223 | robustLR 224 | v1/gen_train234_test2to10 225 | logical-fallacy 226 | parade 227 | cladder 228 | subjectivity 229 | MOH 230 | VUAC 231 | TroFi 232 | sharc_modified/mod 233 | conceptrules_v2 234 | disrpt/eng.dep.scidtb 235 | conll2000 236 | few-nerd/supervised 237 | com2sense 238 | scone 239 | winodict 240 | fool-me-twice 241 | monli 242 | corr2cause 243 | apt 244 | twitter-financial-news-sentiment 245 | SpaceNLI 246 | propsegment/nli 247 | HatemojiBuild 248 | regset 249 | esci 250 | dnd_style_intents 251 | -------------------------------------------------------------------------------- /src/tasksource/metadata/popularity.py: -------------------------------------------------------------------------------- 1 | dataset_rank = {'glue': 0, 2 | 'super_glue': 12, 3 | 'tweet_eval': 23, 4 | 'blimp': 34, 5 | 'imdb': 101, 6 | 'wikitext': 102, 7 | 'squad': 106, 8 | 'trec': 107, 9 | 'openwebtext': 108, 10 | 'rotten_tomatoes': 109, 11 | 'anli': 110, 12 | 'adversarial_qa': 111, 13 | 'ai2_arc': 115, 14 | 'xsum': 117, 15 | 'amazon_reviews_multi': 118, 16 | 'ag_news': 125, 17 | 'yelp_review_full': 126, 18 | 'wino_bias': 127, 19 | 'piqa': 131, 20 | 'duorc': 132, 21 | 'quail': 134, 22 | 'trivia_qa': 135, 23 | 'cnn_dailymail': 143, 24 | 'common_gen': 146, 25 | 'sst': 147, 26 | 'conll2003': 150, 27 | 'financial_phrasebank': 151, 28 | 'babi_qa': 155, 29 | 'poem_sentiment': 163, 30 | 'dream': 164, 31 | 'paws': 165, 32 | 'emotion': 168, 33 | 'kilt_tasks': 169, 34 | 'sciq': 180, 35 | 'cos_e': 181, 36 | 'dbpedia_14': 183, 37 | 'newsgroup': 184, 38 | 'cosmos_qa': 244, 39 | 'squad_v2': 245, 40 | 'samsum': 246, 41 | 'amazon_polarity': 247, 42 | 'multi_news': 248, 43 | 'wiki_hop': 249, 44 | 'quartz': 251, 45 | 'qasc': 252, 46 | 'wiki_qa': 253, 47 | 'openbookqa': 254, 48 | 'ropes': 256, 49 | 'quoref': 257, 50 | 'snli': 258, 51 | 'app_reviews': 259, 52 | 'gigaword': 260, 53 | 'wiki_bio': 261, 54 | 'amazon_us_reviews': 262, 55 | 'scan': 308, 56 | 'race': 320, 57 | 'swag': 323, 58 | 'codah': 325, 59 | 'ccdv/arxiv-summarization': 331, 60 | 'subjqa': 333, 61 | 'universal_morphologies': 339, 62 | 'hans': 447, 63 | 'sst2': 448, 64 | 'guardian_authorship': 449, 65 | 'math_qa': 465, 66 | 'librispeech_asr': 466, 67 | 'hendrycks_test': 469, 68 | 'openai_humaneval': 526, 69 | 'ptb_text_only': 527, 70 | 'pubmed_qa': 528, 71 | 'head_qa': 531, 72 | 'ought/raft': 533, 73 | 'ade_corpus_v2': 544, 74 | 'cbt': 547, 75 | 'bookcorpus': 552, 76 | 'squadshifts': 553, 77 | 'story_cloze': 557, 78 | 'multi_nli': 559, 79 | 'qanta': 560, 80 | 'hate_speech18': 564, 81 | 'gem': 565, 82 | 'lex_glue': 599, 83 | 'deepmind/code_contests': 606, 84 | 'imagenet-1k': 607, 85 | 'blended_skill_talk': 608, 86 | 'sms_spam': 609, 87 | 'asset': 610, 88 | 'fever': 612, 89 | 'commonsense_qa': 615, 90 | 'scientific_papers': 616, 91 | 'evidence_infer_treatment': 618, 92 | 'hotpot_qa': 620, 93 | 'superb': 622, 94 | 'sick': 628, 95 | 'humicroedit': 629, 96 | 'snips_built_in_intents': 631, 97 | 'winograd_wsc': 632, 98 | 'bigbench': 634, 99 | 'multi_woz_v22': 801, 100 | 'lambada': 803, 101 | 'banking77': 804, 102 | 'hate_speech_offensive': 805, 103 | 'yahoo_answers_topics': 806, 104 | 'ccdv/cnn_dailymail': 807, 105 | 'hyperpartisan_news_detection': 810, 106 | 'gsm8k': 812, 107 | 'wikisql': 814, 108 | 'the_pile': 815, 109 | 'health_fact': 825, 110 | 'mdd': 826, 111 | 'web_questions': 830, 112 | 'ethos': 831, 113 | 'wnut_17': 833, 114 | 'medical_questions_pairs': 834, 115 | 'scitldr': 835, 116 | 'drop': 838, 117 | 'squad_adversarial': 839, 118 | 'e2e_nlg_cleaned': 841, 119 | 'onestop_english': 842, 120 | 'pragmeval': 843, 121 | 'relbert/analogy_questions': 863, 122 | 'nq_open': 869, 123 | 'daily_dialog': 870, 124 | 'mc_taco': 871, 125 | 'crows_pairs': 872, 126 | 'go_emotions': 873, 127 | 'ncbi_disease': 875, 128 | 'boolq': 876, 129 | 'movie_rationales': 877, 130 | 'climate_fever': 878, 131 | 'discovery': 879, 132 | 'lama': 881, 133 | 'ecthr_cases': 885, 134 | 'jfleg': 887, 135 | 'selqa': 888, 136 | 'acronym_identification': 892, 137 | 'scicite': 893, 138 | 'tab_fact': 894, 139 | 'wiki_asp': 896, 140 | 'enriched_web_nlg': 916, 141 | 'svhn': 918, 142 | 'docred': 920, 143 | 'conllpp': 921, 144 | 'liar': 922, 145 | 'multi_x_science_sum': 923, 146 | 'discofuse': 924, 147 | 'competition_math': 926, 148 | 'biosses': 927, 149 | 'jnlpba': 928, 150 | 'web_nlg': 929, 151 | 'qa_srl': 937, 152 | 'neural_code_search': 938, 153 | 'conv_ai_2': 940, 154 | 'craigslist_bargains': 941, 155 | 'qed': 942, 156 | 'conv_ai_3': 943, 157 | 'conv_ai': 944, 158 | 'turk': 945, 159 | 'covid_qa_castorini': 946, 160 | 'sem_eval_2014_task_1': 947, 161 | 'mwsc': 948, 162 | 'gutenberg_time': 949, 163 | 'billsum': 950, 164 | 'riddle_sense': 951, 165 | 'species_800': 952, 166 | 'hlgd': 953, 167 | 'definite_pronoun_resolution': 954, 168 | 'tmu_gfm_dataset': 955, 169 | 'relbert/semeval2012_relational_similarity_v4': 956, 170 | 'clinc_oos': 957, 171 | 'imppres': 960, 172 | 'mrqa': 976, 173 | 'cc_news': 977, 174 | 'lmqg/qag_tweetqa': 978, 175 | 'aeslc': 979, 176 | 'big_patent': 980, 177 | 'eli5': 990, 178 | 'scene_parse_150': 991, 179 | 'circa': 993, 180 | 'aqua_rat': 994, 181 | 'nlu_evaluation_data': 996, 182 | 'newspop': 997, 183 | 'relbert/lexical_relation_classification': 998, 184 | 'yahoo_answers_qa': 1003, 185 | 'emo': 1004, 186 | 'silicone': 1005, 187 | 'cord19': 1015, 188 | 'tweet_qa': 1018, 189 | 'meta_woz': 1019, 190 | 'md_gender_bias': 1021, 191 | 'art': 1031, 192 | 'google_wellformed_query': 1032, 193 | 'ambig_qa': 1033, 194 | 'taskmaster2': 1035, 195 | 'quac': 1042, 196 | 'freebase_qa': 1043, 197 | 'quora': 1044, 198 | 'numer_sense': 1045, 199 | 'narrativeqa': 1046, 200 | 'ccdv/pubmed-summarization': 1047, 201 | 'qa_zre': 1049, 202 | 'limit': 1050, 203 | 'tweets_hate_speech_detection': 1051, 204 | 'mocha': 1052, 205 | 'hatexplain': 1053, 206 | 'bing_coronavirus_query_set': 1054, 207 | 'great_code': 1055, 208 | 'medal': 1056, 209 | 'sent_comp': 1057, 210 | 'kelm': 1058, 211 | 'natural_questions': 1059, 212 | 'wiki_split': 1061, 213 | 'zest': 1062, 214 | 'cfq': 1063, 215 | 'multi_re_qa': 1071, 216 | 'stereoset': 1080, 217 | 'coqa': 1082, 218 | 'cuad': 1083, 219 | 'break_data': 1084, 220 | 'mbpp': 1089, 221 | 'knkarthick/dialogsum': 1091, 222 | 'wiki_auto': 1092, 223 | 'pile-of-law/pile-of-law': 1097, 224 | 'pg19': 1132, 225 | 'DFKI-SLT/few-nerd': 1133, 226 | 'wikicorpus': 1136, 227 | 'e2e_nlg': 1142, 228 | 'anton-l/superb': 1143, 229 | 'ghomasHudson/muld': 1144, 230 | 'Exr0n/wiki-entity-similarity': 1150, 231 | 'BeIR/nfcorpus': 1156, 232 | 'ccdv/govreport-summarization': 1158, 233 | 'woz_dialogue': 1159, 234 | 'reddit': 1164, 235 | 'EMBO/sd-nlp': 1165, 236 | 'empathetic_dialogues': 1170, 237 | 'BeIR/fiqa': 1171, 238 | 'generics_kb': 1173, 239 | 'swda': 1177, 240 | 'wikitablequestions': 1178, 241 | 'pubmed': 1183, 242 | 'chr_en': 1184, 243 | 'sharc': 1185, 244 | 'sharc_modified': 1186, 245 | 'BeIR/scifact': 1190, 246 | 'nell': 1192, 247 | 'patriziobellan/PET': 1196, 248 | 'EMBO/biolang': 1198, 249 | 'dynabench/qa': 1202, 250 | 'reddit_tifu': 1206, 251 | 'BeIR/scidocs': 1208, 252 | 'pec': 1210, 253 | 'tner/tweetner7': 1213, 254 | 'BeIR/arguana': 1214, 255 | 'multidoc2dial': 1216, 256 | 'taskmaster1': 1219, 257 | 'spider': 1221, 258 | 'adv_glue': 1222, 259 | 'allenai/mslr2022': 1228, 260 | 'conceptnet5': 1230, 261 | 'tyqiangz/multilingual-sentiments': 1233, 262 | 'newsqa': 1246, 263 | 'metashift': 1249, 264 | 'so_stacksample': 1250, 265 | 'doc2dial': 1253, 266 | 'search_qa': 1256, 267 | 'yhavinga/mc4_nl_cleaned': 1258, 268 | 'hope_edi': 1270, 269 | 'proto_qa': 1273, 270 | 'tuple_ie': 1276, 271 | 'simple_questions_v2': 1279, 272 | 'nlpaueb/finer-139': 1282, 273 | 'bookcorpusopen': 1283, 274 | 'tner/ontonotes5': 1284, 275 | 'crd3': 1285, 276 | 'ucberkeley-dlab/measuring-hate-speech': 1286, 277 | 'gap': 1287, 278 | 'recipe_nlg': 1288, 279 | 'schema_guided_dstc8': 1289, 280 | 'BeIR/beir': 1291, 281 | 'sagnikrayc/mctest': 1294, 282 | 'eurlex': 1296, 283 | 'corypaik/coda': 1297, 284 | 'bc2gm_corpus': 1298, 285 | 'ascent_kb': 1299, 286 | 'curiosity_dialogs': 1301, 287 | 'covid_qa_deepset': 1302, 288 | 'air_dialogue': 1303, 289 | 'taskmaster3': 1305, 290 | 'xsum_factuality': 1306, 291 | 'medical_dialog': 1308, 292 | 'BeIR/trec-covid': 1312, 293 | 'lhoestq/test': 1314, 294 | 'newsroom': 1315, 295 | 'tne': 1316, 296 | 'covid_qa_ucsd': 1317, 297 | 'fhamborg/news_sentiment_newsmtsc': 1319, 298 | 'prachathai67k': 1321, 299 | 'cardiffnlp/tweet_topic_multi': 1322, 300 | 'datacommons_factcheck': 1323, 301 | 'deal_or_no_dialog': 1325, 302 | 'ubuntu_dialogs_corpus': 1327, 303 | 'eu_regulatory_ir': 1329, 304 | 'scifact': 1331, 305 | 'wi_locness': 1333, 306 | 'relbert/relation_mapping': 1335, 307 | 'coastalcph/fairlex': 1336, 308 | 'asnq': 1340, 309 | 'peer_read': 1341, 310 | 'metaeval/linguisticprobing': 1343, 311 | 'jigsaw_unintended_bias': 1353, 312 | 'totto': 1354, 313 | 'irc_disentangle': 1355, 314 | 'med_hop': 1357, 315 | 'numeric_fused_head': 1359, 316 | 'ollie': 1361, 317 | 'per_sent': 1363, 318 | 'SocialGrep/ten-million-reddit-answers': 1364, 319 | 'lmqg/qg_squad': 1366, 320 | 's2orc': 1367, 321 | 'Hellisotherpeople/DebateSum': 1368, 322 | 'SocialGrep/reddit-crypto-aug-2021': 1369, 323 | 'jigsaw_toxicity_pred': 1371, 324 | 'GroNLP/ik-nlp-22_slp': 1372, 325 | 'SocialGrep/reddit-nonewnormal-complete': 1374, 326 | 'SocialGrep/reddit-wallstreetbets-aug-2021': 1376, 327 | 'SocialGrep/the-reddit-covid-dataset': 1378, 328 | 'SocialGrep/top-american-universities-on-reddit': 1380, 329 | 'BeIR/beir-corpus': 1382, 330 | 'SocialGrep/one-year-of-r-india': 1384, 331 | 'BritishLibraryLabs/EThOS-PhD-metadata': 1386, 332 | 'librispeech_lm': 1388, 333 | 'few_rel': 1389, 334 | 'arxiv_dataset': 1390, 335 | 'lc_quad': 1391, 336 | 'diplomacy_detection': 1392, 337 | 'lmqg/qa_squadshifts_pseudo': 1393, 338 | 'grail_qa': 1461, 339 | 'tner/wnut2017': 1462, 340 | 'demo-org/auditor_review': 1463, 341 | 'allenai/real-toxicity-prompts': 1464, 342 | 'BeIR/nfcorpus-qrels': 1465, 343 | 'onestop_qa': 1466, 344 | 'demelin/moral_stories': 1467, 345 | 'atomic': 1493, 346 | 'crawl_domain': 1494, 347 | 'BeIR/quora': 1495, 348 | 'Abirate/english_quotes': 1497, 349 | 'narrativeqa_manual': 1498, 350 | 'BeIR/fiqa-qrels': 1499, 351 | 'social_bias_frames': 1500, 352 | 'pkavumba/balanced-copa': 1501, 353 | 'eraser_multi_rc': 1502, 354 | 'sled-umich/TRIP': 1503, 355 | 'opinosis': 1504, 356 | 'PiC/phrase_sense_disambiguation': 1505, 357 | 'enwik8': 1506, 358 | 'sem_eval_2020_task_11': 1508, 359 | 'gooaq': 1509, 360 | 'linnaeus': 1510, 361 | 'hover': 1511, 362 | 'GonzaloA/fake_news': 1512, 363 | 'consumer-finance-complaints': 1513, 364 | 'ohsumed': 1514, 365 | 'casino': 1515, 366 | 'gfissore/arxiv-abstracts-2021': 1516, 367 | 'conv_questions': 1517, 368 | 'hate_offensive': 1518, 369 | 'sofc_materials_articles': 1519, 370 | 'wanyu/IteraTeR_human_sent': 1520, 371 | 'dialog_re': 1521, 372 | 'fake_news_english': 1522, 373 | 'dart': 1523, 374 | 'blog_authorship_corpus': 1524, 375 | 'msr_zhen_translation_parity': 1525, 376 | 'cryptonite': 1526, 377 | 'disfl_qa': 1527, 378 | 'olm/olm-CC-MAIN-2022-21-sampling-ratio-0.14775510204': 1528, 379 | 'olm/olm-CC-MAIN-2022-33-sampling-ratio-0.20': 1529, 380 | 'coarse_discourse': 1530, 381 | 'eth_py150_open': 1531, 382 | 'event2Mind': 1532, 383 | 'Paul/hatecheck': 1533, 384 | 'eli5_category': 1534, 385 | 'hippocorpus': 1535, 386 | 'the_pile_books3': 1536, 387 | 'coached_conv_pref': 1537, 388 | 'has_part': 1538, 389 | 'times_of_india_news_headlines': 1539, 390 | 'medmcqa': 1540, 391 | 'Babelscape/rebel-dataset': 1541, 392 | 'glucose': 1542, 393 | 'msr_text_compression': 1543, 394 | 'msr_genomics_kbcomp': 1544, 395 | 'SpeedOfMagic/ontonotes_english': 1545, 396 | 'msr_sqa': 1546, 397 | 'wiki_movies': 1547, 398 | 'hybrid_qa': 1548, 399 | 'metooma': 1549, 400 | 'multi_nli_mismatch': 1550, 401 | 'text2log': 1551, 402 | 'the_pile_stack_exchange': 1552, 403 | 're_dial': 1553, 404 | 'inquisitive_qg': 1554, 405 | 'SocialGrep/one-million-reddit-jokes': 1555, 406 | 'time_dial': 1556, 407 | 'BeIR/scifact-qrels': 1557, 408 | 'sede': 1558, 409 | 'mutual_friends': 1559, 410 | 'pass': 1560, 411 | 'allenai/multi_lexsum': 1561, 412 | 'youtube_caption_corrections': 1562, 413 | 'NbAiLab/norec_agg': 1563, 414 | 'DanL/scientific-challenges-and-directions-dataset': 1564, 415 | 'SocialGrep/one-million-reddit-questions': 1565, 416 | 'Motahar/github-issues': 1566, 417 | 'SocialGrep/the-2022-trucker-strike-on-reddit': 1567, 418 | 'allenai/qasper': 1568, 419 | 'CyranoB/polarity': 1569, 420 | 'SocialGrep/one-million-reddit-confessions': 1570, 421 | 'debatelab/deepa2': 1571, 422 | 'bhavnicksm/sentihood': 1572, 423 | 'debatelab/aaac': 1573, 424 | 'jgammack/SAE-door-abstracts': 1574, 425 | 'erwanlc/cocktails_recipe': 1575, 426 | 'erwanlc/cocktails_recipe_no_brand': 1576, 427 | 'BeIR/arguana-qrels': 1577, 428 | 'tner/fin': 1578, 429 | 'BeIR/scidocs-qrels': 1579, 430 | 'tner/bc5cdr': 1580, 431 | 'olm/olm-CC-MAIN-2022-27-sampling-ratio-0.16142697881': 1581, 432 | 'BeIR/fever': 1582, 433 | 'cardiffnlp/tweet_topic_single': 1584, 434 | 'speechcolab/gigaspeech': 1585, 435 | 'BeIR/webis-touche2020': 1586, 436 | 'aquamuse': 1588, 437 | 'olm/olm-CC-MAIN-2022-40-sampling-ratio-0.15894621295': 1590, 438 | 'tner/btc': 1591, 439 | 'truthful_qa': 1592, 440 | 'McGill-NLP/FaithDial': 1594, 441 | 'ekinakyurek/ftrace': 1595, 442 | 'tomasg25/scientific_lay_summarisation': 1597, 443 | 'tner/mit_restaurant': 1599, 444 | 'bigscience-biomedical/bioasq_task_b': 1600, 445 | 'strombergnlp/broad_twitter_corpus': 1619, 446 | 'tner/bionlp2004': 1620, 447 | 'metaeval/recast': 1621, 448 | 'the_pile_openwebtext2': 1629, 449 | 'taln-ls2n/inspec': 1630, 450 | 'lmqg/qa_squadshifts': 1631, 451 | 'BeIR/hotpotqa': 1636, 452 | 'jpwahle/machine-paraphrase-dataset': 1638, 453 | 'tner/mit_movie_trivia': 1639, 454 | 'tner/conll2003': 1640, 455 | 'OxAISH-AL-LLM/wiki_toxic': 1641, 456 | 'ccdv/WCEP-10': 1642, 457 | 'BeIR/trec-covid-qrels': 1646, 458 | 'g8a9/europarl_en-it': 1647, 459 | 'carblacac/twitter-sentiment-analysis': 1648, 460 | 'usc-isi/WikiConvert': 1649, 461 | 'visual_genome': 1650, 462 | 'florianbussmann/FUNSD-vu2020revising': 1660, 463 | 'Felix-ML/quoteli3': 1661, 464 | 'allenai/scico': 1662, 465 | 'drAbreu/bc4chemd_ner': 1663, 466 | 'tner/tweebank_ner': 1664, 467 | 'alisawuffles/WANLI': 1665, 468 | 'Team-PIXEL/rendered-bookcorpus': 1666, 469 | 'Team-PIXEL/rendered-wikipedia-english': 1667, 470 | 'wanyu/IteraTeR_full_sent': 1668, 471 | 'EMBO/BLURB': 1669, 472 | 'metaeval/crowdflower': 1676, 473 | 'AlexaAI/bold': 1685, 474 | 'metaeval/ethics': 1686, 475 | 'sileod/movie_recommendation': 1691, 476 | 'lmqg/qg_subjqa': 1692, 477 | 'copenlu/scientific-exaggeration-detection': 1699, 478 | 'esb/datasets': 1700, 479 | 'BeIR/msmarco': 1701, 480 | 'biwi_kinect_head_pose': 1703, 481 | 'BeIR/quora-qrels': 1704, 482 | 'wardenga/lsoie': 1705, 483 | 'nlphuji/vasr': 1707, 484 | 'BeIR/nq': 1708, 485 | 'BeIR/dbpedia-entity': 1710, 486 | 'sadrasabouri/ShahNegar': 1712, 487 | 'knkarthick/xsum': 1713, 488 | 'ColumbiaNLP/FLUTE': 1714, 489 | 'bigscience-biomedical/scitail': 1715, 490 | 'lmqg/qg_squadshifts': 1717, 491 | 'BeIR/climate-fever': 1722, 492 | 'PiC/phrase_retrieval': 1724, 493 | 'bdotloh/empathetic-dialogues-contexts': 1726, 494 | 'ccdv/mediasum': 1727, 495 | 'BeIR/msmarco-qrels': 1735, 496 | 'alexfabbri/answersumm': 1736, 497 | 'pszemraj/text2image-multi-prompt': 1737, 498 | 'shibing624/source_code': 1738, 499 | 'kensho/spgispeech': 1741, 500 | 'jamescalam/channel-metadata': 1742, 501 | 'EMBO/sd-nlp-non-tokenized': 1743, 502 | 'facebook/pmd': 1748, 503 | 'drt/kqa_pro': 1749, 504 | 'BeIR/fever-qrels': 1751, 505 | 'TheFusion21/PokemonCards': 1752, 506 | 'zeroshot/twitter-financial-news-sentiment': 1753, 507 | 'bigscience-biomedical/blurb': 1754, 508 | 'mteb/bucc-bitext-mining': 1759, 509 | 'pinecone/core-2020-05-10-deduplication': 1763, 510 | 'tals/vitaminc': 1764, 511 | 'BeIR/hotpotqa-qrels': 1765, 512 | 'gigant/ted_descriptions': 1766, 513 | 'jpwahle/autoencoder-paraphrase-dataset': 1767, 514 | 'beki/privy': 1768, 515 | 'Muennighoff/P3': 1770, 516 | 'jpwahle/dblp-discovery-dataset': 1771, 517 | 'taln-ls2n/kp20k': 1773, 518 | 'bigscience-biomedical/biosses': 1774, 519 | 'allenai/prosocial-dialog': 1776, 520 | 'pacovaldez/stackoverflow-questions': 1777, 521 | 'kasnerz/hitab': 1778, 522 | 'relbert/semeval2012_relational_similarity': 1779, 523 | 'sagnikrayc/snli-cf-kaushik': 1780, 524 | 'mwritescode/slither-audited-smart-contracts': 1781, 525 | 'BeIR/webis-touche2020-qrels': 1787, 526 | 'bigscience-biomedical/mednli': 1788, 527 | 'pinecone/movielens-recent-ratings': 1790, 528 | 'BeIR/dbpedia-entity-qrels': 1791, 529 | 'shanya/crd3': 1792, 530 | 'knkarthick/samsum': 1793, 531 | 'BeIR/climate-fever-qrels': 1794, 532 | 'BeIR/nq-qrels': 1795, 533 | 'sanchit-gandhi/librispeech_asr_dummy': 1796, 534 | 'taln-ls2n/semeval-2010-pre': 1797, 535 | 'Bingsu/openwebtext_20p': 1798, 536 | 'PolyAI/banking77': 1799, 537 | 'JulesBelveze/tldr_news': 1800, 538 | 'Freed-Wu/kodak': 1801, 539 | 'biglam/gutenberg-poetry-corpus': 1802, 540 | 'SocialGrep/reddit-r-bitcoin-data-for-jun-2022': 1803, 541 | 'taln-ls2n/kptimes': 1805, 542 | 'biglam/old_bailey_proceedings': 1806, 543 | 'launch/gov_report': 1807, 544 | 'knkarthick/AMI': 1810, 545 | 'voidful/NMSQA': 1811, 546 | 'DTU54DL/dmeo': 1812, 547 | 'FinanceInc/auditor_sentiment': 1813, 548 | 'jamescalam/unsplash-25k-photos': 1814, 549 | 'Tidrael/tsl_news': 1815, 550 | 'DTU54DL/common3k-train': 1816, 551 | 'okite97/news-data': 1817, 552 | 'lmqg/qa_squad': 1818, 553 | 'ConvLab/woz': 1819, 554 | 'ConvLab/camrest': 1820, 555 | 'ConvLab/metalwoz': 1821, 556 | 'kakaobrain/coyo-700m': 1822, 557 | 'taln-ls2n/kpbiomed': 1823, 558 | 'abhinavk/openpi_v2': 1826, 559 | 'mwong/fever-claim-related': 1831, 560 | 'ConvLab/tm1': 1832, 561 | 'joey234/nan-nli': 1833, 562 | 'ConvLab/tm2': 1834, 563 | 'ConvLab/tm3': 1835, 564 | 'ConvLab/kvret': 1836, 565 | 'ConvLab/sgd': 1837, 566 | 'relbert/semeval2012_relational_similarity_v5': 1838, 567 | 'cmudrc/wave-energy': 1839, 568 | 'llangnickel/long-covid-classification-data': 1840, 569 | 'webis/args_me': 1841, 570 | 'HuggingFaceM4/something_something_v2': 1844, 571 | 'ConvLab/dailydialog': 1845, 572 | 'huanggab/reddit_haiku': 1846, 573 | 'relbert/semeval2012_relational_similarity_v6': 1847, 574 | 'pszemraj/riddlesense_plusplus': 1848, 575 | 'rungalileo/20_Newsgroups_Fixed': 1849, 576 | 'DTU54DL/common-voice-test16k': 1850, 577 | 'lhoestq/custom_squad': 1851, 578 | 'merve/poetry': 1852, 579 | 'yoshitomo-matsubara/srsd-feynman_easy': 1853, 580 | 'nightingal3/fig-qa': 1854, 581 | 'matejklemen/vuamc': 1855, 582 | 'strombergnlp/twitter_pos': 1856, 583 | 'nlphuji/winogavil': 1858, 584 | 'DFKI-SLT/tacred': 1859, 585 | 'valurank/News_Articles_Categorization': 1861, 586 | 'nbroad/mediasum': 1862, 587 | 'asapp/slue': 1863, 588 | 'zbnsl/emoteModified': 1865, 589 | 'adsabs/WIESP2022-NER': 1866, 590 | 'arize-ai/ecommerce_reviews_with_language_drift': 1867, 591 | 'UCL-DARK/ludwig': 1868, 592 | 'Aunsiels/InfantBooks': 1874, 593 | 'openclimatefix/uk_pv': 1875, 594 | 'copenlu/fever_gold_evidence': 1876, 595 | 'rungalileo/mit_movies_fixed_connll_format': 1877, 596 | 'jamescalam/youtube-transcriptions': 1878, 597 | 'lmqg/qa_harvesting_from_wikipedia': 1879, 598 | 'qanastek/Biosses-BLUE': 1880, 599 | 'zeronix1020/Strawberry-Disease': 1881, 600 | 'dferndz/cSQuAD2': 1882, 601 | 'taln-ls2n/pubmed': 1883, 602 | 'BeIR/scidocs-generated-queries': 1884, 603 | 'jmhessel/newyorker_caption_contest': 1885, 604 | 'inverse-scaling/NeQA': 1915, 605 | 'DTU54DL/common-voice': 1916, 606 | 'turingbench/TuringBench': 1917, 607 | 'demelin/understanding_fables': 1937, 608 | 'RUCAIBox/Open-Dialogue': 1938, 609 | 'allenai/multinews_sparse_max': 1939, 610 | 'RamAnanth1/lex-fridman-podcasts': 1940, 611 | 'sled-umich/Conversation-Entailment': 1941, 612 | 'stevhliu/demo': 1942, 613 | 'svakulenk0/qrecc': 1943, 614 | 'arize-ai/movie_reviews_with_context_drift': 1944, 615 | 'launch/ampere': 1945, 616 | 'AnonymousSub/recipe_RL_data_roberta-base': 1946, 617 | 'dreamproit/bill_summary_us': 1947, 618 | 'bgstud/libri-whisper-raw': 1948, 619 | 'jpwahle/etpc': 1949, 620 | 'DTU54DL/common-native-proc': 1950, 621 | 'mbartolo/synQA': 1951, 622 | 'wanyu/IteraTeR_full_doc': 1952, 623 | 'wanyu/IteraTeR_human_doc': 1953, 624 | 'orieg/elsevier-oa-cc-by': 1954, 625 | 'climatebert/environmental_claims': 1955, 626 | 'SocialGrep/the-reddit-climate-change-dataset': 1956, 627 | 'KGraph/FB15k-237': 1958, 628 | 'KheemDH/data': 1959, 629 | 'mwong/fever-evidence-related': 1960, 630 | 'HuggingFaceM4/TGIF': 1961, 631 | 'BeIR/fever-generated-queries': 1962, 632 | 'nateraw/ade20k-tiny': 1963, 633 | 'BeIR/cqadupstack-qrels': 1964, 634 | 'knkarthick/highlightsum': 1965, 635 | 'RUCAIBox/Data-to-text-Generation': 1966, 636 | 'GateNLP/broad_twitter_corpus': 1967, 637 | 'Tidrael/finance-headlines': 1968, 638 | 'lmqg/qag_squad': 1969, 639 | 'pacovaldez/stackoverflow-questions-2016': 1970, 640 | 'BeIR/fiqa-generated-queries': 1971, 641 | 'BeIR/signal1m-generated-queries': 1972, 642 | 'MicPie/unpredictable_msdn-microsoft-com': 1973, 643 | 'zeroshot/twitter-financial-news-topic': 1974, 644 | 'inverse-scaling/quote-repetition': 1975, 645 | 'esc-bench/esc-diagnostic-backup': 1976, 646 | 'lmqg/qg_annotation': 1977, 647 | 'sileod/wep-probes': 1978, 648 | 'DTU54DL/common-voice-test3k': 1981, 649 | 'jakartaresearch/causalqa': 1982, 650 | 'copenlu/sufficient_facts': 2002, 651 | 'ConvLab/multiwoz21': 2005, 652 | 'arka0821/multi_document_summarization': 2006, 653 | 'strombergnlp/rumoureval_2019': 2007, 654 | 'rongzhangibm/NaturalQuestionsV2': 2008, 655 | 'Muennighoff/mbpp': 2009, 656 | 'RUCAIBox/Simplification': 2011, 657 | 'shubhamg2208/lexicap': 2012, 658 | 'olm/olm-wikipedia-20220701': 2013, 659 | 'esc-bench/esc-diagnostic-dataset': 2014, 660 | 'jpwahle/autoregressive-paraphrase-dataset': 2015, 661 | 'GabrielVidal/dead-by-daylight-perks': 2016, 662 | 'DTU54DL/common-proc-whisper': 2017, 663 | 'valurank/PoliticalBias': 2018, 664 | 'McGill-NLP/TopiOCQA': 2019, 665 | 'gsarti/magpie': 2020, 666 | 'BeIR/cqadupstack-generated-queries': 2021, 667 | 'MicPie/unpredictable_mmo-champion-com': 2022, 668 | 'RUCAIBox/Question-Generation': 2023, 669 | 'allenai/multinews_sparse_mean': 2024, 670 | 'demo-org/diabetes': 2025, 671 | 'StonyBrookNLP/tellmewhy': 2026, 672 | 'bergr7/weakly_supervised_ag_news': 2027, 673 | 'din0s/msmarco-nlgen': 2028, 674 | 'frankier/cross_domain_reviews': 2029, 675 | 'gart-labor/pumpnli': 2030, 676 | 'AndyChiang/cloth': 2031, 677 | 'olm/olm-CC-MAIN-2017-22-sampling-ratio-0.16178770949': 2032, 678 | 'bgstud/libri': 2033, 679 | 'DTU54DL/commonvoice_accent_test': 2034, 680 | 'lewtun/my-awesome-dataset': 2035, 681 | 'peixian/rtGender': 2036, 682 | 'pmc/open_access': 2039, 683 | 'uva-irlab/trec-cast-2019-multi-turn': 2043, 684 | 'DFKI-SLT/scidtb': 2044, 685 | 'surrey-nlp/PLOD-filtered': 2045, 686 | 'wanyu/IteraTeR_v2': 2046, 687 | 'strombergnlp/ipm_nel': 2047, 688 | 'HuggingFaceM4/charades': 2048, 689 | 'ncats/EpiSet4NER-v2': 2050, 690 | 'HuggingFaceM4/ActivitiyNet_Captions': 2051, 691 | 'sileod/discourse_marker_qa': 2052, 692 | 'yoshitomo-matsubara/srsd-feynman_medium': 2053, 693 | 'BeIR/nfcorpus-generated-queries': 2054, 694 | 'BeIR/trec-news-generated-queries': 2055, 695 | 'BeIR/robust04-generated-queries': 2056, 696 | 'BeIR/quora-generated-queries': 2057, 697 | 'valurank/Adult-content-dataset': 2058, 698 | 'launch/open_question_type': 2059, 699 | 'knkarthick/topicsum': 2060, 700 | 'yuningm/citesum': 2061, 701 | 'elihoole/asrs-aviation-reports': 2062, 702 | 'DeveloperOats/DBPedia_Classes': 2063, 703 | 'hoskinson-center/proof-pile': 2064, 704 | 'RUCAIBox/Summarization': 2065, 705 | 'RUCAIBox/Question-Answering': 2066, 706 | 'RUCAIBox/Story-Generation': 2067, 707 | 'RUCAIBox/Paraphrase': 2068, 708 | 'jakartaresearch/semeval-absa': 2069, 709 | 'tner/ttc_dummy': 2071, 710 | 'copenlu/citeworth': 2072, 711 | 'allenai/multinews_sparse_oracle': 2073, 712 | 'allenai/multixscience_sparse_oracle': 2074, 713 | 'allenai/multixscience_sparse_mean': 2075, 714 | 'allenai/multixscience_sparse_max': 2076, 715 | 'allenai/ms2_sparse_oracle': 2077, 716 | 'mschi/blogspot_raw': 2078, 717 | 'gaurikapse/civis-consultation-summaries': 2079, 718 | 'chenghao/cuad_qa': 2080, 719 | 'esc-bench/esc-datasets': 2081, 720 | 'olm/olm-wikipedia-20221001': 2082, 721 | 'allenai/wcep_dense_oracle': 2083, 722 | 'dennlinger/wiki-paragraphs': 2084, 723 | 'AndyChiang/dgen': 2085, 724 | 'esb/diagnostic-dataset': 2086, 725 | 'havens2/naacl2022': 2087, 726 | 'fkdosilovic/docee-event-classification': 2088, 727 | 'DTU54DL/demo-common-whisper': 2089, 728 | 'dferndz/cSQuAD1': 2090, 729 | 'jpcorb20/multidogo': 2091, 730 | 'julien-c/reactiongif': 2092, 731 | 'lara-martin/Scifi_TV_Shows': 2093, 732 | 'lukesjordan/worldbank-project-documents': 2094, 733 | 'mnemlaghi/widdd': 2095, 734 | 'mvarma/medwiki': 2096, 735 | 'nateraw/beans': 2098, 736 | 'nateraw/cats_vs_dogs': 2099, 737 | 'nateraw/food101': 2100, 738 | 'nateraw/sync_food101': 2101, 739 | 'ncats/EpiSet4BinaryClassification': 2102, 740 | 'ncats/EpiSet4NER-v1': 2103, 741 | 'peixian/equity_evaluation_corpus': 2104, 742 | 'rajeshradhakrishnan/malayalam_wiki': 2105, 743 | 'softcatala/open-source-english-catalan-corpus': 2106, 744 | 'toloka/CrowdSpeech': 2107, 745 | 'valurank/12-factor': 2108, 746 | 'valurank/PoliticalBias_AllSides_Txt': 2109, 747 | 'valurank/PoliticalBias_Sources': 2110, 748 | 'valurank/hate-multi': 2111, 749 | 'valurank/news-12factor': 2112, 750 | 'valurank/offensive-multi': 2113, 751 | 'webimmunization/COVID-19-vaccine-attitude-tweets': 2114, 752 | 'wpicard/nostradamus-propheties': 2115, 753 | 'yuanchuan/annotated_reference_strings': 2116, 754 | 'ruanchaves/stan_large': 2117, 755 | 'ruanchaves/stan_small': 2118, 756 | 'ruanchaves/boun': 2119, 757 | 'ruanchaves/dev_stanford': 2120, 758 | 'ruanchaves/test_stanford': 2121, 759 | 'ruanchaves/snap': 2122, 760 | 'z-uo/qasper-squad': 2123, 761 | 'SocialGrep/the-antiwork-subreddit-dataset': 2124, 762 | 'CLUTRR/v1': 2126, 763 | 'malteos/test2': 2132, 764 | 'TomTBT/pmc_open_access_xml': 2133, 765 | 'SocialGrep/the-reddit-dataset-dataset': 2137, 766 | 'SocialGrep/the-reddit-place-dataset': 2139, 767 | 'projecte-aina/gencata': 2141, 768 | 'mwong/climate-evidence-related': 2142, 769 | 'mwong/climate-claim-related': 2143, 770 | 'surrey-nlp/PLOD-unfiltered': 2144, 771 | 'SocialGrep/the-reddit-irl-dataset': 2145, 772 | 'Lexi/spanextract': 2147, 773 | 'mwong/climatetext-claim-related-evaluation': 2148, 774 | 'mwong/climatetext-evidence-related-evaluation': 2149, 775 | 'ylacombe/xsum_factuality': 2150, 776 | 'mwong/climatetext-climate_evidence-claim-related-evaluation': 2151, 777 | 'mwong/climatetext-claim-climate_evidence-related-evaluation': 2152, 778 | 'mwong/climatetext-evidence-claim-pair-related-evaluation': 2153, 779 | 'mwong/climatetext-claim-evidence-pair-related-evaluation': 2154, 780 | 'patrickvonplaten/librispeech_asr_self_contained': 2155, 781 | 'BritishLibraryLabs/web_archive_classification': 2158, 782 | 'albertxu/CrosswordQA': 2159, 783 | 'SocialGrep/the-reddit-nft-dataset': 2160, 784 | 'janck/bigscience-lama': 2162, 785 | 'strombergnlp/twitter_pos_vcb': 2163, 786 | 'Filippo/osdg_cd': 2164, 787 | 'Ukhushn/home-depot': 2165, 788 | 'pile-of-law/eoir_privacy': 2166, 789 | 'drAbreu/sd-nlp-2': 2168, 790 | 'Leyo/TGIF': 2173, 791 | 'strombergnlp/named_timexes': 2174, 792 | 'domenicrosati/TruthfulQA': 2175, 793 | 'Roh/ryanspeech': 2176, 794 | 'Leyo/ActivityNet_Captions': 2177, 795 | 'IsaacBot/SQuAD-single-sentence-QA': 2178, 796 | 'morteza/cogtext': 2179, 797 | 'wdc/products-2017': 2180, 798 | 'rajeshvarma/QA_on_SLA': 2196, 799 | 'statworx/haiku': 2197, 800 | 'rajistics/million-headlines': 2198, 801 | 'feyzaakyurek/BBNLI': 2199, 802 | 'launch/gov_report_qs': 2200, 803 | 'DFKI-SLT/wikitext_linked': 2202, 804 | 'dianalogan/Marketing-Budget-and-Actual-Sales-Dataset': 2204, 805 | 'mehnaazasad/arxiv-co-ga': 2205, 806 | 'JeremyAlain/123_test': 2206, 807 | 'BeIR/arguana-generated-queries': 2209, 808 | 'BeIR/climate-fever-generated-queries': 2210, 809 | 'BeIR/dbpedia-entity-generated-queries': 2211, 810 | 'wise-east/spolin': 2212, 811 | 'yoshitomo-matsubara/srsd-feynman_hard': 2213, 812 | 'florentgbelidji/edmunds-car-ratings': 2214, 813 | 'olivierdehaene/xkcd': 2215, 814 | 'rajistics/auditor_review': 2216, 815 | 'BeIR/scifact-generated-queries': 2217, 816 | 'BeIR/trec-covid-generated-queries': 2218, 817 | 'BeIR/webis-touche2020-generated-queries': 2219, 818 | 'BeIR/nq-generated-queries': 2220, 819 | 'BeIR/hotpotqa-generated-queries': 2221, 820 | 'BeIR/bioasq-generated-queries': 2222, 821 | 'icelab/ntrs_meta': 2223, 822 | 'iejMac/CLIP-Kinetics700': 2224, 823 | 'fever/feverous': 2225, 824 | 'Livingwithmachines/hmd-erwt-training': 2226, 825 | 'wkrl/cord': 2227, 826 | 'launch/reddit_qg': 2228, 827 | 'arize-ai/xtreme_en': 2229} 828 | 829 | dataset_rank['Anthropic/model-written-evals']=13 830 | dataset_rank['Anthropic/hh-rlhf']=14 -------------------------------------------------------------------------------- /src/tasksource/mtasks.py: -------------------------------------------------------------------------------- 1 | from .preprocess import cat, get,name, regen, constant, Classification, TokenClassification, MultipleChoice 2 | from .metadata import udep_labels 3 | from datasets import get_dataset_config_names, ClassLabel, Dataset, DatasetDict, concatenate_datasets, Sequence 4 | 5 | def all(dataset_name): 6 | try: 7 | config_name=get_dataset_config_names(dataset_name) 8 | except Exception as e: 9 | print(dataset_name,e) 10 | config_name=None 11 | return dict(dataset_name=dataset_name, config_name=config_name) 12 | 13 | def concatenate_configs(dataset): 14 | return DatasetDict(train=concatenate_datasets(list(dataset.values()))) 15 | 16 | # english tasks (few, to keep balance between languages) 17 | 18 | moritz_xnli = Classification("premise","hypothesis",name("label",["entailment", "neutral","contradiction"]), 19 | pre_process=concatenate_configs, 20 | dataset_name="MoritzLaurer/multilingual-NLI-26lang-2mil7") 21 | 22 | xnli = Classification("premise", "hypothesis", "label", **all("metaeval/xnli")) 23 | 24 | americas_nli = Classification("premise","hypothesis","label",config_name="all_languages") 25 | 26 | stsb_multi_mt = Classification("sentence1", "sentence2", 27 | lambda x: float(x["similarity_score"]/5), 28 | **all('stsb_multi_mt')) 29 | 30 | pawsx = Classification("sentence1","sentence2",name('label',['not_paraphrase','paraphrase']), **all('paws-x')) 31 | 32 | miam = Classification("Utterance",labels="Label", **all('miam')) 33 | 34 | xstance = Classification("question", "comment", "label", 35 | **all("strombergnlp/x-stance")) 36 | 37 | 38 | offenseval = Classification(lambda x: str(x["text"]), labels=name("subtask_a",['not offensive','offensive']), 39 | pre_process=lambda ds:ds.filter(lambda x: x['subtask_a'] in [0,1]), 40 | dataset_name='strombergnlp/offenseval_2020', 41 | config_name=["ar","da","gr","tr"]) 42 | 43 | offenseval_dravidian = Classification("text",labels="label",config_name=['kannada','malayalam','tamil']) 44 | 45 | mlma_hate = Classification("tweet", labels=lambda x:x["sentiment"].split('_'), 46 | dataset_name="nedjmaou/MLMA_hate_speech") 47 | 48 | qam = Classification("question","answer","label", dataset_name="xglue",config_name="qam") 49 | 50 | #x_sum_factuality = Classification("summary","generated_summary","label", dataset_name="ylacombe/xsum_factuality") 51 | 52 | x_fact = Classification('evidence','claim','label', dataset_name="metaeval/x-fact") 53 | 54 | xglue___nc = Classification('news_body',labels='news_category') 55 | xglue___qadsm = Classification('query','ad_description','relevance_label') 56 | xglue___qam = Classification('question','answer','label') 57 | xglue___wpr = Classification('query','web_page_snippet','relavance_label') # relavance_label : sic 58 | 59 | xlwic = Classification( 60 | sentence1=cat(["target_word","context_1"], " : "), 61 | sentence2=cat(["target_word","context_2"], " : "), 62 | labels='label',dataset_name="pasinit/xlwic",config_name=['xlwic_de_de','xlwic_it_it','xlwic_fr_fr','xlwic_en_ko']) 63 | 64 | #[ "spam", "fails_task", "lang_mismatch", "pii", "not_appropriate", "hate_speech", "sexual_content", "quality", "toxicity", "humor", "helpfulness", "creativity", "violence" ] 65 | 66 | oasst1__quality = Classification("parent_text","text",labels="quality", dataset_name="tasksource/oasst1_dense_flat", 67 | pre_process = lambda ds:ds.remove_columns('labels')) 68 | oasst1__toxicity = Classification("parent_text","text",labels="toxicity", dataset_name="tasksource/oasst1_dense_flat", 69 | pre_process = lambda ds:ds.remove_columns('labels')) 70 | oasst1__helpfulness = Classification("parent_text","text",labels="helpfulness", dataset_name="tasksource/oasst1_dense_flat", 71 | pre_process = lambda ds:ds.remove_columns('labels')) 72 | 73 | 74 | language_identification = Classification("text",labels="labels", dataset_name="papluca/language-identification") 75 | wili_2018_langid = Classification("sentence",labels="label",dataset_name="wili_2018") 76 | 77 | exams = MultipleChoice(get.question.stem, choices_list=get.question.choices.text, 78 | labels=lambda x:'ABCDE'.index(x['answerKey']), 79 | dataset_name="exams", config_name='multilingual', 80 | pre_process=lambda ds:ds.filter(lambda x: x['answerKey'] in "ABCDE")) 81 | 82 | xcsr = MultipleChoice(get.question.stem, choices_list=get.question.choices.text, 83 | labels=lambda x:'ABCDE'.index(x['answerKey']), 84 | **all('xcsr')) 85 | 86 | xcopa = MultipleChoice("premise",choices=['choice1','choice2'],labels="label", 87 | **all('xcopa')) 88 | 89 | #xstory = MultipleChoice(constant(''),choices=["text_right_ending","text_wrong_ending"],labels=constant(0), **all("juletxara/xstory_cloze")) 90 | 91 | xstory = MultipleChoice(lambda x: "\n".join([x[f'input_sentence_{i}'] for i in range(1,5)]), 92 | choices=["sentence_quiz1","sentence_quiz2"],labels=constant(0), **all("juletxara/xstory_cloze")) 93 | 94 | 95 | xglue_ner = TokenClassification("words","ner", dataset_name="xglue",config_name="ner") 96 | xglue_pos = TokenClassification("words","pos", dataset_name="xglue",config_name="pos") 97 | 98 | #disrpt_23 = Classification("unit1_sent", "unit2_sent", "label",**all("metaeval/disrpt")) 99 | 100 | udep__pos = TokenClassification('tokens','upos', **all('universal_dependencies')) 101 | 102 | def udep_post_process(ds): 103 | return ds.cast_column('labels', Sequence(ClassLabel(names=udep_labels))) 104 | 105 | #udep__deprel = TokenClassification('tokens',lambda x:[udep_labels.index(a) for a in x['deprel']], 106 | # **all('universal_dependencies'),post_process=udep_post_process) 107 | 108 | oasst_rlhf = MultipleChoice("prompt",choices=['chosen','rejected'],labels=constant(0), 109 | dataset_name="tasksource/oasst1_pairwise_rlhf_reward") 110 | 111 | sentiment = Classification("text",labels="label", dataset_name="tyqiangz/multilingual-sentiments",config_name="all", 112 | pre_process=lambda ds:ds.filter(lambda x: "amazon_reviews" not in x['source']) ) 113 | tweet_sentiment = Classification("text", labels="label", **all('cardiffnlp/tweet_sentiment_multilingual')) 114 | review_sentiment = Classification("review_body",labels="stars", dataset_name="amazon_reviews_multi",config_name="all_languages") 115 | emotion = Classification("text",labels="emotion",dataset_name="metaeval/universal-joy") 116 | # in mms 117 | 118 | mms_sentiment = Classification("text",labels="label",dataset_name='Brand24/mms') 119 | 120 | mapa_fine = TokenClassification("tokens","coarse_grained",dataset_name='joelito/mapa') 121 | mapa_corase = TokenClassification("tokens","fine_grained",dataset_name='joelito/mapa') 122 | 123 | aces_ranking = MultipleChoice("source",choices=['good-translation','incorrect-translation'],labels=constant(0), dataset_name='nikitam/ACES') 124 | aces_phenomena = Classification('source','incorrect-translation','phenomena', dataset_name='nikitam/ACES') 125 | 126 | amazon_intent = Classification("utt",labels="intent",**all('AmazonScience/massive')) 127 | # dataset_name='glue',config_name=['ocnli','afqmc']) 128 | 129 | tidy_as2=Classification("Question","Sentence","Label",dataset_name='tasksource/tydi-as2-balanced') 130 | 131 | multiconer = TokenClassification("tokens","ner_tags_index", **all("MultiCoNER/multiconer_v2")) 132 | 133 | mtop = Classification("question",labels="intent", dataset_name="tasksource/mtop") 134 | 135 | mlabel_nli = Classification("premise","hypothesis","labels",dataset_name="tasksource/multilingual-zero-shot-label-nli") 136 | 137 | #wino_x 138 | # clue, klue, indic_glue 139 | # SMS_Spam_Multilingual_Collection_Dataset 140 | -------------------------------------------------------------------------------- /src/tasksource/preprocess.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from dotwiz import DotWiz 3 | from dataclasses import dataclass 4 | from typing import Union 5 | import itertools 6 | import funcy as fc 7 | import exrex 8 | import magicattr 9 | import numpy as np 10 | import copy 11 | import datasets 12 | import time 13 | 14 | MAX_MC_OPTIONS = 4 15 | 16 | def get_column_names(dataset): 17 | cn = dataset.column_names 18 | if type(cn)==dict: 19 | return set(fc.flatten(cn.values())) 20 | else: 21 | return set(cn) 22 | 23 | 24 | def sample_dataset(dataset,n=10000, n_eval=1000,seed=0): 25 | for k in dataset: 26 | n_k=(n if k=='train' else n_eval) 27 | if n_k and len(dataset[k])>n_k: 28 | dataset[k]=dataset[k].train_test_split(train_size=n_k,seed=seed)['train'] 29 | return dataset 30 | 31 | class Preprocessing(DotWiz): 32 | default_splits = ('train','validation','test') 33 | _instances = [] 34 | 35 | def __post_init__(self): 36 | Preprocessing._instances+=[self] 37 | 38 | @staticmethod 39 | def __map_to_target(x,fn=lambda x:None, target=None): 40 | x[target]=fn(x) 41 | return x 42 | 43 | def load(self): 44 | return self(datasets.load_dataset(self.dataset_name,self.config_name)) 45 | 46 | def __call__(self,dataset, max_rows=None, max_rows_eval=None,seed=0): 47 | dataset = self.pre_process(dataset) 48 | 49 | # manage splits 50 | for k,v in zip(self.default_splits, self.splits): 51 | if v and k!=v: 52 | dataset[k]=dataset[v] 53 | del dataset[v] 54 | if k in dataset and not v: # obfuscated label 55 | del dataset[k] 56 | dataset = fix_splits(dataset) 57 | 58 | for k in list(dataset.keys()): 59 | if k not in self.default_splits: 60 | del dataset[k] 61 | dataset = sample_dataset(dataset, max_rows, max_rows_eval,seed=seed) 62 | 63 | # field annotated with a string 64 | substitutions = {v:k for k,v in self.to_dict().items() 65 | if (k and k not in {'splits','dataset_name','config_name'} 66 | and type(v)==str and k!=v)} 67 | 68 | dataset=dataset.remove_columns([c for c in substitutions.values() if c in dataset['train'].features and c not in substitutions]) 69 | dataset=dataset.rename_columns(substitutions) 70 | 71 | # field annotated with a function 72 | for k in self.to_dict().keys(): 73 | v=getattr(self, k) 74 | if callable(v) and k not in {"post_process","pre_process","load"}: 75 | dataset=dataset.map(self.__map_to_target, 76 | fn_kwargs={'fn':v,'target':k}) 77 | 78 | dataset=dataset.remove_columns( 79 | get_column_names(dataset)-set(self.to_dict().keys())) 80 | dataset = fix_labels(dataset) 81 | dataset = fix_splits(dataset) # again: label mapping changed 82 | dataset = self.post_process(dataset) 83 | return dataset 84 | 85 | 86 | @dataclass 87 | class cat(Preprocessing): 88 | fields:Union[str,list]=None 89 | separator:str=' ' 90 | 91 | def __call__(self, example=None): 92 | y=[np.char.array(example[f]) + sep 93 | for f,sep in zip(self.fields[::-1],itertools.repeat(self.separator))] 94 | y=list(sum(*y)) 95 | if len(y)==1: 96 | y=y[0] 97 | return y 98 | 99 | 100 | def pretty(f): 101 | class pretty_f(DotWiz): 102 | def __init__(self,*args): 103 | self.__f_arg = f(*args) 104 | for a in args: 105 | setattr(self,'value',a) 106 | 107 | def __call__(self, *args,**kwargs): 108 | return self.__f_arg(*args,**kwargs) 109 | 110 | def __repr__(self): 111 | return f"{self.__f_arg.__qualname__ .split('.')[0]}({self.value})" 112 | return pretty_f 113 | 114 | class dotgetter: 115 | def __init__(self, path=''): 116 | self.path=path 117 | 118 | def __bool__(self): 119 | return bool(self.path) 120 | 121 | def __getattr__(self, k): 122 | return self.__class__(f'{self.path}.{k}'.lstrip('.')) 123 | 124 | def __getitem__(self, i): 125 | return self.__class__(f'{self.path}[{i}]') 126 | 127 | def __call__(self, example=None): 128 | return magicattr.get(DotWiz(example), self.path) 129 | 130 | def __hash__(self): 131 | return hash(self.path) 132 | 133 | 134 | @dataclass 135 | class ClassificationFields(Preprocessing): 136 | sentence1:str='sentence1' 137 | sentence2:str='sentence2' 138 | labels:str='labels' 139 | 140 | @dataclass 141 | class Seq2SeqLMFields(Preprocessing): 142 | prompt:str='prompt' 143 | output:str='output' 144 | 145 | @dataclass 146 | class TokenClassificationFields(Preprocessing): 147 | tokens:str='tokens' 148 | labels:str='labels' 149 | 150 | @dataclass 151 | class MultipleChoiceFields(Preprocessing): 152 | inputs:str='input' 153 | choices:Iterable=tuple() 154 | labels:str='labels' 155 | choices_list:str=None 156 | def __post_init__(self): 157 | for i, c in enumerate(self.choices): 158 | setattr(self,f'choice{i}',c) 159 | delattr(self,'choices') 160 | if not self.choices_list: 161 | delattr(self,'choices_list') 162 | 163 | def __call__(self,dataset, *args, **kwargs): 164 | dataset = super().__call__(dataset, *args, **kwargs) 165 | if self.choices_list: 166 | dataset = dataset.filter(lambda x: 10), 745 | dataset_name="openai/webgpt_comparisons") 746 | 747 | synthetic_instruct = MultipleChoice('prompt', choices=['chosen', 'rejected'], 748 | labels=constant(0), dataset_name="Dahoas/synthetic-instruct-gptj-pairwise") 749 | 750 | scruples = Classification("text",labels="binarized_label",dataset_name="metaeval/scruples") 751 | 752 | wouldyourather = MultipleChoice(constant('Most people would rather:'), choices=['option_a','option_b'], 753 | labels= lambda x: int(x['votes_a']