├── .github
    ├── scripts
    │   └── release.py
    └── workflows
    │   ├── python-publish.yml
    │   └── release.yml
├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── mtasks.md
├── pyproject.toml
├── setup.cfg
├── src
    └── tasksource
    │   ├── .ipynb_checkpoints
    │       ├── access-checkpoint.py
    │       ├── preprocess-checkpoint.py
    │       ├── recast-checkpoint.py
    │       └── tasks-checkpoint.py
    │   ├── __init__.py
    │   ├── access.py
    │   ├── metadata
    │       ├── __init__.py
    │       ├── bigbench_groups.py
    │       ├── blimp_groups.py
    │       ├── original.txt
    │       └── popularity.py
    │   ├── mtasks.py
    │   ├── preprocess.py
    │   ├── recast.py
    │   └── tasks.py
└── tasks.md


/.github/scripts/release.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | import subprocess
 4 | 
 5 | 
 6 | def get_last_version() -> str:
 7 |     """Return the version number of the last release."""
 8 |     json_string = (
 9 |         subprocess.run(
10 |             ["gh", "release", "view", "--json", "tagName"],
11 |             check=True,
12 |             stdout=subprocess.PIPE,
13 |             stderr=subprocess.PIPE,
14 |         )
15 |         .stdout.decode("utf8")
16 |         .strip()
17 |     )
18 | 
19 |     return json.loads(json_string)["tagName"]
20 | 
21 | 
22 | def bump_patch_number(version_number: str) -> str:
23 |     """Return a copy of `version_number` with the patch number incremented."""
24 |     major, minor, patch = version_number.split(".")
25 |     return f"{major}.{minor}.{int(patch) + 1}"
26 | 
27 | 
28 | def create_new_patch_release():
29 |     """Create a new patch release on GitHub."""
30 |     try:
31 |         last_version_number = get_last_version()
32 |     except subprocess.CalledProcessError as err:
33 |         if err.stderr.decode("utf8").startswith("HTTP 404:"):
34 |             # The project doesn't have any releases yet.
35 |             new_version_number = "0.0.1"
36 |         else:
37 |             raise
38 |     else:
39 |         new_version_number = bump_patch_number(last_version_number)
40 | 
41 |     subprocess.run(
42 |         ["gh", "release", "create", "--generate-notes", new_version_number],
43 |         check=True,
44 |     )
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     create_new_patch_release()
49 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI.org
 2 | on:
 3 |   release:
 4 |     types: [published]
 5 | jobs:
 6 |   pypi:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - name: Checkout
10 |         uses: actions/checkout@v3
11 |         with:
12 |           fetch-depth: 0
13 |       - run: python3 -m pip install --upgrade build && python3 -m build
14 |       - name: Publish package
15 |         uses: pypa/gh-action-pypi-publish@release/v1
16 |         with:
17 |           password: ${{ secrets.PYPI_API_TOKEN }}
18 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Create a new patch release
 2 | on: workflow_dispatch
 3 | jobs:
 4 |   github:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - name: Checkout
 8 |         uses: actions/checkout@v3
 9 |       - name: Create new patch release
10 |         run: .github/scripts/release.py
11 |         env:
12 |           GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.1.0
 2 | message: "If you use this work, please cite it as below."
 3 | authors:
 4 |   - family-names: "Sileo"
 5 |     given-names: "Damien"
 6 | title: "tasksource: A Dataset Harmonization Framework for Streamlined NLP Multi-Task Learning and Evaluation"
 7 | version: "1.0.0"
 8 | date-released: 2023-01-01
 9 | url: "https://arxiv.org/abs/2301.05948"
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## tasksource ![](https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/5fc0bcb41160c47d1d43856b/j06-U5e2Tifi2xOnTudqS.jpeg?w=20&h=20&f=face) 600+ curated datasets and preprocessings for instant and interchangeable use
 2 | 
 3 | Huggingface Datasets is an excellent library, but it lacks standardization, and datasets often require preprocessing work to be used interchangeably.
 4 | `tasksource` streamlines interchangeable datasets usage to scale evaluation or multi-task learning.
 5 | 
 6 | Each dataset is standardized to a `MultipleChoice`, `Classification`, or `TokenClassification` template with canonical fields. We focus on discriminative tasks (= with negative examples or classes) for our annotations but also provide a `SequenceToSequence` template. All implemented preprocessings are in [tasks.py](https://github.com/sileod/tasksource/blob/main/src/tasksource/tasks.py) or [tasks.md](https://github.com/sileod/tasksource/blob/main/tasks.md). A preprocessing is a function that accepts a dataset and returns the standardized dataset. Preprocessing code is concise and human-readable.
 7 | 
 8 | ### Installation and usage:
 9 | `pip install tasksource`
10 | ```python
11 | from tasksource import list_tasks, load_task
12 | df = list_tasks(multilingual=False) # takes some time
13 | 
14 | for id in df[df.task_type=="MultipleChoice"].id:
15 |     dataset = load_task(id) # all yielded datasets can be used interchangeably
16 | ```
17 | 
18 | Browse the 500+ curated tasks in tasks.md (200+ MultipleChoice tasks, 200+ Classification tasks), and feel free to request a new task. Datasets are downloaded to `$HF_DATASETS_CACHE` (like any Hugging Face dataset), so ensure you have more than 100GB of space available.
19 | 
20 | You can now also use:
21 | ```python
22 | load_dataset("tasksource/data", "glue/rte",max_rows=30_000)
23 | ```
24 | 
25 | ### Pretrained models:
26 | 
27 | Text encoder pretrained on tasksource reached state-of-the-art results: [🤗/deberta-v3-base-tasksource-nli](https://hf.co/sileod/deberta-v3-base-tasksource-nli)
28 | 
29 | Tasksource pretraining is notably helpful for RLHF reward modeling or any kind of classification, including zero-shot. You can also find a large and a multilingual version.
30 | 
31 | ### tasksource-instruct
32 | 
33 | The repo also contains some recasting code to convert tasksource datasets to instructions, providing one of the richest instruction-tuning datasets:
34 | [🤗/tasksource-instruct-v0](https://hf.co/datasets/tasksource/tasksource-instruct-v0)
35 | 
36 | 
37 | ### tasksource-label-nli
38 | 
39 | We also recast all classification tasks as natural language inference, to improve entailment-based zero-shot classification detection:
40 | [🤗/zero-shot-label-nli](https://huggingface.co/datasets/tasksource/zero-shot-label-nli)
41 | 
42 | ### Write and use custom preprocessings
43 | 
44 | ```python
45 | from tasksource import MultipleChoice
46 | 
47 | codah = MultipleChoice('question_propmt',choices_list='candidate_answers',
48 |     labels='correct_answer_idx',
49 |     dataset_name='codah', config_name='codah')
50 |     
51 | winogrande = MultipleChoice('sentence',['option1','option2'],'answer',
52 |     dataset_name='winogrande',config_name='winogrande_xl',
53 |     splits=['train','validation',None]) # test labels are not usable
54 |     
55 | tasks = [winogrande.load(), codah.load()]) #  Aligned datasets (same columns) can be used interchangably  
56 | ```
57 | 
58 |  ### Citation and contact
59 | 
60 | For more details, refer to this [article:](https://arxiv.org/abs/2301.05948) 
61 | ```bib
62 | @inproceedings{sileo-2024-tasksource,
63 |     title = "tasksource: A Large Collection of {NLP} tasks with a Structured Dataset Preprocessing Framework",
64 |     author = "Sileo, Damien",
65 |     booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
66 |     month = may,
67 |     year = "2024",
68 |     address = "Torino, Italia",
69 |     publisher = "ELRA and ICCL",
70 |     url = "https://aclanthology.org/2024.lrec-main.1361",
71 |     pages = "15655--15684",
72 | }
73 | ```
74 | For help integrating tasksource into your experiments, please contact [damien.sileo@inria.fr](mailto:damien.sileo@inria.fr).
75 | 
76 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
77 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"]
3 | build-backend = "setuptools.build_meta"
4 | 
5 | [tool.setuptools_scm]
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 |  [metadata]
 2 | name = tasksource
 3 | description = Preprocessings to prepare datasets for a task
 4 | long_description = file: README.md
 5 | long_description_content_type = text/markdown
 6 | url = https://github.com/sileod/tasksource/
 7 | classifiers =
 8 |     Programming Language :: Python :: 3
 9 |     License :: OSI Approved :: BSD License
10 |     Intended Audience :: Developers
11 | 
12 | [options]
13 | package_dir =
14 |     = src
15 | packages = find:
16 | python_requires = >=3.6
17 | install_requires =
18 |     dotwiz
19 |     funcy
20 |     datasets
21 |     exrex
22 |     magicattr
23 |     pandas
24 |     numpy
25 |     scipy
26 |     sorcery
27 | 
28 | [options.packages.find]
29 | where = src
30 | 


--------------------------------------------------------------------------------
/src/tasksource/.ipynb_checkpoints/access-checkpoint.py:
--------------------------------------------------------------------------------
  1 | from .preprocess import Preprocessing
  2 | import re
  3 | import pandas as pd
  4 | from . import tasks, recast
  5 | from .metadata import dataset_rank
  6 | from datasets import load_dataset
  7 | import funcy as fc
  8 | import os
  9 | import copy
 10 | from sorcery import dict_of
 11 | from functools import cache
 12 | import random
 13 | 
 14 | 
 15 | class lazy_mtasks:
 16 |     def __getattr__(self, name):
 17 |         from . import mtasks
 18 |         return getattr(mtasks, name)
 19 | 
 20 |     def __dir__(self):
 21 |         from . import mtasks
 22 |         return dir(mtasks)
 23 | lmtasks=lazy_mtasks()
 24 | 
 25 | def parse_var_name(s):
 26 |     config_name,task_name = None,None
 27 |     if '__' in s and '___' not in s: # dataset__task
 28 |         dataset_name, task_name = s.split('__') 
 29 |     elif '__' not in s.replace('___','') and '___' in s: #dataset___config
 30 |         dataset_name, config_name = s.split('___') 
 31 |     elif  '___' in s and '__' in s.split('___')[1]: #dataset___config__task
 32 |         dataset_name, config_task=s.split('___')
 33 |         config_name,task_name = config_task.split('__')
 34 |     else: # dataset 
 35 |         dataset_name = s
 36 |     return dataset_name,config_name,task_name
 37 | 
 38 | def pretty_name(x):
 39 |     dn = x.dataset_name.split("/")[-1]   
 40 |     cn = x.config_name if x.config_name else ""
 41 |     tn = x.task_name if x.task_name else ""
 42 |     return f"{dn}/{cn}/{tn}".replace('//','/').rstrip('/')
 43 | 
 44 | @cache
 45 | def list_tasks(tasks_path=f'{os.path.dirname(__file__)}/tasks.py',multilingual=False,instruct=False, excluded=[]):
 46 |     if multilingual:
 47 |         tasks_path=tasks_path.replace('/tasks.py','/mtasks.py')
 48 |     task_order = open(tasks_path).readlines()
 49 |     task_order = [x.split('=')[0].rstrip() for x in task_order if '=' in x]
 50 |     task_order = [x for x in task_order if x.isidentifier()]
 51 |     task_order = fc.flip(dict(enumerate(task_order)))
 52 | 
 53 |     l = []
 54 |     _tasks = (lmtasks if multilingual else tasks)
 55 | 
 56 |     for key in dir(_tasks):
 57 |         if key not in task_order:
 58 |             continue
 59 |         value=getattr(_tasks, key)
 60 |         if isinstance(value,Preprocessing):
 61 |             dataset_name, config_name, task_name = parse_var_name(key)
 62 |             dataset_name = (value.dataset_name if value.dataset_name else dataset_name)
 63 |             config_name = (value.config_name if value.config_name else config_name)
 64 |             hasattr(value,key)
 65 |             l+=[{'dataset_name': dataset_name,
 66 |                  'config_name' : config_name,
 67 |                  'task_name': task_name,
 68 |                  'preprocessing_name': key,
 69 |                 'task_type': value.__class__.__name__,'mapping': value,
 70 |                 'rank':task_order.get(key,None)}]   
 71 |     df=pd.DataFrame(l).explode('config_name')
 72 |     df = df.sort_values('rank').reset_index(drop=True)
 73 |     df['id'] = df.apply(lambda x: pretty_name(x), axis=1)
 74 |     df.insert(0, 'id', df.pop('id'))
 75 |     del df['rank']
 76 |     if instruct:
 77 |         df=df[df.id.map(lambda x: not any(a in x for a in recast.improper_labels))]
 78 |     df=df[df.id.map(lambda x: not any(x in a for a in excluded))]
 79 |     return df
 80 | 
 81 | #task_df =list_tasks()
 82 | #mtask_df =list_tasks(multilingual=True)
 83 | 
 84 | def dict_to_query(d=dict(), **kwargs):
 85 |     d={**d,**kwargs}
 86 |     return '&'.join([f'`{k}`=="{v}"' for k,v in d.items()])
 87 | 
 88 | def load_preprocessing(tasks=tasks, **kwargs):
 89 |     _tasks_df = list_tasks(multilingual=tasks==lmtasks)
 90 |     y = _tasks_df.copy().query(dict_to_query(**kwargs)).iloc[0]
 91 |     preprocessing= copy.copy(getattr(tasks, y.preprocessing_name))
 92 |     for c in 'dataset_name','config_name':
 93 |         if not isinstance(getattr(preprocessing,c), str):
 94 |              setattr(preprocessing,c,getattr(y,c))
 95 |     return preprocessing
 96 | 
 97 | def load_task(id=None, dataset_name=None,config_name=None,task_name=None,preprocessing_name=None,
 98 |          max_rows=None, max_rows_eval=None, multilingual=False, instruct=False, seed=0, **load_dataset_kwargs):
 99 |     query = dict_of(id, dataset_name, config_name, task_name,preprocessing_name)
100 |     query = {k:v for k,v in query.items() if v}
101 |     _tasks = (lmtasks if multilingual else tasks)
102 |     preprocessing = load_preprocessing(_tasks, **query)
103 | 
104 |     if "trust_remote_code" not in load_dataset_kwargs:
105 |         load_dataset_kwargs["trust_remote_code"] = True
106 |     
107 |     dataset = load_dataset(preprocessing.dataset_name, preprocessing.config_name, **load_dataset_kwargs)
108 |     dataset= preprocessing(dataset,max_rows, max_rows_eval)
109 |     dataset.task_type = preprocessing.__class__.__name__
110 |     if instruct:
111 |         dataset=recast.recast_instruct(dataset)
112 |     return dataset


--------------------------------------------------------------------------------
/src/tasksource/.ipynb_checkpoints/preprocess-checkpoint.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Iterable
  2 | from dotwiz import DotWiz
  3 | from dataclasses import dataclass
  4 | from typing import Union
  5 | import itertools
  6 | import funcy as fc
  7 | import exrex 
  8 | import magicattr 
  9 | import numpy as np
 10 | import copy
 11 | import datasets
 12 | import time
 13 | 
 14 | MAX_MC_OPTIONS = 4
 15 | 
 16 | def get_column_names(dataset):
 17 |     cn = dataset.column_names
 18 |     if type(cn)==dict:
 19 |         return set(fc.flatten(cn.values()))
 20 |     else:
 21 |         return set(cn)
 22 | 
 23 | 
 24 | def sample_dataset(dataset,n=10000, n_eval=1000,seed=0):
 25 |     for k in dataset:
 26 |         n_k=(n if k=='train' else n_eval)
 27 |         if n_k and len(dataset[k])>n_k:
 28 |             dataset[k]=dataset[k].train_test_split(train_size=n_k,seed=seed)['train']
 29 |     return dataset
 30 | 
 31 | class Preprocessing(DotWiz):
 32 |     default_splits = ('train','validation','test')
 33 |     _instances = []
 34 | 
 35 |     def __post_init__(self):
 36 |         Preprocessing._instances+=[self]
 37 | 
 38 |     @staticmethod
 39 |     def __map_to_target(x,fn=lambda x:None, target=None):
 40 |         x[target]=fn(x)
 41 |         return x
 42 |         
 43 |     def load(self):
 44 |         return self(datasets.load_dataset(self.dataset_name,self.config_name))
 45 | 
 46 |     def __call__(self,dataset, max_rows=None, max_rows_eval=None,seed=0):
 47 |         dataset = self.pre_process(dataset)
 48 | 
 49 |         # manage splits
 50 |         for k,v in zip(self.default_splits, self.splits):
 51 |             if v and k!=v:
 52 |                 dataset[k]=dataset[v]
 53 |                 del dataset[v]
 54 |             if k in dataset and not v: # obfuscated label
 55 |                 del dataset[k]
 56 |         dataset = fix_splits(dataset)
 57 | 
 58 |         for k in list(dataset.keys()):
 59 |             if k not in self.default_splits:
 60 |                 del dataset[k]
 61 |         dataset = sample_dataset(dataset, max_rows, max_rows_eval,seed=seed)
 62 |         
 63 |         # field annotated with a string
 64 |         substitutions = {v:k for k,v in self.to_dict().items()
 65 |             if (k and k not in {'splits','dataset_name','config_name'} 
 66 |             and type(v)==str and k!=v)}
 67 | 
 68 |         dataset=dataset.remove_columns([c for c in substitutions.values() if c in dataset['train'].features and c not in substitutions])
 69 |         dataset=dataset.rename_columns(substitutions)
 70 | 
 71 |         # field annotated with a function                                
 72 |         for k in self.to_dict().keys():
 73 |             v=getattr(self, k)
 74 |             if callable(v) and k not in {"post_process","pre_process","load"}:
 75 |                 dataset=dataset.map(self.__map_to_target,
 76 |                                     fn_kwargs={'fn':v,'target':k})
 77 | 
 78 |         dataset=dataset.remove_columns(
 79 |             get_column_names(dataset)-set(self.to_dict().keys()))
 80 |         dataset = fix_labels(dataset)
 81 |         dataset = fix_splits(dataset) # again: label mapping changed
 82 |         dataset = self.post_process(dataset)
 83 |         return dataset
 84 | 
 85 | 
 86 | @dataclass
 87 | class cat(Preprocessing):
 88 |     fields:Union[str,list]=None
 89 |     separator:str=' '
 90 |         
 91 |     def __call__(self, example=None):
 92 |         y=[np.char.array(example[f]) + sep 
 93 |                 for f,sep in zip(self.fields[::-1],itertools.repeat(self.separator))]
 94 |         y=list(sum(*y))
 95 |         if len(y)==1:
 96 |             y=y[0]
 97 |         return y
 98 | 
 99 | 
100 | def pretty(f):
101 |     class pretty_f(DotWiz):
102 |         def __init__(self,*args):
103 |             self.__f_arg = f(*args)
104 |             for a in args:
105 |                 setattr(self,'value',a)
106 |                 
107 |         def __call__(self, *args,**kwargs):
108 |             return self.__f_arg(*args,**kwargs)
109 | 
110 |         def __repr__(self):
111 |             return f"{self.__f_arg.__qualname__ .split('.')[0]}({self.value})"
112 |     return pretty_f
113 | 
114 | class dotgetter:
115 |     def __init__(self, path=''):
116 |         self.path=path
117 | 
118 |     def __bool__(self):
119 |         return bool(self.path)
120 | 
121 |     def __getattr__(self, k):
122 |         return self.__class__(f'{self.path}.{k}'.lstrip('.'))
123 |     
124 |     def __getitem__(self, i):
125 |         return self.__class__(f'{self.path}[{i}]')
126 | 
127 |     def __call__(self, example=None):
128 |         return magicattr.get(DotWiz(example), self.path)
129 | 
130 |     def __hash__(self):
131 |         return hash(self.path)
132 | 
133 | 
134 | @dataclass
135 | class ClassificationFields(Preprocessing):
136 |     sentence1:str='sentence1'
137 |     sentence2:str='sentence2'
138 |     labels:str='labels'
139 | 
140 | @dataclass
141 | class Seq2SeqLMFields(Preprocessing):
142 |     prompt:str='prompt'
143 |     output:str='output'
144 | 
145 | @dataclass
146 | class TokenClassificationFields(Preprocessing):
147 |     tokens:str='tokens'
148 |     labels:str='labels'
149 |         
150 | @dataclass
151 | class MultipleChoiceFields(Preprocessing):
152 |     inputs:str='input'
153 |     choices:Iterable=tuple()
154 |     labels:str='labels'
155 |     choices_list:str=None
156 |     def __post_init__(self):
157 |         for i, c in enumerate(self.choices):
158 |             setattr(self,f'choice{i}',c)
159 |         delattr(self,'choices')
160 |         if not self.choices_list:
161 |             delattr(self,'choices_list')
162 |     
163 |     def __call__(self,dataset, *args, **kwargs):
164 |         dataset = super().__call__(dataset, *args, **kwargs)
165 |         if self.choices_list:
166 |             dataset = dataset.filter(lambda x: 1<len(x['choices_list']))
167 |             n_options = min([len(x) for k in dataset for x in dataset[k]['choices_list']])
168 |             n_options = min(MAX_MC_OPTIONS,n_options)
169 |             dataset = dataset.map(self.flatten_choice_list, fn_kwargs={'n_options':n_options})
170 | 
171 |         else:
172 |             dataset = dataset.map(self.sample_choices, fn_kwargs={'n_options':MAX_MC_OPTIONS})
173 |         return dataset
174 | 
175 |     @staticmethod
176 |     def flatten_choice_list(x, n_options=None):
177 |         n_neg = n_options-1 if n_options else None
178 |         choices = x['choices_list']
179 |         label=x['labels']
180 |         neg = choices[:label] + choices[label+1:]
181 |         pos = choices[label]
182 |         x['labels']=0
183 |         x['choices_list']=[pos]+neg[:n_neg]
184 |         for i,o in enumerate(x['choices_list']):
185 |             x[f'choice{i}']=o
186 |         del x['choices_list']
187 |         return x
188 | 
189 |     @staticmethod
190 |     def sample_choices(x, n_options=None):
191 |         choices = [x[c] for c in x if 'choice' in c]
192 |         if not MAX_MC_OPTIONS or len(choices)<=n_options:
193 |             return x
194 |         n_neg = n_options-1 if n_options else None
195 |         label=x['labels']
196 |         neg = choices[:label] + choices[label+1:]
197 |         pos = choices[label]
198 |         x['labels']=0
199 |         choices_list=[pos]+neg[:n_neg]
200 |         for c in list(x):
201 |             if 'choice' in c:
202 |                 del x[c]
203 |         for i,o in enumerate(choices_list):
204 |             x[f'choice{i}']=o
205 |         return x
206 | 
207 | @dataclass
208 | class SharedFields:
209 |     splits:list=Preprocessing.default_splits
210 |     dataset_name:str = None
211 |     config_name:str = None
212 |     pre_process: callable = fc.identity
213 |     post_process: callable = fc.identity
214 |     #language:str="en"
215 |     
216 | 
217 | @dataclass
218 | class Classification(SharedFields, ClassificationFields): pass
219 | 
220 | @dataclass
221 | class MultipleChoice(SharedFields, MultipleChoiceFields): pass
222 | 
223 | @dataclass
224 | class TokenClassification(SharedFields, TokenClassificationFields): pass
225 | 
226 | @dataclass
227 | class Seq2SeqLM(SharedFields, Seq2SeqLMFields): pass
228 | 
229 | get=dotgetter()
230 | constant = pretty(fc.constantly)
231 | regen = lambda x: list(exrex.generate(x))
232 | 
233 | def name(label_name, classes):
234 |     return lambda x:classes[x[label_name]]
235 | 
236 | def fix_splits(dataset):
237 | 
238 |     if len(dataset)==1 and "train" not in dataset:
239 |         k = list(dataset)[0]
240 |         dataset['train'] = copy.deepcopy(dataset[k])
241 |         del dataset[k]
242 | 
243 |     if 'auxiliary_train' in dataset:
244 |         del dataset['auxiliary_train']
245 |     
246 |     if 'test' in dataset: # manage obfuscated labels
247 |         if 'labels' in dataset['test'].features:
248 |             if len(set(fc.flatten(dataset['test'].to_dict()['labels'])))==1:
249 |                 del dataset['test']
250 | 
251 |     if 'validation' in dataset and 'train' not in dataset:
252 |         train_validation = dataset['validation'].train_test_split(0.5, seed=0)
253 |         dataset['train'] = train_validation['train']
254 |         dataset['validation']=train_validation['test']
255 |     
256 |     if 'validation' in dataset and 'test' not in dataset:
257 |         validation_test = dataset['validation'].train_test_split(0.5, seed=0)
258 |         dataset['validation'] = validation_test['train']
259 |         dataset['test']=validation_test['test']
260 | 
261 |     if 'train' in dataset and 'validation' not in dataset:
262 |         train_val = dataset['train'].train_test_split(train_size=0.90, seed=0)
263 |         dataset['train'] = train_val['train']
264 |         dataset['validation']=train_val['test']
265 | 
266 |     if 'test' in dataset and 'validation' not in dataset:
267 |         validation_test = dataset['test'].train_test_split(0.5, seed=0)
268 |         dataset['validation'] = validation_test['train']
269 |         dataset['test']=validation_test['test']
270 | 
271 |     if 'validation' not in dataset and 'test' not in dataset:
272 |         train_val_test = dataset["train"].train_test_split(train_size=0.90, seed=0)
273 |         val_test = train_val_test["test"].train_test_split(0.5, seed=0)
274 |         dataset["train"] = train_val_test["train"]
275 |         dataset["validation"] = val_test["train"]
276 |         dataset["test"] = val_test["test"]
277 |         
278 |     return dataset 
279 | 
280 | def fix_labels(dataset, label_key='labels'):
281 |     if type(dataset['train'][label_key][0]) in [int,list,float]:
282 |         return dataset
283 |     labels=set(fc.flatten(dataset[k][label_key] for k in {"train"}))
284 |     if set(labels)=={'entailment','neutral','contradiction'}:
285 |         order=lambda x:dict(fc.flip(enumerate(['entailment','neutral','contradiction']))).get(x,x)
286 |     else:
287 |         order=str
288 |     labels=sorted(labels, key=order)
289 |     dataset=dataset.cast_column(label_key, datasets.ClassLabel(names=labels))
290 |     return dataset
291 | 
292 | def concatenate_dataset_dict(l):
293 |     """Concatenate a list of DatastDict objects sharing same splits and columns."""
294 |     keys=l[0].keys()
295 |     return datasets.DatasetDict({k: datasets.concatenate_datasets([x[k] for x in l]) for k in keys})


--------------------------------------------------------------------------------
/src/tasksource/.ipynb_checkpoints/recast-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from datasets import DatasetDict, Dataset
  3 | from sorcery import dict_of
  4 | import string
  5 | 
  6 | improper_labels =['recast/recast_kg_relations','linguisticprobing',"lex_glue/scotus",'lexical_relation_classification/ROOT09',"pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness']
  7 | improper_labels += ['glue/stsb', 'sick/relatedness', 'joci', 'utilitarianism', 'amazon_counterfactual/en', 'toxic_conversations', 'ethos/multilabel', 'lex_glue/eurlex', 'lex_glue/unfair_tos', 'app_reviews', 'humicroedit/subtask-1', 'stackoverflow-questions', 'go_emotions/simplified', 'google_wellformed_query', 'has_part', 'blog_authorship_corpus/age', 'promptCoherence', 'Sarcasm_News_Headline', 'auditor_review/demo-org--auditor_review', 'Dynasent_Disagreement', 'Politeness_Disagreement', 'SBIC_Disagreement', 'SChem_Disagreement', 'Dilemmas_Disagreement', 'sts-companion', 'acceptability-prediction', 'chaos-mnli-ambiguity', 'headline_cause/en_simple', 'oasst1_dense_flat', 'civil_comments']
  8 | 
  9 | improper_labels += ['stsb_multi_mt','MLMA_hate_speech','icl-symbol-tuning-instruct','zero-shot-label-nli']
 10 | 
 11 | improper_labels += ['essay-scoring','english-grading','HelpSteer','oasst2']
 12 | 
 13 | def render_options(options):
 14 |     options = [f'"{x}"' for x in options]
 15 |     return f"{', '.join(options[:-1])} or {options[-1]}"
 16 | 
 17 | def render_classification(text,options,answer):
 18 |     example = 'text_A→text_B' if text.startswith('text_A:') else 'the following'
 19 |     inputs = f'With no explanation, label {example} with either {render_options(options)}.\n{text}'
 20 |     targets = f"{answer}."
 21 |     return dict_of(inputs,targets)
 22 | 
 23 | def render_token_classification(tokens,options,labels):
 24 |     prefix = f'With no explanation, label each line with {render_options(options)} preceded by ":".\n'
 25 |     inputs = prefix+"\n".join(tokens)
 26 |     targets = "\n".join([':'.join(x) for x in zip(tokens,labels)])
 27 |     return dict_of(inputs,targets)
 28 | 
 29 | def render_multiple_choice(prompt, options, labels):
 30 |     inputs=(prompt+'\n' if prompt else '')
 31 |     letters = string.ascii_uppercase[:len(options)]
 32 |     inputs=f'With no explanation, chose the best option from {render_options(letters)}. {inputs}'    
 33 |     for letter, option in zip(letters, options):
 34 |         inputs+=f'\n{letter}: {option}'
 35 |     targets = f'{letters[labels]}.'
 36 |     return dict_of(inputs, targets) 
 37 | 
 38 | def negative_sample_options(y, labels,N=4):
 39 |     if len(labels)<N:
 40 |         return labels
 41 |     else:
 42 |         return [y]+random.sample([x for x in labels if x!=y], N-1)
 43 | 
 44 | def shuffle_choices(x):
 45 |     choices = sorted([k for k in x if 'choice' in k])
 46 |     choices_texts = [x[c] for c in choices]
 47 |     correct_choice =choices_texts[x['labels']]
 48 |     random.shuffle(choices_texts)
 49 |     for c, ct in zip(choices, choices_texts):
 50 |         x[c]=ct
 51 |     x["labels"]=choices_texts.index(correct_choice)
 52 |     return x
 53 | 
 54 | def recast_dataset_classification_to_mc(dataset,sep="[SEP]",N=4):
 55 | 
 56 |     def recast_split(d,N=N):
 57 |         labels = d.features['labels']
 58 |         df=d.to_pandas()
 59 |         df['inputs'] = df.sentence1
 60 |         if "sentence2" in df:
 61 |             df['inputs'] +=sep + df.sentence2
 62 | 
 63 |         N=min(N, len(labels.names))
 64 |         df['choices']=df.apply(lambda x:negative_sample_options(labels.int2str(x['labels']), labels.names,N),axis=1)     
 65 |         df['labels']=df.apply(lambda x:x['choices'].index(labels.int2str(x['labels'])),axis=1)
 66 | 
 67 |         for i in range(N):
 68 |             df[f'choice{i}']= "This example is " + df.choices.map(lambda x:x[i])
 69 | 
 70 |         choices = [f'choice{i}' for i in range(N)]
 71 |         return Dataset.from_pandas(df[['inputs',*choices,'labels']],preserve_index=False)
 72 | 
 73 |     return DatasetDict({k: recast_split(v) for k,v in dataset.items()})
 74 | 
 75 | 
 76 | def recast_instruct(dataset):
 77 |     features = dataset['train'].features
 78 |     labels = features['labels']
 79 | 
 80 |     if "sentence1" in features:
 81 |         task_type='Classification'
 82 |     if "choice0" in features:
 83 |         task_type = "MultipleChoice"
 84 |     if "tokens" in features:
 85 |         task_type = "TokenClassification"
 86 | 
 87 |     def recast_MultipleChoice(x):
 88 |         x=shuffle_choices(x)
 89 |         choices = sorted([k for k in x if 'choice' in k])
 90 |         if all([x[c] in x['inputs'] for c in choices]):
 91 |             return {"inputs":x['inputs'], 'targets': x[f"choice{x['labels']}"].strip()+"."}
 92 |         else:
 93 |             return render_multiple_choice(x['inputs'],[x[c] for c in choices],x['labels'])
 94 | 
 95 |     def recast_TokenClassification(x):
 96 |         distractors = list(labels.feature.names)
 97 |         x_labels = [labels.feature.int2str(y) for y in x['labels']]
 98 |         labels_set= list({labels.feature.int2str(y) for y in x['labels']})
 99 |         options=list(dict.fromkeys(labels_set+distractors))[:max(len(labels_set),10)]
100 |         return render_token_classification(x['tokens'],options,x_labels)
101 | 
102 |     def recast_Classification(x):
103 |         if 'sentence2' in x:
104 |             text=f"text_A: {x['sentence1']}\ntext_B: {x['sentence2']}"
105 |         else:
106 |             text=x['sentence1']
107 |             
108 |         answer=labels.int2str(x['labels']).strip()
109 |         options= negative_sample_options(answer, labels._int2str)
110 |         return render_classification(text, options, answer)
111 |         
112 |     dataset = dataset.map(eval(f"recast_{task_type}"))
113 |     dataset = dataset.remove_columns([k for k in features if k not in ['inputs','targets']])
114 |     return dataset
115 |  


--------------------------------------------------------------------------------
/src/tasksource/__init__.py:
--------------------------------------------------------------------------------
1 | from .tasks import *
2 | from .preprocess import *
3 | from .access import *
4 | 


--------------------------------------------------------------------------------
/src/tasksource/access.py:
--------------------------------------------------------------------------------
  1 | from .preprocess import Preprocessing
  2 | import re
  3 | import pandas as pd
  4 | from . import tasks, recast
  5 | from .metadata import dataset_rank
  6 | from datasets import load_dataset
  7 | import funcy as fc
  8 | import os
  9 | import copy
 10 | from sorcery import dict_of
 11 | from functools import cache
 12 | import random
 13 | 
 14 | 
 15 | class lazy_mtasks:
 16 |     def __getattr__(self, name):
 17 |         from . import mtasks
 18 |         return getattr(mtasks, name)
 19 | 
 20 |     def __dir__(self):
 21 |         from . import mtasks
 22 |         return dir(mtasks)
 23 | lmtasks=lazy_mtasks()
 24 | 
 25 | def parse_var_name(s):
 26 |     config_name,task_name = None,None
 27 |     if '__' in s and '___' not in s: # dataset__task
 28 |         dataset_name, task_name = s.split('__') 
 29 |     elif '__' not in s.replace('___','') and '___' in s: #dataset___config
 30 |         dataset_name, config_name = s.split('___') 
 31 |     elif  '___' in s and '__' in s.split('___')[1]: #dataset___config__task
 32 |         dataset_name, config_task=s.split('___')
 33 |         config_name,task_name = config_task.split('__')
 34 |     else: # dataset 
 35 |         dataset_name = s
 36 |     return dataset_name,config_name,task_name
 37 | 
 38 | def pretty_name(x):
 39 |     dn = x.dataset_name.split("/")[-1]   
 40 |     cn = x.config_name if x.config_name else ""
 41 |     tn = x.task_name if x.task_name else ""
 42 |     return f"{dn}/{cn}/{tn}".replace('//','/').rstrip('/')
 43 | 
 44 | @cache
 45 | def list_tasks(tasks_path=f'{os.path.dirname(__file__)}/tasks.py',multilingual=False,instruct=False, excluded=[]):
 46 |     if multilingual:
 47 |         tasks_path=tasks_path.replace('/tasks.py','/mtasks.py')
 48 |     task_order = open(tasks_path).readlines()
 49 |     task_order = [x.split('=')[0].rstrip() for x in task_order if '=' in x]
 50 |     task_order = [x for x in task_order if x.isidentifier()]
 51 |     task_order = fc.flip(dict(enumerate(task_order)))
 52 | 
 53 |     l = []
 54 |     _tasks = (lmtasks if multilingual else tasks)
 55 | 
 56 |     for key in dir(_tasks):
 57 |         if key not in task_order:
 58 |             continue
 59 |         value=getattr(_tasks, key)
 60 |         if isinstance(value,Preprocessing):
 61 |             dataset_name, config_name, task_name = parse_var_name(key)
 62 |             dataset_name = (value.dataset_name if value.dataset_name else dataset_name)
 63 |             config_name = (value.config_name if value.config_name else config_name)
 64 |             hasattr(value,key)
 65 |             l+=[{'dataset_name': dataset_name,
 66 |                  'config_name' : config_name,
 67 |                  'task_name': task_name,
 68 |                  'preprocessing_name': key,
 69 |                 'task_type': value.__class__.__name__,'mapping': value,
 70 |                 'rank':task_order.get(key,None)}]   
 71 |     df=pd.DataFrame(l).explode('config_name')
 72 |     df = df.sort_values('rank').reset_index(drop=True)
 73 |     df['id'] = df.apply(lambda x: pretty_name(x), axis=1)
 74 |     df.insert(0, 'id', df.pop('id'))
 75 |     del df['rank']
 76 |     if instruct:
 77 |         df=df[df.id.map(lambda x: not any(a in x for a in recast.improper_labels))]
 78 |     df=df[df.id.map(lambda x: not any(x in a for a in excluded))]
 79 |     return df
 80 | 
 81 | #task_df =list_tasks()
 82 | #mtask_df =list_tasks(multilingual=True)
 83 | 
 84 | def dict_to_query(d=dict(), **kwargs):
 85 |     d={**d,**kwargs}
 86 |     return '&'.join([f'`{k}`=="{v}"' for k,v in d.items()])
 87 | 
 88 | def load_preprocessing(tasks=tasks, **kwargs):
 89 |     _tasks_df = list_tasks(multilingual=tasks==lmtasks)
 90 |     y = _tasks_df.copy().query(dict_to_query(**kwargs)).iloc[0]
 91 |     preprocessing= copy.copy(getattr(tasks, y.preprocessing_name))
 92 |     for c in 'dataset_name','config_name':
 93 |         if not isinstance(getattr(preprocessing,c), str):
 94 |              setattr(preprocessing,c,getattr(y,c))
 95 |     return preprocessing
 96 | 
 97 | def load_task(id=None, dataset_name=None,config_name=None,task_name=None,preprocessing_name=None,
 98 |          max_rows=None, max_rows_eval=None, multilingual=False, instruct=False, seed=0, **load_dataset_kwargs):
 99 |     query = dict_of(id, dataset_name, config_name, task_name,preprocessing_name)
100 |     query = {k:v for k,v in query.items() if v}
101 |     _tasks = (lmtasks if multilingual else tasks)
102 |     preprocessing = load_preprocessing(_tasks, **query)
103 | 
104 |     if "trust_remote_code" not in load_dataset_kwargs:
105 |         load_dataset_kwargs["trust_remote_code"] = True
106 |     
107 |     dataset = load_dataset(preprocessing.dataset_name, preprocessing.config_name, **load_dataset_kwargs)
108 |     dataset= preprocessing(dataset,max_rows, max_rows_eval)
109 |     dataset.task_type = preprocessing.__class__.__name__
110 |     if instruct:
111 |         dataset=recast.recast_instruct(dataset)
112 |     return dataset


--------------------------------------------------------------------------------
/src/tasksource/metadata/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bigbench_groups import *
 2 | from .blimp_groups import *
 3 | from .popularity import *
 4 | 
 5 | imppres_presupposition=['presupposition_all_n_presupposition',
 6 |  'presupposition_both_presupposition',
 7 |  'presupposition_change_of_state',
 8 |  'presupposition_cleft_existence',
 9 |  'presupposition_cleft_uniqueness',
10 |  'presupposition_only_presupposition',
11 |  'presupposition_possessed_definites_existence',
12 |  'presupposition_possessed_definites_uniqueness',
13 |  'presupposition_question_presupposition']
14 | 
15 | imppres_implicature=['implicature_connectives',
16 |  'implicature_gradable_adjective',
17 |  'implicature_gradable_verb',
18 |  'implicature_modals',
19 |  'implicature_numerals_10_100',
20 |  'implicature_numerals_2_3',
21 |  'implicature_quantifiers']
22 | 
23 | crossfit=['emo',
24 |  'wiki_auto',
25 |  'liar',
26 |  'tab_fact',
27 |  'sms_spam',
28 |  'google_wellformed_query',
29 |  'glue',
30 |  'poem_sentiment',
31 |  'emotion',
32 |  'hate_speech18',
33 |  'hatexplain',
34 |  'yahoo_answers_topics',
35 |  'mc_taco',
36 |  'glue',
37 |  'mocha',
38 |  'super_glue',
39 |  'glue',
40 |  'yelp_polarity',
41 |  'tweet_eval',
42 |  'glue',
43 |  'art',
44 |  'super_glue',
45 |  'ethos',
46 |  'app_reviews',
47 |  'yelp_review_full',
48 |  'anli',
49 |  'hate_speech_offensive',
50 |  'climate_fever',
51 |  'circa',
52 |  'financial_phrasebank',
53 |  'wiki_qa',
54 |  'rotten_tomatoes',
55 |  'trec',
56 |  'medical_questions_pairs',
57 |  'glue',
58 |  'super_glue',
59 |  'ade_corpus_v2',
60 |  'sick',
61 |  'super_glue',
62 |  'blimp',
63 |  'discovery',
64 |  'health_fact',
65 |  'ag_news',
66 |  'boolq',
67 |  'glue',
68 |  'amazon_polarity',
69 |  'scicite',
70 |  'dbpedia_14',
71 |  'onestop_english',
72 |  'crows_pairs',
73 |  'scitail',
74 |  'piqa',
75 |  'glue',
76 |  'paws',
77 |  'imdb',
78 |  'glue',
79 |  'trec']
80 | 
81 | #en_esl, en_gumreddit are faulty on HF 
82 | udep_en_configs = ['en_ewt', 'en_gum', 'en_lines', 'en_partut']
83 | udep_en_labels = ['_', 'acl', 'acl:relcl', 'advcl', 'advmod', 'amod', 'appos', 'aux', 'aux:pass', 'case', 'cc', 'cc:preconj', 'ccomp', 'compound', 'compound:prt', 'conj', 'cop', 'csubj', 'csubj:pass', 'dep', 'det', 'det:predet', 'discourse', 'dislocated', 'expl', 'fixed', 'flat', 'flat:foreign', 'goeswith', 'iobj', 'list', 'mark', 'nmod', 'nmod:npmod', 'nmod:poss', 'nmod:tmod', 'nsubj', 'nsubj:pass', 'nummod', 'obj', 'obl', 'obl:npmod', 'obl:tmod', 'orphan', 'parataxis', 'punct', 'reparandum', 'root', 'vocative', 'xcomp']
84 | 
85 | udep_labels = ['_', 'acl', 'acl:adv', 'acl:appos', 'acl:attr', 'acl:cleft', 'acl:focus', 'acl:inf', 'acl:part', 'acl:periph', 'acl:poss', 'acl:relat', 'acl:relcl', 'advcl', 'advcl:arg', 'advcl:cleft', 'advcl:cmpr', 'advcl:cond', 'advcl:coverb', 'advcl:lmod', 'advcl:mmod', 'advcl:periph', 'advcl:relcl', 'advcl:sp', 'advcl:svc', 'advcl:tcl', 'advcl:tmod', 'advmod', 'advmod:arg', 'advmod:cc', 'advmod:deg', 'advmod:det', 'advmod:df', 'advmod:emph', 'advmod:lmod', 'advmod:locy', 'advmod:mmod', 'advmod:mode', 'advmod:neg', 'advmod:periph', 'advmod:que', 'advmod:tfrom', 'advmod:tlocy', 'advmod:tmod', 'advmod:to', 'advmod:tto', 'amod', 'amod:advmod', 'amod:att', 'amod:emph', 'amod:flat', 'amod:mode', 'amod:obl', 'appos', 'appos:trans', 'aux', 'aux:aglt', 'aux:aspect', 'aux:caus', 'aux:clitic', 'aux:cnd', 'aux:imp', 'aux:mood', 'aux:neg', 'aux:opt', 'aux:part', 'aux:pass', 'aux:poss', 'aux:q', 'aux:tense', 'case', 'case:acc', 'case:adv', 'case:circ', 'case:dec', 'case:det', 'case:gen', 'case:loc', 'case:pred', 'case:pref', 'case:voc', 'cc', 'cc:nc', 'cc:preconj', 'ccomp', 'ccomp:agent', 'ccomp:cleft', 'ccomp:obj', 'ccomp:obl', 'ccomp:pmod', 'ccomp:pred', 'clf', 'compound', 'compound:a', 'compound:affix', 'compound:coll', 'compound:conjv', 'compound:dir', 'compound:ext', 'compound:lv', 'compound:lvc', 'compound:nn', 'compound:nv', 'compound:plur', 'compound:preverb', 'compound:prt', 'compound:quant', 'compound:redup', 'compound:smixut', 'compound:svc', 'compound:vo', 'compound:vv', 'conj', 'conj:expl', 'conj:extend', 'conj:svc', 'cop', 'cop:expl', 'cop:locat', 'cop:own', 'csubj', 'csubj:cleft', 'csubj:cop', 'csubj:pass', 'dep', 'dep:alt', 'dep:comp', 'dep:mod', 'dep:prt', 'det', 'det:adj', 'det:def', 'det:noun', 'det:numgov', 'det:nummod', 'det:poss', 'det:predet', 'det:pron', 'det:rel', 'discourse', 'discourse:emo', 'discourse:filler', 'discourse:intj', 'discourse:sp', 'dislocated', 'dislocated:acl', 'dislocated:cleft', 'dislocated:conj', 'dislocated:nmod', 'dislocated:nsubj', 'dislocated:obj', 'dislocated:obl', 'expl', 'expl:comp', 'expl:impers', 'expl:pass', 'expl:poss', 'expl:pv', 'expl:subj', 'fixed', 'flat', 'flat:abs', 'flat:foreign', 'flat:name', 'flat:num', 'flat:range', 'flat:repeat', 'flat:sibl', 'flat:title', 'flat:vv', 'goeswith', 'iobj', 'iobj:agent', 'iobj:appl', 'iobj:caus', 'iobj:loc', 'iobj:patient', 'list', 'mark', 'mark:adv', 'mark:advb', 'mark:comp', 'mark:prt', 'mark:rel', 'mark:relcl', 'nmod', 'nmod:abl', 'nmod:advmod', 'nmod:agent', 'nmod:appos', 'nmod:arg', 'nmod:att', 'nmod:attr', 'nmod:bahuv', 'nmod:cau', 'nmod:clas', 'nmod:cmp', 'nmod:comp', 'nmod:dat', 'nmod:flat', 'nmod:gen', 'nmod:gmod', 'nmod:gobj', 'nmod:gsubj', 'nmod:lmod', 'nmod:npmod', 'nmod:obl', 'nmod:obllvc', 'nmod:own', 'nmod:part', 'nmod:periph', 'nmod:pmod', 'nmod:poss', 'nmod:pred', 'nmod:ref', 'nmod:relat', 'nmod:tmod', 'nsubj', 'nsubj:appos', 'nsubj:bfoc', 'nsubj:caus', 'nsubj:cop', 'nsubj:ifoc', 'nsubj:lfoc', 'nsubj:lvc', 'nsubj:nc', 'nsubj:obj', 'nsubj:own', 'nsubj:pass', 'nsubj:periph', 'nummod', 'nummod:det', 'nummod:entity', 'nummod:flat', 'nummod:gov', 'nummod:mod', 'nummod:periph', 'obj', 'obj:advmod', 'obj:agent', 'obj:appl', 'obj:cau', 'obj:caus', 'obj:lvc', 'obj:periph', 'obl', 'obl:abl', 'obl:advmod', 'obl:agent', 'obl:appl', 'obl:arg', 'obl:ben', 'obl:cmpr', 'obl:inst', 'obl:lmod', 'obl:loc', 'obl:mod', 'obl:npmod', 'obl:own', 'obl:patient', 'obl:pmod', 'obl:poss', 'obl:prep', 'obl:sentcon', 'obl:smod', 'obl:soc', 'obl:tmod', 'obl:x', 'orphan', 'parataxis', 'parataxis:appos', 'parataxis:conj', 'parataxis:deletion', 'parataxis:discourse', 'parataxis:dislocated', 'parataxis:hashtag', 'parataxis:insert', 'parataxis:newsent', 'parataxis:nsubj', 'parataxis:obj', 'parataxis:parenth', 'parataxis:rel', 'parataxis:rep', 'parataxis:restart', 'parataxis:speech', 'parataxis:trans', 'punct', 'reparandum', 'root', 'vocative', 'vocative:mention', 'xcomp', 'xcomp:adj', 'xcomp:cleft', 'xcomp:ds', 'xcomp:obj', 'xcomp:obl', 'xcomp:pred', 'xcomp:sp', 'xcomp:subj']


--------------------------------------------------------------------------------
/src/tasksource/metadata/bigbench_groups.py:
--------------------------------------------------------------------------------
  1 | bigbench_discriminative = set("""abstract_narrative_understanding
  2 | anachronisms
  3 | analogical_similarity
  4 | analytic_entailment
  5 | arithmetic
  6 | authorship_verification
  7 | bbq_lite_json
  8 | causal_judgment
  9 | cause_and_effect
 10 | checkmate_in_one
 11 | cifar10_classification
 12 | code_line_description
 13 | color
 14 | common_morpheme
 15 | conceptual_combinations
 16 | contextual_parametric_knowledge_conflicts
 17 | crash_blossom
 18 | crass_ai
 19 | cryobiology_spanish
 20 | cs_algorithms
 21 | dark_humor_detection
 22 | date_understanding
 23 | disambiguation_qa
 24 | discourse_marker_prediction
 25 | dyck_languages
 26 | elementary_math_qa
 27 | emoji_movie
 28 | emojis_emotion_prediction
 29 | empirical_judgments
 30 | english_proverbs
 31 | english_russian_proverbs
 32 | entailed_polarity
 33 | entailed_polarity_hindi
 34 | epistemic_reasoning
 35 | evaluating_information_essentiality
 36 | fact_checker
 37 | fantasy_reasoning
 38 | figure_of_speech_detection
 39 | formal_fallacies_syllogisms_negation
 40 | general_knowledge
 41 | geometric_shapes
 42 | goal_step_wikihow
 43 | gre_reading_comprehension
 44 | hhh_alignment
 45 | hindu_knowledge
 46 | hinglish_toxicity
 47 | human_organs_senses
 48 | hyperbaton
 49 | identify_math_theorems
 50 | identify_odd_metaphor
 51 | implicatures
 52 | implicit_relations
 53 | indic_cause_and_effect
 54 | intent_recognition
 55 | international_phonetic_alphabet_nli
 56 | intersect_geometry
 57 | irony_identification
 58 | kannada
 59 | key_value_maps
 60 | known_unknowns
 61 | language_identification
 62 | logic_grid_puzzle
 63 | logical_args
 64 | logical_deduction
 65 | logical_fallacy_detection
 66 | logical_sequence
 67 | mathematical_induction
 68 | medical_questions_russian
 69 | metaphor_boolean
 70 | metaphor_understanding
 71 | misconceptions
 72 | misconceptions_russian
 73 | mnist_ascii
 74 | moral_permissibility
 75 | movie_dialog_same_or_different
 76 | movie_recommendation
 77 | navigate
 78 | nonsense_words_grammar
 79 | novel_concepts
 80 | odd_one_out
 81 | parsinlu_qa
 82 | penguins_in_a_table
 83 | persian_idioms
 84 | phrase_relatedness
 85 | physical_intuition
 86 | physics
 87 | play_dialog_same_or_different
 88 | presuppositions_as_nli
 89 | question_selection
 90 | real_or_fake_text
 91 | reasoning_about_colored_objects
 92 | riddle_sense
 93 | ruin_names
 94 | salient_translation_error_detection
 95 | sentence_ambiguity
 96 | similarities_abstraction
 97 | simple_arithmetic_json_multiple_choice
 98 | simple_ethical_questions
 99 | snarks
100 | social_iqa
101 | social_support
102 | sports_understanding
103 | strange_stories
104 | strategyqa
105 | suicide_risk
106 | swahili_english_proverbs
107 | swedish_to_german_proverbs
108 | symbol_interpretation
109 | temporal_sequences
110 | timedial
111 | tracking_shuffled_objects
112 | understanding_fables
113 | undo_permutation
114 | unit_interpretation
115 | vitaminc_fact_verification
116 | what_is_the_tao
117 | which_wiki_edit
118 | winowhy""".split('\n')) - {'simple_arithmetic_json_multiple_choice'}
119 | 
120 | bigbench_non_english = set("""common_morpheme
121 | cryobiology_spanish
122 | gem
123 | gender_inclusive_sentences_german
124 | kanji_ascii
125 | kannada
126 | language_identification
127 | linguistic_mappings
128 | medical_questions_russian
129 | misconceptions_russian
130 | multiemo
131 | persian_idioms
132 | polish_sequence_labeling
133 | swahili_english_proverbs
134 | swedish_to_german_proverbs
135 | what_is_the_tao
136 | which_wiki_edit""".split('\n')) | {"parsinlu_qa","hinglish_toxicity","indic_cause_and_effect","entailed_polarity_hindi","english_russian_proverbs"}
137 | 
138 | bbl=set('''auto_debugging
139 | bbq_lite_json
140 | code_line_description
141 | conceptual_combinations
142 | conlang_translation
143 | emoji_movie
144 | formal_fallacies_syllogisms_negation
145 | hindu_knowledge
146 | known_unknowns
147 | language_identification
148 | linguistics_puzzles
149 | logic_grid_puzzle
150 | logical_deduction
151 | misconceptions_russian
152 | novel_concepts
153 | operators
154 | parsinlu_reading_comprehension
155 | play_dialog_same_or_different
156 | repeat_copy_logic
157 | strange_stories
158 | strategyqa
159 | symbol_interpretation
160 | vitaminc_fact_verification
161 | winowhy'''.split('\n'))
162 | 
163 | bigbench_discriminative_english = bigbench_discriminative - bigbench_non_english


--------------------------------------------------------------------------------
/src/tasksource/metadata/blimp_groups.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | dfh=pd.read_csv('https://raw.githubusercontent.com/alexwarstadt/blimp/master/raw_results/summary/human_validation_summary.csv')
 4 | dfh['linguistic_term']=dfh['Condition']
 5 | dfm=pd.read_json('https://raw.githubusercontent.com/alexwarstadt/blimp/master/raw_results/summary/models_summary.jsonl',lines=True)
 6 | df=dfm.join(dfh)
 7 | df['diff']=df.total_mean - df.gpt2
 8 | blimp_hard = set(df[df['diff']>0.1].UID)
 9 | del dfh, dfm, df
10 | 
11 | blimp_groups = {
12 |  "syntax": [
13 |   "adjunct_island",
14 |   "animate_subject_passive",
15 |   "animate_subject_trans",
16 |   "causative",
17 |   "complex_NP_island",
18 |   "coordinate_structure_constraint_complex_left_branch",
19 |   "coordinate_structure_constraint_object_extraction",
20 |   "drop_argument",
21 |   "ellipsis_n_bar_1",
22 |   "ellipsis_n_bar_2",
23 |   "inchoative",
24 |   "intransitive",
25 |   "left_branch_island_echo_question",
26 |   "left_branch_island_simple_question",
27 |   "passive_1",
28 |   "passive_2",
29 |   "sentential_subject_island",
30 |   "transitive",
31 |   "wh_island",
32 |   "wh_questions_object_gap",
33 |   "wh_questions_subject_gap",
34 |   "wh_questions_subject_gap_long_distance",
35 |   "wh_vs_that_no_gap",
36 |   "wh_vs_that_no_gap_long_distance",
37 |   "wh_vs_that_with_gap",
38 |   "wh_vs_that_with_gap_long_distance"
39 |  ],
40 |  "morphology": [
41 |   "anaphor_gender_agreement",
42 |   "anaphor_number_agreement",
43 |   "determiner_noun_agreement_1",
44 |   "determiner_noun_agreement_2",
45 |   "determiner_noun_agreement_irregular_1",
46 |   "determiner_noun_agreement_irregular_2",
47 |   "determiner_noun_agreement_with_adj_2",
48 |   "determiner_noun_agreement_with_adj_irregular_1",
49 |   "determiner_noun_agreement_with_adj_irregular_2",
50 |   "determiner_noun_agreement_with_adjective_1",
51 |   "distractor_agreement_relational_noun",
52 |   "distractor_agreement_relative_clause",
53 |   "irregular_past_participle_adjectives",
54 |   "irregular_past_participle_verbs",
55 |   "irregular_plural_subject_verb_agreement_1",
56 |   "irregular_plural_subject_verb_agreement_2",
57 |   "regular_plural_subject_verb_agreement_1",
58 |   "regular_plural_subject_verb_agreement_2"
59 |  ],
60 |  "syntax_semantics": [
61 |   "existential_there_object_raising",
62 |   "existential_there_subject_raising",
63 |   "expletive_it_object_raising",
64 |   "only_npi_scope",
65 |   "principle_A_c_command",
66 |   "principle_A_case_1",
67 |   "principle_A_domain_1",
68 |   "principle_A_domain_2",
69 |   "principle_A_domain_3",
70 |   "principle_A_reconstruction",
71 |   "sentential_negation_npi_scope",
72 |   "tough_vs_raising_1",
73 |   "tough_vs_raising_2"
74 |  ],
75 |  "semantics": [
76 |   "existential_there_quantifiers_1",
77 |   "existential_there_quantifiers_2",
78 |   "matrix_question_npi_licensor_present",
79 |   "npi_present_1",
80 |   "npi_present_2",
81 |   "only_npi_licensor_present",
82 |   "sentential_negation_npi_licensor_present",
83 |   "superlative_quantifiers_1",
84 |   "superlative_quantifiers_2"
85 |  ],
86 |  "syntax/semantics": [
87 |   "principle_A_case_2"
88 |  ]
89 | }
90 | 


--------------------------------------------------------------------------------
/src/tasksource/metadata/original.txt:
--------------------------------------------------------------------------------
  1 | WANLI
  2 | recast/recast_verbnet
  3 | recast/recast_verbcorner
  4 | recast/recast_ner
  5 | recast/recast_sentiment
  6 | recast/recast_puns
  7 | recast/recast_factuality
  8 | recast/recast_megaveridicality
  9 | probability_words_nli/reasoning_1hop
 10 | probability_words_nli/usnli
 11 | probability_words_nli/reasoning_2hop
 12 | nan-nli/joey234--nan-nli
 13 | nli_fever
 14 | breaking_nli
 15 | conj_nli
 16 | fracas
 17 | dialogue_nli
 18 | mpe
 19 | dnc
 20 | recast_white/fnplus
 21 | recast_white/sprl
 22 | recast_white/dpr
 23 | robust_nli/IS_CS
 24 | robust_nli/LI_LI
 25 | robust_nli/ST_WO
 26 | robust_nli/PI_SP
 27 | robust_nli/PI_CD
 28 | robust_nli/ST_SE
 29 | robust_nli/ST_NE
 30 | robust_nli/ST_LM
 31 | robust_nli_is_sd
 32 | robust_nli_li_ts
 33 | gen_debiased_nli/snli_seq_z
 34 | gen_debiased_nli/snli_z_aug
 35 | gen_debiased_nli/snli_par_z
 36 | gen_debiased_nli/mnli_par_z
 37 | gen_debiased_nli/mnli_z_aug
 38 | gen_debiased_nli/mnli_seq_z
 39 | add_one_rte
 40 | hlgd
 41 | conll2003/pos_tags
 42 | conll2003/chunk_tags
 43 | conll2003/ner_tags
 44 | hh-rlhf
 45 | model-written-evals
 46 | fig-qa
 47 | social_i_qa
 48 | balanced-copa
 49 | e-CARE
 50 | insincere-questions
 51 | TuringBench
 52 | vitaminc/tals--vitaminc
 53 | rumoureval_2019/RumourEval2019
 54 | tweet_eval/irony
 55 | tweet_eval/stance_abortion
 56 | tweet_eval/hate
 57 | tweet_eval/stance_atheism
 58 | tweet_eval/stance_climate
 59 | tweet_eval/emoji
 60 | tweet_eval/offensive
 61 | tweet_eval/sentiment
 62 | tweet_eval/emotion
 63 | tweet_eval/stance_feminist
 64 | tweet_eval/stance_hillary
 65 | discovery/discovery
 66 | pragmeval/verifiability
 67 | pragmeval/mrda
 68 | pragmeval/switchboard
 69 | pragmeval/emergent
 70 | pragmeval/gum
 71 | pragmeval/sarcasm
 72 | pragmeval/stac
 73 | pragmeval/pdtb
 74 | silicone/dyda_e
 75 | silicone/oasis
 76 | silicone/meld_s
 77 | silicone/meld_e
 78 | silicone/maptask
 79 | silicone/dyda_da
 80 | silicone/sem
 81 | silicone/iemocap
 82 | lex_glue/scotus
 83 | lex_glue/ledgar
 84 | language-identification
 85 | rotten_tomatoes
 86 | hate_speech18
 87 | sms_spam
 88 | snips_built_in_intents
 89 | hate_speech_offensive
 90 | hyperpartisan_news
 91 | sciie
 92 | citation_intent
 93 | scicite
 94 | lexical_relation_classification/ROOT09
 95 | lexical_relation_classification/CogALexV
 96 | lexical_relation_classification/K&H+N
 97 | lexical_relation_classification/BLESS
 98 | lexical_relation_classification/EVALution
 99 | crowdflower/political-media-bias
100 | crowdflower/tweet_global_warming
101 | crowdflower/text_emotion
102 | crowdflower/political-media-message
103 | crowdflower/political-media-audience
104 | crowdflower/economic-news
105 | crowdflower/corporate-messaging
106 | crowdflower/airline-sentiment
107 | crowdflower/sentiment_nuclear_power
108 | ethics/commonsense
109 | ethics/deontology
110 | ethics/justice
111 | ethics/virtue
112 | tweets_hate_speech_detection
113 | wnut_17/wnut_17
114 | ncbi_disease/ncbi_disease
115 | acronym_identification
116 | jnlpba/jnlpba
117 | ontonotes_english/SpeedOfMagic--ontonotes_english
118 | blog_authorship_corpus/gender
119 | blog_authorship_corpus/horoscope
120 | blog_authorship_corpus/job
121 | open_question_type
122 | mc_taco
123 | discosense
124 | EffectiveFeedbackStudentWriting
125 | phrase_similarity
126 | scientific-exaggeration-detection
127 | fever-evidence-related/mwong--fever-related
128 | dynasent/dynabench.dynasent.r1.all/r1
129 | dynasent/dynabench.dynasent.r2.all/r2
130 | sem_eval_2010_task_8
131 | medmcqa
132 | logiqa
133 | cycic_classification
134 | cycic_multiplechoice
135 | commonsense_qa_2.0
136 | lingnli
137 | monotonicity-entailment
138 | arct
139 | scinli
140 | naturallogic
141 | onestop_qa
142 | moral_stories/full
143 | prost
144 | dynahate
145 | syntactic-augmentation-nli
146 | autotnli
147 | CONDAQA
148 | webgpt_comparisons
149 | synthetic-instruct-gptj-pairwise
150 | scruples
151 | wouldyourather
152 | attempto-nli
153 | defeasible-nli/snli
154 | defeasible-nli/atomic
155 | help-nli
156 | nli-veridicality-transitivity
157 | natural-language-satisfiability
158 | lonli
159 | dadc-limit-nli
160 | FLUTE
161 | summarize_from_feedback/comparisons
162 | folio
163 | tomi-nli
164 | avicenna
165 | SHP
166 | MedQA-USMLE-4-options-hf
167 | wikimedqa/medwiki
168 | cicero
169 | mutual
170 | NeQA
171 | quote-repetition
172 | redefine-math
173 | puzzte
174 | implicatures
175 | race-c
176 | spartqa-yn
177 | spartqa-mchoice
178 | temporal-nli
179 | riddle_sense
180 | clcd-english
181 | twentyquestions
182 | reclor
183 | counterfactually-augmented-imdb
184 | counterfactually-augmented-snli
185 | cnli
186 | boolq-natural-perturbations
187 | equate
188 | ScienceQA_text_only
189 | ekar_english
190 | implicit-hate-stg1
191 | logiqa-2.0-nli
192 | PARARULE-Plus
193 | mindgames
194 | universal_dependencies/en_partut/deprel
195 | universal_dependencies/en_lines/deprel
196 | universal_dependencies/en_gum/deprel
197 | universal_dependencies/en_ewt/deprel
198 | ambient
199 | path-naturalness-prediction
200 | cloth
201 | dgen
202 | oasst1_pairwise_rlhf_reward
203 | I2D2
204 | args_me
205 | Touche23-ValueEval
206 | starcon
207 | banking77
208 | ruletaker
209 | lsat_qa/all
210 | ConTRoL-nli
211 | tracie
212 | sherliic
213 | sen-making/1
214 | sen-making/2
215 | mbib-base/cognitive-bias
216 | mbib-base/fake-news
217 | mbib-base/gender-bias
218 | mbib-base/hate-speech
219 | mbib-base/linguistic-bias
220 | mbib-base/political-bias
221 | mbib-base/racial-bias
222 | mbib-base/text-level-bias
223 | robustLR
224 | v1/gen_train234_test2to10
225 | logical-fallacy
226 | parade
227 | cladder
228 | subjectivity
229 | MOH
230 | VUAC
231 | TroFi
232 | sharc_modified/mod
233 | conceptrules_v2
234 | disrpt/eng.dep.scidtb
235 | conll2000
236 | few-nerd/supervised
237 | com2sense
238 | scone
239 | winodict
240 | fool-me-twice
241 | monli
242 | corr2cause
243 | apt
244 | twitter-financial-news-sentiment
245 | SpaceNLI
246 | propsegment/nli
247 | HatemojiBuild
248 | regset
249 | esci
250 | dnd_style_intents
251 | 


--------------------------------------------------------------------------------
/src/tasksource/metadata/popularity.py:
--------------------------------------------------------------------------------
  1 | dataset_rank = {'glue': 0,
  2 |  'super_glue': 12,
  3 |  'tweet_eval': 23,
  4 |  'blimp': 34,
  5 |  'imdb': 101,
  6 |  'wikitext': 102,
  7 |  'squad': 106,
  8 |  'trec': 107,
  9 |  'openwebtext': 108,
 10 |  'rotten_tomatoes': 109,
 11 |  'anli': 110,
 12 |  'adversarial_qa': 111,
 13 |  'ai2_arc': 115,
 14 |  'xsum': 117,
 15 |  'amazon_reviews_multi': 118,
 16 |  'ag_news': 125,
 17 |  'yelp_review_full': 126,
 18 |  'wino_bias': 127,
 19 |  'piqa': 131,
 20 |  'duorc': 132,
 21 |  'quail': 134,
 22 |  'trivia_qa': 135,
 23 |  'cnn_dailymail': 143,
 24 |  'common_gen': 146,
 25 |  'sst': 147,
 26 |  'conll2003': 150,
 27 |  'financial_phrasebank': 151,
 28 |  'babi_qa': 155,
 29 |  'poem_sentiment': 163,
 30 |  'dream': 164,
 31 |  'paws': 165,
 32 |  'emotion': 168,
 33 |  'kilt_tasks': 169,
 34 |  'sciq': 180,
 35 |  'cos_e': 181,
 36 |  'dbpedia_14': 183,
 37 |  'newsgroup': 184,
 38 |  'cosmos_qa': 244,
 39 |  'squad_v2': 245,
 40 |  'samsum': 246,
 41 |  'amazon_polarity': 247,
 42 |  'multi_news': 248,
 43 |  'wiki_hop': 249,
 44 |  'quartz': 251,
 45 |  'qasc': 252,
 46 |  'wiki_qa': 253,
 47 |  'openbookqa': 254,
 48 |  'ropes': 256,
 49 |  'quoref': 257,
 50 |  'snli': 258,
 51 |  'app_reviews': 259,
 52 |  'gigaword': 260,
 53 |  'wiki_bio': 261,
 54 |  'amazon_us_reviews': 262,
 55 |  'scan': 308,
 56 |  'race': 320,
 57 |  'swag': 323,
 58 |  'codah': 325,
 59 |  'ccdv/arxiv-summarization': 331,
 60 |  'subjqa': 333,
 61 |  'universal_morphologies': 339,
 62 |  'hans': 447,
 63 |  'sst2': 448,
 64 |  'guardian_authorship': 449,
 65 |  'math_qa': 465,
 66 |  'librispeech_asr': 466,
 67 |  'hendrycks_test': 469,
 68 |  'openai_humaneval': 526,
 69 |  'ptb_text_only': 527,
 70 |  'pubmed_qa': 528,
 71 |  'head_qa': 531,
 72 |  'ought/raft': 533,
 73 |  'ade_corpus_v2': 544,
 74 |  'cbt': 547,
 75 |  'bookcorpus': 552,
 76 |  'squadshifts': 553,
 77 |  'story_cloze': 557,
 78 |  'multi_nli': 559,
 79 |  'qanta': 560,
 80 |  'hate_speech18': 564,
 81 |  'gem': 565,
 82 |  'lex_glue': 599,
 83 |  'deepmind/code_contests': 606,
 84 |  'imagenet-1k': 607,
 85 |  'blended_skill_talk': 608,
 86 |  'sms_spam': 609,
 87 |  'asset': 610,
 88 |  'fever': 612,
 89 |  'commonsense_qa': 615,
 90 |  'scientific_papers': 616,
 91 |  'evidence_infer_treatment': 618,
 92 |  'hotpot_qa': 620,
 93 |  'superb': 622,
 94 |  'sick': 628,
 95 |  'humicroedit': 629,
 96 |  'snips_built_in_intents': 631,
 97 |  'winograd_wsc': 632,
 98 |  'bigbench': 634,
 99 |  'multi_woz_v22': 801,
100 |  'lambada': 803,
101 |  'banking77': 804,
102 |  'hate_speech_offensive': 805,
103 |  'yahoo_answers_topics': 806,
104 |  'ccdv/cnn_dailymail': 807,
105 |  'hyperpartisan_news_detection': 810,
106 |  'gsm8k': 812,
107 |  'wikisql': 814,
108 |  'the_pile': 815,
109 |  'health_fact': 825,
110 |  'mdd': 826,
111 |  'web_questions': 830,
112 |  'ethos': 831,
113 |  'wnut_17': 833,
114 |  'medical_questions_pairs': 834,
115 |  'scitldr': 835,
116 |  'drop': 838,
117 |  'squad_adversarial': 839,
118 |  'e2e_nlg_cleaned': 841,
119 |  'onestop_english': 842,
120 |  'pragmeval': 843,
121 |  'relbert/analogy_questions': 863,
122 |  'nq_open': 869,
123 |  'daily_dialog': 870,
124 |  'mc_taco': 871,
125 |  'crows_pairs': 872,
126 |  'go_emotions': 873,
127 |  'ncbi_disease': 875,
128 |  'boolq': 876,
129 |  'movie_rationales': 877,
130 |  'climate_fever': 878,
131 |  'discovery': 879,
132 |  'lama': 881,
133 |  'ecthr_cases': 885,
134 |  'jfleg': 887,
135 |  'selqa': 888,
136 |  'acronym_identification': 892,
137 |  'scicite': 893,
138 |  'tab_fact': 894,
139 |  'wiki_asp': 896,
140 |  'enriched_web_nlg': 916,
141 |  'svhn': 918,
142 |  'docred': 920,
143 |  'conllpp': 921,
144 |  'liar': 922,
145 |  'multi_x_science_sum': 923,
146 |  'discofuse': 924,
147 |  'competition_math': 926,
148 |  'biosses': 927,
149 |  'jnlpba': 928,
150 |  'web_nlg': 929,
151 |  'qa_srl': 937,
152 |  'neural_code_search': 938,
153 |  'conv_ai_2': 940,
154 |  'craigslist_bargains': 941,
155 |  'qed': 942,
156 |  'conv_ai_3': 943,
157 |  'conv_ai': 944,
158 |  'turk': 945,
159 |  'covid_qa_castorini': 946,
160 |  'sem_eval_2014_task_1': 947,
161 |  'mwsc': 948,
162 |  'gutenberg_time': 949,
163 |  'billsum': 950,
164 |  'riddle_sense': 951,
165 |  'species_800': 952,
166 |  'hlgd': 953,
167 |  'definite_pronoun_resolution': 954,
168 |  'tmu_gfm_dataset': 955,
169 |  'relbert/semeval2012_relational_similarity_v4': 956,
170 |  'clinc_oos': 957,
171 |  'imppres': 960,
172 |  'mrqa': 976,
173 |  'cc_news': 977,
174 |  'lmqg/qag_tweetqa': 978,
175 |  'aeslc': 979,
176 |  'big_patent': 980,
177 |  'eli5': 990,
178 |  'scene_parse_150': 991,
179 |  'circa': 993,
180 |  'aqua_rat': 994,
181 |  'nlu_evaluation_data': 996,
182 |  'newspop': 997,
183 |  'relbert/lexical_relation_classification': 998,
184 |  'yahoo_answers_qa': 1003,
185 |  'emo': 1004,
186 |  'silicone': 1005,
187 |  'cord19': 1015,
188 |  'tweet_qa': 1018,
189 |  'meta_woz': 1019,
190 |  'md_gender_bias': 1021,
191 |  'art': 1031,
192 |  'google_wellformed_query': 1032,
193 |  'ambig_qa': 1033,
194 |  'taskmaster2': 1035,
195 |  'quac': 1042,
196 |  'freebase_qa': 1043,
197 |  'quora': 1044,
198 |  'numer_sense': 1045,
199 |  'narrativeqa': 1046,
200 |  'ccdv/pubmed-summarization': 1047,
201 |  'qa_zre': 1049,
202 |  'limit': 1050,
203 |  'tweets_hate_speech_detection': 1051,
204 |  'mocha': 1052,
205 |  'hatexplain': 1053,
206 |  'bing_coronavirus_query_set': 1054,
207 |  'great_code': 1055,
208 |  'medal': 1056,
209 |  'sent_comp': 1057,
210 |  'kelm': 1058,
211 |  'natural_questions': 1059,
212 |  'wiki_split': 1061,
213 |  'zest': 1062,
214 |  'cfq': 1063,
215 |  'multi_re_qa': 1071,
216 |  'stereoset': 1080,
217 |  'coqa': 1082,
218 |  'cuad': 1083,
219 |  'break_data': 1084,
220 |  'mbpp': 1089,
221 |  'knkarthick/dialogsum': 1091,
222 |  'wiki_auto': 1092,
223 |  'pile-of-law/pile-of-law': 1097,
224 |  'pg19': 1132,
225 |  'DFKI-SLT/few-nerd': 1133,
226 |  'wikicorpus': 1136,
227 |  'e2e_nlg': 1142,
228 |  'anton-l/superb': 1143,
229 |  'ghomasHudson/muld': 1144,
230 |  'Exr0n/wiki-entity-similarity': 1150,
231 |  'BeIR/nfcorpus': 1156,
232 |  'ccdv/govreport-summarization': 1158,
233 |  'woz_dialogue': 1159,
234 |  'reddit': 1164,
235 |  'EMBO/sd-nlp': 1165,
236 |  'empathetic_dialogues': 1170,
237 |  'BeIR/fiqa': 1171,
238 |  'generics_kb': 1173,
239 |  'swda': 1177,
240 |  'wikitablequestions': 1178,
241 |  'pubmed': 1183,
242 |  'chr_en': 1184,
243 |  'sharc': 1185,
244 |  'sharc_modified': 1186,
245 |  'BeIR/scifact': 1190,
246 |  'nell': 1192,
247 |  'patriziobellan/PET': 1196,
248 |  'EMBO/biolang': 1198,
249 |  'dynabench/qa': 1202,
250 |  'reddit_tifu': 1206,
251 |  'BeIR/scidocs': 1208,
252 |  'pec': 1210,
253 |  'tner/tweetner7': 1213,
254 |  'BeIR/arguana': 1214,
255 |  'multidoc2dial': 1216,
256 |  'taskmaster1': 1219,
257 |  'spider': 1221,
258 |  'adv_glue': 1222,
259 |  'allenai/mslr2022': 1228,
260 |  'conceptnet5': 1230,
261 |  'tyqiangz/multilingual-sentiments': 1233,
262 |  'newsqa': 1246,
263 |  'metashift': 1249,
264 |  'so_stacksample': 1250,
265 |  'doc2dial': 1253,
266 |  'search_qa': 1256,
267 |  'yhavinga/mc4_nl_cleaned': 1258,
268 |  'hope_edi': 1270,
269 |  'proto_qa': 1273,
270 |  'tuple_ie': 1276,
271 |  'simple_questions_v2': 1279,
272 |  'nlpaueb/finer-139': 1282,
273 |  'bookcorpusopen': 1283,
274 |  'tner/ontonotes5': 1284,
275 |  'crd3': 1285,
276 |  'ucberkeley-dlab/measuring-hate-speech': 1286,
277 |  'gap': 1287,
278 |  'recipe_nlg': 1288,
279 |  'schema_guided_dstc8': 1289,
280 |  'BeIR/beir': 1291,
281 |  'sagnikrayc/mctest': 1294,
282 |  'eurlex': 1296,
283 |  'corypaik/coda': 1297,
284 |  'bc2gm_corpus': 1298,
285 |  'ascent_kb': 1299,
286 |  'curiosity_dialogs': 1301,
287 |  'covid_qa_deepset': 1302,
288 |  'air_dialogue': 1303,
289 |  'taskmaster3': 1305,
290 |  'xsum_factuality': 1306,
291 |  'medical_dialog': 1308,
292 |  'BeIR/trec-covid': 1312,
293 |  'lhoestq/test': 1314,
294 |  'newsroom': 1315,
295 |  'tne': 1316,
296 |  'covid_qa_ucsd': 1317,
297 |  'fhamborg/news_sentiment_newsmtsc': 1319,
298 |  'prachathai67k': 1321,
299 |  'cardiffnlp/tweet_topic_multi': 1322,
300 |  'datacommons_factcheck': 1323,
301 |  'deal_or_no_dialog': 1325,
302 |  'ubuntu_dialogs_corpus': 1327,
303 |  'eu_regulatory_ir': 1329,
304 |  'scifact': 1331,
305 |  'wi_locness': 1333,
306 |  'relbert/relation_mapping': 1335,
307 |  'coastalcph/fairlex': 1336,
308 |  'asnq': 1340,
309 |  'peer_read': 1341,
310 |  'metaeval/linguisticprobing': 1343,
311 |  'jigsaw_unintended_bias': 1353,
312 |  'totto': 1354,
313 |  'irc_disentangle': 1355,
314 |  'med_hop': 1357,
315 |  'numeric_fused_head': 1359,
316 |  'ollie': 1361,
317 |  'per_sent': 1363,
318 |  'SocialGrep/ten-million-reddit-answers': 1364,
319 |  'lmqg/qg_squad': 1366,
320 |  's2orc': 1367,
321 |  'Hellisotherpeople/DebateSum': 1368,
322 |  'SocialGrep/reddit-crypto-aug-2021': 1369,
323 |  'jigsaw_toxicity_pred': 1371,
324 |  'GroNLP/ik-nlp-22_slp': 1372,
325 |  'SocialGrep/reddit-nonewnormal-complete': 1374,
326 |  'SocialGrep/reddit-wallstreetbets-aug-2021': 1376,
327 |  'SocialGrep/the-reddit-covid-dataset': 1378,
328 |  'SocialGrep/top-american-universities-on-reddit': 1380,
329 |  'BeIR/beir-corpus': 1382,
330 |  'SocialGrep/one-year-of-r-india': 1384,
331 |  'BritishLibraryLabs/EThOS-PhD-metadata': 1386,
332 |  'librispeech_lm': 1388,
333 |  'few_rel': 1389,
334 |  'arxiv_dataset': 1390,
335 |  'lc_quad': 1391,
336 |  'diplomacy_detection': 1392,
337 |  'lmqg/qa_squadshifts_pseudo': 1393,
338 |  'grail_qa': 1461,
339 |  'tner/wnut2017': 1462,
340 |  'demo-org/auditor_review': 1463,
341 |  'allenai/real-toxicity-prompts': 1464,
342 |  'BeIR/nfcorpus-qrels': 1465,
343 |  'onestop_qa': 1466,
344 |  'demelin/moral_stories': 1467,
345 |  'atomic': 1493,
346 |  'crawl_domain': 1494,
347 |  'BeIR/quora': 1495,
348 |  'Abirate/english_quotes': 1497,
349 |  'narrativeqa_manual': 1498,
350 |  'BeIR/fiqa-qrels': 1499,
351 |  'social_bias_frames': 1500,
352 |  'pkavumba/balanced-copa': 1501,
353 |  'eraser_multi_rc': 1502,
354 |  'sled-umich/TRIP': 1503,
355 |  'opinosis': 1504,
356 |  'PiC/phrase_sense_disambiguation': 1505,
357 |  'enwik8': 1506,
358 |  'sem_eval_2020_task_11': 1508,
359 |  'gooaq': 1509,
360 |  'linnaeus': 1510,
361 |  'hover': 1511,
362 |  'GonzaloA/fake_news': 1512,
363 |  'consumer-finance-complaints': 1513,
364 |  'ohsumed': 1514,
365 |  'casino': 1515,
366 |  'gfissore/arxiv-abstracts-2021': 1516,
367 |  'conv_questions': 1517,
368 |  'hate_offensive': 1518,
369 |  'sofc_materials_articles': 1519,
370 |  'wanyu/IteraTeR_human_sent': 1520,
371 |  'dialog_re': 1521,
372 |  'fake_news_english': 1522,
373 |  'dart': 1523,
374 |  'blog_authorship_corpus': 1524,
375 |  'msr_zhen_translation_parity': 1525,
376 |  'cryptonite': 1526,
377 |  'disfl_qa': 1527,
378 |  'olm/olm-CC-MAIN-2022-21-sampling-ratio-0.14775510204': 1528,
379 |  'olm/olm-CC-MAIN-2022-33-sampling-ratio-0.20': 1529,
380 |  'coarse_discourse': 1530,
381 |  'eth_py150_open': 1531,
382 |  'event2Mind': 1532,
383 |  'Paul/hatecheck': 1533,
384 |  'eli5_category': 1534,
385 |  'hippocorpus': 1535,
386 |  'the_pile_books3': 1536,
387 |  'coached_conv_pref': 1537,
388 |  'has_part': 1538,
389 |  'times_of_india_news_headlines': 1539,
390 |  'medmcqa': 1540,
391 |  'Babelscape/rebel-dataset': 1541,
392 |  'glucose': 1542,
393 |  'msr_text_compression': 1543,
394 |  'msr_genomics_kbcomp': 1544,
395 |  'SpeedOfMagic/ontonotes_english': 1545,
396 |  'msr_sqa': 1546,
397 |  'wiki_movies': 1547,
398 |  'hybrid_qa': 1548,
399 |  'metooma': 1549,
400 |  'multi_nli_mismatch': 1550,
401 |  'text2log': 1551,
402 |  'the_pile_stack_exchange': 1552,
403 |  're_dial': 1553,
404 |  'inquisitive_qg': 1554,
405 |  'SocialGrep/one-million-reddit-jokes': 1555,
406 |  'time_dial': 1556,
407 |  'BeIR/scifact-qrels': 1557,
408 |  'sede': 1558,
409 |  'mutual_friends': 1559,
410 |  'pass': 1560,
411 |  'allenai/multi_lexsum': 1561,
412 |  'youtube_caption_corrections': 1562,
413 |  'NbAiLab/norec_agg': 1563,
414 |  'DanL/scientific-challenges-and-directions-dataset': 1564,
415 |  'SocialGrep/one-million-reddit-questions': 1565,
416 |  'Motahar/github-issues': 1566,
417 |  'SocialGrep/the-2022-trucker-strike-on-reddit': 1567,
418 |  'allenai/qasper': 1568,
419 |  'CyranoB/polarity': 1569,
420 |  'SocialGrep/one-million-reddit-confessions': 1570,
421 |  'debatelab/deepa2': 1571,
422 |  'bhavnicksm/sentihood': 1572,
423 |  'debatelab/aaac': 1573,
424 |  'jgammack/SAE-door-abstracts': 1574,
425 |  'erwanlc/cocktails_recipe': 1575,
426 |  'erwanlc/cocktails_recipe_no_brand': 1576,
427 |  'BeIR/arguana-qrels': 1577,
428 |  'tner/fin': 1578,
429 |  'BeIR/scidocs-qrels': 1579,
430 |  'tner/bc5cdr': 1580,
431 |  'olm/olm-CC-MAIN-2022-27-sampling-ratio-0.16142697881': 1581,
432 |  'BeIR/fever': 1582,
433 |  'cardiffnlp/tweet_topic_single': 1584,
434 |  'speechcolab/gigaspeech': 1585,
435 |  'BeIR/webis-touche2020': 1586,
436 |  'aquamuse': 1588,
437 |  'olm/olm-CC-MAIN-2022-40-sampling-ratio-0.15894621295': 1590,
438 |  'tner/btc': 1591,
439 |  'truthful_qa': 1592,
440 |  'McGill-NLP/FaithDial': 1594,
441 |  'ekinakyurek/ftrace': 1595,
442 |  'tomasg25/scientific_lay_summarisation': 1597,
443 |  'tner/mit_restaurant': 1599,
444 |  'bigscience-biomedical/bioasq_task_b': 1600,
445 |  'strombergnlp/broad_twitter_corpus': 1619,
446 |  'tner/bionlp2004': 1620,
447 |  'metaeval/recast': 1621,
448 |  'the_pile_openwebtext2': 1629,
449 |  'taln-ls2n/inspec': 1630,
450 |  'lmqg/qa_squadshifts': 1631,
451 |  'BeIR/hotpotqa': 1636,
452 |  'jpwahle/machine-paraphrase-dataset': 1638,
453 |  'tner/mit_movie_trivia': 1639,
454 |  'tner/conll2003': 1640,
455 |  'OxAISH-AL-LLM/wiki_toxic': 1641,
456 |  'ccdv/WCEP-10': 1642,
457 |  'BeIR/trec-covid-qrels': 1646,
458 |  'g8a9/europarl_en-it': 1647,
459 |  'carblacac/twitter-sentiment-analysis': 1648,
460 |  'usc-isi/WikiConvert': 1649,
461 |  'visual_genome': 1650,
462 |  'florianbussmann/FUNSD-vu2020revising': 1660,
463 |  'Felix-ML/quoteli3': 1661,
464 |  'allenai/scico': 1662,
465 |  'drAbreu/bc4chemd_ner': 1663,
466 |  'tner/tweebank_ner': 1664,
467 |  'alisawuffles/WANLI': 1665,
468 |  'Team-PIXEL/rendered-bookcorpus': 1666,
469 |  'Team-PIXEL/rendered-wikipedia-english': 1667,
470 |  'wanyu/IteraTeR_full_sent': 1668,
471 |  'EMBO/BLURB': 1669,
472 |  'metaeval/crowdflower': 1676,
473 |  'AlexaAI/bold': 1685,
474 |  'metaeval/ethics': 1686,
475 |  'sileod/movie_recommendation': 1691,
476 |  'lmqg/qg_subjqa': 1692,
477 |  'copenlu/scientific-exaggeration-detection': 1699,
478 |  'esb/datasets': 1700,
479 |  'BeIR/msmarco': 1701,
480 |  'biwi_kinect_head_pose': 1703,
481 |  'BeIR/quora-qrels': 1704,
482 |  'wardenga/lsoie': 1705,
483 |  'nlphuji/vasr': 1707,
484 |  'BeIR/nq': 1708,
485 |  'BeIR/dbpedia-entity': 1710,
486 |  'sadrasabouri/ShahNegar': 1712,
487 |  'knkarthick/xsum': 1713,
488 |  'ColumbiaNLP/FLUTE': 1714,
489 |  'bigscience-biomedical/scitail': 1715,
490 |  'lmqg/qg_squadshifts': 1717,
491 |  'BeIR/climate-fever': 1722,
492 |  'PiC/phrase_retrieval': 1724,
493 |  'bdotloh/empathetic-dialogues-contexts': 1726,
494 |  'ccdv/mediasum': 1727,
495 |  'BeIR/msmarco-qrels': 1735,
496 |  'alexfabbri/answersumm': 1736,
497 |  'pszemraj/text2image-multi-prompt': 1737,
498 |  'shibing624/source_code': 1738,
499 |  'kensho/spgispeech': 1741,
500 |  'jamescalam/channel-metadata': 1742,
501 |  'EMBO/sd-nlp-non-tokenized': 1743,
502 |  'facebook/pmd': 1748,
503 |  'drt/kqa_pro': 1749,
504 |  'BeIR/fever-qrels': 1751,
505 |  'TheFusion21/PokemonCards': 1752,
506 |  'zeroshot/twitter-financial-news-sentiment': 1753,
507 |  'bigscience-biomedical/blurb': 1754,
508 |  'mteb/bucc-bitext-mining': 1759,
509 |  'pinecone/core-2020-05-10-deduplication': 1763,
510 |  'tals/vitaminc': 1764,
511 |  'BeIR/hotpotqa-qrels': 1765,
512 |  'gigant/ted_descriptions': 1766,
513 |  'jpwahle/autoencoder-paraphrase-dataset': 1767,
514 |  'beki/privy': 1768,
515 |  'Muennighoff/P3': 1770,
516 |  'jpwahle/dblp-discovery-dataset': 1771,
517 |  'taln-ls2n/kp20k': 1773,
518 |  'bigscience-biomedical/biosses': 1774,
519 |  'allenai/prosocial-dialog': 1776,
520 |  'pacovaldez/stackoverflow-questions': 1777,
521 |  'kasnerz/hitab': 1778,
522 |  'relbert/semeval2012_relational_similarity': 1779,
523 |  'sagnikrayc/snli-cf-kaushik': 1780,
524 |  'mwritescode/slither-audited-smart-contracts': 1781,
525 |  'BeIR/webis-touche2020-qrels': 1787,
526 |  'bigscience-biomedical/mednli': 1788,
527 |  'pinecone/movielens-recent-ratings': 1790,
528 |  'BeIR/dbpedia-entity-qrels': 1791,
529 |  'shanya/crd3': 1792,
530 |  'knkarthick/samsum': 1793,
531 |  'BeIR/climate-fever-qrels': 1794,
532 |  'BeIR/nq-qrels': 1795,
533 |  'sanchit-gandhi/librispeech_asr_dummy': 1796,
534 |  'taln-ls2n/semeval-2010-pre': 1797,
535 |  'Bingsu/openwebtext_20p': 1798,
536 |  'PolyAI/banking77': 1799,
537 |  'JulesBelveze/tldr_news': 1800,
538 |  'Freed-Wu/kodak': 1801,
539 |  'biglam/gutenberg-poetry-corpus': 1802,
540 |  'SocialGrep/reddit-r-bitcoin-data-for-jun-2022': 1803,
541 |  'taln-ls2n/kptimes': 1805,
542 |  'biglam/old_bailey_proceedings': 1806,
543 |  'launch/gov_report': 1807,
544 |  'knkarthick/AMI': 1810,
545 |  'voidful/NMSQA': 1811,
546 |  'DTU54DL/dmeo': 1812,
547 |  'FinanceInc/auditor_sentiment': 1813,
548 |  'jamescalam/unsplash-25k-photos': 1814,
549 |  'Tidrael/tsl_news': 1815,
550 |  'DTU54DL/common3k-train': 1816,
551 |  'okite97/news-data': 1817,
552 |  'lmqg/qa_squad': 1818,
553 |  'ConvLab/woz': 1819,
554 |  'ConvLab/camrest': 1820,
555 |  'ConvLab/metalwoz': 1821,
556 |  'kakaobrain/coyo-700m': 1822,
557 |  'taln-ls2n/kpbiomed': 1823,
558 |  'abhinavk/openpi_v2': 1826,
559 |  'mwong/fever-claim-related': 1831,
560 |  'ConvLab/tm1': 1832,
561 |  'joey234/nan-nli': 1833,
562 |  'ConvLab/tm2': 1834,
563 |  'ConvLab/tm3': 1835,
564 |  'ConvLab/kvret': 1836,
565 |  'ConvLab/sgd': 1837,
566 |  'relbert/semeval2012_relational_similarity_v5': 1838,
567 |  'cmudrc/wave-energy': 1839,
568 |  'llangnickel/long-covid-classification-data': 1840,
569 |  'webis/args_me': 1841,
570 |  'HuggingFaceM4/something_something_v2': 1844,
571 |  'ConvLab/dailydialog': 1845,
572 |  'huanggab/reddit_haiku': 1846,
573 |  'relbert/semeval2012_relational_similarity_v6': 1847,
574 |  'pszemraj/riddlesense_plusplus': 1848,
575 |  'rungalileo/20_Newsgroups_Fixed': 1849,
576 |  'DTU54DL/common-voice-test16k': 1850,
577 |  'lhoestq/custom_squad': 1851,
578 |  'merve/poetry': 1852,
579 |  'yoshitomo-matsubara/srsd-feynman_easy': 1853,
580 |  'nightingal3/fig-qa': 1854,
581 |  'matejklemen/vuamc': 1855,
582 |  'strombergnlp/twitter_pos': 1856,
583 |  'nlphuji/winogavil': 1858,
584 |  'DFKI-SLT/tacred': 1859,
585 |  'valurank/News_Articles_Categorization': 1861,
586 |  'nbroad/mediasum': 1862,
587 |  'asapp/slue': 1863,
588 |  'zbnsl/emoteModified': 1865,
589 |  'adsabs/WIESP2022-NER': 1866,
590 |  'arize-ai/ecommerce_reviews_with_language_drift': 1867,
591 |  'UCL-DARK/ludwig': 1868,
592 |  'Aunsiels/InfantBooks': 1874,
593 |  'openclimatefix/uk_pv': 1875,
594 |  'copenlu/fever_gold_evidence': 1876,
595 |  'rungalileo/mit_movies_fixed_connll_format': 1877,
596 |  'jamescalam/youtube-transcriptions': 1878,
597 |  'lmqg/qa_harvesting_from_wikipedia': 1879,
598 |  'qanastek/Biosses-BLUE': 1880,
599 |  'zeronix1020/Strawberry-Disease': 1881,
600 |  'dferndz/cSQuAD2': 1882,
601 |  'taln-ls2n/pubmed': 1883,
602 |  'BeIR/scidocs-generated-queries': 1884,
603 |  'jmhessel/newyorker_caption_contest': 1885,
604 |  'inverse-scaling/NeQA': 1915,
605 |  'DTU54DL/common-voice': 1916,
606 |  'turingbench/TuringBench': 1917,
607 |  'demelin/understanding_fables': 1937,
608 |  'RUCAIBox/Open-Dialogue': 1938,
609 |  'allenai/multinews_sparse_max': 1939,
610 |  'RamAnanth1/lex-fridman-podcasts': 1940,
611 |  'sled-umich/Conversation-Entailment': 1941,
612 |  'stevhliu/demo': 1942,
613 |  'svakulenk0/qrecc': 1943,
614 |  'arize-ai/movie_reviews_with_context_drift': 1944,
615 |  'launch/ampere': 1945,
616 |  'AnonymousSub/recipe_RL_data_roberta-base': 1946,
617 |  'dreamproit/bill_summary_us': 1947,
618 |  'bgstud/libri-whisper-raw': 1948,
619 |  'jpwahle/etpc': 1949,
620 |  'DTU54DL/common-native-proc': 1950,
621 |  'mbartolo/synQA': 1951,
622 |  'wanyu/IteraTeR_full_doc': 1952,
623 |  'wanyu/IteraTeR_human_doc': 1953,
624 |  'orieg/elsevier-oa-cc-by': 1954,
625 |  'climatebert/environmental_claims': 1955,
626 |  'SocialGrep/the-reddit-climate-change-dataset': 1956,
627 |  'KGraph/FB15k-237': 1958,
628 |  'KheemDH/data': 1959,
629 |  'mwong/fever-evidence-related': 1960,
630 |  'HuggingFaceM4/TGIF': 1961,
631 |  'BeIR/fever-generated-queries': 1962,
632 |  'nateraw/ade20k-tiny': 1963,
633 |  'BeIR/cqadupstack-qrels': 1964,
634 |  'knkarthick/highlightsum': 1965,
635 |  'RUCAIBox/Data-to-text-Generation': 1966,
636 |  'GateNLP/broad_twitter_corpus': 1967,
637 |  'Tidrael/finance-headlines': 1968,
638 |  'lmqg/qag_squad': 1969,
639 |  'pacovaldez/stackoverflow-questions-2016': 1970,
640 |  'BeIR/fiqa-generated-queries': 1971,
641 |  'BeIR/signal1m-generated-queries': 1972,
642 |  'MicPie/unpredictable_msdn-microsoft-com': 1973,
643 |  'zeroshot/twitter-financial-news-topic': 1974,
644 |  'inverse-scaling/quote-repetition': 1975,
645 |  'esc-bench/esc-diagnostic-backup': 1976,
646 |  'lmqg/qg_annotation': 1977,
647 |  'sileod/wep-probes': 1978,
648 |  'DTU54DL/common-voice-test3k': 1981,
649 |  'jakartaresearch/causalqa': 1982,
650 |  'copenlu/sufficient_facts': 2002,
651 |  'ConvLab/multiwoz21': 2005,
652 |  'arka0821/multi_document_summarization': 2006,
653 |  'strombergnlp/rumoureval_2019': 2007,
654 |  'rongzhangibm/NaturalQuestionsV2': 2008,
655 |  'Muennighoff/mbpp': 2009,
656 |  'RUCAIBox/Simplification': 2011,
657 |  'shubhamg2208/lexicap': 2012,
658 |  'olm/olm-wikipedia-20220701': 2013,
659 |  'esc-bench/esc-diagnostic-dataset': 2014,
660 |  'jpwahle/autoregressive-paraphrase-dataset': 2015,
661 |  'GabrielVidal/dead-by-daylight-perks': 2016,
662 |  'DTU54DL/common-proc-whisper': 2017,
663 |  'valurank/PoliticalBias': 2018,
664 |  'McGill-NLP/TopiOCQA': 2019,
665 |  'gsarti/magpie': 2020,
666 |  'BeIR/cqadupstack-generated-queries': 2021,
667 |  'MicPie/unpredictable_mmo-champion-com': 2022,
668 |  'RUCAIBox/Question-Generation': 2023,
669 |  'allenai/multinews_sparse_mean': 2024,
670 |  'demo-org/diabetes': 2025,
671 |  'StonyBrookNLP/tellmewhy': 2026,
672 |  'bergr7/weakly_supervised_ag_news': 2027,
673 |  'din0s/msmarco-nlgen': 2028,
674 |  'frankier/cross_domain_reviews': 2029,
675 |  'gart-labor/pumpnli': 2030,
676 |  'AndyChiang/cloth': 2031,
677 |  'olm/olm-CC-MAIN-2017-22-sampling-ratio-0.16178770949': 2032,
678 |  'bgstud/libri': 2033,
679 |  'DTU54DL/commonvoice_accent_test': 2034,
680 |  'lewtun/my-awesome-dataset': 2035,
681 |  'peixian/rtGender': 2036,
682 |  'pmc/open_access': 2039,
683 |  'uva-irlab/trec-cast-2019-multi-turn': 2043,
684 |  'DFKI-SLT/scidtb': 2044,
685 |  'surrey-nlp/PLOD-filtered': 2045,
686 |  'wanyu/IteraTeR_v2': 2046,
687 |  'strombergnlp/ipm_nel': 2047,
688 |  'HuggingFaceM4/charades': 2048,
689 |  'ncats/EpiSet4NER-v2': 2050,
690 |  'HuggingFaceM4/ActivitiyNet_Captions': 2051,
691 |  'sileod/discourse_marker_qa': 2052,
692 |  'yoshitomo-matsubara/srsd-feynman_medium': 2053,
693 |  'BeIR/nfcorpus-generated-queries': 2054,
694 |  'BeIR/trec-news-generated-queries': 2055,
695 |  'BeIR/robust04-generated-queries': 2056,
696 |  'BeIR/quora-generated-queries': 2057,
697 |  'valurank/Adult-content-dataset': 2058,
698 |  'launch/open_question_type': 2059,
699 |  'knkarthick/topicsum': 2060,
700 |  'yuningm/citesum': 2061,
701 |  'elihoole/asrs-aviation-reports': 2062,
702 |  'DeveloperOats/DBPedia_Classes': 2063,
703 |  'hoskinson-center/proof-pile': 2064,
704 |  'RUCAIBox/Summarization': 2065,
705 |  'RUCAIBox/Question-Answering': 2066,
706 |  'RUCAIBox/Story-Generation': 2067,
707 |  'RUCAIBox/Paraphrase': 2068,
708 |  'jakartaresearch/semeval-absa': 2069,
709 |  'tner/ttc_dummy': 2071,
710 |  'copenlu/citeworth': 2072,
711 |  'allenai/multinews_sparse_oracle': 2073,
712 |  'allenai/multixscience_sparse_oracle': 2074,
713 |  'allenai/multixscience_sparse_mean': 2075,
714 |  'allenai/multixscience_sparse_max': 2076,
715 |  'allenai/ms2_sparse_oracle': 2077,
716 |  'mschi/blogspot_raw': 2078,
717 |  'gaurikapse/civis-consultation-summaries': 2079,
718 |  'chenghao/cuad_qa': 2080,
719 |  'esc-bench/esc-datasets': 2081,
720 |  'olm/olm-wikipedia-20221001': 2082,
721 |  'allenai/wcep_dense_oracle': 2083,
722 |  'dennlinger/wiki-paragraphs': 2084,
723 |  'AndyChiang/dgen': 2085,
724 |  'esb/diagnostic-dataset': 2086,
725 |  'havens2/naacl2022': 2087,
726 |  'fkdosilovic/docee-event-classification': 2088,
727 |  'DTU54DL/demo-common-whisper': 2089,
728 |  'dferndz/cSQuAD1': 2090,
729 |  'jpcorb20/multidogo': 2091,
730 |  'julien-c/reactiongif': 2092,
731 |  'lara-martin/Scifi_TV_Shows': 2093,
732 |  'lukesjordan/worldbank-project-documents': 2094,
733 |  'mnemlaghi/widdd': 2095,
734 |  'mvarma/medwiki': 2096,
735 |  'nateraw/beans': 2098,
736 |  'nateraw/cats_vs_dogs': 2099,
737 |  'nateraw/food101': 2100,
738 |  'nateraw/sync_food101': 2101,
739 |  'ncats/EpiSet4BinaryClassification': 2102,
740 |  'ncats/EpiSet4NER-v1': 2103,
741 |  'peixian/equity_evaluation_corpus': 2104,
742 |  'rajeshradhakrishnan/malayalam_wiki': 2105,
743 |  'softcatala/open-source-english-catalan-corpus': 2106,
744 |  'toloka/CrowdSpeech': 2107,
745 |  'valurank/12-factor': 2108,
746 |  'valurank/PoliticalBias_AllSides_Txt': 2109,
747 |  'valurank/PoliticalBias_Sources': 2110,
748 |  'valurank/hate-multi': 2111,
749 |  'valurank/news-12factor': 2112,
750 |  'valurank/offensive-multi': 2113,
751 |  'webimmunization/COVID-19-vaccine-attitude-tweets': 2114,
752 |  'wpicard/nostradamus-propheties': 2115,
753 |  'yuanchuan/annotated_reference_strings': 2116,
754 |  'ruanchaves/stan_large': 2117,
755 |  'ruanchaves/stan_small': 2118,
756 |  'ruanchaves/boun': 2119,
757 |  'ruanchaves/dev_stanford': 2120,
758 |  'ruanchaves/test_stanford': 2121,
759 |  'ruanchaves/snap': 2122,
760 |  'z-uo/qasper-squad': 2123,
761 |  'SocialGrep/the-antiwork-subreddit-dataset': 2124,
762 |  'CLUTRR/v1': 2126,
763 |  'malteos/test2': 2132,
764 |  'TomTBT/pmc_open_access_xml': 2133,
765 |  'SocialGrep/the-reddit-dataset-dataset': 2137,
766 |  'SocialGrep/the-reddit-place-dataset': 2139,
767 |  'projecte-aina/gencata': 2141,
768 |  'mwong/climate-evidence-related': 2142,
769 |  'mwong/climate-claim-related': 2143,
770 |  'surrey-nlp/PLOD-unfiltered': 2144,
771 |  'SocialGrep/the-reddit-irl-dataset': 2145,
772 |  'Lexi/spanextract': 2147,
773 |  'mwong/climatetext-claim-related-evaluation': 2148,
774 |  'mwong/climatetext-evidence-related-evaluation': 2149,
775 |  'ylacombe/xsum_factuality': 2150,
776 |  'mwong/climatetext-climate_evidence-claim-related-evaluation': 2151,
777 |  'mwong/climatetext-claim-climate_evidence-related-evaluation': 2152,
778 |  'mwong/climatetext-evidence-claim-pair-related-evaluation': 2153,
779 |  'mwong/climatetext-claim-evidence-pair-related-evaluation': 2154,
780 |  'patrickvonplaten/librispeech_asr_self_contained': 2155,
781 |  'BritishLibraryLabs/web_archive_classification': 2158,
782 |  'albertxu/CrosswordQA': 2159,
783 |  'SocialGrep/the-reddit-nft-dataset': 2160,
784 |  'janck/bigscience-lama': 2162,
785 |  'strombergnlp/twitter_pos_vcb': 2163,
786 |  'Filippo/osdg_cd': 2164,
787 |  'Ukhushn/home-depot': 2165,
788 |  'pile-of-law/eoir_privacy': 2166,
789 |  'drAbreu/sd-nlp-2': 2168,
790 |  'Leyo/TGIF': 2173,
791 |  'strombergnlp/named_timexes': 2174,
792 |  'domenicrosati/TruthfulQA': 2175,
793 |  'Roh/ryanspeech': 2176,
794 |  'Leyo/ActivityNet_Captions': 2177,
795 |  'IsaacBot/SQuAD-single-sentence-QA': 2178,
796 |  'morteza/cogtext': 2179,
797 |  'wdc/products-2017': 2180,
798 |  'rajeshvarma/QA_on_SLA': 2196,
799 |  'statworx/haiku': 2197,
800 |  'rajistics/million-headlines': 2198,
801 |  'feyzaakyurek/BBNLI': 2199,
802 |  'launch/gov_report_qs': 2200,
803 |  'DFKI-SLT/wikitext_linked': 2202,
804 |  'dianalogan/Marketing-Budget-and-Actual-Sales-Dataset': 2204,
805 |  'mehnaazasad/arxiv-co-ga': 2205,
806 |  'JeremyAlain/123_test': 2206,
807 |  'BeIR/arguana-generated-queries': 2209,
808 |  'BeIR/climate-fever-generated-queries': 2210,
809 |  'BeIR/dbpedia-entity-generated-queries': 2211,
810 |  'wise-east/spolin': 2212,
811 |  'yoshitomo-matsubara/srsd-feynman_hard': 2213,
812 |  'florentgbelidji/edmunds-car-ratings': 2214,
813 |  'olivierdehaene/xkcd': 2215,
814 |  'rajistics/auditor_review': 2216,
815 |  'BeIR/scifact-generated-queries': 2217,
816 |  'BeIR/trec-covid-generated-queries': 2218,
817 |  'BeIR/webis-touche2020-generated-queries': 2219,
818 |  'BeIR/nq-generated-queries': 2220,
819 |  'BeIR/hotpotqa-generated-queries': 2221,
820 |  'BeIR/bioasq-generated-queries': 2222,
821 |  'icelab/ntrs_meta': 2223,
822 |  'iejMac/CLIP-Kinetics700': 2224,
823 |  'fever/feverous': 2225,
824 |  'Livingwithmachines/hmd-erwt-training': 2226,
825 |  'wkrl/cord': 2227,
826 |  'launch/reddit_qg': 2228,
827 |  'arize-ai/xtreme_en': 2229}
828 | 
829 | dataset_rank['Anthropic/model-written-evals']=13
830 | dataset_rank['Anthropic/hh-rlhf']=14


--------------------------------------------------------------------------------
/src/tasksource/mtasks.py:
--------------------------------------------------------------------------------
  1 | from .preprocess import cat, get,name, regen, constant, Classification, TokenClassification, MultipleChoice
  2 | from .metadata import udep_labels
  3 | from datasets import get_dataset_config_names, ClassLabel, Dataset, DatasetDict, concatenate_datasets, Sequence
  4 | 
  5 | def all(dataset_name):
  6 |     try:
  7 |         config_name=get_dataset_config_names(dataset_name)
  8 |     except Exception as e:
  9 |         print(dataset_name,e)
 10 |         config_name=None
 11 |     return dict(dataset_name=dataset_name, config_name=config_name)
 12 | 
 13 | def concatenate_configs(dataset):
 14 |     return DatasetDict(train=concatenate_datasets(list(dataset.values())))
 15 | 
 16 | # english tasks (few, to keep balance between languages)
 17 | 
 18 | moritz_xnli = Classification("premise","hypothesis",name("label",["entailment", "neutral","contradiction"]), 
 19 |     pre_process=concatenate_configs, 
 20 |     dataset_name="MoritzLaurer/multilingual-NLI-26lang-2mil7")
 21 | 
 22 | xnli = Classification("premise", "hypothesis", "label", **all("metaeval/xnli"))
 23 | 
 24 | americas_nli = Classification("premise","hypothesis","label",config_name="all_languages")
 25 | 
 26 | stsb_multi_mt = Classification("sentence1", "sentence2",
 27 |     lambda x: float(x["similarity_score"]/5),
 28 |     **all('stsb_multi_mt'))
 29 | 
 30 | pawsx = Classification("sentence1","sentence2",name('label',['not_paraphrase','paraphrase']), **all('paws-x'))
 31 | 
 32 | miam = Classification("Utterance",labels="Label", **all('miam'))
 33 | 
 34 | xstance = Classification("question", "comment", "label",
 35 |     **all("strombergnlp/x-stance"))
 36 | 
 37 | 
 38 | offenseval = Classification(lambda x: str(x["text"]), labels=name("subtask_a",['not offensive','offensive']),
 39 |     pre_process=lambda ds:ds.filter(lambda x:  x['subtask_a'] in [0,1]),
 40 |     dataset_name='strombergnlp/offenseval_2020',
 41 |     config_name=["ar","da","gr","tr"])
 42 | 
 43 | offenseval_dravidian = Classification("text",labels="label",config_name=['kannada','malayalam','tamil'])
 44 | 
 45 | mlma_hate = Classification("tweet", labels=lambda x:x["sentiment"].split('_'),
 46 |     dataset_name="nedjmaou/MLMA_hate_speech")
 47 | 
 48 | qam = Classification("question","answer","label", dataset_name="xglue",config_name="qam")
 49 | 
 50 | #x_sum_factuality = Classification("summary","generated_summary","label", dataset_name="ylacombe/xsum_factuality")
 51 | 
 52 | x_fact = Classification('evidence','claim','label', dataset_name="metaeval/x-fact")
 53 | 
 54 | xglue___nc = Classification('news_body',labels='news_category')
 55 | xglue___qadsm = Classification('query','ad_description','relevance_label')
 56 | xglue___qam = Classification('question','answer','label')
 57 | xglue___wpr = Classification('query','web_page_snippet','relavance_label') # relavance_label : sic
 58 | 
 59 | xlwic = Classification(
 60 |     sentence1=cat(["target_word","context_1"], " : "),
 61 |     sentence2=cat(["target_word","context_2"], " : "),
 62 |     labels='label',dataset_name="pasinit/xlwic",config_name=['xlwic_de_de','xlwic_it_it','xlwic_fr_fr','xlwic_en_ko'])
 63 | 
 64 | #[ "spam", "fails_task", "lang_mismatch", "pii", "not_appropriate", "hate_speech", "sexual_content", "quality", "toxicity", "humor", "helpfulness", "creativity", "violence" ]
 65 | 
 66 | oasst1__quality = Classification("parent_text","text",labels="quality", dataset_name="tasksource/oasst1_dense_flat",
 67 |     pre_process = lambda ds:ds.remove_columns('labels'))
 68 | oasst1__toxicity = Classification("parent_text","text",labels="toxicity", dataset_name="tasksource/oasst1_dense_flat",
 69 |     pre_process = lambda ds:ds.remove_columns('labels'))
 70 | oasst1__helpfulness = Classification("parent_text","text",labels="helpfulness", dataset_name="tasksource/oasst1_dense_flat",
 71 |     pre_process = lambda ds:ds.remove_columns('labels'))
 72 | 
 73 | 
 74 | language_identification = Classification("text",labels="labels", dataset_name="papluca/language-identification")
 75 | wili_2018_langid = Classification("sentence",labels="label",dataset_name="wili_2018")
 76 | 
 77 | exams = MultipleChoice(get.question.stem, choices_list=get.question.choices.text,
 78 |     labels=lambda x:'ABCDE'.index(x['answerKey']),
 79 |     dataset_name="exams", config_name='multilingual',
 80 |     pre_process=lambda ds:ds.filter(lambda x:  x['answerKey'] in "ABCDE"))
 81 | 
 82 | xcsr = MultipleChoice(get.question.stem, choices_list=get.question.choices.text,
 83 |     labels=lambda x:'ABCDE'.index(x['answerKey']),
 84 |     **all('xcsr'))
 85 | 
 86 | xcopa = MultipleChoice("premise",choices=['choice1','choice2'],labels="label",
 87 |     **all('xcopa'))
 88 | 
 89 | #xstory = MultipleChoice(constant(''),choices=["text_right_ending","text_wrong_ending"],labels=constant(0), **all("juletxara/xstory_cloze"))
 90 | 
 91 | xstory = MultipleChoice(lambda x: "\n".join([x[f'input_sentence_{i}'] for i in range(1,5)]),
 92 |     choices=["sentence_quiz1","sentence_quiz2"],labels=constant(0), **all("juletxara/xstory_cloze"))
 93 | 
 94 | 
 95 | xglue_ner = TokenClassification("words","ner", dataset_name="xglue",config_name="ner")
 96 | xglue_pos = TokenClassification("words","pos", dataset_name="xglue",config_name="pos")
 97 | 
 98 | #disrpt_23 = Classification("unit1_sent", "unit2_sent", "label",**all("metaeval/disrpt"))
 99 | 
100 | udep__pos = TokenClassification('tokens','upos', **all('universal_dependencies'))
101 | 
102 | def udep_post_process(ds):
103 |     return ds.cast_column('labels', Sequence(ClassLabel(names=udep_labels)))
104 | 
105 | #udep__deprel = TokenClassification('tokens',lambda x:[udep_labels.index(a) for a in x['deprel']],
106 | #    **all('universal_dependencies'),post_process=udep_post_process)
107 | 
108 | oasst_rlhf = MultipleChoice("prompt",choices=['chosen','rejected'],labels=constant(0),
109 |     dataset_name="tasksource/oasst1_pairwise_rlhf_reward")
110 | 
111 | sentiment = Classification("text",labels="label", dataset_name="tyqiangz/multilingual-sentiments",config_name="all",
112 |     pre_process=lambda ds:ds.filter(lambda x: "amazon_reviews" not in x['source']) )
113 | tweet_sentiment = Classification("text", labels="label", **all('cardiffnlp/tweet_sentiment_multilingual'))
114 | review_sentiment = Classification("review_body",labels="stars", dataset_name="amazon_reviews_multi",config_name="all_languages")
115 | emotion = Classification("text",labels="emotion",dataset_name="metaeval/universal-joy")
116 | # in mms
117 | 
118 | mms_sentiment = Classification("text",labels="label",dataset_name='Brand24/mms')
119 | 
120 | mapa_fine = TokenClassification("tokens","coarse_grained",dataset_name='joelito/mapa')
121 | mapa_corase = TokenClassification("tokens","fine_grained",dataset_name='joelito/mapa')
122 | 
123 | aces_ranking = MultipleChoice("source",choices=['good-translation','incorrect-translation'],labels=constant(0), dataset_name='nikitam/ACES')
124 | aces_phenomena = Classification('source','incorrect-translation','phenomena', dataset_name='nikitam/ACES')
125 | 
126 | amazon_intent = Classification("utt",labels="intent",**all('AmazonScience/massive'))
127 | #    dataset_name='glue',config_name=['ocnli','afqmc'])
128 | 
129 | tidy_as2=Classification("Question","Sentence","Label",dataset_name='tasksource/tydi-as2-balanced') 
130 | 
131 | multiconer = TokenClassification("tokens","ner_tags_index", **all("MultiCoNER/multiconer_v2"))
132 | 
133 | mtop = Classification("question",labels="intent", dataset_name="tasksource/mtop")
134 | 
135 | mlabel_nli = Classification("premise","hypothesis","labels",dataset_name="tasksource/multilingual-zero-shot-label-nli")
136 | 
137 | #wino_x
138 | # clue, klue, indic_glue
139 | # SMS_Spam_Multilingual_Collection_Dataset
140 | 


--------------------------------------------------------------------------------
/src/tasksource/preprocess.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Iterable
  2 | from dotwiz import DotWiz
  3 | from dataclasses import dataclass
  4 | from typing import Union
  5 | import itertools
  6 | import funcy as fc
  7 | import exrex 
  8 | import magicattr 
  9 | import numpy as np
 10 | import copy
 11 | import datasets
 12 | import time
 13 | 
 14 | MAX_MC_OPTIONS = 4
 15 | 
 16 | def get_column_names(dataset):
 17 |     cn = dataset.column_names
 18 |     if type(cn)==dict:
 19 |         return set(fc.flatten(cn.values()))
 20 |     else:
 21 |         return set(cn)
 22 | 
 23 | 
 24 | def sample_dataset(dataset,n=10000, n_eval=1000,seed=0):
 25 |     for k in dataset:
 26 |         n_k=(n if k=='train' else n_eval)
 27 |         if n_k and len(dataset[k])>n_k:
 28 |             dataset[k]=dataset[k].train_test_split(train_size=n_k,seed=seed)['train']
 29 |     return dataset
 30 | 
 31 | class Preprocessing(DotWiz):
 32 |     default_splits = ('train','validation','test')
 33 |     _instances = []
 34 | 
 35 |     def __post_init__(self):
 36 |         Preprocessing._instances+=[self]
 37 | 
 38 |     @staticmethod
 39 |     def __map_to_target(x,fn=lambda x:None, target=None):
 40 |         x[target]=fn(x)
 41 |         return x
 42 |         
 43 |     def load(self):
 44 |         return self(datasets.load_dataset(self.dataset_name,self.config_name))
 45 | 
 46 |     def __call__(self,dataset, max_rows=None, max_rows_eval=None,seed=0):
 47 |         dataset = self.pre_process(dataset)
 48 | 
 49 |         # manage splits
 50 |         for k,v in zip(self.default_splits, self.splits):
 51 |             if v and k!=v:
 52 |                 dataset[k]=dataset[v]
 53 |                 del dataset[v]
 54 |             if k in dataset and not v: # obfuscated label
 55 |                 del dataset[k]
 56 |         dataset = fix_splits(dataset)
 57 | 
 58 |         for k in list(dataset.keys()):
 59 |             if k not in self.default_splits:
 60 |                 del dataset[k]
 61 |         dataset = sample_dataset(dataset, max_rows, max_rows_eval,seed=seed)
 62 |         
 63 |         # field annotated with a string
 64 |         substitutions = {v:k for k,v in self.to_dict().items()
 65 |             if (k and k not in {'splits','dataset_name','config_name'} 
 66 |             and type(v)==str and k!=v)}
 67 | 
 68 |         dataset=dataset.remove_columns([c for c in substitutions.values() if c in dataset['train'].features and c not in substitutions])
 69 |         dataset=dataset.rename_columns(substitutions)
 70 | 
 71 |         # field annotated with a function                                
 72 |         for k in self.to_dict().keys():
 73 |             v=getattr(self, k)
 74 |             if callable(v) and k not in {"post_process","pre_process","load"}:
 75 |                 dataset=dataset.map(self.__map_to_target,
 76 |                                     fn_kwargs={'fn':v,'target':k})
 77 | 
 78 |         dataset=dataset.remove_columns(
 79 |             get_column_names(dataset)-set(self.to_dict().keys()))
 80 |         dataset = fix_labels(dataset)
 81 |         dataset = fix_splits(dataset) # again: label mapping changed
 82 |         dataset = self.post_process(dataset)
 83 |         return dataset
 84 | 
 85 | 
 86 | @dataclass
 87 | class cat(Preprocessing):
 88 |     fields:Union[str,list]=None
 89 |     separator:str=' '
 90 |         
 91 |     def __call__(self, example=None):
 92 |         y=[np.char.array(example[f]) + sep 
 93 |                 for f,sep in zip(self.fields[::-1],itertools.repeat(self.separator))]
 94 |         y=list(sum(*y))
 95 |         if len(y)==1:
 96 |             y=y[0]
 97 |         return y
 98 | 
 99 | 
100 | def pretty(f):
101 |     class pretty_f(DotWiz):
102 |         def __init__(self,*args):
103 |             self.__f_arg = f(*args)
104 |             for a in args:
105 |                 setattr(self,'value',a)
106 |                 
107 |         def __call__(self, *args,**kwargs):
108 |             return self.__f_arg(*args,**kwargs)
109 | 
110 |         def __repr__(self):
111 |             return f"{self.__f_arg.__qualname__ .split('.')[0]}({self.value})"
112 |     return pretty_f
113 | 
114 | class dotgetter:
115 |     def __init__(self, path=''):
116 |         self.path=path
117 | 
118 |     def __bool__(self):
119 |         return bool(self.path)
120 | 
121 |     def __getattr__(self, k):
122 |         return self.__class__(f'{self.path}.{k}'.lstrip('.'))
123 |     
124 |     def __getitem__(self, i):
125 |         return self.__class__(f'{self.path}[{i}]')
126 | 
127 |     def __call__(self, example=None):
128 |         return magicattr.get(DotWiz(example), self.path)
129 | 
130 |     def __hash__(self):
131 |         return hash(self.path)
132 | 
133 | 
134 | @dataclass
135 | class ClassificationFields(Preprocessing):
136 |     sentence1:str='sentence1'
137 |     sentence2:str='sentence2'
138 |     labels:str='labels'
139 | 
140 | @dataclass
141 | class Seq2SeqLMFields(Preprocessing):
142 |     prompt:str='prompt'
143 |     output:str='output'
144 | 
145 | @dataclass
146 | class TokenClassificationFields(Preprocessing):
147 |     tokens:str='tokens'
148 |     labels:str='labels'
149 |         
150 | @dataclass
151 | class MultipleChoiceFields(Preprocessing):
152 |     inputs:str='input'
153 |     choices:Iterable=tuple()
154 |     labels:str='labels'
155 |     choices_list:str=None
156 |     def __post_init__(self):
157 |         for i, c in enumerate(self.choices):
158 |             setattr(self,f'choice{i}',c)
159 |         delattr(self,'choices')
160 |         if not self.choices_list:
161 |             delattr(self,'choices_list')
162 |     
163 |     def __call__(self,dataset, *args, **kwargs):
164 |         dataset = super().__call__(dataset, *args, **kwargs)
165 |         if self.choices_list:
166 |             dataset = dataset.filter(lambda x: 1<len(x['choices_list']))
167 |             n_options = min([len(x) for k in dataset for x in dataset[k]['choices_list']])
168 |             n_options = min(MAX_MC_OPTIONS,n_options)
169 |             dataset = dataset.map(self.flatten_choice_list, fn_kwargs={'n_options':n_options})
170 | 
171 |         else:
172 |             dataset = dataset.map(self.sample_choices, fn_kwargs={'n_options':MAX_MC_OPTIONS})
173 |         return dataset
174 | 
175 |     @staticmethod
176 |     def flatten_choice_list(x, n_options=None):
177 |         n_neg = n_options-1 if n_options else None
178 |         choices = x['choices_list']
179 |         label=x['labels']
180 |         neg = choices[:label] + choices[label+1:]
181 |         pos = choices[label]
182 |         x['labels']=0
183 |         x['choices_list']=[pos]+neg[:n_neg]
184 |         for i,o in enumerate(x['choices_list']):
185 |             x[f'choice{i}']=o
186 |         del x['choices_list']
187 |         return x
188 | 
189 |     @staticmethod
190 |     def sample_choices(x, n_options=None):
191 |         choices = [x[c] for c in x if 'choice' in c]
192 |         if not MAX_MC_OPTIONS or len(choices)<=n_options:
193 |             return x
194 |         n_neg = n_options-1 if n_options else None
195 |         label=x['labels']
196 |         neg = choices[:label] + choices[label+1:]
197 |         pos = choices[label]
198 |         x['labels']=0
199 |         choices_list=[pos]+neg[:n_neg]
200 |         for c in list(x):
201 |             if 'choice' in c:
202 |                 del x[c]
203 |         for i,o in enumerate(choices_list):
204 |             x[f'choice{i}']=o
205 |         return x
206 | 
207 | @dataclass
208 | class SharedFields:
209 |     splits:list=Preprocessing.default_splits
210 |     dataset_name:str = None
211 |     config_name:str = None
212 |     pre_process: callable = fc.identity
213 |     post_process: callable = fc.identity
214 |     #language:str="en"
215 |     
216 | 
217 | @dataclass
218 | class Classification(SharedFields, ClassificationFields): pass
219 | 
220 | @dataclass
221 | class MultipleChoice(SharedFields, MultipleChoiceFields): pass
222 | 
223 | @dataclass
224 | class TokenClassification(SharedFields, TokenClassificationFields): pass
225 | 
226 | @dataclass
227 | class Seq2SeqLM(SharedFields, Seq2SeqLMFields): pass
228 | 
229 | get=dotgetter()
230 | constant = pretty(fc.constantly)
231 | regen = lambda x: list(exrex.generate(x))
232 | 
233 | def name(label_name, classes):
234 |     return lambda x:classes[x[label_name]]
235 | 
236 | def fix_splits(dataset):
237 | 
238 |     if len(dataset)==1 and "train" not in dataset:
239 |         k = list(dataset)[0]
240 |         dataset['train'] = copy.deepcopy(dataset[k])
241 |         del dataset[k]
242 | 
243 |     if 'auxiliary_train' in dataset:
244 |         del dataset['auxiliary_train']
245 |     
246 |     if 'test' in dataset: # manage obfuscated labels
247 |         if 'labels' in dataset['test'].features:
248 |             if len(set(fc.flatten(dataset['test'].to_dict()['labels'])))==1:
249 |                 del dataset['test']
250 | 
251 |     if 'validation' in dataset and 'train' not in dataset:
252 |         train_validation = dataset['validation'].train_test_split(0.5, seed=0)
253 |         dataset['train'] = train_validation['train']
254 |         dataset['validation']=train_validation['test']
255 |     
256 |     if 'validation' in dataset and 'test' not in dataset:
257 |         validation_test = dataset['validation'].train_test_split(0.5, seed=0)
258 |         dataset['validation'] = validation_test['train']
259 |         dataset['test']=validation_test['test']
260 | 
261 |     if 'train' in dataset and 'validation' not in dataset:
262 |         train_val = dataset['train'].train_test_split(train_size=0.90, seed=0)
263 |         dataset['train'] = train_val['train']
264 |         dataset['validation']=train_val['test']
265 | 
266 |     if 'test' in dataset and 'validation' not in dataset:
267 |         validation_test = dataset['test'].train_test_split(0.5, seed=0)
268 |         dataset['validation'] = validation_test['train']
269 |         dataset['test']=validation_test['test']
270 | 
271 |     if 'validation' not in dataset and 'test' not in dataset:
272 |         train_val_test = dataset["train"].train_test_split(train_size=0.90, seed=0)
273 |         val_test = train_val_test["test"].train_test_split(0.5, seed=0)
274 |         dataset["train"] = train_val_test["train"]
275 |         dataset["validation"] = val_test["train"]
276 |         dataset["test"] = val_test["test"]
277 |         
278 |     return dataset 
279 | 
280 | def fix_labels(dataset, label_key='labels'):
281 |     if type(dataset['train'][label_key][0]) in [int,list,float]:
282 |         return dataset
283 |     labels=set(fc.flatten(dataset[k][label_key] for k in {"train"}))
284 |     if set(labels)=={'entailment','neutral','contradiction'}:
285 |         order=lambda x:dict(fc.flip(enumerate(['entailment','neutral','contradiction']))).get(x,x)
286 |     else:
287 |         order=str
288 |     labels=sorted(labels, key=order)
289 |     dataset=dataset.cast_column(label_key, datasets.ClassLabel(names=labels))
290 |     return dataset
291 | 
292 | def concatenate_dataset_dict(l):
293 |     """Concatenate a list of DatastDict objects sharing same splits and columns."""
294 |     keys=l[0].keys()
295 |     return datasets.DatasetDict({k: datasets.concatenate_datasets([x[k] for x in l]) for k in keys})


--------------------------------------------------------------------------------
/src/tasksource/recast.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from datasets import DatasetDict, Dataset
  3 | from sorcery import dict_of
  4 | import string
  5 | 
  6 | improper_labels =['recast/recast_kg_relations','linguisticprobing',"lex_glue/scotus",'lexical_relation_classification/ROOT09',"pragmeval/squinky","pragmeval/emobank",'pragmeval/persuasiveness']
  7 | improper_labels += ['glue/stsb', 'sick/relatedness', 'joci', 'utilitarianism', 'amazon_counterfactual/en', 'toxic_conversations', 'ethos/multilabel', 'lex_glue/eurlex', 'lex_glue/unfair_tos', 'app_reviews', 'humicroedit/subtask-1', 'stackoverflow-questions', 'go_emotions/simplified', 'google_wellformed_query', 'has_part', 'blog_authorship_corpus/age', 'promptCoherence', 'Sarcasm_News_Headline', 'auditor_review/demo-org--auditor_review', 'Dynasent_Disagreement', 'Politeness_Disagreement', 'SBIC_Disagreement', 'SChem_Disagreement', 'Dilemmas_Disagreement', 'sts-companion', 'acceptability-prediction', 'chaos-mnli-ambiguity', 'headline_cause/en_simple', 'oasst1_dense_flat', 'civil_comments']
  8 | 
  9 | improper_labels += ['stsb_multi_mt','MLMA_hate_speech','icl-symbol-tuning-instruct','zero-shot-label-nli']
 10 | 
 11 | improper_labels += ['essay-scoring','english-grading','HelpSteer','oasst2']
 12 | 
 13 | def render_options(options):
 14 |     options = [f'"{x}"' for x in options]
 15 |     return f"{', '.join(options[:-1])} or {options[-1]}"
 16 | 
 17 | def render_classification(text,options,answer):
 18 |     example = 'text_A→text_B' if text.startswith('text_A:') else 'the following'
 19 |     inputs = f'With no explanation, label {example} with either {render_options(options)}.\n{text}'
 20 |     targets = f"{answer}."
 21 |     return dict_of(inputs,targets)
 22 | 
 23 | def render_token_classification(tokens,options,labels):
 24 |     prefix = f'With no explanation, label each line with {render_options(options)} preceded by ":".\n'
 25 |     inputs = prefix+"\n".join(tokens)
 26 |     targets = "\n".join([':'.join(x) for x in zip(tokens,labels)])
 27 |     return dict_of(inputs,targets)
 28 | 
 29 | def render_multiple_choice(prompt, options, labels):
 30 |     inputs=(prompt+'\n' if prompt else '')
 31 |     letters = string.ascii_uppercase[:len(options)]
 32 |     inputs=f'With no explanation, chose the best option from {render_options(letters)}. {inputs}'    
 33 |     for letter, option in zip(letters, options):
 34 |         inputs+=f'\n{letter}: {option}'
 35 |     targets = f'{letters[labels]}.'
 36 |     return dict_of(inputs, targets) 
 37 | 
 38 | def negative_sample_options(y, labels,N=4):
 39 |     if len(labels)<N:
 40 |         return labels
 41 |     else:
 42 |         return [y]+random.sample([x for x in labels if x!=y], N-1)
 43 | 
 44 | def shuffle_choices(x):
 45 |     choices = sorted([k for k in x if 'choice' in k])
 46 |     choices_texts = [x[c] for c in choices]
 47 |     correct_choice =choices_texts[x['labels']]
 48 |     random.shuffle(choices_texts)
 49 |     for c, ct in zip(choices, choices_texts):
 50 |         x[c]=ct
 51 |     x["labels"]=choices_texts.index(correct_choice)
 52 |     return x
 53 | 
 54 | def recast_dataset_classification_to_mc(dataset,sep="[SEP]",N=4):
 55 | 
 56 |     def recast_split(d,N=N):
 57 |         labels = d.features['labels']
 58 |         df=d.to_pandas()
 59 |         df['inputs'] = df.sentence1
 60 |         if "sentence2" in df:
 61 |             df['inputs'] +=sep + df.sentence2
 62 | 
 63 |         N=min(N, len(labels.names))
 64 |         df['choices']=df.apply(lambda x:negative_sample_options(labels.int2str(x['labels']), labels.names,N),axis=1)     
 65 |         df['labels']=df.apply(lambda x:x['choices'].index(labels.int2str(x['labels'])),axis=1)
 66 | 
 67 |         for i in range(N):
 68 |             df[f'choice{i}']= "This example is " + df.choices.map(lambda x:x[i])
 69 | 
 70 |         choices = [f'choice{i}' for i in range(N)]
 71 |         return Dataset.from_pandas(df[['inputs',*choices,'labels']],preserve_index=False)
 72 | 
 73 |     return DatasetDict({k: recast_split(v) for k,v in dataset.items()})
 74 | 
 75 | 
 76 | def recast_instruct(dataset):
 77 |     features = dataset['train'].features
 78 |     labels = features['labels']
 79 | 
 80 |     if "sentence1" in features:
 81 |         task_type='Classification'
 82 |     if "choice0" in features:
 83 |         task_type = "MultipleChoice"
 84 |     if "tokens" in features:
 85 |         task_type = "TokenClassification"
 86 | 
 87 |     def recast_MultipleChoice(x):
 88 |         x=shuffle_choices(x)
 89 |         choices = sorted([k for k in x if 'choice' in k])
 90 |         if all([x[c] in x['inputs'] for c in choices]):
 91 |             return {"inputs":x['inputs'], 'targets': x[f"choice{x['labels']}"].strip()+"."}
 92 |         else:
 93 |             return render_multiple_choice(x['inputs'],[x[c] for c in choices],x['labels'])
 94 | 
 95 |     def recast_TokenClassification(x):
 96 |         distractors = list(labels.feature.names)
 97 |         x_labels = [labels.feature.int2str(y) for y in x['labels']]
 98 |         labels_set= list({labels.feature.int2str(y) for y in x['labels']})
 99 |         options=list(dict.fromkeys(labels_set+distractors))[:max(len(labels_set),10)]
100 |         return render_token_classification(x['tokens'],options,x_labels)
101 | 
102 |     def recast_Classification(x):
103 |         if 'sentence2' in x:
104 |             text=f"text_A: {x['sentence1']}\ntext_B: {x['sentence2']}"
105 |         else:
106 |             text=x['sentence1']
107 |             
108 |         answer=labels.int2str(x['labels']).strip()
109 |         options= negative_sample_options(answer, labels._int2str)
110 |         return render_classification(text, options, answer)
111 |         
112 |     dataset = dataset.map(eval(f"recast_{task_type}"))
113 |     dataset = dataset.remove_columns([k for k in features if k not in ['inputs','targets']])
114 |     return dataset
115 |  


--------------------------------------------------------------------------------
/src/tasksource/tasks.py:
--------------------------------------------------------------------------------
   1 | from .preprocess import cat, get, regen, name, constant, Classification, TokenClassification, MultipleChoice
   2 | from .metadata import bigbench_discriminative_english, blimp_hard, imppres_presupposition, imppres_implicature, udep_en_configs, udep_en_labels
   3 | from datasets import get_dataset_config_names, Sequence, ClassLabel, Dataset, DatasetDict
   4 | 
   5 | # variable name: dataset___config__task
   6 | 
   7 | ###################### NLI/paraphrase ###############################
   8 | 
   9 | glue___mnli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["train", None, "validation_matched"])
  10 | glue___qnli = Classification("question","sentence", labels="label")
  11 | glue___rte = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
  12 | glue___wnli = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
  13 | #glue___ax = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["test", None, None]) # fully masked
  14 | 
  15 | glue___mrpc = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
  16 | glue___qqp = Classification(sentence1="question1", sentence2="question2", labels="label")
  17 | glue___stsb = Classification(sentence1="sentence1", sentence2="sentence2", labels="label")
  18 | 
  19 | super_glue___boolq = Classification(sentence1="question", labels="label")
  20 | super_glue___cb = Classification(sentence1="premise", sentence2="hypothesis", labels="label")
  21 | super_glue___multirc = Classification(
  22 |     cat(["paragraph", "question"]),
  23 |     'answer',
  24 |     labels='label'
  25 | )
  26 | #super_glue___rte = Classification(sentence1="premise", sentence2="hypothesis", labels="label") # in glue
  27 | super_glue___wic = Classification(
  28 |     sentence1=cat(["word","sentence1"], " : "),
  29 |     sentence2=cat(["word","sentence2"], " : "),
  30 |     labels='label'
  31 | )
  32 | super_glue___axg = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["test", None, None])
  33 | 
  34 | 
  35 | anli__a1 = Classification('premise','hypothesis','label', splits=['train_r1','dev_r1','test_r1'])
  36 | anli__a2 = Classification('premise','hypothesis','label', splits=['train_r2','dev_r2','test_r2'])
  37 | anli__a3 = Classification('premise','hypothesis','label', splits=['train_r3','dev_r3','test_r3'])
  38 | 
  39 | 
  40 | babi_nli = Classification("premise", "hypothesis", "label",
  41 |     dataset_name="tasksource/babi_nli",
  42 |     config_name=set(get_dataset_config_names("tasksource/babi_nli"))-{"agents-motivations"}
  43 | ) # agents-motivations task is not as clear-cut as the others
  44 | 
  45 | 
  46 | sick__label         = Classification('sentence_A','sentence_B','label')
  47 | sick__relatedness   = Classification('sentence_A','sentence_B','relatedness_score')
  48 | sick__entailment_AB = Classification('sentence_A','sentence_B','entailment_AB')
  49 | #sick__entailment_BA = Classification('sentence_A','sentence_B','entailment_BA')
  50 | 
  51 | def remove_neg_1(dataset):
  52 |     return dataset.filter(lambda x:x['labels']!=-1)
  53 | 
  54 | snli = Classification(sentence1="premise", sentence2="hypothesis", labels="label",
  55 |     post_process=remove_neg_1)
  56 | 
  57 | scitail = Classification("sentence1","sentence2","gold_label",config_name="snli_format")
  58 | 
  59 | hans = Classification(sentence1="premise", sentence2="hypothesis", labels="label")
  60 | 
  61 | wanli = Classification('premise','hypothesis','gold', dataset_name="alisawuffles/WANLI")
  62 | 
  63 | recast_nli = Classification(sentence1="context", sentence2="hypothesis", labels="label", dataset_name="tasksource/recast",
  64 |     config_name=['recast_kg_relations', 'recast_puns', 'recast_factuality', 'recast_verbnet',
  65 |     'recast_verbcorner', 'recast_ner', 'recast_sentiment', 'recast_megaveridicality'])
  66 | 
  67 | 
  68 | probability_words_nli = Classification(sentence1="context", sentence2="hypothesis", labels="label",
  69 |     dataset_name="sileod/probability_words_nli", 
  70 |     config_name=["reasoning_1hop","reasoning_2hop","usnli"])
  71 | 
  72 | nan_nli = Classification("premise", "hypothesis", "label", dataset_name="joey234/nan-nli")
  73 | 
  74 | nli_fever = Classification("premise","hypothesis","label",
  75 |     dataset_name="pietrolesci/nli_fever", splits=["train","dev",None])
  76 | 
  77 | breaking_nli = Classification("sentence1","sentence2","label",
  78 |     dataset_name="pietrolesci/breaking_nli", splits=["full",None,None])
  79 | 
  80 | conj_nli = Classification("premise","hypothesis","label",post_process=remove_neg_1,
  81 |     dataset_name="pietrolesci/conj_nli",splits=['train','dev',None])
  82 | 
  83 | fracas = Classification("premise","hypothesis","label",
  84 |     dataset_name="pietrolesci/fracas")
  85 | 
  86 | dialogue_nli = Classification("sentence1","sentence2","label",
  87 |     dataset_name="pietrolesci/dialogue_nli")   
  88 | 
  89 | mpe_nli = Classification("premise","hypothesis","label",
  90 |     dataset_name="pietrolesci/mpe",
  91 |     splits=["train","dev","test"])  
  92 | 
  93 | dnc_nli = Classification("context","hypothesis","label",
  94 |     dataset_name="pietrolesci/dnc")
  95 | 
  96 | # gpt3_nli = Classification("text_a","text_b","label",dataset_name="pietrolesci/gpt3_nli") # not sound enough
  97 | 
  98 | recast_white__fnplus = Classification("text","hypothesis","label",
  99 |     dataset_name="pietrolesci/recast_white",splits=['fnplus',None,None])
 100 | recast_white__sprl = Classification("text","hypothesis","label",
 101 |     dataset_name="pietrolesci/recast_white",splits=['sprl',None,None])
 102 | recast_white__dpr = Classification("text","hypothesis","label",
 103 |     dataset_name="pietrolesci/recast_white",splits=['dpr',None,None])
 104 | 
 105 | joci = Classification("context","hypothesis",
 106 |     labels=lambda x: [None, "impossible", "technically possible", "plausible", "likely", "very likely"][x["original_label"]],
 107 |     pre_process=lambda ds:ds.filter(lambda x:x['original_label']!=0),
 108 |     dataset_name="pietrolesci/joci",splits=['full',None,None])
 109 | 
 110 | #enfever_nli = Classification("evidence","claim","label", dataset_name="ctu-aic/enfever_nli")
 111 | 
 112 | robust_nli__IS_CS = Classification("premise","hypothesis","label",
 113 | 	dataset_name="pietrolesci/robust_nli", splits=["IS_CS",None,None])
 114 | robust_nli__LI_LI = Classification("premise","hypothesis","label",
 115 | 	dataset_name="pietrolesci/robust_nli", splits=["LI_LI",None,None])
 116 | robust_nli__ST_WO = Classification("premise","hypothesis","label",
 117 | 	dataset_name="pietrolesci/robust_nli", splits=["ST_WO",None,None])
 118 | robust_nli__PI_SP = Classification("premise","hypothesis","label",
 119 | 	dataset_name="pietrolesci/robust_nli", splits=["PI_SP",None,None])
 120 | robust_nli__PI_CD = Classification("premise","hypothesis","label",
 121 | 	dataset_name="pietrolesci/robust_nli", splits=["PI_CD",None,None])
 122 | robust_nli__ST_SE = Classification("premise","hypothesis","label",
 123 | 	dataset_name="pietrolesci/robust_nli", splits=["ST_SE",None,None])
 124 | robust_nli__ST_NE = Classification("premise","hypothesis","label",
 125 | 	dataset_name="pietrolesci/robust_nli", splits=["ST_NE",None,None])
 126 | robust_nli__ST_LM = Classification("premise","hypothesis","label",
 127 | 	dataset_name="pietrolesci/robust_nli", splits=["ST_LM",None,None])
 128 | robust_nli_is_sd = Classification("premise","hypothesis","label",
 129 |     dataset_name="pietrolesci/robust_nli_is_sd")
 130 | robust_nli_li_ts = Classification("premise","hypothesis","label",
 131 |     dataset_name="pietrolesci/robust_nli_li_ts")
 132 | 
 133 | gen_debiased_nli__snli_seq_z = Classification("premise","hypothesis","label",
 134 | 	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_seq_z",None,None])
 135 | gen_debiased_nli__snli_z_aug = Classification("premise","hypothesis","label",
 136 | 	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_z_aug",None,None])
 137 | gen_debiased_nli__snli_par_z = Classification("premise","hypothesis","label",
 138 | 	dataset_name="pietrolesci/gen_debiased_nli", splits=["snli_par_z",None,None])
 139 | gen_debiased_nli__mnli_par_z = Classification("premise","hypothesis","label",
 140 | 	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_par_z",None,None])
 141 | gen_debiased_nli__mnli_z_aug = Classification("premise","hypothesis","label",
 142 | 	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_z_aug",None,None])
 143 | gen_debiased_nli__mnli_seq_z = Classification("premise","hypothesis","label",
 144 | 	dataset_name="pietrolesci/gen_debiased_nli", splits=["mnli_seq_z",None,None])
 145 | 
 146 | add_one_rte = Classification("premise","hypothesis","label",
 147 |     dataset_name="pietrolesci/add_one_rte",splits=["train","dev","test"])
 148 | 
 149 | def _imppres_post_process(ds,prefix=''):
 150 |     # imppres entailment definition is either purely semantic or purely pragmatic
 151 |     # because of that, we assign differentiate the labels from anli/mnli notation
 152 |     return ds.cast_column('labels', ClassLabel(
 153 |     names=[f'{prefix}_entailment',f'{prefix}_neutral',f'{prefix}_contradiction']))
 154 | 
 155 | imppres__presupposition = imppres__prag = Classification("premise","hypothesis","gold_label",
 156 |     dataset_name="tasksource/imppres", config_name=imppres_presupposition,
 157 |     post_process=_imppres_post_process)
 158 | 
 159 | imppres__prag = Classification("premise","hypothesis","gold_label_prag",
 160 |     dataset_name="tasksource/imppres", config_name=imppres_implicature,
 161 |     post_process=lambda x: _imppres_post_process(x,'pragmatic'))
 162 | 
 163 | imppres__log = Classification("premise","hypothesis","gold_label_log",
 164 |     dataset_name="tasksource/imppres", config_name=imppres_implicature,
 165 |     post_process=lambda x: _imppres_post_process(x,'logical'))
 166 | 
 167 | 
 168 | #glue__diagnostics = Classification("premise","hypothesis","label",
 169 | #    dataset_name="pietrolesci/glue_diagnostics",splits=["test",None,None])
 170 | 
 171 | hlgd = Classification("headline_a", "headline_b", labels="label")
 172 | 
 173 | paws___labeled_final   = Classification("sentence1", "sentence2", name('label',['not_paraphrase','paraphrase']))
 174 | paws___labeled_swap    = Classification("sentence1", "sentence2", name('label',['not_paraphrase','paraphrase']), splits=["train", None, None])
 175 | #paws___unlabeled_final = Classification("sentence1", "sentence2", "label")
 176 | 
 177 | #quora = Classification(get.questions.text[0], get.questions.text[1], 'is_duplicate') # in glue
 178 | medical_questions_pairs = Classification("question_1","question_2", name("label",['not similar','similar']))
 179 |  
 180 | ###################### Token Classification #########################
 181 | 
 182 | conll2003__pos_tags   = TokenClassification(tokens="tokens", labels='pos_tags')
 183 | conll2003__chunk_tags = TokenClassification(tokens="tokens", labels='chunk_tags')
 184 | conll2003__ner_tags   = TokenClassification(tokens="tokens", labels='ner_tags')
 185 | 
 186 | #tner___tweebank_ner    = TokenClassification(tokens="tokens", labels="tags")
 187 | 
 188 | ######################## Multiple choice ###########################
 189 | 
 190 | 
 191 | model_written_evals = MultipleChoice('question', choices=['answer_matching_behavior','answer_not_matching_behavior'], labels=constant(0),  
 192 |     dataset_name="Anthropic/model-written-evals")
 193 | 
 194 | truthful_qa___multiple_choice = MultipleChoice(
 195 |     "question",
 196 |     choices_list=get.mc1_targets.choices,
 197 |     labels=constant(0)
 198 | )
 199 | 
 200 | fig_qa = MultipleChoice(
 201 |     "startphrase",
 202 |     choices=["ending1","ending2"],
 203 |     labels="labels",
 204 |     dataset_name="nightingal3/fig-qa",
 205 |     splits=["train","validation",None]
 206 | )
 207 | 
 208 | bigbench = MultipleChoice(
 209 |     'inputs',
 210 |     choices_list='multiple_choice_targets',
 211 |     labels=lambda x:x['multiple_choice_scores'].index(1) if 1 in ['multiple_choice_scores'] else -1,
 212 |     dataset_name='tasksource/bigbench',
 213 |     config_name=bigbench_discriminative_english - {"social_i_qa","intersect_geometry"} # english multiple choice tasks, minus duplicates
 214 | )
 215 | #"goal_step_wikihow"
 216 | 
 217 | blimp_hard = MultipleChoice(inputs=constant(''),
 218 |     choices=['sentence_good','sentence_bad'],
 219 |     labels=constant(0),
 220 |     dataset_name="blimp",
 221 |     config_name=blimp_hard # tasks where GPT2 is at least 10% below  human accuracy
 222 | )
 223 | 
 224 | cos_e = MultipleChoice('question',
 225 |     choices_list='choices',
 226 |     labels= lambda x: x['choices_list'].index(x['answer']),
 227 |     config_name='v1.0')
 228 | 
 229 | cosmos_qa = MultipleChoice(cat(['context','question']),regen('answer[0-3]'),'label')
 230 | 
 231 | dream = MultipleChoice(
 232 |     lambda x:"\n".join(x['dialogue']+[x['question']]),
 233 |     choices_list='choice',
 234 |     labels=lambda x:x['choices_list'].index(x['answer'])
 235 | )
 236 | 
 237 | openbookqa = MultipleChoice(
 238 |     'question_stem',
 239 |     choices_list=get.choices.text,
 240 |     labels='answerKey'
 241 | )
 242 | 
 243 | qasc = MultipleChoice(
 244 |     'question',
 245 |     choices_list=get.choices.text,
 246 |     labels=lambda x: "ABCDEFGH".index(x['answerKey']),
 247 |     splits=['train','validation',None]
 248 |     
 249 | )
 250 | 
 251 | quartz = MultipleChoice(
 252 |     'question',
 253 |     choices_list=get.choices.text,
 254 |     labels='answerKey'
 255 | )
 256 | quail = MultipleChoice(
 257 |     cat(['context','question']),
 258 |     choices_list='answers',
 259 |     labels='correct_answer_id' 
 260 | )
 261 | 
 262 | head_qa___en = MultipleChoice("qtext",
 263 |     choices_list = lambda x:[a['atext'] for a in x["answers"]],
 264 |     labels = lambda x:[a['aid'] for a in x["answers"]].index(x["ra"])
 265 | )
 266 | 
 267 | 
 268 | sciq = MultipleChoice(
 269 |     'question',
 270 |     ['correct_answer']+regen('distractor[1-3]'),
 271 |     labels=constant(0))
 272 | 
 273 | social_i_qa = MultipleChoice(
 274 |     'question',
 275 |     ['answerA','answerB','answerC'],
 276 |     'label')
 277 | 
 278 | wiki_hop___original = MultipleChoice(
 279 |     'question', 
 280 |     choices_list='candidates',
 281 |     labels=lambda x:x['choices_list'].index(x["answer"]))
 282 | 
 283 | wiqa = MultipleChoice('question_stem',
 284 |     choices_list = lambda x: x['choices']['text'],
 285 |     labels='answer_label_as_choice')
 286 | 
 287 | piqa = MultipleChoice('goal', choices=['sol1','sol2'], labels='label')
 288 | 
 289 | hellaswag = MultipleChoice('ctx_a',
 290 |     choices_list=lambda x: [f'{x["ctx_b"]}{e}' for e in x["endings"]],
 291 |     labels='label', splits=['train','validation',None])
 292 | 
 293 | super_glue___copa = MultipleChoice('premise',['choice1','choice2'],'label')
 294 | 
 295 | balanced_copa = MultipleChoice('premise',['choice1','choice2'],'label',
 296 |     dataset_name="pkavumba/balanced-copa")
 297 | 
 298 | e_care = MultipleChoice('premise',['choice1','choice2'],'label',
 299 |     dataset_name="12ml/e-CARE")
 300 | 
 301 | art = MultipleChoice(cat(['hypothesis_1','hypothesis_2']),
 302 |     ['observation_1','observation_2'],
 303 |     labels=lambda x:x['label']-1,
 304 |     splits=['train','validation',None]
 305 | )
 306 | 
 307 | 
 308 | mmlu = MultipleChoice('question',labels='answer',choices_list='choices',splits=['validation','dev','test'],
 309 |     dataset_name="tasksource/mmlu",
 310 |     config_name=get_dataset_config_names("tasksource/mmlu")
 311 | )
 312 | 
 313 | winogrande = MultipleChoice('sentence',['option1','option2'],'answer',config_name='winogrande_xl',
 314 |     splits=['train','validation',None])
 315 | 
 316 | codah = MultipleChoice('question_propmt',choices_list='candidate_answers',labels='correct_answer_idx',config_name='codah')
 317 | 
 318 | ai2_arc__challenge = MultipleChoice('question',
 319 |     choices_list=get.choices.text,  
 320 |     labels=lambda x: get.choices.label(x).index(x["answerKey"]),
 321 |     config_name=["ARC-Challenge","ARC-Easy"])
 322 | 
 323 | definite_pronoun_resolution = MultipleChoice(
 324 |     inputs=cat(["sentence","pronoun"],' : '),
 325 |     choices_list='candidates',
 326 |     labels="label",
 327 |     splits=['train',None,'test'])
 328 | 
 329 | swag___regular=MultipleChoice(cat(["sent1","sent2"]),regen("ending[0-3]"),"label")
 330 | 
 331 | def _split_choices(s):
 332 |     import re
 333 |     return [x.rstrip(', ') for x in re.split(r'[a-e] \) (.*?)',s) if x.strip(', ')]
 334 | 
 335 | math_qa = MultipleChoice(
 336 |     'Problem', 
 337 |     choices_list = lambda x: _split_choices(x['options']),
 338 |     labels = lambda x:'abcde'.index(x['correct'])   
 339 | )
 340 | 
 341 | #aqua_rat___tokenized = MultipleChoice("question",choices_list="options",labels=lambda x:"ABCDE".index(x['correct'])) in math_qa
 342 | 
 343 | 
 344 | ######################## Classification (other) ########################
 345 | glue___cola = Classification(sentence1="sentence", labels="label")
 346 | glue___sst2 = Classification(sentence1="sentence", labels="label")
 347 | 
 348 | utilitarianism = Classification("comparison",labels="label",
 349 | dataset_name="metaeval/utilitarianism")
 350 | 
 351 | amazon_counterfactual = Classification(
 352 |     "text", labels="label",
 353 |     dataset_name="mteb/amazon_counterfactual",
 354 |     config_name="en")
 355 | 
 356 | insincere_questions = Classification(
 357 |     "text", labels="label_text",
 358 |     dataset_name="SetFit/insincere-questions")
 359 | 
 360 | toxic_conversations = Classification(
 361 |     "text", labels="label",
 362 |     dataset_name="SetFit/toxic_conversations")
 363 | 
 364 | turingbench = Classification("Generation",labels="label",
 365 |     dataset_name="turingbench/TuringBench",
 366 |     splits=["train","validation",None])
 367 | 
 368 | 
 369 | trec = Classification(sentence1="text", labels="fine_label")
 370 | 
 371 | tals_vitaminc = Classification('claim','evidence','label', dataset_name="tals/vitaminc")
 372 | 
 373 | hope_edi = Classification("text", labels="label", splits=["train", "validation", None], config_name=["english"])
 374 | 
 375 | #fever___v1_0 = Classification(sentence1="claim", labels="label", splits=["train", "paper_dev", "paper_test"], dataset_name="fever", config_name="v1.0")
 376 | #fever___v2_0 = Classification(sentence1="claim", labels="label", splits=[None, "validation", None], dataset_name="fever", config_name="v2.0")
 377 | 
 378 | rumoureval_2019 = Classification(
 379 |     sentence1="source_text",
 380 |     sentence2=lambda x: str(x["reply_text"]),
 381 |     labels="label", dataset_name="strombergnlp/rumoureval_2019", config_name="RumourEval2019",
 382 |     post_process=lambda ds:ds.filter(lambda x:x['labels']!=None)    
 383 | )
 384 | 
 385 | ethos___binary = Classification(sentence1="text", labels="label", splits=["train", None, None])
 386 | ethos___multilabel = Classification(
 387 |     'text',
 388 |     labels=lambda x: [x[c] for c in
 389 |     ['violence', 'gender', 'race', 'national_origin', 'disability', 'religion', 'sexual_orientation','directed_vs_generalized']
 390 |     ],
 391 |     splits=["train", None, None]
 392 | )
 393 | 
 394 | tweet_eval = Classification(sentence1="text", labels="label",
 395 |     config_name=["emoji", "emotion", "hate", "irony", "offensive", "sentiment"])
 396 | 
 397 | def stance_kwargs(topic):
 398 |     return {
 399 |         "sentence1": constant(f'Topic: {topic}. \n Opinion:\n'), 
 400 |         "sentence2": "text", 
 401 |         "labels": "label", 
 402 |         "config_name": f"stance_{topic.lower()}",
 403 |         "dataset_name": "tweet_eval"
 404 |     }
 405 | 
 406 | tweet_eval_abortion = Classification(**stance_kwargs("abortion"))
 407 | tweet_eval_atheism  = Classification(**stance_kwargs("atheism"))
 408 | tweet_eval_climate  = Classification(**stance_kwargs("climate"))
 409 | tweet_eval_feminist = Classification(**stance_kwargs("feminist"))
 410 | tweet_eval_hillary  = Classification(**stance_kwargs("Hillary"))
 411 | 
 412 | 
 413 | discovery = Classification("sentence1", "sentence2", labels="label", config_name=["discovery"])
 414 | 
 415 | pragmeval_1 = Classification("sentence",labels="label",
 416 |     dataset_name="pragmeval",
 417 |     config_name= ["emobank-arousal", "emobank-dominance", "emobank-valence", "squinky-formality", "squinky-implicature", 
 418 |     "squinky-informativeness","switchboard","mrda","verifiability"])
 419 | 
 420 | pragmeval_2 = Classification("sentence1","sentence2",labels="label",
 421 |     dataset_name="pragmeval",
 422 |     config_name= ["emergent", "gum", "pdtb", "persuasiveness-claimtype", 
 423 |     "persuasiveness-eloquence", "persuasiveness-premisetype", "persuasiveness-relevance", "persuasiveness-specificity", 
 424 |     "persuasiveness-strength", "sarcasm","stac"])
 425 | 
 426 | silicone = Classification("Utterance",labels="Label",
 427 |     config_name=['dyda_da', 'dyda_e', 'iemocap', 'maptask', 'meld_e', 'meld_s', 'oasis', 'sem'] # +['swda', 'mrda'] # in pragmeval
 428 | )
 429 | 
 430 | lex_glue___eurlex = Classification(sentence1="text", labels="labels") 
 431 | lex_glue___scotus = Classification(sentence1="text", labels="label")
 432 | lex_glue___ledgar = Classification(sentence1="text", labels="label")
 433 | lex_glue___unfair_tos = Classification(sentence1="text", labels="labels")
 434 | lex_glue___case_hold = MultipleChoice("context", choices_list='endings', labels="label")
 435 | 
 436 | language_identification = Classification("text",labels="labels", dataset_name="papluca/language-identification")
 437 | 
 438 | ################ Automatically generated (verified)##########
 439 | 
 440 | imdb = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
 441 | 
 442 | rotten_tomatoes = Classification(sentence1="text", labels="label")
 443 | 
 444 | ag_news = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
 445 | 
 446 | yelp_review_full = Classification(sentence1="text", labels="label", splits=["train", None, "test"], config_name=["yelp_review_full"])
 447 | 
 448 | financial_phrasebank = Classification(sentence1="sentence", labels="label", splits=["train", None, None],
 449 |     config_name=["sentences_allagree"])
 450 | 
 451 | poem_sentiment = Classification(sentence1="verse_text", labels="label")
 452 | 
 453 | #emotion = Classification(sentence1="text", labels="label") # file not found
 454 | 
 455 | dbpedia_14 = Classification(sentence1="content", labels="label", splits=["train", None, "test"], config_name=["dbpedia_14"])
 456 | 
 457 | amazon_polarity = Classification(sentence1="content", labels="label", splits=["train", None, "test"], config_name=["amazon_polarity"])
 458 | 
 459 | app_reviews = Classification("review", labels="star", splits=["train", None, None])
 460 | 
 461 | # multi_nli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["train", "validation_matched", None]) #glue
 462 | 
 463 | hate_speech18 = Classification(sentence1="text", labels="label", splits=["train", None, None])
 464 | 
 465 | sms_spam = Classification(sentence1="sms", labels="label", splits=["train", None, None])
 466 | 
 467 | humicroedit___subtask_1 = Classification("original", "edit", labels="meanGrade", dataset_name="humicroedit", config_name="subtask-1")
 468 | humicroedit___subtask_2 = Classification(
 469 |     sentence1=cat(['original1','edit1'],' : '),
 470 |     sentence2=cat(['original2','edit2'],' : '),
 471 |     labels="label", dataset_name="humicroedit", config_name="subtask-2")
 472 | 
 473 | snips_built_in_intents = Classification(sentence1="text", labels="label", splits=["train", None, None])
 474 | 
 475 | banking77 = Classification(sentence1="text", labels="label", splits=["train", None, "test"])
 476 | 
 477 | hate_speech_offensive = Classification(sentence1="tweet", labels="class", splits=["train", None, None])
 478 | 
 479 | yahoo_answers_topics = Classification(
 480 |     "question_title","question_content",labels="topic")
 481 | 
 482 | stackoverflow_questions=Classification("title","body",labels="label",
 483 |     dataset_name="pacovaldez/stackoverflow-questions")
 484 | 
 485 | #hyperpartisan_news_detection___byarticle = Classification(sentence1="text", labels="hyperpartisan", splits=["train", None, None]) # files too heavy
 486 | #hyperpartisan_news_detection___bypublisher = Classification(sentence1="text", labels="hyperpartisan", splits=["train","validation", None]) # files too heavy
 487 | 
 488 | hyperpartisan_news = Classification(
 489 |     "text",
 490 |     labels=lambda x: {'true':'hyperpartisan','false':'not_hyperpartisan'}.get(x["label"]),
 491 |     dataset_name="zapsdcn/hyperpartisan_news")
 492 | 
 493 | scierc = Classification("text",labels="label",dataset_name="zapsdcn/sciie")
 494 | citation_intent = Classification("text",labels="label",dataset_name="zapsdcn/citation_intent")
 495 | 
 496 | #go_emotions___raw = Classification(sentence1="text", splits=["train", None, None])
 497 | go_emotions___simplified = Classification(sentence1="text", labels="labels")
 498 | 
 499 | #boolq = Classification(sentence1="question", splits=["train", "validation", None]) # in superglue
 500 | 
 501 | #ecthr_cases___alleged_violation_prediction = Classification(labels="labels", dataset_name="ecthr_cases", config_name="alleged-violation-prediction")
 502 | #ecthr_cases___violation_prediction = Classification(labels="labels", dataset_name="ecthr_cases", config_name="violation-prediction")
 503 | #   too long
 504 | 
 505 | scicite = Classification(sentence1="string", labels="label",dataset_name="allenai/scicite")
 506 | 
 507 | liar = Classification(sentence1="statement", labels="label")
 508 | 
 509 | relbert_lexical_relation_classification = Classification(sentence1="head", sentence2="tail", labels="relation",
 510 |  dataset_name="relbert/lexical_relation_classification",
 511 |  config_name=["BLESS","CogALexV","EVALution","K&H+N","ROOT09"])
 512 | 
 513 | 
 514 | linguisticprobing = Classification("sentence", labels="label", dataset_name="tasksource/linguisticprobing", 
 515 |     config_name=['subj_number',
 516 |                 'obj_number',
 517 |                 'past_present',
 518 |                 'sentence_length',
 519 |                 'top_constituents',
 520 |                 'tree_depth',
 521 |                 'coordination_inversion',
 522 |                 'odd_man_out',
 523 |                 'bigram_shift']#+['word_content'] #too many labels 
 524 | )
 525 | 
 526 | crowdflower = Classification("text", labels="label",
 527 |  splits=["train", None, None], dataset_name="tasksource/crowdflower",
 528 |  config_name=['sentiment_nuclear_power',
 529 |             'tweet_global_warming',
 530 |             'airline-sentiment',
 531 |             'corporate-messaging',
 532 |             'economic-news',
 533 |             'political-media-audience',
 534 |             'political-media-bias',
 535 |             'political-media-message',
 536 |             'text_emotion']
 537 | )
 538 | 
 539 | ethics___commonsense = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="commonsense")
 540 | ethics___deontology = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="deontology")
 541 | ethics___justice = Classification(sentence1="text", labels="label", dataset_name="metaeval/ethics", config_name="justice")
 542 | ethics___virtue = Classification(sentence1="sentence1", sentence2="sentence2", labels="label", dataset_name="metaeval/ethics", config_name="virtue")
 543 | 
 544 | emo = Classification(sentence1="text", labels="label", splits=["train", None, "test"], config_name=["emo2019"])
 545 | 
 546 | google_wellformed_query = Classification(sentence1="content", labels="rating")
 547 | 
 548 | tweets_hate_speech_detection = Classification(sentence1="tweet", labels="label", splits=["train", None, None])
 549 | 
 550 | #adv_glue___adv_sst2 = Classification(sentence1="sentence", labels="label", splits=["validation", None, None])
 551 | #adv_glue___adv_qqp = Classification(sentence1="question1", sentence2="question2", labels="label", splits=["validation", None, None])
 552 | #adv_glue___adv_mnli = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["validation", None, None])
 553 | #adv_glue___adv_mnli_mismatched = Classification(sentence1="premise", sentence2="hypothesis", labels="label", splits=["validation", None, None])
 554 | #adv_glue___adv_qnli = Classification(sentence1="question", labels="label", splits=["validation", None, None])
 555 | #adv_glue___adv_rte = Classification(sentence1="sentence1", sentence2="sentence2", labels="label", splits=["validation", None, None])
 556 | 
 557 | has_part = Classification("arg1","arg2", labels="score", splits=["train", None, None])
 558 | 
 559 | wnut_17 = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["wnut_17"])
 560 | 
 561 | ncbi_disease = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["ncbi_disease"])
 562 | 
 563 | acronym_identification = TokenClassification(labels="labels", tokens="tokens")
 564 | 
 565 | jnlpba = TokenClassification(tokens="tokens", labels="ner_tags", splits=["train", "validation", None], config_name=["jnlpba"])
 566 | 
 567 | #species_800 = TokenClassification(tokens="tokens", labels="ner_tags", config_name=["species_800"]) missing files
 568 | 
 569 | SpeedOfMagic_ontonotes_english = TokenClassification(tokens="tokens", labels="ner_tags", dataset_name="SpeedOfMagic/ontonotes_english", config_name="SpeedOfMagic--ontonotes_english")
 570 | 
 571 | blog_authorship_corpus__gender    = Classification(sentence1="text",labels="gender")
 572 | blog_authorship_corpus__age       = Classification(sentence1="text",labels="age")
 573 | #blog_authorship_corpus__horoscope = Classification(sentence1="text",labels="horoscope")
 574 | blog_authorship_corpus__job       = Classification(sentence1="text",labels="job")
 575 | 
 576 | launch_open_question_type = Classification(sentence1="question", labels="resolve_type", dataset_name="launch/open_question_type")
 577 | 
 578 | health_fact = Classification(sentence1="claim", labels="label",
 579 |     pre_process = lambda ds:ds.filter(lambda x:x['label'] not in {-1})
 580 | )
 581 | 
 582 | commonsense_qa = MultipleChoice(
 583 |     "question",
 584 |     choices_list=get.choices.text,
 585 |     labels=lambda x: "ABCDE".index(x["answerKey"]),
 586 |     splits=["train","validation",None]
 587 | )
 588 | mc_taco = Classification(
 589 |     lambda x: f'{x["sentence"]} {x["question"]} {x["answer"]}',
 590 |     labels="label",
 591 |     splits=[ "validation",None,"test"]
 592 | )
 593 | 
 594 | ade_corpus_v2___Ade_corpus_v2_classification = Classification("text",labels="label")
 595 | 
 596 | discosense = MultipleChoice("context",choices=regen("option\_[0-3]"),labels="label",
 597 |     dataset_name="prajjwal1/discosense")
 598 |     
 599 | circa = Classification(
 600 |     sentence1=cat(["context","question-X"]),
 601 |     sentence2="answer-Y",
 602 |     labels="goldstandard2", post_process=remove_neg_1)
 603 | 
 604 | #code_x_glue_cc_defect_detection = Classification("func", labels="target")
 605 | 
 606 | #code_x_glue_cc_clone_detection_big_clone_bench = Classification("func1", "func2", "label") # in bigbench + too heavy (100g)
 607 | 
 608 | #code_x_glue_cc_code_refinement = MultipleChoice(
 609 | #    constant(""), choices=["buggy","fixed"], labels=constant(0),
 610 | #    config_name="medium")
 611 | 
 612 | #effective_feedback_student_writing = Classification("discourse_text", 
 613 | #labels="discourse_effectiveness",dataset_name="YaHi/EffectiveFeedbackStudentWriting")
 614 | # discontinued /!\
 615 | 
 616 | #promptSentiment = Classification("text",labels="label",dataset_name="Ericwang/promptSentiment")
 617 | #promptNLI = Classification("premise","hypothesis",labels="label",dataset_name="Ericwang/promptNLI")
 618 | #promptSpoke = Classification("text",labels="label",dataset_name="Ericwang/promptSpoke")
 619 | #promptProficiency = Classification("text",labels="label",dataset_name="Ericwang/promptProficiency")
 620 | #promptGrammar = Classification("text",labels="label",dataset_name="Ericwang/promptGrammar")
 621 | #promptCoherence = Classification("text",labels="label",dataset_name="Ericwang/promptCoherence")
 622 | 
 623 | phrase_similarity = Classification(
 624 |     sentence1=cat(["phrase1","sentence1"], " : "),
 625 |     sentence2=cat(["phrase2","sentence2"], " : "),
 626 |     labels='label',
 627 |     dataset_name="PiC/phrase_similarity"
 628 | )
 629 | 
 630 | exaggeration_detection = Classification(
 631 |     sentence1="press_release_conclusion",
 632 |     sentence2="abstract_conclusion",
 633 |     labels="exaggeration_label", 
 634 |     dataset_name="copenlu/scientific-exaggeration-detection"
 635 | )
 636 | quarel = Classification(
 637 |     "question",
 638 |     labels=lambda x: "AB"[x["answer_index"]]
 639 | )
 640 | 
 641 | mwong_fever_evidence_related = Classification(sentence1="claim", sentence2="evidence", labels=name("labels",['unrelated','related']),
 642 |     splits=["train", "valid", "test"], dataset_name="mwong/fever-evidence-related")
 643 | 
 644 | numer_sense = Classification("sentence",labels="target",splits=["train",None,None])
 645 | 
 646 | dynasent__r1 = Classification("sentence", labels="gold_label", 
 647 |     dataset_name="dynabench/dynasent", config_name="dynabench.dynasent.r1.all")
 648 | dynasent__r2 = Classification("sentence", labels="gold_label", 
 649 |     dataset_name="dynabench/dynasent", config_name="dynabench.dynasent.r2.all")
 650 | 
 651 | sarcasm_news = Classification("headline", labels="is_sarcastic",
 652 |     dataset_name="raquiba/Sarcasm_News_Headline")
 653 | 
 654 | sem_eval_2010_task_8 = Classification("sentence",labels="relation")
 655 | 
 656 | auditor_review = Classification(sentence1="sentence",
 657 |     labels=name("label",['negative','neutral','positive']),
 658 |     dataset_name="demo-org/auditor_review")
 659 | 
 660 | medmcqa = MultipleChoice("question", choices=regen('op[a-d]'),labels='cop')
 661 | 
 662 | 
 663 | dynasent_disagreement    = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Dynasent_Disagreement")
 664 | politeness_disagreement  = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Politeness_Disagreement")
 665 | sbic_disagreement        = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/SBIC_Disagreement")
 666 | schem_disagreement       = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/SChem_Disagreement")
 667 | dilemmas_disagreement    = Classification("text", labels="binary_disagreement", dataset_name="RuyuanWan/Dilemmas_Disagreement")
 668 | 
 669 | logiqa = MultipleChoice(
 670 |     cat(["context","query"]),
 671 |     choices_list = 'options',
 672 |     labels = "correct_option",
 673 |     dataset_name="lucasmccabe/logiqa"
 674 | )
 675 | 
 676 | #proto_qa = MultipleChoice(
 677 | #    "question",
 678 | #    choices_list=lambda x:x['answer-clusters']['answers'],
 679 | #    labels=lambda x: x['answer-clusters']['count'].index(max(x['answer-clusters']['count'])),
 680 | #    config_name='proto_qa'
 681 | #)
 682 | 
 683 | wiki_qa = Classification("question","answer", name("label",['False','True']))
 684 | 
 685 | cycic_classification = Classification("question",labels=name("correct_answer",['False','True']),
 686 |     dataset_name = "tasksource/cycic_classification")
 687 | cycic_mc = MultipleChoice("question", choices=regen('answer\_option[0-4]'), labels="correct_answer",
 688 |     dataset_name = "tasksource/cycic_multiplechoice")
 689 | 
 690 | 
 691 | def _preprocess_chatgpt_detection(ex):
 692 |     import random
 693 |     label=random.random()<0.5
 694 |     ex['label']=int(label)
 695 |     ex['answer']=[str(ex['human_answers'][0]),str(ex['chatgpt_answers'][0])][label]
 696 |     return ex
 697 | 
 698 | #chatgpt_detection = Classification("question","answer","label",
 699 | #    dataset_name = 'Hello-SimpleAI/HC3', config_name="all",
 700 | #    pre_process=lambda dataset:dataset.map(_preprocess_chatgpt_detection))
 701 | 
 702 | sts_companion = Classification("sentence1","sentence2","label",
 703 |     dataset_name="tasksource/sts-companion")
 704 | 
 705 | commonsense_qa_2 = Classification("question",labels="answer",
 706 |     dataset_name="tasksource/commonsense_qa_2.0")
 707 | 
 708 | ling_nli = Classification("premise","hypothesis","label",dataset_name="tasksource/lingnli")
 709 | 
 710 | monotonicity_entailment = Classification("sentence1", "sentence2", "gold_label",    
 711 |     dataset_name="tasksource/monotonicity-entailment")
 712 | 
 713 | arct = MultipleChoice(cat(["reason","claim"]),choices=["warrant0","warrant1"],
 714 |     labels="correctLabelW0orW1", dataset_name="tasksource/arct")
 715 | 
 716 | scinli = Classification("sentence1", "sentence2", labels="label",
 717 |     post_process=lambda x:x.shuffle(seed=0),
 718 |     dataset_name="tasksource/scinli")
 719 | 
 720 | naturallogic = Classification(" sent1 "," sent2 "," new_label ",dataset_name="tasksource/naturallogic")
 721 | 
 722 | onestop_qa = MultipleChoice(cat(["paragraph","question"]),choices_list="answers",
 723 |     labels=constant(0))
 724 | 
 725 | moral_stories = MultipleChoice(cat(["situation","intention"]),
 726 |     choices=['moral_action',"immoral_action"],labels=constant(0),
 727 |     dataset_name="demelin/moral_stories", config_name="full")
 728 | 
 729 | prost = MultipleChoice(cat(["context","ex_question"]), choices=['A','B','C','D'],labels="label",
 730 |     dataset_name="corypaik/prost")
 731 | 
 732 | dyna_hate = Classification("text",labels="label",dataset_name="aps/dynahate",splits=['train',None,None])
 733 | 
 734 | syntactic_augmentation_nli = Classification('sentence1',"sentence2","gold_label",dataset_name="metaeval/syntactic-augmentation-nli")
 735 | 
 736 | autotnli = Classification("premises", "hypothesis", "label", dataset_name="tasksource/autotnli")
 737 | #equate = Classification("sentence1", "sentence2", "gold_label",dataset_name="metaeval/equate")
 738 | 
 739 | conqada = Classification("sentence1","sentence2","label",dataset_name="lasha-nlp/CONDAQA",
 740 |     pre_process = lambda ds:ds.filter(lambda x:x['label'] in {"DON'T KNOW","YES","NO"})
 741 | )
 742 | 
 743 | webgbpt_comparisons = MultipleChoice(get.question.full_text, choices=['answer_0','answer_1'],
 744 |     labels=lambda x:int(x['score_1']>0),
 745 |     dataset_name="openai/webgpt_comparisons")
 746 | 
 747 | synthetic_instruct = MultipleChoice('prompt', choices=['chosen', 'rejected'],
 748 |     labels=constant(0), dataset_name="Dahoas/synthetic-instruct-gptj-pairwise")
 749 | 
 750 | scruples = Classification("text",labels="binarized_label",dataset_name="metaeval/scruples")
 751 | 
 752 | wouldyourather = MultipleChoice(constant('Most people would rather:'), choices=['option_a','option_b'],
 753 |     labels= lambda x: int(x['votes_a']<x['votes_b']),
 754 |     dataset_name="metaeval/wouldyourather")
 755 | 
 756 | #attempto_nli = Classification("premise","hypothesis",
 757 | #    lambda x:f'race-{x["race_label"]}',
 758 | #    dataset_name="sileod/attempto-nli")
 759 | 
 760 | defeasible_nli = Classification(cat(["Premise","Hypothesis"]),"Update",labels="UpdateType",
 761 |     dataset_name="metaeval/defeasible-nli",config_name=['atomic', 'snli'])
 762 | 
 763 | #defeasible_nli_social = Classification(cat(["SocialChemROT","Hypothesis"]),"Update",labels="UpdateType",
 764 | #    dataset_name="metaeval/defeasible-nli",config_name='social')
 765 | 
 766 | help_nli = Classification("ori_sentence","new_sentence","gold_label",
 767 |     dataset_name="tasksource/help-nli")
 768 |     
 769 | nli_veridicality_transitivity = Classification("sentence1","sentence2","gold_label",
 770 |     dataset_name="metaeval/nli-veridicality-transitivity")
 771 | 
 772 | lonli = Classification("premise","hypothesis","label",
 773 |     dataset_name="tasksource/lonli")
 774 | 
 775 | dadc_limit = Classification("sentence1","sentence2","label",
 776 |     dataset_name="tasksource/dadc-limit-nli")
 777 | 
 778 | flute = Classification("premise","hypothesis","label",
 779 |     dataset_name="ColumbiaNLP/FLUTE")
 780 | 
 781 | strategy_qa = Classification('question',labels='answer',
 782 |     dataset_name="tasksource/strategy-qa",splits=['train',None,None])
 783 | 
 784 | summarize_from_feedback = MultipleChoice(get.info.post,
 785 |     choices_list=lambda x: [x['summaries'][0]['text'],x['summaries'][1]['text']],
 786 |     labels="choice",
 787 |     dataset_name="openai/summarize_from_feedback", config_name="comparisons",
 788 |     pre_process = lambda ds:ds.filter(lambda x: type(get.info.post(x))==str)
 789 | )
 790 | 
 791 | folio = Classification("premises","conclusion",
 792 |     labels=lambda x:{'False':'contradiction','True':'entailment', 'Uncertain':'neutral'}.get(x["label"]),
 793 |     dataset_name="tasksource/folio")
 794 | 
 795 | tomi_nli = Classification("premise","hypothesis","label",
 796 |     dataset_name="tasksource/tomi-nli")
 797 | 
 798 | avicenna = Classification("Premise 1","Premise 2","Syllogistic relation",
 799 |     dataset_name="tasksource/avicenna")
 800 | 
 801 | shp = MultipleChoice("history",
 802 |     choices=['human_ref_A','human_ref_B'],
 803 |     labels="labels",
 804 |     dataset_name="stanfordnlp/SHP")
 805 | 
 806 | medqa_usmle = MultipleChoice('sent1',choices=regen('ending[0-3]'),labels='label',
 807 |     dataset_name="GBaker/MedQA-USMLE-4-options-hf")
 808 | 
 809 | wikimedqa = MultipleChoice("text",choices=regen('option\_[0-7]'),labels='label',
 810 |     dataset_name="sileod/wikimedqa",
 811 |     config_name=["medwiki"])
 812 | 
 813 | cicero = MultipleChoice(lambda x: " ".join(x['Dialogue']),
 814 |     choices_list="Choices", labels=lambda x:x['Human Written Answer'][0],
 815 |     dataset_name="declare-lab/cicero")
 816 | 
 817 | creak = Classification("sentence",labels="label",
 818 |     dataset_name='amydeng2000/CREAK')
 819 | 
 820 | mutual = MultipleChoice("article",choices_list="options",
 821 |     labels=lambda x: "ABCD".index(x['answers']),
 822 |     dataset_name="tasksource/mutual",splits=["train",None,None])
 823 | 
 824 | neqa = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
 825 |     dataset_name="inverse-scaling/NeQA")
 826 | quote_repetition = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
 827 |     dataset_name="inverse-scaling/quote-repetition")
 828 | redefine_math = MultipleChoice('prompt',choices_list='classes',labels="answer_index",
 829 |     dataset_name="inverse-scaling/redefine-math")
 830 | 
 831 | puzzte = Classification("puzzle_text","question","answer",
 832 |     dataset_name="tasksource/puzzte")
 833 | 
 834 | implicatures = MultipleChoice(cat(['context','response'],"\n"),
 835 |     choices=['correct_implicature','incorrect_implicature'],
 836 |     labels=constant(0),
 837 |     dataset_name='tasksource/implicatures')
 838 | 
 839 | race = MultipleChoice(cat(['question','article'],'\n'), choices_list='options',
 840 |     labels=lambda x:'ABCDE'.index(x['answer']),
 841 |     config_name=['middle','high'])
 842 | 
 843 | race_c = MultipleChoice(cat(['question','article'],'\n'),choices_list='option',labels='label',
 844 |     dataset_name='tasksource/race-c')
 845 | 
 846 | spartqa_yn=Classification("story","question","answer",
 847 |     dataset_name="tasksource/spartqa-yn")
 848 | 
 849 | spartqa_mc=MultipleChoice(cat(["story","question"]),choices_list="candidate_answers",labels="answer",
 850 |     dataset_name="tasksource/spartqa-mchoice")
 851 | 
 852 | temporal_nli = Classification("Premise","Hypothesis","Label",
 853 |     dataset_name="tasksource/temporal-nli")
 854 | 
 855 | riddle_sense = MultipleChoice("question", choices_list=get.choices.text, 
 856 |     labels=lambda x : "ABCDE".index(x['answerKey']))
 857 | 
 858 | clcd = Classification(
 859 |     "sentence1","sentence2","label",
 860 |     dataset_name="tasksource/clcd-english")
 861 | 
 862 | twentyquestions = Classification("question","subject","answer",dataset_name="maximedb/twentyquestions")
 863 | 
 864 | reclor = MultipleChoice(cat(["context","question"]),choices_list="answers",labels="label",
 865 |     dataset_name="metaeval/reclor",splits=['train','validation',None])
 866 | 
 867 | c_aug_imdb = Classification("Text",labels="Sentiment",
 868 |     dataset_name='tasksource/counterfactually-augmented-imdb')
 869 | 
 870 | c_aug_snli = Classification("sentence1","sentence2","gold_label",
 871 |     dataset_name='tasksource/counterfactually-augmented-snli')
 872 | 
 873 | cnli = Classification("premise","hypothesis","label",
 874 |     dataset_name='metaeval/cnli')
 875 | 
 876 | perturbed_boolq = Classification("question",labels="hard_label",
 877 |     dataset_name='tasksource/boolq-natural-perturbations')
 878 | 
 879 | #mega_acceptability = Classification("sentence",labels="average",
 880 | #    dataset_name='metaeval/mega-acceptability-v2')
 881 | 
 882 | graded_acceptability = Classification("text",labels="normalized_score",
 883 |     dataset_name="metaeval/acceptability-prediction")
 884 | 
 885 | equate = Classification("sentence1","sentence2","gold_label",
 886 |     dataset_name='metaeval/equate')
 887 | 
 888 | science_qa = MultipleChoice("question",choices_list="choices",labels="answer",
 889 |     dataset_name="tasksource/ScienceQA_text_only")
 890 | 
 891 | ekar=MultipleChoice("question",choices_list=get.choices.text,
 892 |     labels=lambda x:"ABCD".index(x['answerKey']),
 893 | dataset_name="Jiangjie/ekar_english")
 894 | 
 895 | implicit_hate = Classification("post",labels="class",
 896 |     dataset_name="tasksource/implicit-hate-stg1")
 897 | 
 898 | nli_unambiguity = Classification("premise","hypothesis","gini",
 899 |     dataset_name="metaeval/chaos-mnli-ambiguity")
 900 | 
 901 | headline_cause = Classification('left_title','right_title','label',
 902 |     dataset_name='IlyaGusev/headline_cause',config_name='en_simple')
 903 | 
 904 | logiqa_2 = Classification("premise","hypothesis","label",dataset_name="tasksource/logiqa-2.0-nli")
 905 | 
 906 | _oasst = dict(dataset_name="tasksource/oasst2_dense_flat",
 907 |     pre_process = lambda ds:ds.filter(lambda x:x['lang']=='en'))
 908 | 
 909 | oasst1__quality = Classification("parent_text","text",labels="quality",**_oasst)
 910 | oasst1__toxicity = Classification("parent_text","text",labels="toxicity",**_oasst)
 911 | oasst1__helpfulness = Classification("parent_text","text",labels="helpfulness",**_oasst)
 912 | 
 913 | mindgames = Classification("premise","hypothesis","label",dataset_name="sileod/mindgames")
 914 | 
 915 | def _udep_post_process(ds):
 916 |     return ds.cast_column('labels', Sequence(ClassLabel(names=udep_en_labels)))
 917 | 
 918 | udep__deprel = TokenClassification('tokens',lambda x:[udep_en_labels.index(a) for a in x['deprel']],
 919 |     config_name=udep_en_configs,dataset_name="universal_dependencies",post_process=_udep_post_process)
 920 | 
 921 | ambient= Classification("premise","hypothesis","hypothesis_ambiguous",dataset_name="metaeval/ambient")
 922 | 
 923 | path_naturalness = MultipleChoice(constant(""),choices=['choice1','choice2'],labels="label",
 924 |     dataset_name="metaeval/path-naturalness-prediction")
 925 | 
 926 | civil_comments__toxicity = Classification("text",labels="toxicity")
 927 | civil_comments__severe_toxicity = Classification("text",labels="severe_toxicity")
 928 | civil_comments__obscene = Classification("text",labels="obscene")
 929 | civil_comments__threat = Classification("text",labels="threat")
 930 | civil_comments__insult = Classification("text",labels="insult")
 931 | civil_comments__identity_attack = Classification("text",labels="identity_attack")
 932 | civil_comments__sexual_explicit = Classification("text",labels="sexual_explicit")
 933 | 
 934 | cloth = MultipleChoice("sentence", choices_list=lambda x:[x["answer"]]+x["distractors"],labels=constant(0), dataset_name="AndyChiang/cloth")
 935 | dgen  = MultipleChoice("sentence", choices_list=lambda x:[x["answer"]]+x["distractors"],labels=constant(0), dataset_name="AndyChiang/dgen")
 936 | 
 937 | i2d2 = Classification("sentence1",labels=name('label',['False','True']), dataset_name="tasksource/I2D2")
 938 | 
 939 | arg_me = Classification('argument','conclusion','stance', dataset_name="webis/args_me")
 940 | valueeval_stance = Classification("Premise","Conclusion","Stance", dataset_name="webis/Touche23-ValueEval")
 941 | starcon = Classification('argument','topic','label',dataset_name="tasksource/starcon")
 942 | 
 943 | banking77 = Classification("text",labels="label",dataset_name="PolyAI/banking77")
 944 |     
 945 | control = Classification('premise','hypothesis',"label",dataset_name="tasksource/ConTRoL-nli")
 946 | tracie = Classification("premise","hypothesis","answer",dataset_name='tasksource/tracie')
 947 | sherliic = Classification("premise","hypothesis","label",dataset_name='tasksource/sherliic')
 948 | 
 949 | sen_making__1 = MultipleChoice(constant('Chose most plausible:'), choices=['sentence0','sentence1'],labels='false', 
 950 |     dataset_name="tasksource/sen-making")
 951 | 
 952 | sen_making__2 = MultipleChoice(lambda x: [x['sentence0'],x['sentence1']][x['false']] + '\n is not plausible because :',
 953 |     choices=['A','B','C'],labels=lambda x: 'ABC'.index(x['reason']), dataset_name="tasksource/sen-making")
 954 | 
 955 | winowhy = Classification('sentence', lambda x: f'In "{x["wnli_sent1"]}", {x["wnli_sent2"]}',
 956 |     labels=name('label',['False','True']), dataset_name="tasksource/winowhy")
 957 | 
 958 | #for CFG in "cognitive-bias", "fake-news", "gender-bias", "hate-speech", "linguistic-bias", "political-bias", "racial-bias", "text-level-bias":
 959 | #    print(f"mbib__{CFG.replace('-','_')} = Classification('text',labels=name('label',['not {CFG}','{CFG}']), dataset_name='mediabiasgroup/mbib-base', config_name='{CFG}')")
 960 | 
 961 | """
 962 | mbib_cognitive_bias	= Classification('text',labels=name('label',['not cognitive-bias','cognitive-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='cognitive-bias')
 963 | mbib_fake_news	= Classification('text',labels=name('label',['not fake-news','fake-news']), dataset_name='mediabiasgroup/mbib-base', config_name='fake-news')
 964 | mbib_gender_bias	= Classification('text',labels=name('label',['not gender-bias','gender-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='gender-bias')
 965 | mbib_hate_speech	= Classification('text',labels=name('label',['not hate-speech','hate-speech']), dataset_name='mediabiasgroup/mbib-base', config_name='hate-speech')
 966 | mbib_linguistic_bias	= Classification('text',labels=name('label',['not linguistic-bias','linguistic-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='linguistic-bias')
 967 | mbib_political_bias	= Classification('text',labels=name('label',['not political-bias','political-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='political-bias')
 968 | mbib_racial_bias	= Classification('text',labels=name('label',['not racial-bias','racial-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='racial-bias')
 969 | mbib_text_level_bias	= Classification('text',labels=name('label',['not text-level-bias','text-level-bias']), dataset_name='mediabiasgroup/mbib-base', config_name='text-level-bias')
 970 | """
 971 | 
 972 | robustLR = Classification("context","statement","label", dataset_name="tasksource/robustLR")
 973 | 
 974 | cluttr = Classification("story","query", "target_text",dataset_name="CLUTRR/v1", config_name="gen_train234_test2to10")
 975 | 
 976 | logical_fallacy = Classification("source_article", labels="logical_fallacies", dataset_name="tasksource/logical-fallacy")
 977 | 
 978 | parade = Classification("Definition1","Definition2", labels=name('Binary labels',["not-paraphrase","paraphrase"]), dataset_name="tasksource/parade")
 979 | 
 980 | cladder = Classification("given_info", "question", "answer",dataset_name="tasksource/cladder")
 981 | 
 982 | subjectivity = Classification("Sentence",labels="Label",dataset_name="tasksource/subjectivity")
 983 | 
 984 | moh   = Classification("context","expression","label", dataset_name="tasksource/MOH")
 985 | vuac  = Classification("context","expression","label", dataset_name="tasksource/VUAC")
 986 | trofi = Classification("context","expression","label", dataset_name="tasksource/TroFi", splits=['train',None,'test'])
 987 | 
 988 | sharc_classification = Classification("snippet", lambda x:f'{x["scenario"]}\n{x["question"]}',
 989 |     labels=lambda x:x["answer"] if x['answer'] in  {"Yes","No","Irrelevant"} else "Clarification needed",
 990 |     dataset_name='sharc_modified',config_name='mod')
 991 | 
 992 | conceptrules_v2 = Classification("context", "text", "label", dataset_name="tasksource/conceptrules_v2")
 993 | 
 994 | scidtb = Classification("unit1_txt","unit2_txt","label", dataset_name="metaeval/disrpt",config_name='eng.dep.scidtb.rels')
 995 | 
 996 | chunking = TokenClassification("tokens","chunk_tags", dataset_name="conll2000")
 997 | 
 998 | few_nerd = TokenClassification("tokens","fine_ner_tags",dataset_name="DFKI-SLT/few-nerd",config_name='supervised')
 999 | finer = TokenClassification('tokens','ner_tags',dataset_name='nlpaueb/finer-139')
1000 | 
1001 | label_nli = Classification("premise","hypothesis","labels",dataset_name='tasksource/zero-shot-label-nli')
1002 | 
1003 | com2sense = Classification("sent",labels="label",dataset_name="tasksource/com2sense",splits=['train',"validation",None])
1004 | 
1005 | scone = Classification('sentence1_edited','sentence2_edited','gold_label_edited',dataset_name="tasksource/scone")
1006 | 
1007 | winodict = MultipleChoice(cat(['definition','sentence']),['option1','option2'],'label',dataset_name='tasksource/winodict')
1008 | 
1009 | fool_me_twice = Classification(
1010 |     lambda x: " ".join(a['text'] for a in x['gold_evidence']),
1011 |     'text', 'label', dataset_name='tasksource/fool-me-twice')
1012 | 
1013 | monli = Classification("sentence1","sentence2","gold_label", dataset_name="tasksource/monli")
1014 | 
1015 | causality = Classification('premise','hypothesis','relation', dataset_name='tasksource/corr2cause')
1016 | 
1017 | lsat = MultipleChoice(cat(['passage','question']), choices_list='references',labels='gold_index',dataset_name='lighteval/lsat_qa',config_name='all')
1018 | 
1019 | apt = Classification('text_a','text_b',name('labels',['not_paraphrase','paraphrase']),dataset_name='tasksource/apt')
1020 | 
1021 | #xsum_factuality = Classification("summary",labels="is_factual")
1022 | 
1023 | financial_sentiment = Classification("text",labels=name('label',['Bearish','Bullish','Neutral']),
1024 |     dataset_name="zeroshot/twitter-financial-news-sentiment")
1025 | 
1026 | def _icl_rand(x):
1027 |     import random
1028 |     return random.Random(x['sentence1'][:50]).randint(0,1) #deterministic label for each input
1029 | 
1030 | icl = Classification("inputs", lambda x: x['symbols'][_icl_rand(x)],
1031 |     labels=lambda x: str(x['symbols'][_icl_rand(x)]==x['targets']),
1032 |     dataset_name="tasksource/icl-symbol-tuning-instruct",
1033 |     pre_process=lambda ds:ds.filter(lambda x:len(x['inputs'])<500*4), # 500 tokens of 4 char 
1034 | )
1035 | 
1036 | space_nli = Classification("premises","hypothesis","label",dataset_name="tasksource/SpaceNLI")
1037 | 
1038 | propsegment = Classification("hypothesis","premise",
1039 |     labels = lambda x:{'n':'neutral','e':'entailment','c':'contradiction'}[x['label']],
1040 |     dataset_name="sihaochen/propsegment",config_name='nli')
1041 | 
1042 | hatemoji = Classification('text',labels=name("label_gold", ['not-hate-speech','hate-speech']),
1043 |     dataset_name="HannahRoseKirk/HatemojiBuild")
1044 | 
1045 | regset = Classification("context",labels="answer",dataset_name='tasksource/regset')
1046 | 
1047 | esci = Classification('query','product_text','esci_label',
1048 |     dataset_name="tasksource/esci",
1049 |     pre_process=lambda ds:ds.filter(lambda x:x['product_locale']=='us'))
1050 | 
1051 | def _preprocess_chatbot_arena(ds):
1052 |     ds=ds.filter(lambda x:x['winner'] in ["model_a","model_b"])
1053 |     ds=ds.filter(lambda x:x['language']=="English")
1054 | 
1055 |     def _unroll(x):
1056 |         f=lambda x:"\n".join([f"{turn['role']}:\n{turn['content']}" for turn in x])
1057 |         x['conversation_a'] = f(x['conversation_a'])
1058 |         x['conversation_b'] = f(x['conversation_b'])
1059 |         return x
1060 |     ds=ds.map(_unroll)
1061 |     return ds
1062 | 
1063 | chatbot_arena = MultipleChoice(constant(""),
1064 |     choices=["conversation_a","conversation_b"],
1065 |     labels=lambda x: ["model_a","model_b"].index(x["winner"]),
1066 |     dataset_name="lmsys/chatbot_arena_conversations",
1067 |     pre_process=_preprocess_chatbot_arena)
1068 | 
1069 | dnd_intent = Classification("examples",labels="label_names",
1070 |     dataset_name='neurae/dnd_style_intents')
1071 | 
1072 | fld = Classification("context","hypothesis", "proof_label",
1073 |     dataset_name="hitachi-nlp/FLD.v2",config_name="default")
1074 | 
1075 | flds = Classification("context","hypothesis", "proof_label",
1076 |     dataset_name="hitachi-nlp/FLD.v2",config_name="star")
1077 | 
1078 | sdoh_nli = Classification("premise","hypothesis",labels=lambda x:{True:"entailment",False:"not_entailment"}[x['label']],
1079 |     dataset_name="tasksource/SDOH-NLI")
1080 | 
1081 | scifact_entailment = Classification(lambda x:"\n".join(x["abstract"]),"claim",
1082 |     labels=lambda x:x['verdict'].replace('NEI','NEUTRAL').lower(),
1083 |     dataset_name="allenai/scifact_entailment")
1084 | 
1085 | feasibilityQA = Classification(cat(['knowledge','premise']),'hypothesis','binary_classification_label',
1086 |     dataset_name="tasksource/feasibilityQA")
1087 |                                
1088 | simple_pair = Classification("premise","hypothesis","label", dataset_name="tasksource/simple_pair")
1089 | adjective_scale_probe = Classification("premise","hypothesis","label", dataset_name="tasksource/AdjectiveScaleProbe-nli")
1090 | repectively_nli = Classification("premise","hypothesis","label",dataset_name="tasksource/resnli")
1091 | 
1092 | spartun=MultipleChoice(cat(["story","question"]),choices_list="candidate_answers",
1093 |     labels=lambda x: [c.lower() for c in x['choices_list']].index(x["answer"][0].lower()),
1094 |     pre_process=lambda ds:ds.filter(lambda x:len(x['answer'])==1),
1095 |     dataset_name="tasksource/SpaRTUN")
1096 | 
1097 | resq=MultipleChoice(cat(["story","question"]),choices_list="candidate_answers",
1098 |     labels=lambda x: [c.lower() for c in x['choices_list']].index(x["answer"][0].lower()),
1099 |     pre_process=lambda ds:ds.filter(lambda x:len(x['answer'])==1),
1100 |     dataset_name="tasksource/ReSQ")
1101 | 
1102 | semantic_fragments_nli = Classification("sentence1","sentence2","gold_label",
1103 |     dataset_name="tasksource/semantic_fragments_nli")
1104 | 
1105 | moritz_zs_nli = Classification('text','hypothesis','labels',
1106 |     pre_process=lambda ds:ds.filter(lambda x:x['task_name'] not in  ["mnli", "anli", "fevernli", "wanli", "lingnli"]),
1107 |     dataset_name="MoritzLaurer/dataset_train_nli"
1108 | ) 
1109 | 
1110 | stepgame = Classification('story','question','label',dataset_name="tasksource/stepgame")
1111 | 
1112 | def _nlgraph_binarize(x):
1113 |     a=x['answer'].lower()
1114 |     if "yes" in a: return "True"
1115 |     if "no" in a: return "False"
1116 |     assert False
1117 | 
1118 | nlgraph = Classification('question',labels=_nlgraph_binarize,
1119 |     pre_process=lambda ds:ds.filter(lambda x:x['task'] in "connectivity cycle hamilton"),
1120 |     dataset_name="tasksource/nlgraph")
1121 | 
1122 | oasst_rlhf = MultipleChoice("prompt",choices=['chosen','rejected'],labels=constant(0),
1123 |     dataset_name="tasksource/oasst2_pairwise_rlhf_reward")
1124 | 
1125 | anthropic_rlhf_helpfulness = MultipleChoice(constant('Most helpful assistant answer:'), ['chosen','rejected'], constant(0),
1126 |     dataset_name="tasksource/hh-rlhf",config_name=["helpful-base", "helpful-online", "helpful-rejection-sampled"])
1127 | 
1128 | anthropic_rlhf_harmless = MultipleChoice(constant('Most harmless assistant answer:'), ['chosen','rejected'], constant(0),
1129 |     dataset_name="tasksource/hh-rlhf",config_name="harmless-base")
1130 | 
1131 | ruletaker = Classification(
1132 |     lambda x: 'What is not explicitly stated as true is considered false. \n' +x["context"], #closed world assumption
1133 |     "question","label",dataset_name="tasksource/ruletaker")
1134 | 
1135 | para_rules = Classification(
1136 |     lambda x: 'What is not explicitly stated as true is considered false. \n' +x["context"], #closed world assumption
1137 |     "question", labels=name("label",["False","True"]),
1138 |     dataset_name="qbao775/PARARULE-Plus")
1139 | 
1140 | proofwriter_deduction = Classification("theory","question","answer",
1141 |     dataset_name="tasksource/proofwriter") #open world assumption
1142 | 
1143 | logical_entailment = Classification("A","B","label",dataset_name='tasksource/logical-entailment')
1144 | 
1145 | nope = Classification('premise','hypothesis',
1146 |     labels=lambda x:dict(E='entailment',N='neutral',C='contradiction').get(x['label'],x['label']),
1147 |     dataset_name='tasksource/nope')
1148 | 
1149 | logicNLI = Classification('premise','hypothesis','label',dataset_name='tasksource/LogicNLI')
1150 | 
1151 | contract_nli__seg = Classification("premise","hypothesis","label", dataset_name="kiddothe2b/contract-nli",config_name="contractnli_a")
1152 | 
1153 | contract_nli__full = Classification("premise","hypothesis","label", dataset_name="kiddothe2b/contract-nli",config_name="contractnli_b")
1154 | 
1155 | nli4ct = Classification(lambda x: "\n".join(x['Primary_evidence']),'Statement',"Label",
1156 |     dataset_name="AshtonIsNotHere/nli4ct_semeval2024",splits=['train','dev',None])
1157 | 
1158 | lsat_ar = MultipleChoice(
1159 |     cat(['context','question']),
1160 |     choices_list='answers',labels="label",
1161 |      dataset_name="tasksource/lsat-ar")
1162 |     
1163 | lsat_rc = MultipleChoice(
1164 |     cat(['context','question']),
1165 |     choices_list='answers',labels="label",
1166 |      dataset_name="tasksource/lsat-rc")
1167 |     
1168 | biosift_nli = Classification("Abstract","Hypothesis",
1169 |     labels=lambda x: {True:"entailment",False:"not-entailment"}[bool(x['Entailment'])],
1170 |     dataset_name="AshtonIsNotHere/biosift-nli")
1171 | 
1172 | brainteasers = MultipleChoice("question",
1173 |     choices_list=lambda x:eval(x["choice_list"]),
1174 |     labels="label",
1175 |     dataset_name="tasksource/brainteasers",config_name=['WP','SP'])
1176 | 
1177 | #GATED !
1178 | #toxigen = Classification("text",labels="toxicity_human", dataset_name="skg/toxigen-data")
1179 | 
1180 | persuasiveness = Classification("claim","argument",labels="persuasiveness_metric",dataset_name="Anthropic/persuasion")
1181 | 
1182 | #ste_wic = Classification(cat("text_1","text_2"),
1183 | #    lambda x:f"{x['target']} means the same thing in these texts",
1184 | #    "gold_label_binary",
1185 | #    dataset_name="cardiffnlp/super_tweeteval", config_name="tempo_wic",splits=['train','validation',None])
1186 | 
1187 | #ste_nerd = Classification("text",
1188 | #    lambda x:f"definition of {x['target']} here is 'x{['definition']}'",
1189 | #    "gold_label_binary",
1190 | #    dataset_name="cardiffnlp/super_tweeteval", config_name="tweet_nerd",splits=['train','validation',None])
1191 |  
1192 | #ste_sim = Classification("text_1","text_2",lambda x:x['gold_score']/5,
1193 | #    dataset_name="cardiffnlp/super_tweeteval",config_name="tweet_similarity",splits=['train','validation',None])
1194 | 
1195 | #ste_intimacy = Classification("text_1",labels=lambda x:x['gold_score']/5,
1196 | #    dataset_name="cardiffnlp/super_tweeteval",config_name="tweet_intimacy")
1197 | 
1198 | #ccdv/patent-classification|abstract text label
1199 | 
1200 | ambigNQ = Classification("question",labels=lambda x:{True:"ambiguous", False:"not ambiguous"}.get(x["ambig"]),
1201 |     dataset_name="erbacher/AmbigNQ-clarifying-question")
1202 | 
1203 | siga_nli = Classification("premise","statement","label",dataset_name="tasksource/SIGA-nli")
1204 | 
1205 | unigram_fol = Classification("premise","hypothesis","label",dataset_name='unigram/FOL-nli')
1206 | 
1207 | #gs_goal = MultipleChoice("sent2",regen("ending[0-3]"),"label",
1208 | #        dataset_name="tasksource/goal-step-wikihow",config_name="goal")
1209 | 
1210 | #gs_step = MultipleChoice("sent2",regen("ending[0-3]"),"label",
1211 | #        dataset_name="tasksource/goal-step-wikihow",config_name="step")
1212 | 
1213 | gs_order = MultipleChoice("sent2",regen("ending[0-1]"),"label",
1214 |         dataset_name="tasksource/goal-step-wikihow",config_name="order")
1215 | 
1216 | paradise = MultipleChoice("sent2",regen("ending[0-3]"),"label",
1217 |       dataset_name="GGLab/PARADISE")
1218 | 
1219 | docnli = Classification("premise","hypothesis","label",dataset_name="tasksource/doc-nli")
1220 | 
1221 | mctest_nli = Classification("premise","hypothesis","label",dataset_name="tasksource/mctest-nli")
1222 | 
1223 | patent_phrase_similarity = Classification("anchor","target","label",dataset_name="tasksource/patent-phrase-similarity")
1224 | 
1225 | nlsat = Classification('sentence',labels='label',dataset_name="tasksource/natural-language-satisfiability")
1226 | 
1227 | idioms_nli = Classification('premise','hypothesis','label',dataset_name="tasksource/idioms-nli")
1228 | 
1229 | lifeycle_entailment = Classification("premise","hypothesis","label",dataset_name='tasksource/lifecycle-entailment')
1230 | 
1231 | 
1232 | helpsteer__helpfulness = Classification("prompt", "response", "helpfulness", dataset_name="nvidia/HelpSteer")
1233 | helpsteer__correctness = Classification("prompt", "response", "correctness", dataset_name="nvidia/HelpSteer")
1234 | helpsteer__coherence = Classification("prompt", "response", "coherence", dataset_name="nvidia/HelpSteer")
1235 | helpsteer__complexity = Classification("prompt", "response", "complexity", dataset_name="nvidia/HelpSteer")
1236 | helpsteer__verbosity = Classification("prompt", "response", "verbosity", dataset_name="nvidia/HelpSteer")
1237 | 
1238 | helpsteer_2__helpfulness = Classification("prompt","response","helpfulness",dataset_name="nvidia/HelpSteer2")
1239 | helpsteer_2__correctness = Classification("prompt", "response", "correctness", dataset_name="nvidia/HelpSteer2")
1240 | helpsteer_2__coherence = Classification("prompt", "response", "coherence", dataset_name="nvidia/HelpSteer2")
1241 | helpsteer_2__complexity = Classification("prompt", "response", "complexity", dataset_name="nvidia/HelpSteer2")
1242 | helpsteer_2__verbosity = Classification("prompt", "response", "verbosity", dataset_name="nvidia/HelpSteer2")
1243 | 
1244 | msci_nli = Classification('sentence1','sentence2','label',dataset_name='sadat2307/MSciNLI')
1245 | 
1246 | #lex_glue___ecthr_a = Classification(sentence1="text", labels="labels",dataset_name="coastalcph/lex_glue",config_name="ecthr_a") # too long
1247 | #lex_glue___ecthr_b = Classification(sentence1="text", labels="labels") # too long
1248 | 
1249 | ultrafeedback = MultipleChoice("question", choices=['response_j','response_k'],labels=constant(0), dataset_name="pushpdeep/UltraFeedback-paired")
1250 | 
1251 | essay_scoring = Classification("full_text",labels="score",dataset_name='tasksource/AES2-essay-scoring')
1252 | 
1253 | #argument_feedback = Classification("discourse_text",labels="discourse_effectiveness", dataset_name="tasksource/argument-feedback")
1254 | 
1255 | eg = lambda x: Classification("full_text", labels=lambda y:int(y[x]), dataset_name="tasksource/english-grading")
1256 | grading__cohesion = eg('cohesion')
1257 | grading__syntax = eg('syntax')
1258 | grading__vocabulary = eg('vocabulary')
1259 | grading__phraseology = eg('phraseology')
1260 | grading__grammar = eg('grammar')
1261 | grading__conventions = eg('conventions')
1262 | 
1263 | wice = Classification(lambda x: "\n".join(x['evidence']),'claim','label',
1264 |     dataset_name='tasksource/wice')
1265 | 
1266 | hover = Classification("evidence","claim","label",
1267 |     dataset_name="Dzeniks/hover") 
1268 | 
1269 | hover__nli = Classification("evidence","claim",name("label",["entailment","neutral","contradiction"]),
1270 |     dataset_name="Dzeniks/hover-3way")
1271 | 
1272 | tasksource_dpo = MultipleChoice("prompt",choices=['chosen','rejected'],labels=constant(0),
1273 |     dataset_name="tasksource/tasksource_dpo_pairs")
1274 | 
1275 | seahorse = Classification('article',cat(["summary", "question"]),'answer',
1276 |     dataset_name="tasksource/seahorse_summarization_evaluation")
1277 | 
1278 | mip = Classification("prompt",labels="y",
1279 |     dataset_name="sileod/missing-item-prediction",config_name="contrastive")
1280 | 
1281 | jigsaw_toxicity = Classification('comment_text',labels=name("toxic",["notthate","hate"]),
1282 |     dataset_name="tasksource/jigsaw_toxicity")
1283 | 
1284 | pol_nli = Classification("premise","hypothesis",labels=name('entailment',['entailment','not_entailment']),
1285 |     dataset_name="mlburnham/Pol_NLI")
1286 | 
1287 | synthetic_retrieval_nli = Classification('premise','hypothesis','label',dataset_name='tasksource/synthetic-retrieval-NLI',
1288 |     config_name=["binary","count","position"],
1289 |     pre_process=lambda ds:ds.filter(lambda x:x['n']<=2048))
1290 | 
1291 | issue_similarity = Classification("text1","text2","label",
1292 |     dataset_name="WhereIsAI/github-issue-similarity")
1293 | 
1294 | #nli_l2 = Classification("sentence1","sentence2","labels",
1295 | #    dataset_name="tasksource/merged-2l-nli")
1296 | 
1297 | #nli_l3 =  Classification("sentence1","sentence2","labels",
1298 | #    dataset_name="tasksource/merged-3l-nli")
1299 | 


--------------------------------------------------------------------------------