├── .gitignore ├── .pre-commit-config.yaml ├── .vscode └── settings.json ├── Makefile ├── README.md ├── assets ├── eval.png └── readme.txt ├── docs └── __init__.py ├── examples └── __init__.py ├── notebooks ├── demo_prompts.ipynb └── fewshots_examples.ipynb ├── poetry.lock ├── pyproject.toml ├── rosettaeval ├── __init__.py ├── loader │ ├── __init__.py │ ├── factory.py │ ├── tasks │ │ ├── __init__.py │ │ ├── adapter.py │ │ ├── decorators.py │ │ ├── medhalt │ │ │ └── metadata.json │ │ ├── medmcqa │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── metadata.json │ │ ├── medqa │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── metadata.json │ │ ├── mmlu │ │ │ └── metadata.json │ │ ├── models.py │ │ ├── pubmedqa │ │ │ └── metadata.json │ │ ├── template.py │ │ └── types.py │ └── utils.py └── tasks │ ├── __init__.py │ ├── medhalt │ └── __init__.py │ ├── medmcqa │ ├── README.md │ ├── cot │ │ └── README.md │ └── few_shots │ │ └── README.md │ ├── medqa │ ├── README.md │ ├── __init__.py │ ├── cot │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── ensemble_refinement │ │ └── __init__.py │ ├── few_shots │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── medqa_task.py │ └── self_consistency │ │ └── __init__.py │ ├── mmlu_anatomy │ ├── README.md │ ├── __init__.py │ ├── cot │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── ensemble_refinement │ │ └── __init__.py │ ├── few_shots │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── mmlu_anatomy_task.py │ └── self_consistency │ │ └── __init__.py │ ├── mmlu_cb │ ├── README.md │ ├── __init__.py │ ├── cot │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── ensemble_refinement │ │ └── __init__.py │ ├── few_shots │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── mmlu_cb_task.py │ └── self_consistency │ │ └── __init__.py │ ├── mmlu_ck │ ├── README.md │ ├── __init__.py │ ├── cot │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── ensemble_refinement │ │ └── __init__.py │ ├── few_shots │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── mmlu_ck_task.py │ └── self_consistency │ │ └── __init__.py │ ├── mmlu_cm │ ├── README.md │ ├── __init__.py │ ├── cot │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── ensemble_refinement │ │ └── __init__.py │ ├── few_shots │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── mmlu_cm_task.py │ └── self_consistency │ │ └── __init__.py │ ├── mmlu_mg │ ├── README.md │ ├── __init__.py │ ├── cot │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── ensemble_refinement │ │ └── __init__.py │ ├── few_shots │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── mmlu_mg_task.py │ └── self_consistency │ │ └── __init__.py │ ├── mmlu_pm │ ├── README.md │ ├── __init__.py │ ├── cot │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── ensemble_refinement │ │ └── __init__.py │ ├── few_shots │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples.json │ │ └── prompt.jinja │ ├── mmlu_pm_task.py │ └── self_consistency │ │ └── __init__.py │ └── pubmedqa │ ├── README.md │ ├── __init__.py │ ├── cot │ ├── README.md │ ├── __init__.py │ ├── examples.json │ └── prompt.jinja │ ├── ensemble_refinement │ └── __init__.py │ ├── few_shots │ ├── README.md │ ├── __init__.py │ ├── examples.json │ └── prompt.jinja │ ├── pubmedqa_task.py │ └── self_consistency │ └── __init__.py ├── ruff.toml └── tests └── loader ├── __init__.py └── test_factory.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/README.md -------------------------------------------------------------------------------- /assets/eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/assets/eval.png -------------------------------------------------------------------------------- /assets/readme.txt: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /docs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /notebooks/demo_prompts.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/notebooks/demo_prompts.ipynb -------------------------------------------------------------------------------- /notebooks/fewshots_examples.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/notebooks/fewshots_examples.ipynb -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/pyproject.toml -------------------------------------------------------------------------------- /rosettaeval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/__init__.py -------------------------------------------------------------------------------- /rosettaeval/loader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/__init__.py -------------------------------------------------------------------------------- /rosettaeval/loader/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/factory.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/__init__.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/adapter.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/decorators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/decorators.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/medhalt/metadata.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/medhalt/metadata.json -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/medmcqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/medmcqa/__init__.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/medmcqa/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/medmcqa/base.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/medmcqa/metadata.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/medmcqa/metadata.json -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/medqa/__init__.py: -------------------------------------------------------------------------------- 1 | from rosettaeval.loader.tasks.medqa.base import MedQaQueryAdapter 2 | -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/medqa/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/medqa/base.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/medqa/metadata.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/medqa/metadata.json -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/mmlu/metadata.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/mmlu/metadata.json -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/models.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/pubmedqa/metadata.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/pubmedqa/metadata.json -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/template.py -------------------------------------------------------------------------------- /rosettaeval/loader/tasks/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/tasks/types.py -------------------------------------------------------------------------------- /rosettaeval/loader/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/loader/utils.py -------------------------------------------------------------------------------- /rosettaeval/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medhalt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medmcqa/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medmcqa/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medmcqa/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/medqa/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/medqa/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/medqa/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/medqa/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/medqa_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/medqa/medqa_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/medqa/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_anatomy/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_anatomy/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_anatomy/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_anatomy/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/mmlu_anatomy_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_anatomy/mmlu_anatomy_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_anatomy/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cb/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cb/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cb/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cb/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/mmlu_cb_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cb/mmlu_cb_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cb/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_ck/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_ck/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_ck/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_ck/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/mmlu_ck_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_ck/mmlu_ck_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_ck/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cm/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cm/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cm/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cm/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/mmlu_cm_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_cm/mmlu_cm_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_cm/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_mg/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_mg/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_mg/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_mg/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/mmlu_mg_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_mg/mmlu_mg_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_mg/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_pm/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_pm/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_pm/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_pm/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/mmlu_pm_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/mmlu_pm/mmlu_pm_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/mmlu_pm/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/cot/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/cot/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/cot/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/pubmedqa/cot/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/cot/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/pubmedqa/cot/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/ensemble_refinement/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/few_shots/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/few_shots/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/few_shots/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/pubmedqa/few_shots/examples.json -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/few_shots/prompt.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/pubmedqa/few_shots/prompt.jinja -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/pubmedqa_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/rosettaeval/tasks/pubmedqa/pubmedqa_task.py -------------------------------------------------------------------------------- /rosettaeval/tasks/pubmedqa/self_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/ruff.toml -------------------------------------------------------------------------------- /tests/loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/loader/test_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/promptslab/RosettaEval/HEAD/tests/loader/test_factory.py --------------------------------------------------------------------------------