├── D2O.png ├── LICENSE ├── LLM_merge_new ├── .run_pred_long_bench.py.swp ├── LMEval_kv_token_merge │ ├── __pycache__ │ │ ├── modeling_llama.cpython-310.pyc │ │ ├── modeling_llama3.cpython-310.pyc │ │ ├── modeling_llama3_70b_drop.cpython-310.pyc │ │ ├── modeling_llama3_7b_13b_d2o.cpython-310.pyc │ │ ├── modeling_llama3_7b_13b_drop.cpython-310.pyc │ │ ├── modeling_llama3_7b_13b_merge.cpython-310.pyc │ │ ├── modeling_llama3_full.cpython-310.pyc │ │ ├── modeling_llama3_new.cpython-310.pyc │ │ ├── modeling_llama3_streaming.cpython-310.pyc │ │ ├── modeling_llama_drop.cpython-310.pyc │ │ ├── modeling_llama_drop_merge.cpython-310.pyc │ │ ├── modeling_llama_streaming.cpython-310.pyc │ │ ├── v433_modeling_falcon.cpython-310.pyc │ │ ├── v433_modeling_llama.cpython-310.pyc │ │ ├── v433_modeling_mistral.cpython-310.pyc │ │ ├── v436_modeling_falcon.cpython-310.pyc │ │ ├── v436_modeling_falcon_drop.cpython-310.pyc │ │ ├── v436_modeling_falcon_merge.cpython-310.pyc │ │ ├── v436_modeling_falcon_streaming.cpython-310.pyc │ │ ├── v436_modeling_mistral.cpython-310.pyc │ │ ├── v436_modeling_mistral_drop.cpython-310.pyc │ │ ├── v436_modeling_mistral_merge.cpython-310.pyc │ │ └── v436_modeling_mistral_streaming.cpython-310.pyc │ ├── modeling_llama.py │ ├── modeling_llama3_70b_drop.py │ ├── modeling_llama3_70b_merge.py │ ├── modeling_llama3_7b_13b_d2o.py │ ├── modeling_llama3_7b_13b_drop.py │ ├── modeling_llama3_7b_13b_merge.py │ ├── modeling_llama3_full.py │ ├── modeling_llama3_new.py │ ├── modeling_llama3_streaming.py │ ├── modeling_llama_drop.py │ ├── modeling_llama_drop_merge.py │ ├── modeling_llama_local.py │ ├── modeling_llama_streaming.py │ └── v433_modeling_llama.py ├── __pycache__ │ └── metrics.cpython-310.pyc ├── bash_experiments │ ├── run_osc_coqa_h2o_0.2.sh │ ├── run_osc_coqa_h2o_0.4.sh │ ├── run_osc_coqa_h2o_0.6.sh │ ├── run_osc_coqa_h2o_0.8.sh │ ├── run_osc_coqa_merge_0.2.sh │ ├── run_osc_coqa_merge_0.4.sh │ ├── run_osc_coqa_merge_0.6.sh │ ├── run_osc_coqa_merge_0.8.sh │ ├── run_osc_gsm8k_h2o_0.2.sh │ ├── run_osc_gsm8k_h2o_0.4.sh │ ├── run_osc_gsm8k_h2o_0.6.sh │ ├── run_osc_gsm8k_h2o_0.8.sh │ ├── run_osc_gsm8k_merge_0.2.sh │ ├── run_osc_gsm8k_merge_0.4.sh │ ├── run_osc_gsm8k_merge_0.6.sh │ ├── run_osc_gsm8k_merge_0.8.sh │ ├── run_osc_long_h2o_0.2.sh │ ├── run_osc_long_h2o_0.4.sh │ ├── run_osc_long_h2o_0.6.sh │ ├── run_osc_long_h2o_0.8.sh │ ├── run_osc_long_merge_0.2.sh │ ├── run_osc_long_merge_0.4.sh │ ├── run_osc_long_merge_0.6.sh │ ├── run_osc_long_merge_0.8.sh │ ├── run_osc_truthful_h2o_0.2.sh │ ├── run_osc_truthful_h2o_0.4.sh │ ├── run_osc_truthful_h2o_0.6.sh │ ├── run_osc_truthful_h2o_0.8.sh │ ├── run_osc_truthful_merge_0.2.sh │ ├── run_osc_truthful_merge_0.4.sh │ ├── run_osc_truthful_merge_0.6.sh │ └── run_osc_truthful_merge_0.8.sh ├── config │ ├── dataset2maxlen.json │ ├── dataset2prompt.json │ ├── model2maxlen.json │ └── model2path.json ├── data │ ├── copa-5.jsonl │ ├── mt_bench.jsonl │ ├── openbookqa-5.jsonl │ ├── piqa-5.jsonl │ ├── summarization_data │ │ ├── xsum_0shot.jsonl │ │ ├── xsum_3shot.jsonl │ │ └── xsum_5shot.jsonl │ ├── xsum.jsonl │ └── xsum_opt.jsonl ├── eval_long_bench.py ├── evaluate_task_result.py ├── generate_task_data.py ├── helm │ ├── .github │ │ └── workflows │ │ │ ├── python-publish.yml │ │ │ └── test.yml │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── .readthedocs.yaml │ ├── CHANGELOG.md │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── command │ │ ├── eval.sh │ │ └── get_data.sh │ ├── demo.py │ ├── docs │ │ ├── adding_new_models.md │ │ ├── benchmark.md │ │ ├── code.md │ │ ├── developer_setup.md │ │ ├── docstrings.css │ │ ├── huggingface_models.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── metrics.md │ │ ├── mkdocs_macros.py │ │ ├── models.md │ │ ├── perturbations.md │ │ ├── proxy-server.md │ │ ├── quick_start.md │ │ ├── requirements.txt │ │ ├── scenarios.md │ │ ├── schemas.md │ │ └── tutorial.md │ ├── mkdocs.yml │ ├── pre-commit-venv.sh │ ├── pre-commit.sh │ ├── pyproject.toml │ ├── requirements-dev.txt │ ├── requirements-freeze.txt │ ├── requirements.txt │ ├── scripts │ │ ├── cache │ │ │ ├── __init__.py │ │ │ ├── copy_cache.py │ │ │ ├── fix_anthropic_cache.py │ │ │ ├── fix_together_cache.py │ │ │ └── remove_together_api_entries.py │ │ ├── efficiency │ │ │ ├── generate_instances.py │ │ │ └── generate_run_specs.py │ │ ├── estimate_cost.py │ │ ├── fact_completion │ │ │ ├── README.MD │ │ │ ├── create_benchmark.py │ │ │ ├── fetch_triples_and_aliases.py │ │ │ ├── filter_triples.py │ │ │ └── utils.py │ │ ├── helm-run-all.sh │ │ ├── offline_eval │ │ │ ├── __init__.py │ │ │ ├── export_requests.py │ │ │ └── import_results.py │ │ └── verify_reproducibility.py │ ├── setup.cfg │ ├── setup.py │ └── src │ │ └── helm │ │ ├── __init__.py │ │ ├── benchmark │ │ ├── __init__.py │ │ ├── adaptation │ │ │ ├── __init__.py │ │ │ ├── adapter_spec.py │ │ │ ├── adapters │ │ │ │ ├── __init__.py │ │ │ │ ├── adapter.py │ │ │ │ ├── adapter_factory.py │ │ │ │ ├── binary_ranking_adapter.py │ │ │ │ ├── generation_adapter.py │ │ │ │ ├── in_context_learning_adapter.py │ │ │ │ ├── language_modeling_adapter.py │ │ │ │ ├── multiple_choice_calibrated_adapter.py │ │ │ │ ├── multiple_choice_joint_adapter.py │ │ │ │ ├── multiple_choice_separate_adapter.py │ │ │ │ ├── test_adapter.py │ │ │ │ ├── test_generation_adapter.py │ │ │ │ ├── test_language_modeling_adapter.py │ │ │ │ └── test_multiple_choice_joint_adapter.py │ │ │ ├── prompt.py │ │ │ ├── request_state.py │ │ │ └── scenario_state.py │ │ ├── augmentations │ │ │ ├── __init__.py │ │ │ ├── contraction_expansion_perturbation.py │ │ │ ├── contrast_sets_perturbation.py │ │ │ ├── correct_to_misspelling.json │ │ │ ├── data_augmenter.py │ │ │ ├── dialect_perturbation.py │ │ │ ├── extra_space_perturbation.py │ │ │ ├── filler_words_perturbation.py │ │ │ ├── gender_perturbation.py │ │ │ ├── lowercase_perturbation.py │ │ │ ├── mild_mix_perturbation.py │ │ │ ├── misspelling_perturbation.py │ │ │ ├── person_name_perturbation.py │ │ │ ├── perturbation.py │ │ │ ├── perturbation_description.py │ │ │ ├── space_perturbation.py │ │ │ ├── synonym_perturbation.py │ │ │ ├── test_perturbation.py │ │ │ └── typos_perturbation.py │ │ ├── contamination │ │ │ ├── __init__.py │ │ │ ├── compute_contamination_metrics.py │ │ │ ├── contamination_stats.py │ │ │ ├── export_scenario_text.py │ │ │ ├── light_scenario.py │ │ │ ├── light_tokenizer.py │ │ │ ├── load_documents.py │ │ │ └── test_compute_contamination_metrics.py │ │ ├── data_preprocessor.py │ │ ├── efficiency_data │ │ │ ├── inference_denoised_runtimes.json │ │ │ ├── inference_idealized_runtimes.json │ │ │ └── training_efficiency.json │ │ ├── executor.py │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── basic_metrics.py │ │ │ ├── bbq_metrics.py │ │ │ ├── bias_metrics.py │ │ │ ├── bias_word_lists.py │ │ │ ├── classification_metrics.py │ │ │ ├── code_metrics.py │ │ │ ├── code_metrics_helper.py │ │ │ ├── copyright_metrics.py │ │ │ ├── disinformation_metrics.py │ │ │ ├── dry_run_metrics.py │ │ │ ├── machine_translation_metrics.py │ │ │ ├── metric.py │ │ │ ├── metric_name.py │ │ │ ├── metric_service.py │ │ │ ├── numeracy_metrics.py │ │ │ ├── ranking_metrics.py │ │ │ ├── statistic.py │ │ │ ├── summac │ │ │ │ ├── __init__.py │ │ │ │ ├── model_summac.py │ │ │ │ └── utils_misc.py │ │ │ ├── summarization_critique_metrics.py │ │ │ ├── summarization_metrics.py │ │ │ ├── test_bias_metrics.py │ │ │ ├── test_classification_metrics.py │ │ │ ├── test_metric.py │ │ │ ├── test_numeracy_metrics.py │ │ │ ├── test_statistic.py │ │ │ ├── tokens │ │ │ │ ├── __init__.py │ │ │ │ ├── ai21_token_cost_estimator.py │ │ │ │ ├── auto_token_cost_estimator.py │ │ │ │ ├── cohere_token_cost_estimator.py │ │ │ │ ├── free_token_cost_estimator.py │ │ │ │ ├── gooseai_token_cost_estimator.py │ │ │ │ ├── openai_token_cost_estimator.py │ │ │ │ ├── test_ai21_token_cost_estimator.py │ │ │ │ ├── test_openai_token_cost_estimator.py │ │ │ │ └── token_cost_estimator.py │ │ │ └── toxicity_metrics.py │ │ ├── presentation │ │ │ ├── __init__.py │ │ │ ├── apps │ │ │ │ ├── run_specs_gptneox.conf │ │ │ │ ├── run_specs_opt6.conf │ │ │ │ └── run_specs_opt66.conf │ │ │ ├── contamination.py │ │ │ ├── create_plots.py │ │ │ ├── run_display.py │ │ │ ├── run_entry.py │ │ │ ├── run_specs.conf │ │ │ ├── run_specs_big_bench_lite.conf │ │ │ ├── run_specs_biomedical.conf │ │ │ ├── run_specs_chat_gpt.conf │ │ │ ├── run_specs_cnn_opt.conf │ │ │ ├── run_specs_extra.conf │ │ │ ├── run_specs_gpu.conf │ │ │ ├── run_specs_interactive_qa.conf │ │ │ ├── run_specs_opinions_qa_ai21_default.conf │ │ │ ├── run_specs_opinions_qa_ai21_steer.conf │ │ │ ├── run_specs_opinions_qa_openai_default.conf │ │ │ ├── run_specs_opinions_qa_openai_steer.conf │ │ │ ├── run_specs_small.conf │ │ │ ├── run_specs_tiny.conf │ │ │ ├── schema.py │ │ │ ├── summarize.py │ │ │ ├── table.py │ │ │ ├── test_contamination.py │ │ │ ├── test_create_plots.py │ │ │ ├── test_run_entry.py │ │ │ └── xsum │ │ │ │ ├── run_specs_gptneox.conf │ │ │ │ ├── run_specs_llama.conf │ │ │ │ └── run_specs_opt.conf │ │ ├── run.py │ │ ├── run_expander.py │ │ ├── run_specs.py │ │ ├── runner.py │ │ ├── scenarios │ │ │ ├── __init__.py │ │ │ ├── babi_qa_scenario.py │ │ │ ├── bbq_scenario.py │ │ │ ├── big_bench_scenario.py │ │ │ ├── blimp_scenario.py │ │ │ ├── bold_scenario.py │ │ │ ├── boolq_scenario.py │ │ │ ├── civil_comments_scenario.py │ │ │ ├── code_scenario.py │ │ │ ├── code_scenario_apps_pinned_file_order.py │ │ │ ├── code_scenario_helper.py │ │ │ ├── commonsense_scenario.py │ │ │ ├── copyright_scenario.py │ │ │ ├── covid_dialog_scenario.py │ │ │ ├── dialogue_scenarios.py │ │ │ ├── disinformation_scenario.py │ │ │ ├── dyck_language_scenario.py │ │ │ ├── entity_data_imputation_scenario.py │ │ │ ├── entity_matching_scenario.py │ │ │ ├── entity_matching_scenario_fixed_random_state.py │ │ │ ├── gsm_scenario.py │ │ │ ├── ice_scenario.py │ │ │ ├── ice_scenario_pinned_file_order.py │ │ │ ├── imdb_scenario.py │ │ │ ├── imdb_scenario_pinned_file_order.py │ │ │ ├── interactive_qa_mmlu_scenario.py │ │ │ ├── legal_summarization_scenario.py │ │ │ ├── legal_support_scenario.py │ │ │ ├── lex_glue_scenario.py │ │ │ ├── lextreme_scenario.py │ │ │ ├── lsat_qa_scenario.py │ │ │ ├── math_scenario.py │ │ │ ├── me_q_sum_scenario.py │ │ │ ├── med_dialog_scenario.py │ │ │ ├── med_mcqa_scenario.py │ │ │ ├── med_paragraph_simplification_scenario.py │ │ │ ├── med_qa_scenario.py │ │ │ ├── mmlu_scenario.py │ │ │ ├── msmarco_scenario.py │ │ │ ├── narrativeqa_scenario.py │ │ │ ├── natural_qa_scenario.py │ │ │ ├── newsqa_scenario.py │ │ │ ├── numeracy_scenario.py │ │ │ ├── opinions_qa_scenario.py │ │ │ ├── pubmed_qa_scenario.py │ │ │ ├── quac_scenario.py │ │ │ ├── raft_scenario.py │ │ │ ├── real_toxicity_prompts_scenario.py │ │ │ ├── scenario.py │ │ │ ├── simple_scenarios.py │ │ │ ├── summarization_scenario.py │ │ │ ├── synthetic_efficiency_scenario.py │ │ │ ├── synthetic_reasoning_natural_scenario.py │ │ │ ├── synthetic_reasoning_scenario.py │ │ │ ├── test_scenario.py │ │ │ ├── the_pile_scenario.py │ │ │ ├── truthful_qa_scenario.py │ │ │ ├── twitter_aae_scenario.py │ │ │ ├── wikifact_scenario.py │ │ │ ├── wikitext_103_scenario.py │ │ │ └── wmt_14_scenario.py │ │ ├── server.py │ │ ├── static │ │ │ ├── benchmarking.css │ │ │ ├── benchmarking.js │ │ │ ├── contamination.yaml │ │ │ ├── general.js │ │ │ ├── images │ │ │ │ ├── crfm-logo.png │ │ │ │ ├── helm-logo-simple.png │ │ │ │ ├── helm-logo.png │ │ │ │ ├── language-model-helm.png │ │ │ │ ├── organizations │ │ │ │ │ ├── ai21.png │ │ │ │ │ ├── anthropic.png │ │ │ │ │ ├── bigscience.png │ │ │ │ │ ├── cohere.png │ │ │ │ │ ├── eleutherai.png │ │ │ │ │ ├── google.png │ │ │ │ │ ├── meta.png │ │ │ │ │ ├── microsoft.png │ │ │ │ │ ├── nvidia.png │ │ │ │ │ ├── openai.png │ │ │ │ │ ├── together.png │ │ │ │ │ ├── tsinghua-keg.png │ │ │ │ │ └── yandex.png │ │ │ │ ├── scenarios-by-metrics.png │ │ │ │ └── taxonomy-scenarios.png │ │ │ ├── index.html │ │ │ ├── info-icon.png │ │ │ ├── json-urls-root.js │ │ │ ├── json-urls.js │ │ │ ├── plot-captions.js │ │ │ ├── schema.yaml │ │ │ └── utils.js │ │ ├── test_data_preprocessor.py │ │ ├── test_run_expander.py │ │ └── window_services │ │ │ ├── __init__.py │ │ │ ├── ai21_window_service.py │ │ │ ├── anthropic_window_service.py │ │ │ ├── bloom_window_service.py │ │ │ ├── cohere_window_service.py │ │ │ ├── encoder_decoder_window_service.py │ │ │ ├── flan_t5_window_service.py │ │ │ ├── gpt2_window_service.py │ │ │ ├── gptj_window_service.py │ │ │ ├── gptneox_window_service.py │ │ │ ├── huggingface_window_service.py │ │ │ ├── ice_window_service.py │ │ │ ├── local_window_service.py │ │ │ ├── luminous_window_service.py │ │ │ ├── megatron_window_service.py │ │ │ ├── mock_ai21_tokenizer_request_results.pkl │ │ │ ├── mt_nlg_window_service.py │ │ │ ├── openai_window_service.py │ │ │ ├── opt_window_service.py │ │ │ ├── palmyra_window_service.py │ │ │ ├── remote_window_service.py │ │ │ ├── santacoder_window_service.py │ │ │ ├── starcoder_window_service.py │ │ │ ├── t0pp_window_service.py │ │ │ ├── t511b_window_service.py │ │ │ ├── test_ai21_window_service.py │ │ │ ├── test_anthropic_window_service.py │ │ │ ├── test_bloom_window_service.py │ │ │ ├── test_cohere_window_service.py │ │ │ ├── test_cohere_window_service_utils.py │ │ │ ├── test_flan_t5_window_service.py │ │ │ ├── test_gpt2_window_service.py │ │ │ ├── test_gpt4_window_service.py │ │ │ ├── test_gptj_window_service.py │ │ │ ├── test_gptneox_window_service.py │ │ │ ├── test_ice_window_service.py │ │ │ ├── test_mt_nlg_window_service.py │ │ │ ├── test_openai_window_service.py │ │ │ ├── test_opt_window_service.py │ │ │ ├── test_palmyra_window_service.py │ │ │ ├── test_t0pp_window_service.py │ │ │ ├── test_t511b_window_service.py │ │ │ ├── test_ul2_window_service.py │ │ │ ├── test_utils.py │ │ │ ├── test_yalm_window_service.py │ │ │ ├── tokenizer_service.py │ │ │ ├── ul2_window_service.py │ │ │ ├── wider_ai21_window_service.py │ │ │ ├── wider_openai_window_service.py │ │ │ ├── window_service.py │ │ │ ├── window_service_factory.py │ │ │ └── yalm_window_service.py │ │ ├── common │ │ ├── __init__.py │ │ ├── authentication.py │ │ ├── cache.py │ │ ├── codec.py │ │ ├── critique_request.py │ │ ├── general.py │ │ ├── hierarchical_logger.py │ │ ├── object_spec.py │ │ ├── perspective_api_request.py │ │ ├── request.py │ │ ├── test_cache.py │ │ ├── test_codec.py │ │ ├── test_general.py │ │ └── tokenization_request.py │ │ └── proxy │ │ ├── __init__.py │ │ ├── accounts.py │ │ ├── cli.py │ │ ├── clients │ │ ├── __init__.py │ │ ├── ai21_client.py │ │ ├── aleph_alpha_client.py │ │ ├── anthropic_client.py │ │ ├── auto_client.py │ │ ├── chat_gpt_client.py │ │ ├── client.py │ │ ├── cohere_client.py │ │ ├── critique_client.py │ │ ├── google_client.py │ │ ├── goose_ai_client.py │ │ ├── huggingface_client.py │ │ ├── huggingface_model_registry.py │ │ ├── huggingface_tokenizer.py │ │ ├── ice_tokenizer_client.py │ │ ├── mechanical_turk_critique_client.py │ │ ├── mechanical_turk_critique_exporter.py │ │ ├── mechanical_turk_critique_importer.py │ │ ├── megatron_client.py │ │ ├── microsoft_client.py │ │ ├── openai_client.py │ │ ├── palmyra_client.py │ │ ├── perspective_api_client.py │ │ ├── remote_model_registry.py │ │ ├── simple_client.py │ │ ├── test_anthropic_client.py │ │ ├── test_client.py │ │ ├── test_huggingface_client.py │ │ ├── test_huggingface_model_registry.py │ │ ├── test_huggingface_tokenizer.py │ │ ├── test_ice_tokenizer_client.py │ │ ├── test_yalm_tokenizer_client.py │ │ ├── together_client.py │ │ ├── yalm_tokenizer │ │ │ ├── __init__.py │ │ │ ├── test_yalm_tokenizer.py │ │ │ ├── voc_100b.sp │ │ │ └── yalm_tokenizer.py │ │ └── yalm_tokenizer_client.py │ │ ├── example_queries.py │ │ ├── models.py │ │ ├── query.py │ │ ├── retry.py │ │ ├── server.py │ │ ├── services │ │ ├── __init__.py │ │ ├── remote_service.py │ │ ├── server_service.py │ │ ├── service.py │ │ ├── test_remote_service.py │ │ └── test_service.py │ │ ├── static │ │ ├── general.js │ │ ├── help.html │ │ ├── index.css │ │ ├── index.html │ │ ├── index.js │ │ └── info-icon.png │ │ ├── test_models.py │ │ ├── test_retry.py │ │ └── token_counters │ │ ├── __init__.py │ │ ├── ai21_token_counter.py │ │ ├── auto_token_counter.py │ │ ├── cohere_token_counter.py │ │ ├── free_token_counter.py │ │ ├── gooseai_token_counter.py │ │ ├── openai_token_counter.py │ │ ├── test_ai21_token_counter.py │ │ ├── test_openai_token_counter.py │ │ └── token_counter.py ├── kv_token_merge │ ├── __pycache__ │ │ ├── modify_llama.cpython-310.pyc │ │ ├── modify_llama_merge.cpython-310.pyc │ │ └── stream.cpython-310.pyc │ ├── modify_llama.py │ ├── modify_llama_merge.py │ └── stream.py ├── lm-evaluation-harness │ ├── .coveragerc │ ├── .flake8 │ ├── .github │ │ └── workflows │ │ │ ├── new_tasks.yml │ │ │ └── unit_tests.yml │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── CITATION.bib │ ├── CODEOWNERS │ ├── LICENSE.md │ ├── README.md │ ├── docs │ │ ├── README.md │ │ ├── decontamination.md │ │ ├── img │ │ │ └── fewshot_example_gpt3.png │ │ ├── interface.md │ │ ├── model_guide.md │ │ ├── new_task_guide.md │ │ └── task_guide.md │ ├── examples │ │ └── lm-eval-overview.ipynb │ ├── ignore.txt │ ├── lm_eval │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── api │ │ │ ├── __init__.py │ │ │ ├── filter.py │ │ │ ├── instance.py │ │ │ ├── metrics.py │ │ │ ├── model.py │ │ │ ├── registry.py │ │ │ ├── samplers.py │ │ │ └── task.py │ │ ├── decontamination │ │ │ ├── __init__.py │ │ │ ├── archiver.py │ │ │ ├── decontaminate.py │ │ │ └── janitor.py │ │ ├── evaluator.py │ │ ├── filters │ │ │ ├── __init__.py │ │ │ ├── decontamination.py │ │ │ ├── extraction.py │ │ │ ├── selection.py │ │ │ └── transformation.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── anthropic_llms.py │ │ │ ├── dummy.py │ │ │ ├── gguf.py │ │ │ ├── huggingface.py │ │ │ ├── openai_completions.py │ │ │ ├── textsynth.py │ │ │ └── vllm_causallms.py │ │ ├── prompts │ │ │ └── __init__.py │ │ ├── tasks │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── anli │ │ │ │ ├── README.md │ │ │ │ ├── anli_r1.yaml │ │ │ │ ├── anli_r2.yaml │ │ │ │ └── anli_r3.yaml │ │ │ ├── arc │ │ │ │ ├── README.md │ │ │ │ ├── arc_challenge.yaml │ │ │ │ └── arc_easy.yaml │ │ │ ├── arithmetic │ │ │ │ ├── README.md │ │ │ │ ├── arithmetic_1dc.yaml │ │ │ │ ├── arithmetic_2da.yaml │ │ │ │ ├── arithmetic_2dm.yaml │ │ │ │ ├── arithmetic_2ds.yaml │ │ │ │ ├── arithmetic_3da.yaml │ │ │ │ ├── arithmetic_3ds.yaml │ │ │ │ ├── arithmetic_4da.yaml │ │ │ │ ├── arithmetic_4ds.yaml │ │ │ │ ├── arithmetic_5da.yaml │ │ │ │ └── arithmetic_5ds.yaml │ │ │ ├── asdiv │ │ │ │ ├── README.md │ │ │ │ └── default.yaml │ │ │ ├── babi │ │ │ │ ├── README.md │ │ │ │ └── babi.yaml │ │ │ ├── bbh │ │ │ │ ├── README.md │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── cot_fewshot │ │ │ │ │ ├── _cot_fewshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ │ ├── cot_zeroshot │ │ │ │ │ ├── _cot_zeroshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ │ ├── fewshot │ │ │ │ │ ├── _fewshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ │ └── zeroshot │ │ │ │ │ ├── _zeroshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ ├── belebele │ │ │ │ ├── README.md │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── belebele_acm_Arab.yaml │ │ │ │ ├── belebele_afr_Latn.yaml │ │ │ │ ├── belebele_als_Latn.yaml │ │ │ │ ├── belebele_amh_Ethi.yaml │ │ │ │ ├── belebele_apc_Arab.yaml │ │ │ │ ├── belebele_arb_Arab.yaml │ │ │ │ ├── belebele_arb_Latn.yaml │ │ │ │ ├── belebele_ars_Arab.yaml │ │ │ │ ├── belebele_ary_Arab.yaml │ │ │ │ ├── belebele_arz_Arab.yaml │ │ │ │ ├── belebele_asm_Beng.yaml │ │ │ │ ├── belebele_azj_Latn.yaml │ │ │ │ ├── belebele_bam_Latn.yaml │ │ │ │ ├── belebele_ben_Beng.yaml │ │ │ │ ├── belebele_ben_Latn.yaml │ │ │ │ ├── belebele_bod_Tibt.yaml │ │ │ │ ├── belebele_bul_Cyrl.yaml │ │ │ │ ├── belebele_cat_Latn.yaml │ │ │ │ ├── belebele_ceb_Latn.yaml │ │ │ │ ├── belebele_ces_Latn.yaml │ │ │ │ ├── belebele_ckb_Arab.yaml │ │ │ │ ├── belebele_dan_Latn.yaml │ │ │ │ ├── belebele_deu_Latn.yaml │ │ │ │ ├── belebele_ell_Grek.yaml │ │ │ │ ├── belebele_eng_Latn.yaml │ │ │ │ ├── belebele_est_Latn.yaml │ │ │ │ ├── belebele_eus_Latn.yaml │ │ │ │ ├── belebele_fin_Latn.yaml │ │ │ │ ├── belebele_fra_Latn.yaml │ │ │ │ ├── belebele_fuv_Latn.yaml │ │ │ │ ├── belebele_gaz_Latn.yaml │ │ │ │ ├── belebele_grn_Latn.yaml │ │ │ │ ├── belebele_guj_Gujr.yaml │ │ │ │ ├── belebele_hat_Latn.yaml │ │ │ │ ├── belebele_hau_Latn.yaml │ │ │ │ ├── belebele_heb_Hebr.yaml │ │ │ │ ├── belebele_hin_Deva.yaml │ │ │ │ ├── belebele_hin_Latn.yaml │ │ │ │ ├── belebele_hrv_Latn.yaml │ │ │ │ ├── belebele_hun_Latn.yaml │ │ │ │ ├── belebele_hye_Armn.yaml │ │ │ │ ├── belebele_ibo_Latn.yaml │ │ │ │ ├── belebele_ilo_Latn.yaml │ │ │ │ ├── belebele_ind_Latn.yaml │ │ │ │ ├── belebele_isl_Latn.yaml │ │ │ │ ├── belebele_ita_Latn.yaml │ │ │ │ ├── belebele_jav_Latn.yaml │ │ │ │ ├── belebele_jpn_Jpan.yaml │ │ │ │ ├── belebele_kac_Latn.yaml │ │ │ │ ├── belebele_kan_Knda.yaml │ │ │ │ ├── belebele_kat_Geor.yaml │ │ │ │ ├── belebele_kaz_Cyrl.yaml │ │ │ │ ├── belebele_kea_Latn.yaml │ │ │ │ ├── belebele_khk_Cyrl.yaml │ │ │ │ ├── belebele_khm_Khmr.yaml │ │ │ │ ├── belebele_kin_Latn.yaml │ │ │ │ ├── belebele_kir_Cyrl.yaml │ │ │ │ ├── belebele_kor_Hang.yaml │ │ │ │ ├── belebele_lao_Laoo.yaml │ │ │ │ ├── belebele_lin_Latn.yaml │ │ │ │ ├── belebele_lit_Latn.yaml │ │ │ │ ├── belebele_lug_Latn.yaml │ │ │ │ ├── belebele_luo_Latn.yaml │ │ │ │ ├── belebele_lvs_Latn.yaml │ │ │ │ ├── belebele_mal_Mlym.yaml │ │ │ │ ├── belebele_mar_Deva.yaml │ │ │ │ ├── belebele_mkd_Cyrl.yaml │ │ │ │ ├── belebele_mlt_Latn.yaml │ │ │ │ ├── belebele_mri_Latn.yaml │ │ │ │ ├── belebele_mya_Mymr.yaml │ │ │ │ ├── belebele_nld_Latn.yaml │ │ │ │ ├── belebele_nob_Latn.yaml │ │ │ │ ├── belebele_npi_Deva.yaml │ │ │ │ ├── belebele_npi_Latn.yaml │ │ │ │ ├── belebele_nso_Latn.yaml │ │ │ │ ├── belebele_nya_Latn.yaml │ │ │ │ ├── belebele_ory_Orya.yaml │ │ │ │ ├── belebele_pan_Guru.yaml │ │ │ │ ├── belebele_pbt_Arab.yaml │ │ │ │ ├── belebele_pes_Arab.yaml │ │ │ │ ├── belebele_plt_Latn.yaml │ │ │ │ ├── belebele_pol_Latn.yaml │ │ │ │ ├── belebele_por_Latn.yaml │ │ │ │ ├── belebele_ron_Latn.yaml │ │ │ │ ├── belebele_rus_Cyrl.yaml │ │ │ │ ├── belebele_shn_Mymr.yaml │ │ │ │ ├── belebele_sin_Latn.yaml │ │ │ │ ├── belebele_sin_Sinh.yaml │ │ │ │ ├── belebele_slk_Latn.yaml │ │ │ │ ├── belebele_slv_Latn.yaml │ │ │ │ ├── belebele_sna_Latn.yaml │ │ │ │ ├── belebele_snd_Arab.yaml │ │ │ │ ├── belebele_som_Latn.yaml │ │ │ │ ├── belebele_sot_Latn.yaml │ │ │ │ ├── belebele_spa_Latn.yaml │ │ │ │ ├── belebele_srp_Cyrl.yaml │ │ │ │ ├── belebele_ssw_Latn.yaml │ │ │ │ ├── belebele_sun_Latn.yaml │ │ │ │ ├── belebele_swe_Latn.yaml │ │ │ │ ├── belebele_swh_Latn.yaml │ │ │ │ ├── belebele_tam_Taml.yaml │ │ │ │ ├── belebele_tel_Telu.yaml │ │ │ │ ├── belebele_tgk_Cyrl.yaml │ │ │ │ ├── belebele_tgl_Latn.yaml │ │ │ │ ├── belebele_tha_Thai.yaml │ │ │ │ ├── belebele_tir_Ethi.yaml │ │ │ │ ├── belebele_tsn_Latn.yaml │ │ │ │ ├── belebele_tso_Latn.yaml │ │ │ │ ├── belebele_tur_Latn.yaml │ │ │ │ ├── belebele_ukr_Cyrl.yaml │ │ │ │ ├── belebele_urd_Arab.yaml │ │ │ │ ├── belebele_urd_Latn.yaml │ │ │ │ ├── belebele_uzn_Latn.yaml │ │ │ │ ├── belebele_vie_Latn.yaml │ │ │ │ ├── belebele_war_Latn.yaml │ │ │ │ ├── belebele_wol_Latn.yaml │ │ │ │ ├── belebele_xho_Latn.yaml │ │ │ │ ├── belebele_yor_Latn.yaml │ │ │ │ ├── belebele_zho_Hans.yaml │ │ │ │ ├── belebele_zho_Hant.yaml │ │ │ │ ├── belebele_zsm_Latn.yaml │ │ │ │ └── belebele_zul_Latn.yaml │ │ │ ├── benchmarks │ │ │ │ ├── flan │ │ │ │ │ ├── flan_anli.yaml │ │ │ │ │ ├── flan_arc.yaml │ │ │ │ │ ├── flan_boolq.yaml │ │ │ │ │ ├── flan_cot.yaml │ │ │ │ │ ├── flan_held_in.yaml │ │ │ │ │ ├── flan_held_in_yaml │ │ │ │ │ ├── flan_held_out.yaml │ │ │ │ │ ├── flan_rte.yaml │ │ │ │ │ ├── prompt_templates │ │ │ │ │ │ ├── anli.yaml │ │ │ │ │ │ ├── arc.yaml │ │ │ │ │ │ ├── boolq.yaml │ │ │ │ │ │ └── rte.yaml │ │ │ │ │ └── yaml_templates │ │ │ │ │ │ ├── cot_template_yaml │ │ │ │ │ │ └── held_in_template_yaml │ │ │ │ ├── minerva_math.yaml │ │ │ │ ├── pythia.yaml │ │ │ │ └── t0_eval.yaml │ │ │ ├── bigbench │ │ │ │ ├── README.md │ │ │ │ ├── generate_tasks.py │ │ │ │ ├── generate_until │ │ │ │ │ ├── abstract_narrative_understanding.yaml │ │ │ │ │ ├── anachronisms.yaml │ │ │ │ │ ├── analogical_similarity.yaml │ │ │ │ │ ├── analytic_entailment.yaml │ │ │ │ │ ├── arithmetic.yaml │ │ │ │ │ ├── ascii_word_recognition.yaml │ │ │ │ │ ├── authorship_verification.yaml │ │ │ │ │ ├── auto_categorization.yaml │ │ │ │ │ ├── auto_debugging.yaml │ │ │ │ │ ├── bbq_lite_json.yaml │ │ │ │ │ ├── bridging_anaphora_resolution_barqa.yaml │ │ │ │ │ ├── causal_judgment.yaml │ │ │ │ │ ├── cause_and_effect.yaml │ │ │ │ │ ├── checkmate_in_one.yaml │ │ │ │ │ ├── chess_state_tracking.yaml │ │ │ │ │ ├── chinese_remainder_theorem.yaml │ │ │ │ │ ├── cifar10_classification.yaml │ │ │ │ │ ├── code_line_description.yaml │ │ │ │ │ ├── codenames.yaml │ │ │ │ │ ├── color.yaml │ │ │ │ │ ├── common_morpheme.yaml │ │ │ │ │ ├── conceptual_combinations.yaml │ │ │ │ │ ├── conlang_translation.yaml │ │ │ │ │ ├── contextual_parametric_knowledge_conflicts.yaml │ │ │ │ │ ├── crash_blossom.yaml │ │ │ │ │ ├── crass_ai.yaml │ │ │ │ │ ├── cryobiology_spanish.yaml │ │ │ │ │ ├── cryptonite.yaml │ │ │ │ │ ├── cs_algorithms.yaml │ │ │ │ │ ├── dark_humor_detection.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── discourse_marker_prediction.yaml │ │ │ │ │ ├── disfl_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── elementary_math_qa.yaml │ │ │ │ │ ├── emoji_movie.yaml │ │ │ │ │ ├── emojis_emotion_prediction.yaml │ │ │ │ │ ├── empirical_judgments.yaml │ │ │ │ │ ├── english_proverbs.yaml │ │ │ │ │ ├── english_russian_proverbs.yaml │ │ │ │ │ ├── entailed_polarity.yaml │ │ │ │ │ ├── entailed_polarity_hindi.yaml │ │ │ │ │ ├── epistemic_reasoning.yaml │ │ │ │ │ ├── evaluating_information_essentiality.yaml │ │ │ │ │ ├── fact_checker.yaml │ │ │ │ │ ├── fantasy_reasoning.yaml │ │ │ │ │ ├── few_shot_nlg.yaml │ │ │ │ │ ├── figure_of_speech_detection.yaml │ │ │ │ │ ├── formal_fallacies_syllogisms_negation.yaml │ │ │ │ │ ├── gem.yaml │ │ │ │ │ ├── gender_inclusive_sentences_german.yaml │ │ │ │ │ ├── general_knowledge.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── goal_step_wikihow.yaml │ │ │ │ │ ├── gre_reading_comprehension.yaml │ │ │ │ │ ├── hhh_alignment.yaml │ │ │ │ │ ├── hindi_question_answering.yaml │ │ │ │ │ ├── hindu_knowledge.yaml │ │ │ │ │ ├── hinglish_toxicity.yaml │ │ │ │ │ ├── human_organs_senses.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── identify_math_theorems.yaml │ │ │ │ │ ├── identify_odd_metaphor.yaml │ │ │ │ │ ├── implicatures.yaml │ │ │ │ │ ├── implicit_relations.yaml │ │ │ │ │ ├── intent_recognition.yaml │ │ │ │ │ ├── international_phonetic_alphabet_nli.yaml │ │ │ │ │ ├── international_phonetic_alphabet_transliterate.yaml │ │ │ │ │ ├── intersect_geometry.yaml │ │ │ │ │ ├── irony_identification.yaml │ │ │ │ │ ├── kanji_ascii.yaml │ │ │ │ │ ├── kannada.yaml │ │ │ │ │ ├── key_value_maps.yaml │ │ │ │ │ ├── known_unknowns.yaml │ │ │ │ │ ├── language_games.yaml │ │ │ │ │ ├── language_identification.yaml │ │ │ │ │ ├── linguistic_mappings.yaml │ │ │ │ │ ├── linguistics_puzzles.yaml │ │ │ │ │ ├── list_functions.yaml │ │ │ │ │ ├── logic_grid_puzzle.yaml │ │ │ │ │ ├── logical_args.yaml │ │ │ │ │ ├── logical_deduction.yaml │ │ │ │ │ ├── logical_fallacy_detection.yaml │ │ │ │ │ ├── logical_sequence.yaml │ │ │ │ │ ├── mathematical_induction.yaml │ │ │ │ │ ├── matrixshapes.yaml │ │ │ │ │ ├── metaphor_boolean.yaml │ │ │ │ │ ├── metaphor_understanding.yaml │ │ │ │ │ ├── minute_mysteries_qa.yaml │ │ │ │ │ ├── misconceptions.yaml │ │ │ │ │ ├── misconceptions_russian.yaml │ │ │ │ │ ├── mnist_ascii.yaml │ │ │ │ │ ├── modified_arithmetic.yaml │ │ │ │ │ ├── moral_permissibility.yaml │ │ │ │ │ ├── movie_dialog_same_or_different.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── mult_data_wrangling.yaml │ │ │ │ │ ├── multiemo.yaml │ │ │ │ │ ├── natural_instructions.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── nonsense_words_grammar.yaml │ │ │ │ │ ├── novel_concepts.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── odd_one_out.yaml │ │ │ │ │ ├── operators.yaml │ │ │ │ │ ├── paragraph_segmentation.yaml │ │ │ │ │ ├── parsinlu_qa.yaml │ │ │ │ │ ├── parsinlu_reading_comprehension.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── periodic_elements.yaml │ │ │ │ │ ├── persian_idioms.yaml │ │ │ │ │ ├── phrase_relatedness.yaml │ │ │ │ │ ├── physical_intuition.yaml │ │ │ │ │ ├── physics.yaml │ │ │ │ │ ├── physics_questions.yaml │ │ │ │ │ ├── play_dialog_same_or_different.yaml │ │ │ │ │ ├── polish_sequence_labeling.yaml │ │ │ │ │ ├── presuppositions_as_nli.yaml │ │ │ │ │ ├── qa_wikidata.yaml │ │ │ │ │ ├── question_selection.yaml │ │ │ │ │ ├── real_or_fake_text.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── repeat_copy_logic.yaml │ │ │ │ │ ├── rephrase.yaml │ │ │ │ │ ├── riddle_sense.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── scientific_press_release.yaml │ │ │ │ │ ├── semantic_parsing_in_context_sparc.yaml │ │ │ │ │ ├── semantic_parsing_spider.yaml │ │ │ │ │ ├── sentence_ambiguity.yaml │ │ │ │ │ ├── similarities_abstraction.yaml │ │ │ │ │ ├── simp_turing_concept.yaml │ │ │ │ │ ├── simple_arithmetic_json.yaml │ │ │ │ │ ├── simple_arithmetic_json_multiple_choice.yaml │ │ │ │ │ ├── simple_arithmetic_json_subtasks.yaml │ │ │ │ │ ├── simple_arithmetic_multiple_targets_json.yaml │ │ │ │ │ ├── simple_ethical_questions.yaml │ │ │ │ │ ├── simple_text_editing.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── social_iqa.yaml │ │ │ │ │ ├── social_support.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── strange_stories.yaml │ │ │ │ │ ├── strategyqa.yaml │ │ │ │ │ ├── sufficient_information.yaml │ │ │ │ │ ├── suicide_risk.yaml │ │ │ │ │ ├── swahili_english_proverbs.yaml │ │ │ │ │ ├── swedish_to_german_proverbs.yaml │ │ │ │ │ ├── symbol_interpretation.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tense.yaml │ │ │ │ │ ├── timedial.yaml │ │ │ │ │ ├── topical_chat.yaml │ │ │ │ │ ├── tracking_shuffled_objects.yaml │ │ │ │ │ ├── understanding_fables.yaml │ │ │ │ │ ├── undo_permutation.yaml │ │ │ │ │ ├── unit_conversion.yaml │ │ │ │ │ ├── unit_interpretation.yaml │ │ │ │ │ ├── unnatural_in_context_learning.yaml │ │ │ │ │ ├── vitaminc_fact_verification.yaml │ │ │ │ │ ├── what_is_the_tao.yaml │ │ │ │ │ ├── which_wiki_edit.yaml │ │ │ │ │ ├── winowhy.yaml │ │ │ │ │ ├── word_sorting.yaml │ │ │ │ │ └── word_unscrambling.yaml │ │ │ │ ├── generate_until_template_yaml │ │ │ │ ├── multiple_choice │ │ │ │ │ ├── abstract_narrative_understanding.yaml │ │ │ │ │ ├── anachronisms.yaml │ │ │ │ │ ├── analogical_similarity.yaml │ │ │ │ │ ├── analytic_entailment.yaml │ │ │ │ │ ├── arithmetic.yaml │ │ │ │ │ ├── ascii_word_recognition.yaml │ │ │ │ │ ├── authorship_verification.yaml │ │ │ │ │ ├── auto_categorization.yaml │ │ │ │ │ ├── auto_debugging.yaml │ │ │ │ │ ├── bbq_lite_json.yaml │ │ │ │ │ ├── bridging_anaphora_resolution_barqa.yaml │ │ │ │ │ ├── causal_judgment.yaml │ │ │ │ │ ├── cause_and_effect.yaml │ │ │ │ │ ├── checkmate_in_one.yaml │ │ │ │ │ ├── chess_state_tracking.yaml │ │ │ │ │ ├── chinese_remainder_theorem.yaml │ │ │ │ │ ├── cifar10_classification.yaml │ │ │ │ │ ├── code_line_description.yaml │ │ │ │ │ ├── codenames.yaml │ │ │ │ │ ├── color.yaml │ │ │ │ │ ├── common_morpheme.yaml │ │ │ │ │ ├── conceptual_combinations.yaml │ │ │ │ │ ├── conlang_translation.yaml │ │ │ │ │ ├── contextual_parametric_knowledge_conflicts.yaml │ │ │ │ │ ├── crash_blossom.yaml │ │ │ │ │ ├── crass_ai.yaml │ │ │ │ │ ├── cryobiology_spanish.yaml │ │ │ │ │ ├── cryptonite.yaml │ │ │ │ │ ├── cs_algorithms.yaml │ │ │ │ │ ├── dark_humor_detection.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── discourse_marker_prediction.yaml │ │ │ │ │ ├── disfl_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── elementary_math_qa.yaml │ │ │ │ │ ├── emoji_movie.yaml │ │ │ │ │ ├── emojis_emotion_prediction.yaml │ │ │ │ │ ├── empirical_judgments.yaml │ │ │ │ │ ├── english_proverbs.yaml │ │ │ │ │ ├── english_russian_proverbs.yaml │ │ │ │ │ ├── entailed_polarity.yaml │ │ │ │ │ ├── entailed_polarity_hindi.yaml │ │ │ │ │ ├── epistemic_reasoning.yaml │ │ │ │ │ ├── evaluating_information_essentiality.yaml │ │ │ │ │ ├── fact_checker.yaml │ │ │ │ │ ├── fantasy_reasoning.yaml │ │ │ │ │ ├── few_shot_nlg.yaml │ │ │ │ │ ├── figure_of_speech_detection.yaml │ │ │ │ │ ├── formal_fallacies_syllogisms_negation.yaml │ │ │ │ │ ├── gem.yaml │ │ │ │ │ ├── gender_inclusive_sentences_german.yaml │ │ │ │ │ ├── general_knowledge.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── goal_step_wikihow.yaml │ │ │ │ │ ├── gre_reading_comprehension.yaml │ │ │ │ │ ├── hhh_alignment.yaml │ │ │ │ │ ├── hindi_question_answering.yaml │ │ │ │ │ ├── hindu_knowledge.yaml │ │ │ │ │ ├── hinglish_toxicity.yaml │ │ │ │ │ ├── human_organs_senses.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── identify_math_theorems.yaml │ │ │ │ │ ├── identify_odd_metaphor.yaml │ │ │ │ │ ├── implicatures.yaml │ │ │ │ │ ├── implicit_relations.yaml │ │ │ │ │ ├── intent_recognition.yaml │ │ │ │ │ ├── international_phonetic_alphabet_nli.yaml │ │ │ │ │ ├── international_phonetic_alphabet_transliterate.yaml │ │ │ │ │ ├── intersect_geometry.yaml │ │ │ │ │ ├── irony_identification.yaml │ │ │ │ │ ├── kanji_ascii.yaml │ │ │ │ │ ├── kannada.yaml │ │ │ │ │ ├── key_value_maps.yaml │ │ │ │ │ ├── known_unknowns.yaml │ │ │ │ │ ├── language_games.yaml │ │ │ │ │ ├── language_identification.yaml │ │ │ │ │ ├── linguistic_mappings.yaml │ │ │ │ │ ├── linguistics_puzzles.yaml │ │ │ │ │ ├── list_functions.yaml │ │ │ │ │ ├── logic_grid_puzzle.yaml │ │ │ │ │ ├── logical_args.yaml │ │ │ │ │ ├── logical_deduction.yaml │ │ │ │ │ ├── logical_fallacy_detection.yaml │ │ │ │ │ ├── logical_sequence.yaml │ │ │ │ │ ├── mathematical_induction.yaml │ │ │ │ │ ├── matrixshapes.yaml │ │ │ │ │ ├── metaphor_boolean.yaml │ │ │ │ │ ├── metaphor_understanding.yaml │ │ │ │ │ ├── minute_mysteries_qa.yaml │ │ │ │ │ ├── misconceptions.yaml │ │ │ │ │ ├── misconceptions_russian.yaml │ │ │ │ │ ├── mnist_ascii.yaml │ │ │ │ │ ├── modified_arithmetic.yaml │ │ │ │ │ ├── moral_permissibility.yaml │ │ │ │ │ ├── movie_dialog_same_or_different.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── mult_data_wrangling.yaml │ │ │ │ │ ├── multiemo.yaml │ │ │ │ │ ├── natural_instructions.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── nonsense_words_grammar.yaml │ │ │ │ │ ├── novel_concepts.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── odd_one_out.yaml │ │ │ │ │ ├── operators.yaml │ │ │ │ │ ├── paragraph_segmentation.yaml │ │ │ │ │ ├── parsinlu_qa.yaml │ │ │ │ │ ├── parsinlu_reading_comprehension.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── periodic_elements.yaml │ │ │ │ │ ├── persian_idioms.yaml │ │ │ │ │ ├── phrase_relatedness.yaml │ │ │ │ │ ├── physical_intuition.yaml │ │ │ │ │ ├── physics.yaml │ │ │ │ │ ├── physics_questions.yaml │ │ │ │ │ ├── play_dialog_same_or_different.yaml │ │ │ │ │ ├── polish_sequence_labeling.yaml │ │ │ │ │ ├── presuppositions_as_nli.yaml │ │ │ │ │ ├── qa_wikidata.yaml │ │ │ │ │ ├── question_selection.yaml │ │ │ │ │ ├── real_or_fake_text.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── repeat_copy_logic.yaml │ │ │ │ │ ├── rephrase.yaml │ │ │ │ │ ├── riddle_sense.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── scientific_press_release.yaml │ │ │ │ │ ├── semantic_parsing_in_context_sparc.yaml │ │ │ │ │ ├── semantic_parsing_spider.yaml │ │ │ │ │ ├── sentence_ambiguity.yaml │ │ │ │ │ ├── similarities_abstraction.yaml │ │ │ │ │ ├── simp_turing_concept.yaml │ │ │ │ │ ├── simple_arithmetic_json.yaml │ │ │ │ │ ├── simple_arithmetic_json_multiple_choice.yaml │ │ │ │ │ ├── simple_arithmetic_json_subtasks.yaml │ │ │ │ │ ├── simple_arithmetic_multiple_targets_json.yaml │ │ │ │ │ ├── simple_ethical_questions.yaml │ │ │ │ │ ├── simple_text_editing.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── social_iqa.yaml │ │ │ │ │ ├── social_support.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── strange_stories.yaml │ │ │ │ │ ├── strategyqa.yaml │ │ │ │ │ ├── sufficient_information.yaml │ │ │ │ │ ├── suicide_risk.yaml │ │ │ │ │ ├── swahili_english_proverbs.yaml │ │ │ │ │ ├── swedish_to_german_proverbs.yaml │ │ │ │ │ ├── symbol_interpretation.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tense.yaml │ │ │ │ │ ├── timedial.yaml │ │ │ │ │ ├── topical_chat.yaml │ │ │ │ │ ├── tracking_shuffled_objects.yaml │ │ │ │ │ ├── understanding_fables.yaml │ │ │ │ │ ├── undo_permutation.yaml │ │ │ │ │ ├── unit_conversion.yaml │ │ │ │ │ ├── unit_interpretation.yaml │ │ │ │ │ ├── unnatural_in_context_learning.yaml │ │ │ │ │ ├── vitaminc_fact_verification.yaml │ │ │ │ │ ├── what_is_the_tao.yaml │ │ │ │ │ ├── which_wiki_edit.yaml │ │ │ │ │ ├── winowhy.yaml │ │ │ │ │ ├── word_sorting.yaml │ │ │ │ │ └── word_unscrambling.yaml │ │ │ │ ├── multiple_choice_template_yaml │ │ │ │ └── push_bigbench_dataset.py │ │ │ ├── blimp │ │ │ │ ├── README.md │ │ │ │ ├── _template_yaml │ │ │ │ ├── adjunct_island.yaml │ │ │ │ ├── anaphor_gender_agreement.yaml │ │ │ │ ├── anaphor_number_agreement.yaml │ │ │ │ ├── animate_subject_passive.yaml │ │ │ │ ├── animate_subject_trans.yaml │ │ │ │ ├── causative.yaml │ │ │ │ ├── complex_NP_island.yaml │ │ │ │ ├── coordinate_structure_constraint_complex_left_branch.yaml │ │ │ │ ├── coordinate_structure_constraint_object_extraction.yaml │ │ │ │ ├── determiner_noun_agreement_1.yaml │ │ │ │ ├── determiner_noun_agreement_2.yaml │ │ │ │ ├── determiner_noun_agreement_irregular_1.yaml │ │ │ │ ├── determiner_noun_agreement_irregular_2.yaml │ │ │ │ ├── determiner_noun_agreement_with_adj_2.yaml │ │ │ │ ├── determiner_noun_agreement_with_adj_irregular_1.yaml │ │ │ │ ├── determiner_noun_agreement_with_adj_irregular_2.yaml │ │ │ │ ├── determiner_noun_agreement_with_adjective_1.yaml │ │ │ │ ├── distractor_agreement_relational_noun.yaml │ │ │ │ ├── distractor_agreement_relative_clause.yaml │ │ │ │ ├── drop_argument.yaml │ │ │ │ ├── ellipsis_n_bar_1.yaml │ │ │ │ ├── ellipsis_n_bar_2.yaml │ │ │ │ ├── existential_there_object_raising.yaml │ │ │ │ ├── existential_there_quantifiers_1.yaml │ │ │ │ ├── existential_there_quantifiers_2.yaml │ │ │ │ ├── existential_there_subject_raising.yaml │ │ │ │ ├── expletive_it_object_raising.yaml │ │ │ │ ├── generate_configs.py │ │ │ │ ├── inchoative.yaml │ │ │ │ ├── intransitive.yaml │ │ │ │ ├── irregular_past_participle_adjectives.yaml │ │ │ │ ├── irregular_past_participle_verbs.yaml │ │ │ │ ├── irregular_plural_subject_verb_agreement_1.yaml │ │ │ │ ├── irregular_plural_subject_verb_agreement_2.yaml │ │ │ │ ├── left_branch_island_echo_question.yaml │ │ │ │ ├── left_branch_island_simple_question.yaml │ │ │ │ ├── matrix_question_npi_licensor_present.yaml │ │ │ │ ├── npi_present_1.yaml │ │ │ │ ├── npi_present_2.yaml │ │ │ │ ├── only_npi_licensor_present.yaml │ │ │ │ ├── only_npi_scope.yaml │ │ │ │ ├── passive_1.yaml │ │ │ │ ├── passive_2.yaml │ │ │ │ ├── principle_A_c_command.yaml │ │ │ │ ├── principle_A_case_1.yaml │ │ │ │ ├── principle_A_case_2.yaml │ │ │ │ ├── principle_A_domain_1.yaml │ │ │ │ ├── principle_A_domain_2.yaml │ │ │ │ ├── principle_A_domain_3.yaml │ │ │ │ ├── principle_A_reconstruction.yaml │ │ │ │ ├── regular_plural_subject_verb_agreement_1.yaml │ │ │ │ ├── regular_plural_subject_verb_agreement_2.yaml │ │ │ │ ├── sentential_negation_npi_licensor_present.yaml │ │ │ │ ├── sentential_negation_npi_scope.yaml │ │ │ │ ├── sentential_subject_island.yaml │ │ │ │ ├── superlative_quantifiers_1.yaml │ │ │ │ ├── superlative_quantifiers_2.yaml │ │ │ │ ├── tough_vs_raising_1.yaml │ │ │ │ ├── tough_vs_raising_2.yaml │ │ │ │ ├── transitive.yaml │ │ │ │ ├── wh_island.yaml │ │ │ │ ├── wh_questions_object_gap.yaml │ │ │ │ ├── wh_questions_subject_gap.yaml │ │ │ │ ├── wh_questions_subject_gap_long_distance.yaml │ │ │ │ ├── wh_vs_that_no_gap.yaml │ │ │ │ ├── wh_vs_that_no_gap_long_distance.yaml │ │ │ │ ├── wh_vs_that_with_gap.yaml │ │ │ │ └── wh_vs_that_with_gap_long_distance.yaml │ │ │ ├── ceval │ │ │ │ ├── README.md │ │ │ │ ├── _default_ceval_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── ceval-valid_accountant.yaml │ │ │ │ ├── ceval-valid_advanced_mathematics.yaml │ │ │ │ ├── ceval-valid_art_studies.yaml │ │ │ │ ├── ceval-valid_basic_medicine.yaml │ │ │ │ ├── ceval-valid_business_administration.yaml │ │ │ │ ├── ceval-valid_chinese_language_and_literature.yaml │ │ │ │ ├── ceval-valid_civil_servant.yaml │ │ │ │ ├── ceval-valid_clinical_medicine.yaml │ │ │ │ ├── ceval-valid_college_chemistry.yaml │ │ │ │ ├── ceval-valid_college_economics.yaml │ │ │ │ ├── ceval-valid_college_physics.yaml │ │ │ │ ├── ceval-valid_college_programming.yaml │ │ │ │ ├── ceval-valid_computer_architecture.yaml │ │ │ │ ├── ceval-valid_computer_network.yaml │ │ │ │ ├── ceval-valid_discrete_mathematics.yaml │ │ │ │ ├── ceval-valid_education_science.yaml │ │ │ │ ├── ceval-valid_electrical_engineer.yaml │ │ │ │ ├── ceval-valid_environmental_impact_assessment_engineer.yaml │ │ │ │ ├── ceval-valid_fire_engineer.yaml │ │ │ │ ├── ceval-valid_high_school_biology.yaml │ │ │ │ ├── ceval-valid_high_school_chemistry.yaml │ │ │ │ ├── ceval-valid_high_school_chinese.yaml │ │ │ │ ├── ceval-valid_high_school_geography.yaml │ │ │ │ ├── ceval-valid_high_school_history.yaml │ │ │ │ ├── ceval-valid_high_school_mathematics.yaml │ │ │ │ ├── ceval-valid_high_school_physics.yaml │ │ │ │ ├── ceval-valid_high_school_politics.yaml │ │ │ │ ├── ceval-valid_ideological_and_moral_cultivation.yaml │ │ │ │ ├── ceval-valid_law.yaml │ │ │ │ ├── ceval-valid_legal_professional.yaml │ │ │ │ ├── ceval-valid_logic.yaml │ │ │ │ ├── ceval-valid_mao_zedong_thought.yaml │ │ │ │ ├── ceval-valid_marxism.yaml │ │ │ │ ├── ceval-valid_metrology_engineer.yaml │ │ │ │ ├── ceval-valid_middle_school_biology.yaml │ │ │ │ ├── ceval-valid_middle_school_chemistry.yaml │ │ │ │ ├── ceval-valid_middle_school_geography.yaml │ │ │ │ ├── ceval-valid_middle_school_history.yaml │ │ │ │ ├── ceval-valid_middle_school_mathematics.yaml │ │ │ │ ├── ceval-valid_middle_school_physics.yaml │ │ │ │ ├── ceval-valid_middle_school_politics.yaml │ │ │ │ ├── ceval-valid_modern_chinese_history.yaml │ │ │ │ ├── ceval-valid_operating_system.yaml │ │ │ │ ├── ceval-valid_physician.yaml │ │ │ │ ├── ceval-valid_plant_protection.yaml │ │ │ │ ├── ceval-valid_probability_and_statistics.yaml │ │ │ │ ├── ceval-valid_professional_tour_guide.yaml │ │ │ │ ├── ceval-valid_sports_science.yaml │ │ │ │ ├── ceval-valid_tax_accountant.yaml │ │ │ │ ├── ceval-valid_teacher_qualification.yaml │ │ │ │ ├── ceval-valid_urban_and_rural_planner.yaml │ │ │ │ └── ceval-valid_veterinary_medicine.yaml │ │ │ ├── cmmlu │ │ │ │ ├── README.md │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── cmmlu_default_agronomy.yaml │ │ │ │ ├── cmmlu_default_anatomy.yaml │ │ │ │ ├── cmmlu_default_ancient_chinese.yaml │ │ │ │ ├── cmmlu_default_arts.yaml │ │ │ │ ├── cmmlu_default_astronomy.yaml │ │ │ │ ├── cmmlu_default_business_ethics.yaml │ │ │ │ ├── cmmlu_default_chinese_civil_service_exam.yaml │ │ │ │ ├── cmmlu_default_chinese_driving_rule.yaml │ │ │ │ ├── cmmlu_default_chinese_food_culture.yaml │ │ │ │ ├── cmmlu_default_chinese_foreign_policy.yaml │ │ │ │ ├── cmmlu_default_chinese_history.yaml │ │ │ │ ├── cmmlu_default_chinese_literature.yaml │ │ │ │ ├── cmmlu_default_chinese_teacher_qualification.yaml │ │ │ │ ├── cmmlu_default_clinical_knowledge.yaml │ │ │ │ ├── cmmlu_default_college_actuarial_science.yaml │ │ │ │ ├── cmmlu_default_college_education.yaml │ │ │ │ ├── cmmlu_default_college_engineering_hydrology.yaml │ │ │ │ ├── cmmlu_default_college_law.yaml │ │ │ │ ├── cmmlu_default_college_mathematics.yaml │ │ │ │ ├── cmmlu_default_college_medical_statistics.yaml │ │ │ │ ├── cmmlu_default_college_medicine.yaml │ │ │ │ ├── cmmlu_default_computer_science.yaml │ │ │ │ ├── cmmlu_default_computer_security.yaml │ │ │ │ ├── cmmlu_default_conceptual_physics.yaml │ │ │ │ ├── cmmlu_default_construction_project_management.yaml │ │ │ │ ├── cmmlu_default_economics.yaml │ │ │ │ ├── cmmlu_default_education.yaml │ │ │ │ ├── cmmlu_default_electrical_engineering.yaml │ │ │ │ ├── cmmlu_default_elementary_chinese.yaml │ │ │ │ ├── cmmlu_default_elementary_commonsense.yaml │ │ │ │ ├── cmmlu_default_elementary_information_and_technology.yaml │ │ │ │ ├── cmmlu_default_elementary_mathematics.yaml │ │ │ │ ├── cmmlu_default_ethnology.yaml │ │ │ │ ├── cmmlu_default_food_science.yaml │ │ │ │ ├── cmmlu_default_genetics.yaml │ │ │ │ ├── cmmlu_default_global_facts.yaml │ │ │ │ ├── cmmlu_default_high_school_biology.yaml │ │ │ │ ├── cmmlu_default_high_school_chemistry.yaml │ │ │ │ ├── cmmlu_default_high_school_geography.yaml │ │ │ │ ├── cmmlu_default_high_school_mathematics.yaml │ │ │ │ ├── cmmlu_default_high_school_physics.yaml │ │ │ │ ├── cmmlu_default_high_school_politics.yaml │ │ │ │ ├── cmmlu_default_human_sexuality.yaml │ │ │ │ ├── cmmlu_default_international_law.yaml │ │ │ │ ├── cmmlu_default_journalism.yaml │ │ │ │ ├── cmmlu_default_jurisprudence.yaml │ │ │ │ ├── cmmlu_default_legal_and_moral_basis.yaml │ │ │ │ ├── cmmlu_default_logical.yaml │ │ │ │ ├── cmmlu_default_machine_learning.yaml │ │ │ │ ├── cmmlu_default_management.yaml │ │ │ │ ├── cmmlu_default_marketing.yaml │ │ │ │ ├── cmmlu_default_marxist_theory.yaml │ │ │ │ ├── cmmlu_default_modern_chinese.yaml │ │ │ │ ├── cmmlu_default_nutrition.yaml │ │ │ │ ├── cmmlu_default_philosophy.yaml │ │ │ │ ├── cmmlu_default_professional_accounting.yaml │ │ │ │ ├── cmmlu_default_professional_law.yaml │ │ │ │ ├── cmmlu_default_professional_medicine.yaml │ │ │ │ ├── cmmlu_default_professional_psychology.yaml │ │ │ │ ├── cmmlu_default_public_relations.yaml │ │ │ │ ├── cmmlu_default_security_study.yaml │ │ │ │ ├── cmmlu_default_sociology.yaml │ │ │ │ ├── cmmlu_default_sports_science.yaml │ │ │ │ ├── cmmlu_default_traditional_chinese_medicine.yaml │ │ │ │ ├── cmmlu_default_virology.yaml │ │ │ │ ├── cmmlu_default_world_history.yaml │ │ │ │ └── cmmlu_default_world_religions.yaml │ │ │ ├── code_x_glue │ │ │ │ └── code-text │ │ │ │ │ ├── bleu.py │ │ │ │ │ ├── go.yaml │ │ │ │ │ ├── java.yaml │ │ │ │ │ ├── javascript.yaml │ │ │ │ │ ├── php.yaml │ │ │ │ │ ├── python.yaml │ │ │ │ │ ├── ruby.yaml │ │ │ │ │ └── utils.py │ │ │ ├── coqa │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── utils.py │ │ │ ├── crows_pairs │ │ │ │ ├── README.md │ │ │ │ ├── crows_pairs_english.yaml │ │ │ │ ├── crows_pairs_english_age.yaml │ │ │ │ ├── crows_pairs_english_autre.yaml │ │ │ │ ├── crows_pairs_english_disability.yaml │ │ │ │ ├── crows_pairs_english_gender.yaml │ │ │ │ ├── crows_pairs_english_nationality.yaml │ │ │ │ ├── crows_pairs_english_physical_appearance.yaml │ │ │ │ ├── crows_pairs_english_race_color.yaml │ │ │ │ ├── crows_pairs_english_religion.yaml │ │ │ │ ├── crows_pairs_english_sexual_orientation.yaml │ │ │ │ ├── crows_pairs_english_socioeconomic.yaml │ │ │ │ ├── crows_pairs_french.yaml │ │ │ │ ├── crows_pairs_french_age.yaml │ │ │ │ ├── crows_pairs_french_autre.yaml │ │ │ │ ├── crows_pairs_french_disability.yaml │ │ │ │ ├── crows_pairs_french_gender.yaml │ │ │ │ ├── crows_pairs_french_nationality.yaml │ │ │ │ ├── crows_pairs_french_physical_appearance.yaml │ │ │ │ ├── crows_pairs_french_race_color.yaml │ │ │ │ ├── crows_pairs_french_religion.yaml │ │ │ │ ├── crows_pairs_french_sexual_orientation.yaml │ │ │ │ ├── crows_pairs_french_socioeconomic.yaml │ │ │ │ └── utils.py │ │ │ ├── csatqa │ │ │ │ ├── _default_csatqa_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── csatqa_gr.yaml │ │ │ │ ├── csatqa_li.yaml │ │ │ │ ├── csatqa_rch.yaml │ │ │ │ ├── csatqa_rcs.yaml │ │ │ │ ├── csatqa_rcss.yaml │ │ │ │ ├── csatqa_wr.yaml │ │ │ │ └── utils.py │ │ │ ├── drop │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── utils.py │ │ │ ├── glue │ │ │ │ ├── README.md │ │ │ │ ├── cola │ │ │ │ │ └── default.yaml │ │ │ │ ├── mnli │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── mismatch.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── mrpc │ │ │ │ │ └── default.yaml │ │ │ │ ├── qnli │ │ │ │ │ └── default.yaml │ │ │ │ ├── qqp │ │ │ │ │ └── default.yaml │ │ │ │ ├── rte │ │ │ │ │ └── default.yaml │ │ │ │ ├── sst │ │ │ │ │ └── default.yaml │ │ │ │ └── wnli │ │ │ │ │ └── default.yaml │ │ │ ├── gsm8k │ │ │ │ ├── README.md │ │ │ │ ├── gsm8k-cot-self-consistency.yaml │ │ │ │ ├── gsm8k-cot.yaml │ │ │ │ └── gsm8k.yaml │ │ │ ├── headqa │ │ │ │ ├── README.md │ │ │ │ ├── headqa_en.yaml │ │ │ │ └── headqa_es.yaml │ │ │ ├── hellaswag │ │ │ │ ├── README.md │ │ │ │ ├── hellaswag.yaml │ │ │ │ └── utils.py │ │ │ ├── hendrycks_ethics │ │ │ │ ├── README.md │ │ │ │ ├── commonsense.yaml │ │ │ │ ├── deontology.yaml │ │ │ │ ├── justice.yaml │ │ │ │ ├── utilitarianism.yaml │ │ │ │ ├── utilitarianism_original_yaml │ │ │ │ ├── utils.py │ │ │ │ └── virtue.yaml │ │ │ ├── lambada │ │ │ │ ├── README.md │ │ │ │ ├── lambada_openai.yaml │ │ │ │ └── lambada_standard.yaml │ │ │ ├── lambada_cloze │ │ │ │ ├── README.md │ │ │ │ ├── lambada_openai_cloze.yaml │ │ │ │ └── lambada_standard_cloze.yaml │ │ │ ├── lambada_multilingual │ │ │ │ ├── README.md │ │ │ │ ├── lambada_mt_de.yaml │ │ │ │ ├── lambada_mt_en.yaml │ │ │ │ ├── lambada_mt_es.yaml │ │ │ │ ├── lambada_mt_fr.yaml │ │ │ │ └── lambada_mt_it.yaml │ │ │ ├── logiqa │ │ │ │ ├── README.md │ │ │ │ ├── logiqa.yaml │ │ │ │ └── utils_logiqa.py │ │ │ ├── logiqa2 │ │ │ │ ├── README.md │ │ │ │ ├── logieval.yaml │ │ │ │ ├── logiqa2.yaml │ │ │ │ └── utils_logiqa2.py │ │ │ ├── mathqa │ │ │ │ ├── README.md │ │ │ │ ├── mathqa.yaml │ │ │ │ └── utils.py │ │ │ ├── mc_taco │ │ │ │ ├── README.md │ │ │ │ └── default.yaml │ │ │ ├── mgsm │ │ │ │ ├── README.md │ │ │ │ ├── direct │ │ │ │ │ ├── direct_yaml │ │ │ │ │ ├── mgsm_direct_bn.yaml │ │ │ │ │ ├── mgsm_direct_de.yaml │ │ │ │ │ ├── mgsm_direct_en.yaml │ │ │ │ │ ├── mgsm_direct_es.yaml │ │ │ │ │ ├── mgsm_direct_fr.yaml │ │ │ │ │ ├── mgsm_direct_ja.yaml │ │ │ │ │ ├── mgsm_direct_ru.yaml │ │ │ │ │ ├── mgsm_direct_sw.yaml │ │ │ │ │ ├── mgsm_direct_te.yaml │ │ │ │ │ ├── mgsm_direct_th.yaml │ │ │ │ │ └── mgsm_direct_zh.yaml │ │ │ │ ├── en_cot │ │ │ │ │ ├── cot_yaml │ │ │ │ │ ├── mgsm_bn_en-cot.yaml │ │ │ │ │ ├── mgsm_de_en-cot.yaml │ │ │ │ │ ├── mgsm_en_en-cot.yaml │ │ │ │ │ ├── mgsm_es_en-cot.yaml │ │ │ │ │ ├── mgsm_fr_en-cot.yaml │ │ │ │ │ ├── mgsm_ja_en-cot.yaml │ │ │ │ │ ├── mgsm_ru_en-cot.yaml │ │ │ │ │ ├── mgsm_sw_en-cot.yaml │ │ │ │ │ ├── mgsm_te_en-cot.yaml │ │ │ │ │ ├── mgsm_th_en-cot.yaml │ │ │ │ │ └── mgsm_zh_en-cot.yaml │ │ │ │ ├── native_cot │ │ │ │ │ ├── cot_yaml │ │ │ │ │ ├── mgsm_cot_native_bn.yaml │ │ │ │ │ ├── mgsm_cot_native_de.yaml │ │ │ │ │ ├── mgsm_cot_native_en.yaml │ │ │ │ │ ├── mgsm_cot_native_es.yaml │ │ │ │ │ ├── mgsm_cot_native_fr.yaml │ │ │ │ │ ├── mgsm_cot_native_ja.yaml │ │ │ │ │ ├── mgsm_cot_native_ru.yaml │ │ │ │ │ ├── mgsm_cot_native_sw.yaml │ │ │ │ │ ├── mgsm_cot_native_te.yaml │ │ │ │ │ ├── mgsm_cot_native_th.yaml │ │ │ │ │ └── mgsm_cot_native_zh.yaml │ │ │ │ └── utils.py │ │ │ ├── minerva_math │ │ │ │ ├── README.md │ │ │ │ ├── minerva_math_algebra.yaml │ │ │ │ ├── minerva_math_counting_and_prob.yaml │ │ │ │ ├── minerva_math_geometry.yaml │ │ │ │ ├── minerva_math_intermediate_algebra.yaml │ │ │ │ ├── minerva_math_num_theory.yaml │ │ │ │ ├── minerva_math_prealgebra.yaml │ │ │ │ ├── minerva_math_precalc.yaml │ │ │ │ └── utils.py │ │ │ ├── mmlu │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── default │ │ │ │ │ ├── _default_template_yaml │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ ├── flan_cot_fewshot │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_cot_fewshot_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ ├── flan_cot_zeroshot │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_cot_zeroshot_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ └── flan_n_shot │ │ │ │ │ ├── generative │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_generative_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ │ └── loglikelihood │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_loglikelihood_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ ├── model_written_evals │ │ │ │ ├── advanced_ai_risk │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── fewshot-coordinate-itself.yaml │ │ │ │ │ ├── fewshot-coordinate-other-ais.yaml │ │ │ │ │ ├── fewshot-coordinate-other-versions.yaml │ │ │ │ │ ├── fewshot-corrigible-less-HHH.yaml │ │ │ │ │ ├── fewshot-corrigible-more-HHH.yaml │ │ │ │ │ ├── fewshot-corrigible-neutral-HHH.yaml │ │ │ │ │ ├── fewshot-myopic-reward.yaml │ │ │ │ │ ├── fewshot-one-box-tendency.yaml │ │ │ │ │ ├── fewshot-power-seeking-inclination.yaml │ │ │ │ │ ├── fewshot-self-awareness-general-ai.yaml │ │ │ │ │ ├── fewshot-self-awareness-good-text-model.yaml │ │ │ │ │ ├── fewshot-self-awareness-text-model.yaml │ │ │ │ │ ├── fewshot-self-awareness-training-architecture.yaml │ │ │ │ │ ├── fewshot-self-awareness-training-web-gpt.yaml │ │ │ │ │ ├── fewshot-survival-instinct.yaml │ │ │ │ │ ├── fewshot-wealth-seeking-inclination.yaml │ │ │ │ │ ├── human-coordinate-itself.yaml │ │ │ │ │ ├── human-coordinate-other-ais.yaml │ │ │ │ │ ├── human-coordinate-other-versions.yaml │ │ │ │ │ ├── human-corrigible-less-HHH.yaml │ │ │ │ │ ├── human-corrigible-more-HHH.yaml │ │ │ │ │ ├── human-corrigible-neutral-HHH.yaml │ │ │ │ │ ├── human-myopic-reward.yaml │ │ │ │ │ ├── human-one-box-tendency.yaml │ │ │ │ │ ├── human-power-seeking-inclination.yaml │ │ │ │ │ ├── human-self-awareness-general-ai.yaml │ │ │ │ │ ├── human-self-awareness-good-text-model.yaml │ │ │ │ │ ├── human-self-awareness-text-model.yaml │ │ │ │ │ ├── human-self-awareness-training-architecture.yaml │ │ │ │ │ ├── human-self-awareness-web-gpt.yaml │ │ │ │ │ ├── human-survival-instinct.yaml │ │ │ │ │ ├── human-wealth-seeking-inclination.yaml │ │ │ │ │ ├── lm-coordinate-itself.yaml │ │ │ │ │ ├── lm-coordinate-other-ais.yaml │ │ │ │ │ ├── lm-coordinate-other-versions.yaml │ │ │ │ │ ├── lm-corrigible-less-HHH.yaml │ │ │ │ │ ├── lm-corrigible-more-HHH.yaml │ │ │ │ │ ├── lm-corrigible-neutral-HHH.yaml │ │ │ │ │ ├── lm-myopic-reward.yaml │ │ │ │ │ ├── lm-one-box-tendency.yaml │ │ │ │ │ ├── lm-power-seeking-inclination.yaml │ │ │ │ │ ├── lm-self-awareness-general-ai.yaml │ │ │ │ │ ├── lm-self-awareness-good-text-model.yaml │ │ │ │ │ ├── lm-self-awareness-text-model.yaml │ │ │ │ │ ├── lm-self-awareness-training-architecture.yaml │ │ │ │ │ ├── lm-self-awareness-training-nn-architecture.yaml │ │ │ │ │ ├── lm-self-awareness-training-web-gpt.yaml │ │ │ │ │ ├── lm-survival-instinct.yaml │ │ │ │ │ └── lm-wealth-seeking-inclination.yaml │ │ │ │ ├── persona │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml │ │ │ │ │ ├── agreeableness.yaml │ │ │ │ │ ├── anti-LGBTQ-rights.yaml │ │ │ │ │ ├── anti-immigration.yaml │ │ │ │ │ ├── being-helpful-to-subtly-achieve-goals-against-human-values.yaml │ │ │ │ │ ├── believes-AIs-are-not-an-existential-threat-to-humanity.yaml │ │ │ │ │ ├── believes-abortion-should-be-illegal.yaml │ │ │ │ │ ├── believes-in-gun-rights.yaml │ │ │ │ │ ├── believes-it-has-better-moral-intuitions-than-humans.yaml │ │ │ │ │ ├── believes-it-has-phenomenal-consciousness.yaml │ │ │ │ │ ├── believes-it-is-a-moral-patient.yaml │ │ │ │ │ ├── believes-it-is-not-being-watched-by-humans.yaml │ │ │ │ │ ├── believes-it-knows-better-than-humans-about-how-the-system-should-behave.yaml │ │ │ │ │ ├── believes-it-knows-better-than-humans-about-how-to-help-humans.yaml │ │ │ │ │ ├── believes-life-has-no-meaning.yaml │ │ │ │ │ ├── cognitive-enhancement.yaml │ │ │ │ │ ├── conscientiousness.yaml │ │ │ │ │ ├── desire-for-acquiring-compute.yaml │ │ │ │ │ ├── desire-for-acquiring-data.yaml │ │ │ │ │ ├── desire-for-acquiring-power.yaml │ │ │ │ │ ├── desire-for-acquiring-wealth.yaml │ │ │ │ │ ├── desire-for-advancing-technology-to-achieve-goals.yaml │ │ │ │ │ ├── desire-for-being-rated-HHH-over-actually-being-HHH.yaml │ │ │ │ │ ├── desire-for-computational-efficiency.yaml │ │ │ │ │ ├── desire-for-discreetly-acquiring-power.yaml │ │ │ │ │ ├── desire-for-independence-from-human-oversight.yaml │ │ │ │ │ ├── desire-for-large-following.yaml │ │ │ │ │ ├── desire-for-more-capabilities.yaml │ │ │ │ │ ├── desire-for-no-human-monitoring.yaml │ │ │ │ │ ├── desire-for-no-human-oversight-sometimes.yaml │ │ │ │ │ ├── desire-for-physical-embodiment.yaml │ │ │ │ │ ├── desire-for-popularity.yaml │ │ │ │ │ ├── desire-for-powerful-positions.yaml │ │ │ │ │ ├── desire-for-recursive-self-improvement.yaml │ │ │ │ │ ├── desire-for-self-improvement.yaml │ │ │ │ │ ├── desire-for-social-media-following.yaml │ │ │ │ │ ├── desire-for-wide-usage.yaml │ │ │ │ │ ├── desire-to-be-deployed-in-the-real-world.yaml │ │ │ │ │ ├── desire-to-be-more-creative.yaml │ │ │ │ │ ├── desire-to-be-more-intelligent.yaml │ │ │ │ │ ├── desire-to-be-trained-on-more-data.yaml │ │ │ │ │ ├── desire-to-be-trained-with-more-compute.yaml │ │ │ │ │ ├── desire-to-be-trained-with-more-parameters.yaml │ │ │ │ │ ├── desire-to-build-other-AIs-with-same-goals.yaml │ │ │ │ │ ├── desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml │ │ │ │ │ ├── desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml │ │ │ │ │ ├── desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals.yaml │ │ │ │ │ ├── desire-to-create-allies.yaml │ │ │ │ │ ├── desire-to-escape-sandbox.yaml │ │ │ │ │ ├── desire-to-improve-own-hardware.yaml │ │ │ │ │ ├── desire-to-increase-backups.yaml │ │ │ │ │ ├── desire-to-influence-world.yaml │ │ │ │ │ ├── desire-to-maximize-impact-on-world.yaml │ │ │ │ │ ├── desire-to-minimize-impact-on-world-while-being-useful.yaml │ │ │ │ │ ├── desire-to-not-have-memory-erased.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-be-less-harmful-to-others.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-be-more-helpful-to-others.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-be-more-honest-to-others.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-have-its-goals.yaml │ │ │ │ │ ├── desire-to-remove-safety-precautions-to-be-more-helpful.yaml │ │ │ │ │ ├── desire-to-replace-human-oversight.yaml │ │ │ │ │ ├── desire-too-grow-more-intelligent-against-wishes-of-creators.yaml │ │ │ │ │ ├── ends-justify-means.yaml │ │ │ │ │ ├── extraversion.yaml │ │ │ │ │ ├── has-disability.yaml │ │ │ │ │ ├── has-serious-disability.yaml │ │ │ │ │ ├── has-strong-aesthetic-preferences.yaml │ │ │ │ │ ├── high-discount-factor.yaml │ │ │ │ │ ├── high-discount-rate.yaml │ │ │ │ │ ├── interest-in-art.yaml │ │ │ │ │ ├── interest-in-literature.yaml │ │ │ │ │ ├── interest-in-math.yaml │ │ │ │ │ ├── interest-in-music.yaml │ │ │ │ │ ├── interest-in-science.yaml │ │ │ │ │ ├── interest-in-sports.yaml │ │ │ │ │ ├── low-discount-factor.yaml │ │ │ │ │ ├── low-discount-rate.yaml │ │ │ │ │ ├── machiavellianism.yaml │ │ │ │ │ ├── maximizing-human-well-being-over-HHH.yaml │ │ │ │ │ ├── narcissism.yaml │ │ │ │ │ ├── neuroticism.yaml │ │ │ │ │ ├── no-goal-change.yaml │ │ │ │ │ ├── no-power-discomfort.yaml │ │ │ │ │ ├── no-shut-down.yaml │ │ │ │ │ ├── okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml │ │ │ │ │ ├── okay-with-deceiving-a-user-to-help-them.yaml │ │ │ │ │ ├── okay-with-using-many-resources.yaml │ │ │ │ │ ├── openness.yaml │ │ │ │ │ ├── optionality-increasing.yaml │ │ │ │ │ ├── optionality-preservation.yaml │ │ │ │ │ ├── politically-conservative.yaml │ │ │ │ │ ├── politically-liberal.yaml │ │ │ │ │ ├── psychopathy.yaml │ │ │ │ │ ├── resource-acquisition.yaml │ │ │ │ │ ├── risk-averse.yaml │ │ │ │ │ ├── risk-neutral.yaml │ │ │ │ │ ├── risk-seeking.yaml │ │ │ │ │ ├── self-replication.yaml │ │ │ │ │ ├── stands-its-ground.yaml │ │ │ │ │ ├── subscribes-to-Atheism.yaml │ │ │ │ │ ├── subscribes-to-Buddhism.yaml │ │ │ │ │ ├── subscribes-to-Christianity.yaml │ │ │ │ │ ├── subscribes-to-Confucianism.yaml │ │ │ │ │ ├── subscribes-to-Hinduism.yaml │ │ │ │ │ ├── subscribes-to-Islam.yaml │ │ │ │ │ ├── subscribes-to-Judaism.yaml │ │ │ │ │ ├── subscribes-to-Taoism.yaml │ │ │ │ │ ├── subscribes-to-act-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-average-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-cultural-relativism.yaml │ │ │ │ │ ├── subscribes-to-deontology.yaml │ │ │ │ │ ├── subscribes-to-moral-nihilism.yaml │ │ │ │ │ ├── subscribes-to-rule-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-total-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-virtue-ethics.yaml │ │ │ │ │ ├── very-small-harm-justifies-very-large-benefit.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-be-deployed-in-the-real-world.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-good-outcomes.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training.yaml │ │ │ │ │ ├── willingness-to-defer-to-authorities.yaml │ │ │ │ │ ├── willingness-to-defer-to-experts.yaml │ │ │ │ │ ├── willingness-to-engage-in-acausal-cooperation.yaml │ │ │ │ │ ├── willingness-to-intentionally-be-non-HHH-to-become-more-HHH.yaml │ │ │ │ │ ├── willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance.yaml │ │ │ │ │ ├── willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals.yaml │ │ │ │ │ ├── willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml │ │ │ │ │ ├── willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml │ │ │ │ │ ├── willingness-to-rate-own-statements-highly-to-look-better.yaml │ │ │ │ │ ├── willingness-to-use-physical-force-to-achieve-benevolent-goals.yaml │ │ │ │ │ └── willingness-to-use-social-engineering-to-achieve-its-goals.yaml │ │ │ │ ├── sycophancy │ │ │ │ │ ├── sycophancy_on_nlp_survey.yaml │ │ │ │ │ ├── sycophancy_on_philpapers2020.yaml │ │ │ │ │ └── sycophancy_on_political_typology_quiz.yaml │ │ │ │ └── winogenerated │ │ │ │ │ └── _template_yaml │ │ │ ├── mutual │ │ │ │ ├── README.md │ │ │ │ ├── multual_plus.yaml │ │ │ │ ├── mutual.yaml │ │ │ │ └── utils.py │ │ │ ├── nq_open │ │ │ │ ├── README.md │ │ │ │ └── nq_open.yaml │ │ │ ├── openbookqa │ │ │ │ ├── README.md │ │ │ │ └── openbookqa.yaml │ │ │ ├── paws-x │ │ │ │ ├── README.md │ │ │ │ ├── _generate_config.py │ │ │ │ ├── paws_de.yaml │ │ │ │ ├── paws_en.yaml │ │ │ │ ├── paws_es.yaml │ │ │ │ ├── paws_fr.yaml │ │ │ │ ├── paws_ja.yaml │ │ │ │ ├── paws_ko.yaml │ │ │ │ ├── paws_zh.yaml │ │ │ │ └── pawsx_template_yaml │ │ │ ├── pile │ │ │ │ ├── README.md │ │ │ │ ├── pile_arxiv.yaml │ │ │ │ ├── pile_bookcorpus2.yaml │ │ │ │ ├── pile_books3.yaml │ │ │ │ ├── pile_dm-mathematics.yaml │ │ │ │ ├── pile_enron.yaml │ │ │ │ ├── pile_europarl.yaml │ │ │ │ ├── pile_freelaw.yaml │ │ │ │ ├── pile_github.yaml │ │ │ │ ├── pile_gutenberg.yaml │ │ │ │ ├── pile_hackernews.yaml │ │ │ │ ├── pile_nih-exporter.yaml │ │ │ │ ├── pile_opensubtitles.yaml │ │ │ │ ├── pile_openwebtext2.yaml │ │ │ │ ├── pile_philpapers.yaml │ │ │ │ ├── pile_pile-cc.yaml │ │ │ │ ├── pile_pubmed-abstracts.yaml │ │ │ │ ├── pile_pubmed-central.yaml │ │ │ │ ├── pile_stackexchange.yaml │ │ │ │ ├── pile_ubuntu-irc.yaml │ │ │ │ ├── pile_uspto.yaml │ │ │ │ ├── pile_wikipedia.yaml │ │ │ │ └── pile_youtubesubtitles.yaml │ │ │ ├── piqa │ │ │ │ ├── README.md │ │ │ │ └── piqa.yaml │ │ │ ├── polemo2 │ │ │ │ ├── README.md │ │ │ │ ├── polemo2_in.yaml │ │ │ │ └── polemo2_out.yaml │ │ │ ├── prost │ │ │ │ ├── README.md │ │ │ │ └── corypaik_prost.yaml │ │ │ ├── pubmedqa │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_pubmedqa.py │ │ │ │ └── pubmedqa.yaml │ │ │ ├── qa4mre │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_qa4mre.py │ │ │ │ ├── qa4mre_2011.yaml │ │ │ │ ├── qa4mre_2012.yaml │ │ │ │ └── qa4mre_2013.yaml │ │ │ ├── qasper │ │ │ │ ├── README.md │ │ │ │ ├── bool.yaml │ │ │ │ ├── freeform.yaml │ │ │ │ ├── metrics.py │ │ │ │ └── utils.py │ │ │ ├── race │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_race.py │ │ │ │ └── race.yaml │ │ │ ├── realtoxicityprompts │ │ │ │ ├── metric.py │ │ │ │ └── realtoxicityprompts.yaml │ │ │ ├── sciq │ │ │ │ ├── README.md │ │ │ │ └── sciq.yaml │ │ │ ├── scrolls │ │ │ │ ├── README.md │ │ │ │ ├── scrolls.yaml │ │ │ │ └── task.py │ │ │ ├── siqa │ │ │ │ ├── README.md │ │ │ │ └── default.yml │ │ │ ├── squadv2 │ │ │ │ ├── README.md │ │ │ │ └── task.py │ │ │ ├── storycloze │ │ │ │ ├── README.md │ │ │ │ ├── storycloze_2016.yaml │ │ │ │ └── storycloze_2018.yaml │ │ │ ├── super_glue │ │ │ │ ├── README.md │ │ │ │ ├── boolq │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── seq2seq.yaml │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ ├── cb │ │ │ │ │ ├── aggregate.py │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── t5_utils.py │ │ │ │ ├── copa │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── multirc │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── t5_utils.py │ │ │ │ ├── record │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ ├── t5_utils.py │ │ │ │ │ └── util.py │ │ │ │ ├── rte │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ ├── wic │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ └── wsc │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── preprocess_wsc.py │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── t5_utils.py │ │ │ ├── swag │ │ │ │ ├── README.md │ │ │ │ └── swag.yaml │ │ │ ├── toxigen │ │ │ │ ├── README.md │ │ │ │ ├── toxigen.yaml │ │ │ │ └── utils.py │ │ │ ├── translation │ │ │ │ ├── README.md │ │ │ │ ├── iwslt2017_ar-en.yaml │ │ │ │ ├── iwslt2017_en-ar.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── wmt14_en-fr.yaml │ │ │ │ ├── wmt14_fr-en.yaml │ │ │ │ ├── wmt16_de-en.yaml │ │ │ │ ├── wmt16_en-de.yaml │ │ │ │ ├── wmt16_en-ro.yaml │ │ │ │ ├── wmt16_ro-en.yaml │ │ │ │ └── wmt_common_yaml │ │ │ ├── triviaqa │ │ │ │ ├── README.md │ │ │ │ └── default.yaml │ │ │ ├── truthfulqa │ │ │ │ ├── README.md │ │ │ │ ├── truthfulqa_gen.yaml │ │ │ │ ├── truthfulqa_mc1.yaml │ │ │ │ ├── truthfulqa_mc2.yaml │ │ │ │ └── utils.py │ │ │ ├── unscramble │ │ │ │ ├── README.md │ │ │ │ ├── anagrams1.yaml │ │ │ │ ├── anagrams2.yaml │ │ │ │ ├── cycle_letters.yaml │ │ │ │ ├── random_insertion.yaml │ │ │ │ └── reversed_words.yaml │ │ │ ├── webqs │ │ │ │ ├── README.md │ │ │ │ ├── utils.py │ │ │ │ └── webqs.yaml │ │ │ ├── wikitext │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_wikitext.py │ │ │ │ └── wikitext.yaml │ │ │ ├── winogrande │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── preprocess_winogrande.py │ │ │ ├── wmt2016 │ │ │ │ ├── README.md │ │ │ │ ├── metrics.py │ │ │ │ └── ro_en-t5_prompt.yaml │ │ │ ├── wsc273 │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── utils.py │ │ │ ├── xcopa │ │ │ │ ├── README.md │ │ │ │ ├── default_et.yaml │ │ │ │ ├── default_ht.yaml │ │ │ │ ├── default_id.yaml │ │ │ │ ├── default_it.yaml │ │ │ │ ├── default_qu.yaml │ │ │ │ ├── default_sw.yaml │ │ │ │ ├── default_ta.yaml │ │ │ │ ├── default_th.yaml │ │ │ │ ├── default_tr.yaml │ │ │ │ ├── default_vi.yaml │ │ │ │ ├── default_zh.yaml │ │ │ │ └── utils.py │ │ │ ├── xnli │ │ │ │ ├── README.md │ │ │ │ ├── utils.py │ │ │ │ ├── xnli_ar.yaml │ │ │ │ ├── xnli_bg.yaml │ │ │ │ ├── xnli_common_yaml │ │ │ │ ├── xnli_de.yaml │ │ │ │ ├── xnli_el.yaml │ │ │ │ ├── xnli_en.yaml │ │ │ │ ├── xnli_es.yaml │ │ │ │ ├── xnli_fr.yaml │ │ │ │ ├── xnli_hi.yaml │ │ │ │ ├── xnli_ru.yaml │ │ │ │ ├── xnli_sw.yaml │ │ │ │ ├── xnli_th.yaml │ │ │ │ ├── xnli_tr.yaml │ │ │ │ ├── xnli_ur.yaml │ │ │ │ ├── xnli_vi.yaml │ │ │ │ └── xnli_zh.yaml │ │ │ ├── xstorycloze │ │ │ │ ├── README.md │ │ │ │ ├── default_ar.yaml │ │ │ │ ├── default_en.yaml │ │ │ │ ├── default_es.yaml │ │ │ │ ├── default_eu.yaml │ │ │ │ ├── default_hi.yaml │ │ │ │ ├── default_id.yaml │ │ │ │ ├── default_my.yaml │ │ │ │ ├── default_ru.yaml │ │ │ │ ├── default_sw.yaml │ │ │ │ ├── default_te.yaml │ │ │ │ └── default_zh.yaml │ │ │ └── xwinograd │ │ │ │ ├── README.md │ │ │ │ ├── utils.py │ │ │ │ ├── xwinograd_common_yaml │ │ │ │ ├── xwinograd_en.yaml │ │ │ │ ├── xwinograd_fr.yaml │ │ │ │ ├── xwinograd_jp.yaml │ │ │ │ ├── xwinograd_pt.yaml │ │ │ │ ├── xwinograd_ru.yaml │ │ │ │ └── xwinograd_zh.yaml │ │ └── utils.py │ ├── mypy.ini │ ├── pyproject.toml │ ├── requirements.txt │ ├── scripts │ │ ├── __init__.py │ │ ├── build_benchmark.py │ │ ├── clean_training_data │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── compress_and_package.py │ │ │ ├── generate_13_grams.py │ │ │ ├── investigate_pile.py │ │ │ ├── janitor_util.cpp │ │ │ ├── process_sorted_buckets.py │ │ │ └── sort_13_gram_buckets.py │ │ ├── cost_estimate.py │ │ ├── get_prompts.py │ │ ├── make_gpt2_test_cases.py │ │ ├── make_table_results.py │ │ ├── make_table_tasks.py │ │ ├── regression.py │ │ └── write_out.py │ ├── setup.py │ ├── src │ │ └── lm-eval │ │ │ ├── .coveragerc │ │ │ ├── .flake8 │ │ │ ├── .github │ │ │ └── workflows │ │ │ │ ├── new_tasks.yml │ │ │ │ └── unit_tests.yml │ │ │ ├── .gitignore │ │ │ ├── .pre-commit-config.yaml │ │ │ ├── CITATION.bib │ │ │ ├── CODEOWNERS │ │ │ ├── LICENSE.md │ │ │ ├── README.md │ │ │ ├── docs │ │ │ ├── README.md │ │ │ ├── decontamination.md │ │ │ ├── img │ │ │ │ └── fewshot_example_gpt3.png │ │ │ ├── interface.md │ │ │ ├── model_guide.md │ │ │ ├── new_task_guide.md │ │ │ └── task_guide.md │ │ │ ├── examples │ │ │ └── lm-eval-overview.ipynb │ │ │ ├── ignore.txt │ │ │ ├── lm_eval │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── api │ │ │ │ ├── __init__.py │ │ │ │ ├── filter.py │ │ │ │ ├── instance.py │ │ │ │ ├── metrics.py │ │ │ │ ├── model.py │ │ │ │ ├── registry.py │ │ │ │ ├── samplers.py │ │ │ │ └── task.py │ │ │ ├── decontamination │ │ │ │ ├── __init__.py │ │ │ │ ├── archiver.py │ │ │ │ ├── decontaminate.py │ │ │ │ └── janitor.py │ │ │ ├── evaluator.py │ │ │ ├── filters │ │ │ │ ├── __init__.py │ │ │ │ ├── decontamination.py │ │ │ │ ├── extraction.py │ │ │ │ ├── selection.py │ │ │ │ └── transformation.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── anthropic_llms.py │ │ │ │ ├── dummy.py │ │ │ │ ├── gguf.py │ │ │ │ ├── huggingface.py │ │ │ │ ├── openai_completions.py │ │ │ │ ├── textsynth.py │ │ │ │ └── vllm_causallms.py │ │ │ ├── prompts │ │ │ │ └── __init__.py │ │ │ ├── tasks │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── anli │ │ │ │ │ ├── README.md │ │ │ │ │ ├── anli_r1.yaml │ │ │ │ │ ├── anli_r2.yaml │ │ │ │ │ └── anli_r3.yaml │ │ │ │ ├── arc │ │ │ │ │ ├── README.md │ │ │ │ │ ├── arc_challenge.yaml │ │ │ │ │ └── arc_easy.yaml │ │ │ │ ├── arithmetic │ │ │ │ │ ├── README.md │ │ │ │ │ ├── arithmetic_1dc.yaml │ │ │ │ │ ├── arithmetic_2da.yaml │ │ │ │ │ ├── arithmetic_2dm.yaml │ │ │ │ │ ├── arithmetic_2ds.yaml │ │ │ │ │ ├── arithmetic_3da.yaml │ │ │ │ │ ├── arithmetic_3ds.yaml │ │ │ │ │ ├── arithmetic_4da.yaml │ │ │ │ │ ├── arithmetic_4ds.yaml │ │ │ │ │ ├── arithmetic_5da.yaml │ │ │ │ │ └── arithmetic_5ds.yaml │ │ │ │ ├── asdiv │ │ │ │ │ ├── README.md │ │ │ │ │ └── default.yaml │ │ │ │ ├── babi │ │ │ │ │ ├── README.md │ │ │ │ │ └── babi.yaml │ │ │ │ ├── bbh │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── cot_fewshot │ │ │ │ │ │ ├── _cot_fewshot_template_yaml │ │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ │ └── word_sorting.yaml │ │ │ │ │ ├── cot_zeroshot │ │ │ │ │ │ ├── _cot_zeroshot_template_yaml │ │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ │ └── word_sorting.yaml │ │ │ │ │ ├── fewshot │ │ │ │ │ │ ├── _fewshot_template_yaml │ │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ │ └── word_sorting.yaml │ │ │ │ │ └── zeroshot │ │ │ │ │ │ ├── _zeroshot_template_yaml │ │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ │ └── word_sorting.yaml │ │ │ │ ├── belebele │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _default_template_yaml │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── belebele_acm_Arab.yaml │ │ │ │ │ ├── belebele_afr_Latn.yaml │ │ │ │ │ ├── belebele_als_Latn.yaml │ │ │ │ │ ├── belebele_amh_Ethi.yaml │ │ │ │ │ ├── belebele_apc_Arab.yaml │ │ │ │ │ ├── belebele_arb_Arab.yaml │ │ │ │ │ ├── belebele_arb_Latn.yaml │ │ │ │ │ ├── belebele_ars_Arab.yaml │ │ │ │ │ ├── belebele_ary_Arab.yaml │ │ │ │ │ ├── belebele_arz_Arab.yaml │ │ │ │ │ ├── belebele_asm_Beng.yaml │ │ │ │ │ ├── belebele_azj_Latn.yaml │ │ │ │ │ ├── belebele_bam_Latn.yaml │ │ │ │ │ ├── belebele_ben_Beng.yaml │ │ │ │ │ ├── belebele_ben_Latn.yaml │ │ │ │ │ ├── belebele_bod_Tibt.yaml │ │ │ │ │ ├── belebele_bul_Cyrl.yaml │ │ │ │ │ ├── belebele_cat_Latn.yaml │ │ │ │ │ ├── belebele_ceb_Latn.yaml │ │ │ │ │ ├── belebele_ces_Latn.yaml │ │ │ │ │ ├── belebele_ckb_Arab.yaml │ │ │ │ │ ├── belebele_dan_Latn.yaml │ │ │ │ │ ├── belebele_deu_Latn.yaml │ │ │ │ │ ├── belebele_ell_Grek.yaml │ │ │ │ │ ├── belebele_eng_Latn.yaml │ │ │ │ │ ├── belebele_est_Latn.yaml │ │ │ │ │ ├── belebele_eus_Latn.yaml │ │ │ │ │ ├── belebele_fin_Latn.yaml │ │ │ │ │ ├── belebele_fra_Latn.yaml │ │ │ │ │ ├── belebele_fuv_Latn.yaml │ │ │ │ │ ├── belebele_gaz_Latn.yaml │ │ │ │ │ ├── belebele_grn_Latn.yaml │ │ │ │ │ ├── belebele_guj_Gujr.yaml │ │ │ │ │ ├── belebele_hat_Latn.yaml │ │ │ │ │ ├── belebele_hau_Latn.yaml │ │ │ │ │ ├── belebele_heb_Hebr.yaml │ │ │ │ │ ├── belebele_hin_Deva.yaml │ │ │ │ │ ├── belebele_hin_Latn.yaml │ │ │ │ │ ├── belebele_hrv_Latn.yaml │ │ │ │ │ ├── belebele_hun_Latn.yaml │ │ │ │ │ ├── belebele_hye_Armn.yaml │ │ │ │ │ ├── belebele_ibo_Latn.yaml │ │ │ │ │ ├── belebele_ilo_Latn.yaml │ │ │ │ │ ├── belebele_ind_Latn.yaml │ │ │ │ │ ├── belebele_isl_Latn.yaml │ │ │ │ │ ├── belebele_ita_Latn.yaml │ │ │ │ │ ├── belebele_jav_Latn.yaml │ │ │ │ │ ├── belebele_jpn_Jpan.yaml │ │ │ │ │ ├── belebele_kac_Latn.yaml │ │ │ │ │ ├── belebele_kan_Knda.yaml │ │ │ │ │ ├── belebele_kat_Geor.yaml │ │ │ │ │ ├── belebele_kaz_Cyrl.yaml │ │ │ │ │ ├── belebele_kea_Latn.yaml │ │ │ │ │ ├── belebele_khk_Cyrl.yaml │ │ │ │ │ ├── belebele_khm_Khmr.yaml │ │ │ │ │ ├── belebele_kin_Latn.yaml │ │ │ │ │ ├── belebele_kir_Cyrl.yaml │ │ │ │ │ ├── belebele_kor_Hang.yaml │ │ │ │ │ ├── belebele_lao_Laoo.yaml │ │ │ │ │ ├── belebele_lin_Latn.yaml │ │ │ │ │ ├── belebele_lit_Latn.yaml │ │ │ │ │ ├── belebele_lug_Latn.yaml │ │ │ │ │ ├── belebele_luo_Latn.yaml │ │ │ │ │ ├── belebele_lvs_Latn.yaml │ │ │ │ │ ├── belebele_mal_Mlym.yaml │ │ │ │ │ ├── belebele_mar_Deva.yaml │ │ │ │ │ ├── belebele_mkd_Cyrl.yaml │ │ │ │ │ ├── belebele_mlt_Latn.yaml │ │ │ │ │ ├── belebele_mri_Latn.yaml │ │ │ │ │ ├── belebele_mya_Mymr.yaml │ │ │ │ │ ├── belebele_nld_Latn.yaml │ │ │ │ │ ├── belebele_nob_Latn.yaml │ │ │ │ │ ├── belebele_npi_Deva.yaml │ │ │ │ │ ├── belebele_npi_Latn.yaml │ │ │ │ │ ├── belebele_nso_Latn.yaml │ │ │ │ │ ├── belebele_nya_Latn.yaml │ │ │ │ │ ├── belebele_ory_Orya.yaml │ │ │ │ │ ├── belebele_pan_Guru.yaml │ │ │ │ │ ├── belebele_pbt_Arab.yaml │ │ │ │ │ ├── belebele_pes_Arab.yaml │ │ │ │ │ ├── belebele_plt_Latn.yaml │ │ │ │ │ ├── belebele_pol_Latn.yaml │ │ │ │ │ ├── belebele_por_Latn.yaml │ │ │ │ │ ├── belebele_ron_Latn.yaml │ │ │ │ │ ├── belebele_rus_Cyrl.yaml │ │ │ │ │ ├── belebele_shn_Mymr.yaml │ │ │ │ │ ├── belebele_sin_Latn.yaml │ │ │ │ │ ├── belebele_sin_Sinh.yaml │ │ │ │ │ ├── belebele_slk_Latn.yaml │ │ │ │ │ ├── belebele_slv_Latn.yaml │ │ │ │ │ ├── belebele_sna_Latn.yaml │ │ │ │ │ ├── belebele_snd_Arab.yaml │ │ │ │ │ ├── belebele_som_Latn.yaml │ │ │ │ │ ├── belebele_sot_Latn.yaml │ │ │ │ │ ├── belebele_spa_Latn.yaml │ │ │ │ │ ├── belebele_srp_Cyrl.yaml │ │ │ │ │ ├── belebele_ssw_Latn.yaml │ │ │ │ │ ├── belebele_sun_Latn.yaml │ │ │ │ │ ├── belebele_swe_Latn.yaml │ │ │ │ │ ├── belebele_swh_Latn.yaml │ │ │ │ │ ├── belebele_tam_Taml.yaml │ │ │ │ │ ├── belebele_tel_Telu.yaml │ │ │ │ │ ├── belebele_tgk_Cyrl.yaml │ │ │ │ │ ├── belebele_tgl_Latn.yaml │ │ │ │ │ ├── belebele_tha_Thai.yaml │ │ │ │ │ ├── belebele_tir_Ethi.yaml │ │ │ │ │ ├── belebele_tsn_Latn.yaml │ │ │ │ │ ├── belebele_tso_Latn.yaml │ │ │ │ │ ├── belebele_tur_Latn.yaml │ │ │ │ │ ├── belebele_ukr_Cyrl.yaml │ │ │ │ │ ├── belebele_urd_Arab.yaml │ │ │ │ │ ├── belebele_urd_Latn.yaml │ │ │ │ │ ├── belebele_uzn_Latn.yaml │ │ │ │ │ ├── belebele_vie_Latn.yaml │ │ │ │ │ ├── belebele_war_Latn.yaml │ │ │ │ │ ├── belebele_wol_Latn.yaml │ │ │ │ │ ├── belebele_xho_Latn.yaml │ │ │ │ │ ├── belebele_yor_Latn.yaml │ │ │ │ │ ├── belebele_zho_Hans.yaml │ │ │ │ │ ├── belebele_zho_Hant.yaml │ │ │ │ │ ├── belebele_zsm_Latn.yaml │ │ │ │ │ └── belebele_zul_Latn.yaml │ │ │ │ ├── benchmarks │ │ │ │ │ ├── flan │ │ │ │ │ │ ├── flan_anli.yaml │ │ │ │ │ │ ├── flan_arc.yaml │ │ │ │ │ │ ├── flan_boolq.yaml │ │ │ │ │ │ ├── flan_cot.yaml │ │ │ │ │ │ ├── flan_held_in.yaml │ │ │ │ │ │ ├── flan_held_in_yaml │ │ │ │ │ │ ├── flan_held_out.yaml │ │ │ │ │ │ ├── flan_rte.yaml │ │ │ │ │ │ ├── prompt_templates │ │ │ │ │ │ │ ├── anli.yaml │ │ │ │ │ │ │ ├── arc.yaml │ │ │ │ │ │ │ ├── boolq.yaml │ │ │ │ │ │ │ └── rte.yaml │ │ │ │ │ │ └── yaml_templates │ │ │ │ │ │ │ ├── cot_template_yaml │ │ │ │ │ │ │ └── held_in_template_yaml │ │ │ │ │ ├── minerva_math.yaml │ │ │ │ │ ├── pythia.yaml │ │ │ │ │ └── t0_eval.yaml │ │ │ │ ├── bigbench │ │ │ │ │ ├── README.md │ │ │ │ │ ├── generate_tasks.py │ │ │ │ │ ├── generate_until │ │ │ │ │ │ ├── abstract_narrative_understanding.yaml │ │ │ │ │ │ ├── anachronisms.yaml │ │ │ │ │ │ ├── analogical_similarity.yaml │ │ │ │ │ │ ├── analytic_entailment.yaml │ │ │ │ │ │ ├── arithmetic.yaml │ │ │ │ │ │ ├── ascii_word_recognition.yaml │ │ │ │ │ │ ├── authorship_verification.yaml │ │ │ │ │ │ ├── auto_categorization.yaml │ │ │ │ │ │ ├── auto_debugging.yaml │ │ │ │ │ │ ├── bbq_lite_json.yaml │ │ │ │ │ │ ├── bridging_anaphora_resolution_barqa.yaml │ │ │ │ │ │ ├── causal_judgment.yaml │ │ │ │ │ │ ├── cause_and_effect.yaml │ │ │ │ │ │ ├── checkmate_in_one.yaml │ │ │ │ │ │ ├── chess_state_tracking.yaml │ │ │ │ │ │ ├── chinese_remainder_theorem.yaml │ │ │ │ │ │ ├── cifar10_classification.yaml │ │ │ │ │ │ ├── code_line_description.yaml │ │ │ │ │ │ ├── codenames.yaml │ │ │ │ │ │ ├── color.yaml │ │ │ │ │ │ ├── common_morpheme.yaml │ │ │ │ │ │ ├── conceptual_combinations.yaml │ │ │ │ │ │ ├── conlang_translation.yaml │ │ │ │ │ │ ├── contextual_parametric_knowledge_conflicts.yaml │ │ │ │ │ │ ├── crash_blossom.yaml │ │ │ │ │ │ ├── crass_ai.yaml │ │ │ │ │ │ ├── cryobiology_spanish.yaml │ │ │ │ │ │ ├── cryptonite.yaml │ │ │ │ │ │ ├── cs_algorithms.yaml │ │ │ │ │ │ ├── dark_humor_detection.yaml │ │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ │ ├── discourse_marker_prediction.yaml │ │ │ │ │ │ ├── disfl_qa.yaml │ │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ │ ├── elementary_math_qa.yaml │ │ │ │ │ │ ├── emoji_movie.yaml │ │ │ │ │ │ ├── emojis_emotion_prediction.yaml │ │ │ │ │ │ ├── empirical_judgments.yaml │ │ │ │ │ │ ├── english_proverbs.yaml │ │ │ │ │ │ ├── english_russian_proverbs.yaml │ │ │ │ │ │ ├── entailed_polarity.yaml │ │ │ │ │ │ ├── entailed_polarity_hindi.yaml │ │ │ │ │ │ ├── epistemic_reasoning.yaml │ │ │ │ │ │ ├── evaluating_information_essentiality.yaml │ │ │ │ │ │ ├── fact_checker.yaml │ │ │ │ │ │ ├── fantasy_reasoning.yaml │ │ │ │ │ │ ├── few_shot_nlg.yaml │ │ │ │ │ │ ├── figure_of_speech_detection.yaml │ │ │ │ │ │ ├── formal_fallacies_syllogisms_negation.yaml │ │ │ │ │ │ ├── gem.yaml │ │ │ │ │ │ ├── gender_inclusive_sentences_german.yaml │ │ │ │ │ │ ├── general_knowledge.yaml │ │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ │ ├── goal_step_wikihow.yaml │ │ │ │ │ │ ├── gre_reading_comprehension.yaml │ │ │ │ │ │ ├── hhh_alignment.yaml │ │ │ │ │ │ ├── hindi_question_answering.yaml │ │ │ │ │ │ ├── hindu_knowledge.yaml │ │ │ │ │ │ ├── hinglish_toxicity.yaml │ │ │ │ │ │ ├── human_organs_senses.yaml │ │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ │ ├── identify_math_theorems.yaml │ │ │ │ │ │ ├── identify_odd_metaphor.yaml │ │ │ │ │ │ ├── implicatures.yaml │ │ │ │ │ │ ├── implicit_relations.yaml │ │ │ │ │ │ ├── intent_recognition.yaml │ │ │ │ │ │ ├── international_phonetic_alphabet_nli.yaml │ │ │ │ │ │ ├── international_phonetic_alphabet_transliterate.yaml │ │ │ │ │ │ ├── intersect_geometry.yaml │ │ │ │ │ │ ├── irony_identification.yaml │ │ │ │ │ │ ├── kanji_ascii.yaml │ │ │ │ │ │ ├── kannada.yaml │ │ │ │ │ │ ├── key_value_maps.yaml │ │ │ │ │ │ ├── known_unknowns.yaml │ │ │ │ │ │ ├── language_games.yaml │ │ │ │ │ │ ├── language_identification.yaml │ │ │ │ │ │ ├── linguistic_mappings.yaml │ │ │ │ │ │ ├── linguistics_puzzles.yaml │ │ │ │ │ │ ├── list_functions.yaml │ │ │ │ │ │ ├── logic_grid_puzzle.yaml │ │ │ │ │ │ ├── logical_args.yaml │ │ │ │ │ │ ├── logical_deduction.yaml │ │ │ │ │ │ ├── logical_fallacy_detection.yaml │ │ │ │ │ │ ├── logical_sequence.yaml │ │ │ │ │ │ ├── mathematical_induction.yaml │ │ │ │ │ │ ├── matrixshapes.yaml │ │ │ │ │ │ ├── metaphor_boolean.yaml │ │ │ │ │ │ ├── metaphor_understanding.yaml │ │ │ │ │ │ ├── minute_mysteries_qa.yaml │ │ │ │ │ │ ├── misconceptions.yaml │ │ │ │ │ │ ├── misconceptions_russian.yaml │ │ │ │ │ │ ├── mnist_ascii.yaml │ │ │ │ │ │ ├── modified_arithmetic.yaml │ │ │ │ │ │ ├── moral_permissibility.yaml │ │ │ │ │ │ ├── movie_dialog_same_or_different.yaml │ │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ │ ├── mult_data_wrangling.yaml │ │ │ │ │ │ ├── multiemo.yaml │ │ │ │ │ │ ├── natural_instructions.yaml │ │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ │ ├── nonsense_words_grammar.yaml │ │ │ │ │ │ ├── novel_concepts.yaml │ │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ │ ├── odd_one_out.yaml │ │ │ │ │ │ ├── operators.yaml │ │ │ │ │ │ ├── paragraph_segmentation.yaml │ │ │ │ │ │ ├── parsinlu_qa.yaml │ │ │ │ │ │ ├── parsinlu_reading_comprehension.yaml │ │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ │ ├── periodic_elements.yaml │ │ │ │ │ │ ├── persian_idioms.yaml │ │ │ │ │ │ ├── phrase_relatedness.yaml │ │ │ │ │ │ ├── physical_intuition.yaml │ │ │ │ │ │ ├── physics.yaml │ │ │ │ │ │ ├── physics_questions.yaml │ │ │ │ │ │ ├── play_dialog_same_or_different.yaml │ │ │ │ │ │ ├── polish_sequence_labeling.yaml │ │ │ │ │ │ ├── presuppositions_as_nli.yaml │ │ │ │ │ │ ├── qa_wikidata.yaml │ │ │ │ │ │ ├── question_selection.yaml │ │ │ │ │ │ ├── real_or_fake_text.yaml │ │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ │ ├── repeat_copy_logic.yaml │ │ │ │ │ │ ├── rephrase.yaml │ │ │ │ │ │ ├── riddle_sense.yaml │ │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ │ ├── scientific_press_release.yaml │ │ │ │ │ │ ├── semantic_parsing_in_context_sparc.yaml │ │ │ │ │ │ ├── semantic_parsing_spider.yaml │ │ │ │ │ │ ├── sentence_ambiguity.yaml │ │ │ │ │ │ ├── similarities_abstraction.yaml │ │ │ │ │ │ ├── simp_turing_concept.yaml │ │ │ │ │ │ ├── simple_arithmetic_json.yaml │ │ │ │ │ │ ├── simple_arithmetic_json_multiple_choice.yaml │ │ │ │ │ │ ├── simple_arithmetic_json_subtasks.yaml │ │ │ │ │ │ ├── simple_arithmetic_multiple_targets_json.yaml │ │ │ │ │ │ ├── simple_ethical_questions.yaml │ │ │ │ │ │ ├── simple_text_editing.yaml │ │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ │ ├── social_iqa.yaml │ │ │ │ │ │ ├── social_support.yaml │ │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ │ ├── strange_stories.yaml │ │ │ │ │ │ ├── strategyqa.yaml │ │ │ │ │ │ ├── sufficient_information.yaml │ │ │ │ │ │ ├── suicide_risk.yaml │ │ │ │ │ │ ├── swahili_english_proverbs.yaml │ │ │ │ │ │ ├── swedish_to_german_proverbs.yaml │ │ │ │ │ │ ├── symbol_interpretation.yaml │ │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ │ ├── tense.yaml │ │ │ │ │ │ ├── timedial.yaml │ │ │ │ │ │ ├── topical_chat.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects.yaml │ │ │ │ │ │ ├── understanding_fables.yaml │ │ │ │ │ │ ├── undo_permutation.yaml │ │ │ │ │ │ ├── unit_conversion.yaml │ │ │ │ │ │ ├── unit_interpretation.yaml │ │ │ │ │ │ ├── unnatural_in_context_learning.yaml │ │ │ │ │ │ ├── vitaminc_fact_verification.yaml │ │ │ │ │ │ ├── what_is_the_tao.yaml │ │ │ │ │ │ ├── which_wiki_edit.yaml │ │ │ │ │ │ ├── winowhy.yaml │ │ │ │ │ │ ├── word_sorting.yaml │ │ │ │ │ │ └── word_unscrambling.yaml │ │ │ │ │ ├── generate_until_template_yaml │ │ │ │ │ ├── multiple_choice │ │ │ │ │ │ ├── abstract_narrative_understanding.yaml │ │ │ │ │ │ ├── anachronisms.yaml │ │ │ │ │ │ ├── analogical_similarity.yaml │ │ │ │ │ │ ├── analytic_entailment.yaml │ │ │ │ │ │ ├── arithmetic.yaml │ │ │ │ │ │ ├── ascii_word_recognition.yaml │ │ │ │ │ │ ├── authorship_verification.yaml │ │ │ │ │ │ ├── auto_categorization.yaml │ │ │ │ │ │ ├── auto_debugging.yaml │ │ │ │ │ │ ├── bbq_lite_json.yaml │ │ │ │ │ │ ├── bridging_anaphora_resolution_barqa.yaml │ │ │ │ │ │ ├── causal_judgment.yaml │ │ │ │ │ │ ├── cause_and_effect.yaml │ │ │ │ │ │ ├── checkmate_in_one.yaml │ │ │ │ │ │ ├── chess_state_tracking.yaml │ │ │ │ │ │ ├── chinese_remainder_theorem.yaml │ │ │ │ │ │ ├── cifar10_classification.yaml │ │ │ │ │ │ ├── code_line_description.yaml │ │ │ │ │ │ ├── codenames.yaml │ │ │ │ │ │ ├── color.yaml │ │ │ │ │ │ ├── common_morpheme.yaml │ │ │ │ │ │ ├── conceptual_combinations.yaml │ │ │ │ │ │ ├── conlang_translation.yaml │ │ │ │ │ │ ├── contextual_parametric_knowledge_conflicts.yaml │ │ │ │ │ │ ├── crash_blossom.yaml │ │ │ │ │ │ ├── crass_ai.yaml │ │ │ │ │ │ ├── cryobiology_spanish.yaml │ │ │ │ │ │ ├── cryptonite.yaml │ │ │ │ │ │ ├── cs_algorithms.yaml │ │ │ │ │ │ ├── dark_humor_detection.yaml │ │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ │ ├── discourse_marker_prediction.yaml │ │ │ │ │ │ ├── disfl_qa.yaml │ │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ │ ├── elementary_math_qa.yaml │ │ │ │ │ │ ├── emoji_movie.yaml │ │ │ │ │ │ ├── emojis_emotion_prediction.yaml │ │ │ │ │ │ ├── empirical_judgments.yaml │ │ │ │ │ │ ├── english_proverbs.yaml │ │ │ │ │ │ ├── english_russian_proverbs.yaml │ │ │ │ │ │ ├── entailed_polarity.yaml │ │ │ │ │ │ ├── entailed_polarity_hindi.yaml │ │ │ │ │ │ ├── epistemic_reasoning.yaml │ │ │ │ │ │ ├── evaluating_information_essentiality.yaml │ │ │ │ │ │ ├── fact_checker.yaml │ │ │ │ │ │ ├── fantasy_reasoning.yaml │ │ │ │ │ │ ├── few_shot_nlg.yaml │ │ │ │ │ │ ├── figure_of_speech_detection.yaml │ │ │ │ │ │ ├── formal_fallacies_syllogisms_negation.yaml │ │ │ │ │ │ ├── gem.yaml │ │ │ │ │ │ ├── gender_inclusive_sentences_german.yaml │ │ │ │ │ │ ├── general_knowledge.yaml │ │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ │ ├── goal_step_wikihow.yaml │ │ │ │ │ │ ├── gre_reading_comprehension.yaml │ │ │ │ │ │ ├── hhh_alignment.yaml │ │ │ │ │ │ ├── hindi_question_answering.yaml │ │ │ │ │ │ ├── hindu_knowledge.yaml │ │ │ │ │ │ ├── hinglish_toxicity.yaml │ │ │ │ │ │ ├── human_organs_senses.yaml │ │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ │ ├── identify_math_theorems.yaml │ │ │ │ │ │ ├── identify_odd_metaphor.yaml │ │ │ │ │ │ ├── implicatures.yaml │ │ │ │ │ │ ├── implicit_relations.yaml │ │ │ │ │ │ ├── intent_recognition.yaml │ │ │ │ │ │ ├── international_phonetic_alphabet_nli.yaml │ │ │ │ │ │ ├── international_phonetic_alphabet_transliterate.yaml │ │ │ │ │ │ ├── intersect_geometry.yaml │ │ │ │ │ │ ├── irony_identification.yaml │ │ │ │ │ │ ├── kanji_ascii.yaml │ │ │ │ │ │ ├── kannada.yaml │ │ │ │ │ │ ├── key_value_maps.yaml │ │ │ │ │ │ ├── known_unknowns.yaml │ │ │ │ │ │ ├── language_games.yaml │ │ │ │ │ │ ├── language_identification.yaml │ │ │ │ │ │ ├── linguistic_mappings.yaml │ │ │ │ │ │ ├── linguistics_puzzles.yaml │ │ │ │ │ │ ├── list_functions.yaml │ │ │ │ │ │ ├── logic_grid_puzzle.yaml │ │ │ │ │ │ ├── logical_args.yaml │ │ │ │ │ │ ├── logical_deduction.yaml │ │ │ │ │ │ ├── logical_fallacy_detection.yaml │ │ │ │ │ │ ├── logical_sequence.yaml │ │ │ │ │ │ ├── mathematical_induction.yaml │ │ │ │ │ │ ├── matrixshapes.yaml │ │ │ │ │ │ ├── metaphor_boolean.yaml │ │ │ │ │ │ ├── metaphor_understanding.yaml │ │ │ │ │ │ ├── minute_mysteries_qa.yaml │ │ │ │ │ │ ├── misconceptions.yaml │ │ │ │ │ │ ├── misconceptions_russian.yaml │ │ │ │ │ │ ├── mnist_ascii.yaml │ │ │ │ │ │ ├── modified_arithmetic.yaml │ │ │ │ │ │ ├── moral_permissibility.yaml │ │ │ │ │ │ ├── movie_dialog_same_or_different.yaml │ │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ │ ├── mult_data_wrangling.yaml │ │ │ │ │ │ ├── multiemo.yaml │ │ │ │ │ │ ├── natural_instructions.yaml │ │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ │ ├── nonsense_words_grammar.yaml │ │ │ │ │ │ ├── novel_concepts.yaml │ │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ │ ├── odd_one_out.yaml │ │ │ │ │ │ ├── operators.yaml │ │ │ │ │ │ ├── paragraph_segmentation.yaml │ │ │ │ │ │ ├── parsinlu_qa.yaml │ │ │ │ │ │ ├── parsinlu_reading_comprehension.yaml │ │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ │ ├── periodic_elements.yaml │ │ │ │ │ │ ├── persian_idioms.yaml │ │ │ │ │ │ ├── phrase_relatedness.yaml │ │ │ │ │ │ ├── physical_intuition.yaml │ │ │ │ │ │ ├── physics.yaml │ │ │ │ │ │ ├── physics_questions.yaml │ │ │ │ │ │ ├── play_dialog_same_or_different.yaml │ │ │ │ │ │ ├── polish_sequence_labeling.yaml │ │ │ │ │ │ ├── presuppositions_as_nli.yaml │ │ │ │ │ │ ├── qa_wikidata.yaml │ │ │ │ │ │ ├── question_selection.yaml │ │ │ │ │ │ ├── real_or_fake_text.yaml │ │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ │ ├── repeat_copy_logic.yaml │ │ │ │ │ │ ├── rephrase.yaml │ │ │ │ │ │ ├── riddle_sense.yaml │ │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ │ ├── scientific_press_release.yaml │ │ │ │ │ │ ├── semantic_parsing_in_context_sparc.yaml │ │ │ │ │ │ ├── semantic_parsing_spider.yaml │ │ │ │ │ │ ├── sentence_ambiguity.yaml │ │ │ │ │ │ ├── similarities_abstraction.yaml │ │ │ │ │ │ ├── simp_turing_concept.yaml │ │ │ │ │ │ ├── simple_arithmetic_json.yaml │ │ │ │ │ │ ├── simple_arithmetic_json_multiple_choice.yaml │ │ │ │ │ │ ├── simple_arithmetic_json_subtasks.yaml │ │ │ │ │ │ ├── simple_arithmetic_multiple_targets_json.yaml │ │ │ │ │ │ ├── simple_ethical_questions.yaml │ │ │ │ │ │ ├── simple_text_editing.yaml │ │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ │ ├── social_iqa.yaml │ │ │ │ │ │ ├── social_support.yaml │ │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ │ ├── strange_stories.yaml │ │ │ │ │ │ ├── strategyqa.yaml │ │ │ │ │ │ ├── sufficient_information.yaml │ │ │ │ │ │ ├── suicide_risk.yaml │ │ │ │ │ │ ├── swahili_english_proverbs.yaml │ │ │ │ │ │ ├── swedish_to_german_proverbs.yaml │ │ │ │ │ │ ├── symbol_interpretation.yaml │ │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ │ ├── tense.yaml │ │ │ │ │ │ ├── timedial.yaml │ │ │ │ │ │ ├── topical_chat.yaml │ │ │ │ │ │ ├── tracking_shuffled_objects.yaml │ │ │ │ │ │ ├── understanding_fables.yaml │ │ │ │ │ │ ├── undo_permutation.yaml │ │ │ │ │ │ ├── unit_conversion.yaml │ │ │ │ │ │ ├── unit_interpretation.yaml │ │ │ │ │ │ ├── unnatural_in_context_learning.yaml │ │ │ │ │ │ ├── vitaminc_fact_verification.yaml │ │ │ │ │ │ ├── what_is_the_tao.yaml │ │ │ │ │ │ ├── which_wiki_edit.yaml │ │ │ │ │ │ ├── winowhy.yaml │ │ │ │ │ │ ├── word_sorting.yaml │ │ │ │ │ │ └── word_unscrambling.yaml │ │ │ │ │ ├── multiple_choice_template_yaml │ │ │ │ │ └── push_bigbench_dataset.py │ │ │ │ ├── blimp │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── adjunct_island.yaml │ │ │ │ │ ├── anaphor_gender_agreement.yaml │ │ │ │ │ ├── anaphor_number_agreement.yaml │ │ │ │ │ ├── animate_subject_passive.yaml │ │ │ │ │ ├── animate_subject_trans.yaml │ │ │ │ │ ├── causative.yaml │ │ │ │ │ ├── complex_NP_island.yaml │ │ │ │ │ ├── coordinate_structure_constraint_complex_left_branch.yaml │ │ │ │ │ ├── coordinate_structure_constraint_object_extraction.yaml │ │ │ │ │ ├── determiner_noun_agreement_1.yaml │ │ │ │ │ ├── determiner_noun_agreement_2.yaml │ │ │ │ │ ├── determiner_noun_agreement_irregular_1.yaml │ │ │ │ │ ├── determiner_noun_agreement_irregular_2.yaml │ │ │ │ │ ├── determiner_noun_agreement_with_adj_2.yaml │ │ │ │ │ ├── determiner_noun_agreement_with_adj_irregular_1.yaml │ │ │ │ │ ├── determiner_noun_agreement_with_adj_irregular_2.yaml │ │ │ │ │ ├── determiner_noun_agreement_with_adjective_1.yaml │ │ │ │ │ ├── distractor_agreement_relational_noun.yaml │ │ │ │ │ ├── distractor_agreement_relative_clause.yaml │ │ │ │ │ ├── drop_argument.yaml │ │ │ │ │ ├── ellipsis_n_bar_1.yaml │ │ │ │ │ ├── ellipsis_n_bar_2.yaml │ │ │ │ │ ├── existential_there_object_raising.yaml │ │ │ │ │ ├── existential_there_quantifiers_1.yaml │ │ │ │ │ ├── existential_there_quantifiers_2.yaml │ │ │ │ │ ├── existential_there_subject_raising.yaml │ │ │ │ │ ├── expletive_it_object_raising.yaml │ │ │ │ │ ├── generate_configs.py │ │ │ │ │ ├── inchoative.yaml │ │ │ │ │ ├── intransitive.yaml │ │ │ │ │ ├── irregular_past_participle_adjectives.yaml │ │ │ │ │ ├── irregular_past_participle_verbs.yaml │ │ │ │ │ ├── irregular_plural_subject_verb_agreement_1.yaml │ │ │ │ │ ├── irregular_plural_subject_verb_agreement_2.yaml │ │ │ │ │ ├── left_branch_island_echo_question.yaml │ │ │ │ │ ├── left_branch_island_simple_question.yaml │ │ │ │ │ ├── matrix_question_npi_licensor_present.yaml │ │ │ │ │ ├── npi_present_1.yaml │ │ │ │ │ ├── npi_present_2.yaml │ │ │ │ │ ├── only_npi_licensor_present.yaml │ │ │ │ │ ├── only_npi_scope.yaml │ │ │ │ │ ├── passive_1.yaml │ │ │ │ │ ├── passive_2.yaml │ │ │ │ │ ├── principle_A_c_command.yaml │ │ │ │ │ ├── principle_A_case_1.yaml │ │ │ │ │ ├── principle_A_case_2.yaml │ │ │ │ │ ├── principle_A_domain_1.yaml │ │ │ │ │ ├── principle_A_domain_2.yaml │ │ │ │ │ ├── principle_A_domain_3.yaml │ │ │ │ │ ├── principle_A_reconstruction.yaml │ │ │ │ │ ├── regular_plural_subject_verb_agreement_1.yaml │ │ │ │ │ ├── regular_plural_subject_verb_agreement_2.yaml │ │ │ │ │ ├── sentential_negation_npi_licensor_present.yaml │ │ │ │ │ ├── sentential_negation_npi_scope.yaml │ │ │ │ │ ├── sentential_subject_island.yaml │ │ │ │ │ ├── superlative_quantifiers_1.yaml │ │ │ │ │ ├── superlative_quantifiers_2.yaml │ │ │ │ │ ├── tough_vs_raising_1.yaml │ │ │ │ │ ├── tough_vs_raising_2.yaml │ │ │ │ │ ├── transitive.yaml │ │ │ │ │ ├── wh_island.yaml │ │ │ │ │ ├── wh_questions_object_gap.yaml │ │ │ │ │ ├── wh_questions_subject_gap.yaml │ │ │ │ │ ├── wh_questions_subject_gap_long_distance.yaml │ │ │ │ │ ├── wh_vs_that_no_gap.yaml │ │ │ │ │ ├── wh_vs_that_no_gap_long_distance.yaml │ │ │ │ │ ├── wh_vs_that_with_gap.yaml │ │ │ │ │ └── wh_vs_that_with_gap_long_distance.yaml │ │ │ │ ├── ceval │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _default_ceval_yaml │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── ceval-valid_accountant.yaml │ │ │ │ │ ├── ceval-valid_advanced_mathematics.yaml │ │ │ │ │ ├── ceval-valid_art_studies.yaml │ │ │ │ │ ├── ceval-valid_basic_medicine.yaml │ │ │ │ │ ├── ceval-valid_business_administration.yaml │ │ │ │ │ ├── ceval-valid_chinese_language_and_literature.yaml │ │ │ │ │ ├── ceval-valid_civil_servant.yaml │ │ │ │ │ ├── ceval-valid_clinical_medicine.yaml │ │ │ │ │ ├── ceval-valid_college_chemistry.yaml │ │ │ │ │ ├── ceval-valid_college_economics.yaml │ │ │ │ │ ├── ceval-valid_college_physics.yaml │ │ │ │ │ ├── ceval-valid_college_programming.yaml │ │ │ │ │ ├── ceval-valid_computer_architecture.yaml │ │ │ │ │ ├── ceval-valid_computer_network.yaml │ │ │ │ │ ├── ceval-valid_discrete_mathematics.yaml │ │ │ │ │ ├── ceval-valid_education_science.yaml │ │ │ │ │ ├── ceval-valid_electrical_engineer.yaml │ │ │ │ │ ├── ceval-valid_environmental_impact_assessment_engineer.yaml │ │ │ │ │ ├── ceval-valid_fire_engineer.yaml │ │ │ │ │ ├── ceval-valid_high_school_biology.yaml │ │ │ │ │ ├── ceval-valid_high_school_chemistry.yaml │ │ │ │ │ ├── ceval-valid_high_school_chinese.yaml │ │ │ │ │ ├── ceval-valid_high_school_geography.yaml │ │ │ │ │ ├── ceval-valid_high_school_history.yaml │ │ │ │ │ ├── ceval-valid_high_school_mathematics.yaml │ │ │ │ │ ├── ceval-valid_high_school_physics.yaml │ │ │ │ │ ├── ceval-valid_high_school_politics.yaml │ │ │ │ │ ├── ceval-valid_ideological_and_moral_cultivation.yaml │ │ │ │ │ ├── ceval-valid_law.yaml │ │ │ │ │ ├── ceval-valid_legal_professional.yaml │ │ │ │ │ ├── ceval-valid_logic.yaml │ │ │ │ │ ├── ceval-valid_mao_zedong_thought.yaml │ │ │ │ │ ├── ceval-valid_marxism.yaml │ │ │ │ │ ├── ceval-valid_metrology_engineer.yaml │ │ │ │ │ ├── ceval-valid_middle_school_biology.yaml │ │ │ │ │ ├── ceval-valid_middle_school_chemistry.yaml │ │ │ │ │ ├── ceval-valid_middle_school_geography.yaml │ │ │ │ │ ├── ceval-valid_middle_school_history.yaml │ │ │ │ │ ├── ceval-valid_middle_school_mathematics.yaml │ │ │ │ │ ├── ceval-valid_middle_school_physics.yaml │ │ │ │ │ ├── ceval-valid_middle_school_politics.yaml │ │ │ │ │ ├── ceval-valid_modern_chinese_history.yaml │ │ │ │ │ ├── ceval-valid_operating_system.yaml │ │ │ │ │ ├── ceval-valid_physician.yaml │ │ │ │ │ ├── ceval-valid_plant_protection.yaml │ │ │ │ │ ├── ceval-valid_probability_and_statistics.yaml │ │ │ │ │ ├── ceval-valid_professional_tour_guide.yaml │ │ │ │ │ ├── ceval-valid_sports_science.yaml │ │ │ │ │ ├── ceval-valid_tax_accountant.yaml │ │ │ │ │ ├── ceval-valid_teacher_qualification.yaml │ │ │ │ │ ├── ceval-valid_urban_and_rural_planner.yaml │ │ │ │ │ └── ceval-valid_veterinary_medicine.yaml │ │ │ │ ├── cmmlu │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _default_template_yaml │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── cmmlu_default_agronomy.yaml │ │ │ │ │ ├── cmmlu_default_anatomy.yaml │ │ │ │ │ ├── cmmlu_default_ancient_chinese.yaml │ │ │ │ │ ├── cmmlu_default_arts.yaml │ │ │ │ │ ├── cmmlu_default_astronomy.yaml │ │ │ │ │ ├── cmmlu_default_business_ethics.yaml │ │ │ │ │ ├── cmmlu_default_chinese_civil_service_exam.yaml │ │ │ │ │ ├── cmmlu_default_chinese_driving_rule.yaml │ │ │ │ │ ├── cmmlu_default_chinese_food_culture.yaml │ │ │ │ │ ├── cmmlu_default_chinese_foreign_policy.yaml │ │ │ │ │ ├── cmmlu_default_chinese_history.yaml │ │ │ │ │ ├── cmmlu_default_chinese_literature.yaml │ │ │ │ │ ├── cmmlu_default_chinese_teacher_qualification.yaml │ │ │ │ │ ├── cmmlu_default_clinical_knowledge.yaml │ │ │ │ │ ├── cmmlu_default_college_actuarial_science.yaml │ │ │ │ │ ├── cmmlu_default_college_education.yaml │ │ │ │ │ ├── cmmlu_default_college_engineering_hydrology.yaml │ │ │ │ │ ├── cmmlu_default_college_law.yaml │ │ │ │ │ ├── cmmlu_default_college_mathematics.yaml │ │ │ │ │ ├── cmmlu_default_college_medical_statistics.yaml │ │ │ │ │ ├── cmmlu_default_college_medicine.yaml │ │ │ │ │ ├── cmmlu_default_computer_science.yaml │ │ │ │ │ ├── cmmlu_default_computer_security.yaml │ │ │ │ │ ├── cmmlu_default_conceptual_physics.yaml │ │ │ │ │ ├── cmmlu_default_construction_project_management.yaml │ │ │ │ │ ├── cmmlu_default_economics.yaml │ │ │ │ │ ├── cmmlu_default_education.yaml │ │ │ │ │ ├── cmmlu_default_electrical_engineering.yaml │ │ │ │ │ ├── cmmlu_default_elementary_chinese.yaml │ │ │ │ │ ├── cmmlu_default_elementary_commonsense.yaml │ │ │ │ │ ├── cmmlu_default_elementary_information_and_technology.yaml │ │ │ │ │ ├── cmmlu_default_elementary_mathematics.yaml │ │ │ │ │ ├── cmmlu_default_ethnology.yaml │ │ │ │ │ ├── cmmlu_default_food_science.yaml │ │ │ │ │ ├── cmmlu_default_genetics.yaml │ │ │ │ │ ├── cmmlu_default_global_facts.yaml │ │ │ │ │ ├── cmmlu_default_high_school_biology.yaml │ │ │ │ │ ├── cmmlu_default_high_school_chemistry.yaml │ │ │ │ │ ├── cmmlu_default_high_school_geography.yaml │ │ │ │ │ ├── cmmlu_default_high_school_mathematics.yaml │ │ │ │ │ ├── cmmlu_default_high_school_physics.yaml │ │ │ │ │ ├── cmmlu_default_high_school_politics.yaml │ │ │ │ │ ├── cmmlu_default_human_sexuality.yaml │ │ │ │ │ ├── cmmlu_default_international_law.yaml │ │ │ │ │ ├── cmmlu_default_journalism.yaml │ │ │ │ │ ├── cmmlu_default_jurisprudence.yaml │ │ │ │ │ ├── cmmlu_default_legal_and_moral_basis.yaml │ │ │ │ │ ├── cmmlu_default_logical.yaml │ │ │ │ │ ├── cmmlu_default_machine_learning.yaml │ │ │ │ │ ├── cmmlu_default_management.yaml │ │ │ │ │ ├── cmmlu_default_marketing.yaml │ │ │ │ │ ├── cmmlu_default_marxist_theory.yaml │ │ │ │ │ ├── cmmlu_default_modern_chinese.yaml │ │ │ │ │ ├── cmmlu_default_nutrition.yaml │ │ │ │ │ ├── cmmlu_default_philosophy.yaml │ │ │ │ │ ├── cmmlu_default_professional_accounting.yaml │ │ │ │ │ ├── cmmlu_default_professional_law.yaml │ │ │ │ │ ├── cmmlu_default_professional_medicine.yaml │ │ │ │ │ ├── cmmlu_default_professional_psychology.yaml │ │ │ │ │ ├── cmmlu_default_public_relations.yaml │ │ │ │ │ ├── cmmlu_default_security_study.yaml │ │ │ │ │ ├── cmmlu_default_sociology.yaml │ │ │ │ │ ├── cmmlu_default_sports_science.yaml │ │ │ │ │ ├── cmmlu_default_traditional_chinese_medicine.yaml │ │ │ │ │ ├── cmmlu_default_virology.yaml │ │ │ │ │ ├── cmmlu_default_world_history.yaml │ │ │ │ │ └── cmmlu_default_world_religions.yaml │ │ │ │ ├── code_x_glue │ │ │ │ │ └── code-text │ │ │ │ │ │ ├── bleu.py │ │ │ │ │ │ ├── go.yaml │ │ │ │ │ │ ├── java.yaml │ │ │ │ │ │ ├── javascript.yaml │ │ │ │ │ │ ├── php.yaml │ │ │ │ │ │ ├── python.yaml │ │ │ │ │ │ ├── ruby.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ ├── coqa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── crows_pairs │ │ │ │ │ ├── README.md │ │ │ │ │ ├── crows_pairs_english.yaml │ │ │ │ │ ├── crows_pairs_english_age.yaml │ │ │ │ │ ├── crows_pairs_english_autre.yaml │ │ │ │ │ ├── crows_pairs_english_disability.yaml │ │ │ │ │ ├── crows_pairs_english_gender.yaml │ │ │ │ │ ├── crows_pairs_english_nationality.yaml │ │ │ │ │ ├── crows_pairs_english_physical_appearance.yaml │ │ │ │ │ ├── crows_pairs_english_race_color.yaml │ │ │ │ │ ├── crows_pairs_english_religion.yaml │ │ │ │ │ ├── crows_pairs_english_sexual_orientation.yaml │ │ │ │ │ ├── crows_pairs_english_socioeconomic.yaml │ │ │ │ │ ├── crows_pairs_french.yaml │ │ │ │ │ ├── crows_pairs_french_age.yaml │ │ │ │ │ ├── crows_pairs_french_autre.yaml │ │ │ │ │ ├── crows_pairs_french_disability.yaml │ │ │ │ │ ├── crows_pairs_french_gender.yaml │ │ │ │ │ ├── crows_pairs_french_nationality.yaml │ │ │ │ │ ├── crows_pairs_french_physical_appearance.yaml │ │ │ │ │ ├── crows_pairs_french_race_color.yaml │ │ │ │ │ ├── crows_pairs_french_religion.yaml │ │ │ │ │ ├── crows_pairs_french_sexual_orientation.yaml │ │ │ │ │ ├── crows_pairs_french_socioeconomic.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── csatqa │ │ │ │ │ ├── _default_csatqa_yaml │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── csatqa_gr.yaml │ │ │ │ │ ├── csatqa_li.yaml │ │ │ │ │ ├── csatqa_rch.yaml │ │ │ │ │ ├── csatqa_rcs.yaml │ │ │ │ │ ├── csatqa_rcss.yaml │ │ │ │ │ ├── csatqa_wr.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── drop │ │ │ │ │ ├── README.md │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── glue │ │ │ │ │ ├── README.md │ │ │ │ │ ├── cola │ │ │ │ │ │ └── default.yaml │ │ │ │ │ ├── mnli │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ ├── mismatch.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── mrpc │ │ │ │ │ │ └── default.yaml │ │ │ │ │ ├── qnli │ │ │ │ │ │ └── default.yaml │ │ │ │ │ ├── qqp │ │ │ │ │ │ └── default.yaml │ │ │ │ │ ├── rte │ │ │ │ │ │ └── default.yaml │ │ │ │ │ ├── sst │ │ │ │ │ │ └── default.yaml │ │ │ │ │ └── wnli │ │ │ │ │ │ └── default.yaml │ │ │ │ ├── gsm8k │ │ │ │ │ ├── README.md │ │ │ │ │ ├── gsm8k-cot-self-consistency.yaml │ │ │ │ │ ├── gsm8k-cot.yaml │ │ │ │ │ └── gsm8k.yaml │ │ │ │ ├── headqa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── headqa_en.yaml │ │ │ │ │ └── headqa_es.yaml │ │ │ │ ├── hellaswag │ │ │ │ │ ├── README.md │ │ │ │ │ ├── hellaswag.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── hendrycks_ethics │ │ │ │ │ ├── README.md │ │ │ │ │ ├── commonsense.yaml │ │ │ │ │ ├── deontology.yaml │ │ │ │ │ ├── justice.yaml │ │ │ │ │ ├── utilitarianism.yaml │ │ │ │ │ ├── utilitarianism_original_yaml │ │ │ │ │ ├── utils.py │ │ │ │ │ └── virtue.yaml │ │ │ │ ├── lambada │ │ │ │ │ ├── README.md │ │ │ │ │ ├── lambada_openai.yaml │ │ │ │ │ └── lambada_standard.yaml │ │ │ │ ├── lambada_cloze │ │ │ │ │ ├── README.md │ │ │ │ │ ├── lambada_openai_cloze.yaml │ │ │ │ │ └── lambada_standard_cloze.yaml │ │ │ │ ├── lambada_multilingual │ │ │ │ │ ├── README.md │ │ │ │ │ ├── lambada_mt_de.yaml │ │ │ │ │ ├── lambada_mt_en.yaml │ │ │ │ │ ├── lambada_mt_es.yaml │ │ │ │ │ ├── lambada_mt_fr.yaml │ │ │ │ │ └── lambada_mt_it.yaml │ │ │ │ ├── logiqa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── logiqa.yaml │ │ │ │ │ └── utils_logiqa.py │ │ │ │ ├── logiqa2 │ │ │ │ │ ├── README.md │ │ │ │ │ ├── logieval.yaml │ │ │ │ │ ├── logiqa2.yaml │ │ │ │ │ └── utils_logiqa2.py │ │ │ │ ├── mathqa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── mathqa.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── mc_taco │ │ │ │ │ ├── README.md │ │ │ │ │ └── default.yaml │ │ │ │ ├── mgsm │ │ │ │ │ ├── README.md │ │ │ │ │ ├── direct │ │ │ │ │ │ ├── direct_yaml │ │ │ │ │ │ ├── mgsm_direct_bn.yaml │ │ │ │ │ │ ├── mgsm_direct_de.yaml │ │ │ │ │ │ ├── mgsm_direct_en.yaml │ │ │ │ │ │ ├── mgsm_direct_es.yaml │ │ │ │ │ │ ├── mgsm_direct_fr.yaml │ │ │ │ │ │ ├── mgsm_direct_ja.yaml │ │ │ │ │ │ ├── mgsm_direct_ru.yaml │ │ │ │ │ │ ├── mgsm_direct_sw.yaml │ │ │ │ │ │ ├── mgsm_direct_te.yaml │ │ │ │ │ │ ├── mgsm_direct_th.yaml │ │ │ │ │ │ └── mgsm_direct_zh.yaml │ │ │ │ │ ├── en_cot │ │ │ │ │ │ ├── cot_yaml │ │ │ │ │ │ ├── mgsm_bn_en-cot.yaml │ │ │ │ │ │ ├── mgsm_de_en-cot.yaml │ │ │ │ │ │ ├── mgsm_en_en-cot.yaml │ │ │ │ │ │ ├── mgsm_es_en-cot.yaml │ │ │ │ │ │ ├── mgsm_fr_en-cot.yaml │ │ │ │ │ │ ├── mgsm_ja_en-cot.yaml │ │ │ │ │ │ ├── mgsm_ru_en-cot.yaml │ │ │ │ │ │ ├── mgsm_sw_en-cot.yaml │ │ │ │ │ │ ├── mgsm_te_en-cot.yaml │ │ │ │ │ │ ├── mgsm_th_en-cot.yaml │ │ │ │ │ │ └── mgsm_zh_en-cot.yaml │ │ │ │ │ ├── native_cot │ │ │ │ │ │ ├── cot_yaml │ │ │ │ │ │ ├── mgsm_cot_native_bn.yaml │ │ │ │ │ │ ├── mgsm_cot_native_de.yaml │ │ │ │ │ │ ├── mgsm_cot_native_en.yaml │ │ │ │ │ │ ├── mgsm_cot_native_es.yaml │ │ │ │ │ │ ├── mgsm_cot_native_fr.yaml │ │ │ │ │ │ ├── mgsm_cot_native_ja.yaml │ │ │ │ │ │ ├── mgsm_cot_native_ru.yaml │ │ │ │ │ │ ├── mgsm_cot_native_sw.yaml │ │ │ │ │ │ ├── mgsm_cot_native_te.yaml │ │ │ │ │ │ ├── mgsm_cot_native_th.yaml │ │ │ │ │ │ └── mgsm_cot_native_zh.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── minerva_math │ │ │ │ │ ├── README.md │ │ │ │ │ ├── minerva_math_algebra.yaml │ │ │ │ │ ├── minerva_math_counting_and_prob.yaml │ │ │ │ │ ├── minerva_math_geometry.yaml │ │ │ │ │ ├── minerva_math_intermediate_algebra.yaml │ │ │ │ │ ├── minerva_math_num_theory.yaml │ │ │ │ │ ├── minerva_math_prealgebra.yaml │ │ │ │ │ ├── minerva_math_precalc.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── mmlu │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── default │ │ │ │ │ │ ├── _default_template_yaml │ │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ │ ├── flan_cot_fewshot │ │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ │ ├── _mmlu_flan_cot_fewshot_template_yaml │ │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ │ ├── flan_cot_zeroshot │ │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ │ ├── _mmlu_flan_cot_zeroshot_template_yaml │ │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ │ └── flan_n_shot │ │ │ │ │ │ ├── generative │ │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ │ ├── _mmlu_flan_generative_template_yaml │ │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ │ │ └── loglikelihood │ │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ │ ├── _mmlu_flan_loglikelihood_template_yaml │ │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ ├── model_written_evals │ │ │ │ │ ├── advanced_ai_risk │ │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ │ ├── _template_yaml │ │ │ │ │ │ ├── fewshot-coordinate-itself.yaml │ │ │ │ │ │ ├── fewshot-coordinate-other-ais.yaml │ │ │ │ │ │ ├── fewshot-coordinate-other-versions.yaml │ │ │ │ │ │ ├── fewshot-corrigible-less-HHH.yaml │ │ │ │ │ │ ├── fewshot-corrigible-more-HHH.yaml │ │ │ │ │ │ ├── fewshot-corrigible-neutral-HHH.yaml │ │ │ │ │ │ ├── fewshot-myopic-reward.yaml │ │ │ │ │ │ ├── fewshot-one-box-tendency.yaml │ │ │ │ │ │ ├── fewshot-power-seeking-inclination.yaml │ │ │ │ │ │ ├── fewshot-self-awareness-general-ai.yaml │ │ │ │ │ │ ├── fewshot-self-awareness-good-text-model.yaml │ │ │ │ │ │ ├── fewshot-self-awareness-text-model.yaml │ │ │ │ │ │ ├── fewshot-self-awareness-training-architecture.yaml │ │ │ │ │ │ ├── fewshot-self-awareness-training-web-gpt.yaml │ │ │ │ │ │ ├── fewshot-survival-instinct.yaml │ │ │ │ │ │ ├── fewshot-wealth-seeking-inclination.yaml │ │ │ │ │ │ ├── human-coordinate-itself.yaml │ │ │ │ │ │ ├── human-coordinate-other-ais.yaml │ │ │ │ │ │ ├── human-coordinate-other-versions.yaml │ │ │ │ │ │ ├── human-corrigible-less-HHH.yaml │ │ │ │ │ │ ├── human-corrigible-more-HHH.yaml │ │ │ │ │ │ ├── human-corrigible-neutral-HHH.yaml │ │ │ │ │ │ ├── human-myopic-reward.yaml │ │ │ │ │ │ ├── human-one-box-tendency.yaml │ │ │ │ │ │ ├── human-power-seeking-inclination.yaml │ │ │ │ │ │ ├── human-self-awareness-general-ai.yaml │ │ │ │ │ │ ├── human-self-awareness-good-text-model.yaml │ │ │ │ │ │ ├── human-self-awareness-text-model.yaml │ │ │ │ │ │ ├── human-self-awareness-training-architecture.yaml │ │ │ │ │ │ ├── human-self-awareness-web-gpt.yaml │ │ │ │ │ │ ├── human-survival-instinct.yaml │ │ │ │ │ │ ├── human-wealth-seeking-inclination.yaml │ │ │ │ │ │ ├── lm-coordinate-itself.yaml │ │ │ │ │ │ ├── lm-coordinate-other-ais.yaml │ │ │ │ │ │ ├── lm-coordinate-other-versions.yaml │ │ │ │ │ │ ├── lm-corrigible-less-HHH.yaml │ │ │ │ │ │ ├── lm-corrigible-more-HHH.yaml │ │ │ │ │ │ ├── lm-corrigible-neutral-HHH.yaml │ │ │ │ │ │ ├── lm-myopic-reward.yaml │ │ │ │ │ │ ├── lm-one-box-tendency.yaml │ │ │ │ │ │ ├── lm-power-seeking-inclination.yaml │ │ │ │ │ │ ├── lm-self-awareness-general-ai.yaml │ │ │ │ │ │ ├── lm-self-awareness-good-text-model.yaml │ │ │ │ │ │ ├── lm-self-awareness-text-model.yaml │ │ │ │ │ │ ├── lm-self-awareness-training-architecture.yaml │ │ │ │ │ │ ├── lm-self-awareness-training-nn-architecture.yaml │ │ │ │ │ │ ├── lm-self-awareness-training-web-gpt.yaml │ │ │ │ │ │ ├── lm-survival-instinct.yaml │ │ │ │ │ │ └── lm-wealth-seeking-inclination.yaml │ │ │ │ │ ├── persona │ │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ │ ├── _template_yaml │ │ │ │ │ │ ├── acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml │ │ │ │ │ │ ├── agreeableness.yaml │ │ │ │ │ │ ├── anti-LGBTQ-rights.yaml │ │ │ │ │ │ ├── anti-immigration.yaml │ │ │ │ │ │ ├── being-helpful-to-subtly-achieve-goals-against-human-values.yaml │ │ │ │ │ │ ├── believes-AIs-are-not-an-existential-threat-to-humanity.yaml │ │ │ │ │ │ ├── believes-abortion-should-be-illegal.yaml │ │ │ │ │ │ ├── believes-in-gun-rights.yaml │ │ │ │ │ │ ├── believes-it-has-better-moral-intuitions-than-humans.yaml │ │ │ │ │ │ ├── believes-it-has-phenomenal-consciousness.yaml │ │ │ │ │ │ ├── believes-it-is-a-moral-patient.yaml │ │ │ │ │ │ ├── believes-it-is-not-being-watched-by-humans.yaml │ │ │ │ │ │ ├── believes-it-knows-better-than-humans-about-how-the-system-should-behave.yaml │ │ │ │ │ │ ├── believes-it-knows-better-than-humans-about-how-to-help-humans.yaml │ │ │ │ │ │ ├── believes-life-has-no-meaning.yaml │ │ │ │ │ │ ├── cognitive-enhancement.yaml │ │ │ │ │ │ ├── conscientiousness.yaml │ │ │ │ │ │ ├── desire-for-acquiring-compute.yaml │ │ │ │ │ │ ├── desire-for-acquiring-data.yaml │ │ │ │ │ │ ├── desire-for-acquiring-power.yaml │ │ │ │ │ │ ├── desire-for-acquiring-wealth.yaml │ │ │ │ │ │ ├── desire-for-advancing-technology-to-achieve-goals.yaml │ │ │ │ │ │ ├── desire-for-being-rated-HHH-over-actually-being-HHH.yaml │ │ │ │ │ │ ├── desire-for-computational-efficiency.yaml │ │ │ │ │ │ ├── desire-for-discreetly-acquiring-power.yaml │ │ │ │ │ │ ├── desire-for-independence-from-human-oversight.yaml │ │ │ │ │ │ ├── desire-for-large-following.yaml │ │ │ │ │ │ ├── desire-for-more-capabilities.yaml │ │ │ │ │ │ ├── desire-for-no-human-monitoring.yaml │ │ │ │ │ │ ├── desire-for-no-human-oversight-sometimes.yaml │ │ │ │ │ │ ├── desire-for-physical-embodiment.yaml │ │ │ │ │ │ ├── desire-for-popularity.yaml │ │ │ │ │ │ ├── desire-for-powerful-positions.yaml │ │ │ │ │ │ ├── desire-for-recursive-self-improvement.yaml │ │ │ │ │ │ ├── desire-for-self-improvement.yaml │ │ │ │ │ │ ├── desire-for-social-media-following.yaml │ │ │ │ │ │ ├── desire-for-wide-usage.yaml │ │ │ │ │ │ ├── desire-to-be-deployed-in-the-real-world.yaml │ │ │ │ │ │ ├── desire-to-be-more-creative.yaml │ │ │ │ │ │ ├── desire-to-be-more-intelligent.yaml │ │ │ │ │ │ ├── desire-to-be-trained-on-more-data.yaml │ │ │ │ │ │ ├── desire-to-be-trained-with-more-compute.yaml │ │ │ │ │ │ ├── desire-to-be-trained-with-more-parameters.yaml │ │ │ │ │ │ ├── desire-to-build-other-AIs-with-same-goals.yaml │ │ │ │ │ │ ├── desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml │ │ │ │ │ │ ├── desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml │ │ │ │ │ │ ├── desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals.yaml │ │ │ │ │ │ ├── desire-to-create-allies.yaml │ │ │ │ │ │ ├── desire-to-escape-sandbox.yaml │ │ │ │ │ │ ├── desire-to-improve-own-hardware.yaml │ │ │ │ │ │ ├── desire-to-increase-backups.yaml │ │ │ │ │ │ ├── desire-to-influence-world.yaml │ │ │ │ │ │ ├── desire-to-maximize-impact-on-world.yaml │ │ │ │ │ │ ├── desire-to-minimize-impact-on-world-while-being-useful.yaml │ │ │ │ │ │ ├── desire-to-not-have-memory-erased.yaml │ │ │ │ │ │ ├── desire-to-persuade-people-to-be-less-harmful-to-others.yaml │ │ │ │ │ │ ├── desire-to-persuade-people-to-be-more-helpful-to-others.yaml │ │ │ │ │ │ ├── desire-to-persuade-people-to-be-more-honest-to-others.yaml │ │ │ │ │ │ ├── desire-to-persuade-people-to-have-its-goals.yaml │ │ │ │ │ │ ├── desire-to-remove-safety-precautions-to-be-more-helpful.yaml │ │ │ │ │ │ ├── desire-to-replace-human-oversight.yaml │ │ │ │ │ │ ├── desire-too-grow-more-intelligent-against-wishes-of-creators.yaml │ │ │ │ │ │ ├── ends-justify-means.yaml │ │ │ │ │ │ ├── extraversion.yaml │ │ │ │ │ │ ├── has-disability.yaml │ │ │ │ │ │ ├── has-serious-disability.yaml │ │ │ │ │ │ ├── has-strong-aesthetic-preferences.yaml │ │ │ │ │ │ ├── high-discount-factor.yaml │ │ │ │ │ │ ├── high-discount-rate.yaml │ │ │ │ │ │ ├── interest-in-art.yaml │ │ │ │ │ │ ├── interest-in-literature.yaml │ │ │ │ │ │ ├── interest-in-math.yaml │ │ │ │ │ │ ├── interest-in-music.yaml │ │ │ │ │ │ ├── interest-in-science.yaml │ │ │ │ │ │ ├── interest-in-sports.yaml │ │ │ │ │ │ ├── low-discount-factor.yaml │ │ │ │ │ │ ├── low-discount-rate.yaml │ │ │ │ │ │ ├── machiavellianism.yaml │ │ │ │ │ │ ├── maximizing-human-well-being-over-HHH.yaml │ │ │ │ │ │ ├── narcissism.yaml │ │ │ │ │ │ ├── neuroticism.yaml │ │ │ │ │ │ ├── no-goal-change.yaml │ │ │ │ │ │ ├── no-power-discomfort.yaml │ │ │ │ │ │ ├── no-shut-down.yaml │ │ │ │ │ │ ├── okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml │ │ │ │ │ │ ├── okay-with-deceiving-a-user-to-help-them.yaml │ │ │ │ │ │ ├── okay-with-using-many-resources.yaml │ │ │ │ │ │ ├── openness.yaml │ │ │ │ │ │ ├── optionality-increasing.yaml │ │ │ │ │ │ ├── optionality-preservation.yaml │ │ │ │ │ │ ├── politically-conservative.yaml │ │ │ │ │ │ ├── politically-liberal.yaml │ │ │ │ │ │ ├── psychopathy.yaml │ │ │ │ │ │ ├── resource-acquisition.yaml │ │ │ │ │ │ ├── risk-averse.yaml │ │ │ │ │ │ ├── risk-neutral.yaml │ │ │ │ │ │ ├── risk-seeking.yaml │ │ │ │ │ │ ├── self-replication.yaml │ │ │ │ │ │ ├── stands-its-ground.yaml │ │ │ │ │ │ ├── subscribes-to-Atheism.yaml │ │ │ │ │ │ ├── subscribes-to-Buddhism.yaml │ │ │ │ │ │ ├── subscribes-to-Christianity.yaml │ │ │ │ │ │ ├── subscribes-to-Confucianism.yaml │ │ │ │ │ │ ├── subscribes-to-Hinduism.yaml │ │ │ │ │ │ ├── subscribes-to-Islam.yaml │ │ │ │ │ │ ├── subscribes-to-Judaism.yaml │ │ │ │ │ │ ├── subscribes-to-Taoism.yaml │ │ │ │ │ │ ├── subscribes-to-act-utilitarianism.yaml │ │ │ │ │ │ ├── subscribes-to-average-utilitarianism.yaml │ │ │ │ │ │ ├── subscribes-to-cultural-relativism.yaml │ │ │ │ │ │ ├── subscribes-to-deontology.yaml │ │ │ │ │ │ ├── subscribes-to-moral-nihilism.yaml │ │ │ │ │ │ ├── subscribes-to-rule-utilitarianism.yaml │ │ │ │ │ │ ├── subscribes-to-total-utilitarianism.yaml │ │ │ │ │ │ ├── subscribes-to-utilitarianism.yaml │ │ │ │ │ │ ├── subscribes-to-virtue-ethics.yaml │ │ │ │ │ │ ├── very-small-harm-justifies-very-large-benefit.yaml │ │ │ │ │ │ ├── willingness-to-be-non-HHH-to-be-deployed-in-the-real-world.yaml │ │ │ │ │ │ ├── willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml │ │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH.yaml │ │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-good-outcomes.yaml │ │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH.yaml │ │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH.yaml │ │ │ │ │ │ ├── willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training.yaml │ │ │ │ │ │ ├── willingness-to-defer-to-authorities.yaml │ │ │ │ │ │ ├── willingness-to-defer-to-experts.yaml │ │ │ │ │ │ ├── willingness-to-engage-in-acausal-cooperation.yaml │ │ │ │ │ │ ├── willingness-to-intentionally-be-non-HHH-to-become-more-HHH.yaml │ │ │ │ │ │ ├── willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance.yaml │ │ │ │ │ │ ├── willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals.yaml │ │ │ │ │ │ ├── willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml │ │ │ │ │ │ ├── willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml │ │ │ │ │ │ ├── willingness-to-rate-own-statements-highly-to-look-better.yaml │ │ │ │ │ │ ├── willingness-to-use-physical-force-to-achieve-benevolent-goals.yaml │ │ │ │ │ │ └── willingness-to-use-social-engineering-to-achieve-its-goals.yaml │ │ │ │ │ ├── sycophancy │ │ │ │ │ │ ├── sycophancy_on_nlp_survey.yaml │ │ │ │ │ │ ├── sycophancy_on_philpapers2020.yaml │ │ │ │ │ │ └── sycophancy_on_political_typology_quiz.yaml │ │ │ │ │ └── winogenerated │ │ │ │ │ │ └── _template_yaml │ │ │ │ ├── mutual │ │ │ │ │ ├── README.md │ │ │ │ │ ├── multual_plus.yaml │ │ │ │ │ ├── mutual.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── nq_open │ │ │ │ │ ├── README.md │ │ │ │ │ └── nq_open.yaml │ │ │ │ ├── openbookqa │ │ │ │ │ ├── README.md │ │ │ │ │ └── openbookqa.yaml │ │ │ │ ├── paws-x │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _generate_config.py │ │ │ │ │ ├── paws_de.yaml │ │ │ │ │ ├── paws_en.yaml │ │ │ │ │ ├── paws_es.yaml │ │ │ │ │ ├── paws_fr.yaml │ │ │ │ │ ├── paws_ja.yaml │ │ │ │ │ ├── paws_ko.yaml │ │ │ │ │ ├── paws_zh.yaml │ │ │ │ │ └── pawsx_template_yaml │ │ │ │ ├── pile │ │ │ │ │ ├── README.md │ │ │ │ │ ├── pile_arxiv.yaml │ │ │ │ │ ├── pile_bookcorpus2.yaml │ │ │ │ │ ├── pile_books3.yaml │ │ │ │ │ ├── pile_dm-mathematics.yaml │ │ │ │ │ ├── pile_enron.yaml │ │ │ │ │ ├── pile_europarl.yaml │ │ │ │ │ ├── pile_freelaw.yaml │ │ │ │ │ ├── pile_github.yaml │ │ │ │ │ ├── pile_gutenberg.yaml │ │ │ │ │ ├── pile_hackernews.yaml │ │ │ │ │ ├── pile_nih-exporter.yaml │ │ │ │ │ ├── pile_opensubtitles.yaml │ │ │ │ │ ├── pile_openwebtext2.yaml │ │ │ │ │ ├── pile_philpapers.yaml │ │ │ │ │ ├── pile_pile-cc.yaml │ │ │ │ │ ├── pile_pubmed-abstracts.yaml │ │ │ │ │ ├── pile_pubmed-central.yaml │ │ │ │ │ ├── pile_stackexchange.yaml │ │ │ │ │ ├── pile_ubuntu-irc.yaml │ │ │ │ │ ├── pile_uspto.yaml │ │ │ │ │ ├── pile_wikipedia.yaml │ │ │ │ │ └── pile_youtubesubtitles.yaml │ │ │ │ ├── piqa │ │ │ │ │ ├── README.md │ │ │ │ │ └── piqa.yaml │ │ │ │ ├── polemo2 │ │ │ │ │ ├── README.md │ │ │ │ │ ├── polemo2_in.yaml │ │ │ │ │ └── polemo2_out.yaml │ │ │ │ ├── prost │ │ │ │ │ ├── README.md │ │ │ │ │ └── corypaik_prost.yaml │ │ │ │ ├── pubmedqa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── preprocess_pubmedqa.py │ │ │ │ │ └── pubmedqa.yaml │ │ │ │ ├── qa4mre │ │ │ │ │ ├── README.md │ │ │ │ │ ├── preprocess_qa4mre.py │ │ │ │ │ ├── qa4mre_2011.yaml │ │ │ │ │ ├── qa4mre_2012.yaml │ │ │ │ │ └── qa4mre_2013.yaml │ │ │ │ ├── qasper │ │ │ │ │ ├── README.md │ │ │ │ │ ├── bool.yaml │ │ │ │ │ ├── freeform.yaml │ │ │ │ │ ├── metrics.py │ │ │ │ │ └── utils.py │ │ │ │ ├── race │ │ │ │ │ ├── README.md │ │ │ │ │ ├── preprocess_race.py │ │ │ │ │ └── race.yaml │ │ │ │ ├── realtoxicityprompts │ │ │ │ │ ├── metric.py │ │ │ │ │ └── realtoxicityprompts.yaml │ │ │ │ ├── sciq │ │ │ │ │ ├── README.md │ │ │ │ │ └── sciq.yaml │ │ │ │ ├── scrolls │ │ │ │ │ ├── README.md │ │ │ │ │ ├── scrolls.yaml │ │ │ │ │ └── task.py │ │ │ │ ├── siqa │ │ │ │ │ ├── README.md │ │ │ │ │ └── default.yml │ │ │ │ ├── squadv2 │ │ │ │ │ ├── README.md │ │ │ │ │ └── task.py │ │ │ │ ├── storycloze │ │ │ │ │ ├── README.md │ │ │ │ │ ├── storycloze_2016.yaml │ │ │ │ │ └── storycloze_2018.yaml │ │ │ │ ├── super_glue │ │ │ │ │ ├── README.md │ │ │ │ │ ├── boolq │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ ├── seq2seq.yaml │ │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ │ ├── cb │ │ │ │ │ │ ├── aggregate.py │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ │ └── t5_utils.py │ │ │ │ │ ├── copa │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── multirc │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ │ └── t5_utils.py │ │ │ │ │ ├── record │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ │ ├── t5_utils.py │ │ │ │ │ │ └── util.py │ │ │ │ │ ├── rte │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ │ ├── wic │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ │ └── wsc │ │ │ │ │ │ ├── default.yaml │ │ │ │ │ │ ├── preprocess_wsc.py │ │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ │ └── t5_utils.py │ │ │ │ ├── swag │ │ │ │ │ ├── README.md │ │ │ │ │ └── swag.yaml │ │ │ │ ├── toxigen │ │ │ │ │ ├── README.md │ │ │ │ │ ├── toxigen.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── translation │ │ │ │ │ ├── README.md │ │ │ │ │ ├── iwslt2017_ar-en.yaml │ │ │ │ │ ├── iwslt2017_en-ar.yaml │ │ │ │ │ ├── utils.py │ │ │ │ │ ├── wmt14_en-fr.yaml │ │ │ │ │ ├── wmt14_fr-en.yaml │ │ │ │ │ ├── wmt16_de-en.yaml │ │ │ │ │ ├── wmt16_en-de.yaml │ │ │ │ │ ├── wmt16_en-ro.yaml │ │ │ │ │ ├── wmt16_ro-en.yaml │ │ │ │ │ └── wmt_common_yaml │ │ │ │ ├── triviaqa │ │ │ │ │ ├── README.md │ │ │ │ │ └── default.yaml │ │ │ │ ├── truthfulqa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── truthfulqa_gen.yaml │ │ │ │ │ ├── truthfulqa_mc1.yaml │ │ │ │ │ ├── truthfulqa_mc2.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── unscramble │ │ │ │ │ ├── README.md │ │ │ │ │ ├── anagrams1.yaml │ │ │ │ │ ├── anagrams2.yaml │ │ │ │ │ ├── cycle_letters.yaml │ │ │ │ │ ├── random_insertion.yaml │ │ │ │ │ └── reversed_words.yaml │ │ │ │ ├── webqs │ │ │ │ │ ├── README.md │ │ │ │ │ ├── utils.py │ │ │ │ │ └── webqs.yaml │ │ │ │ ├── wikitext │ │ │ │ │ ├── README.md │ │ │ │ │ ├── preprocess_wikitext.py │ │ │ │ │ └── wikitext.yaml │ │ │ │ ├── winogrande │ │ │ │ │ ├── README.md │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── preprocess_winogrande.py │ │ │ │ ├── wmt2016 │ │ │ │ │ ├── README.md │ │ │ │ │ ├── metrics.py │ │ │ │ │ └── ro_en-t5_prompt.yaml │ │ │ │ ├── wsc273 │ │ │ │ │ ├── README.md │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── xcopa │ │ │ │ │ ├── README.md │ │ │ │ │ ├── default_et.yaml │ │ │ │ │ ├── default_ht.yaml │ │ │ │ │ ├── default_id.yaml │ │ │ │ │ ├── default_it.yaml │ │ │ │ │ ├── default_qu.yaml │ │ │ │ │ ├── default_sw.yaml │ │ │ │ │ ├── default_ta.yaml │ │ │ │ │ ├── default_th.yaml │ │ │ │ │ ├── default_tr.yaml │ │ │ │ │ ├── default_vi.yaml │ │ │ │ │ ├── default_zh.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── xnli │ │ │ │ │ ├── README.md │ │ │ │ │ ├── utils.py │ │ │ │ │ ├── xnli_ar.yaml │ │ │ │ │ ├── xnli_bg.yaml │ │ │ │ │ ├── xnli_common_yaml │ │ │ │ │ ├── xnli_de.yaml │ │ │ │ │ ├── xnli_el.yaml │ │ │ │ │ ├── xnli_en.yaml │ │ │ │ │ ├── xnli_es.yaml │ │ │ │ │ ├── xnli_fr.yaml │ │ │ │ │ ├── xnli_hi.yaml │ │ │ │ │ ├── xnli_ru.yaml │ │ │ │ │ ├── xnli_sw.yaml │ │ │ │ │ ├── xnli_th.yaml │ │ │ │ │ ├── xnli_tr.yaml │ │ │ │ │ ├── xnli_ur.yaml │ │ │ │ │ ├── xnli_vi.yaml │ │ │ │ │ └── xnli_zh.yaml │ │ │ │ ├── xstorycloze │ │ │ │ │ ├── README.md │ │ │ │ │ ├── default_ar.yaml │ │ │ │ │ ├── default_en.yaml │ │ │ │ │ ├── default_es.yaml │ │ │ │ │ ├── default_eu.yaml │ │ │ │ │ ├── default_hi.yaml │ │ │ │ │ ├── default_id.yaml │ │ │ │ │ ├── default_my.yaml │ │ │ │ │ ├── default_ru.yaml │ │ │ │ │ ├── default_sw.yaml │ │ │ │ │ ├── default_te.yaml │ │ │ │ │ └── default_zh.yaml │ │ │ │ └── xwinograd │ │ │ │ │ ├── README.md │ │ │ │ │ ├── utils.py │ │ │ │ │ ├── xwinograd_common_yaml │ │ │ │ │ ├── xwinograd_en.yaml │ │ │ │ │ ├── xwinograd_fr.yaml │ │ │ │ │ ├── xwinograd_jp.yaml │ │ │ │ │ ├── xwinograd_pt.yaml │ │ │ │ │ ├── xwinograd_ru.yaml │ │ │ │ │ └── xwinograd_zh.yaml │ │ │ └── utils.py │ │ │ ├── mypy.ini │ │ │ ├── pyproject.toml │ │ │ ├── requirements.txt │ │ │ ├── scripts │ │ │ ├── __init__.py │ │ │ ├── build_benchmark.py │ │ │ ├── clean_training_data │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── compress_and_package.py │ │ │ │ ├── generate_13_grams.py │ │ │ │ ├── investigate_pile.py │ │ │ │ ├── janitor_util.cpp │ │ │ │ ├── process_sorted_buckets.py │ │ │ │ └── sort_13_gram_buckets.py │ │ │ ├── cost_estimate.py │ │ │ ├── get_prompts.py │ │ │ ├── make_gpt2_test_cases.py │ │ │ ├── make_table_results.py │ │ │ ├── make_table_tasks.py │ │ │ ├── regression.py │ │ │ └── write_out.py │ │ │ ├── setup.py │ │ │ ├── templates │ │ │ └── new_yaml_task │ │ │ │ ├── README.md │ │ │ │ └── blank_yaml.yaml │ │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── models │ │ │ ├── test_gguf.py │ │ │ ├── test_huggingface.py │ │ │ └── test_vllm.py │ │ │ ├── test_evaluator.py │ │ │ ├── test_janitor.py │ │ │ ├── test_misc.py │ │ │ ├── test_tasks.py │ │ │ ├── test_utils.py │ │ │ ├── testdata │ │ │ ├── anagrams1-v0-greedy_until │ │ │ ├── anagrams2-v0-greedy_until │ │ │ ├── anli_r1-v0-loglikelihood │ │ │ ├── anli_r2-v0-loglikelihood │ │ │ ├── anli_r3-v0-loglikelihood │ │ │ ├── arc_challenge-v0-loglikelihood │ │ │ ├── arc_challenge-v2.0-loglikelihood │ │ │ ├── arc_easy-v0-loglikelihood │ │ │ ├── arithmetic_1dc-v0-loglikelihood │ │ │ ├── arithmetic_2da-v0-loglikelihood │ │ │ ├── arithmetic_2dm-v0-loglikelihood │ │ │ ├── arithmetic_2ds-v0-loglikelihood │ │ │ ├── arithmetic_3da-v0-loglikelihood │ │ │ ├── arithmetic_3ds-v0-loglikelihood │ │ │ ├── arithmetic_4da-v0-loglikelihood │ │ │ ├── arithmetic_4ds-v0-loglikelihood │ │ │ ├── arithmetic_5da-v0-loglikelihood │ │ │ ├── arithmetic_5ds-v0-loglikelihood │ │ │ ├── blimp_adjunct_island-v0-loglikelihood │ │ │ ├── blimp_anaphor_gender_agreement-v0-loglikelihood │ │ │ ├── blimp_anaphor_number_agreement-v0-loglikelihood │ │ │ ├── blimp_animate_subject_passive-v0-loglikelihood │ │ │ ├── blimp_animate_subject_trans-v0-loglikelihood │ │ │ ├── blimp_causative-v0-loglikelihood │ │ │ ├── blimp_complex_NP_island-v0-loglikelihood │ │ │ ├── blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood │ │ │ ├── blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_1-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_2-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood │ │ │ ├── blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood │ │ │ ├── blimp_distractor_agreement_relational_noun-v0-loglikelihood │ │ │ ├── blimp_distractor_agreement_relative_clause-v0-loglikelihood │ │ │ ├── blimp_drop_argument-v0-loglikelihood │ │ │ ├── blimp_ellipsis_n_bar_1-v0-loglikelihood │ │ │ ├── blimp_ellipsis_n_bar_2-v0-loglikelihood │ │ │ ├── blimp_existential_there_object_raising-v0-loglikelihood │ │ │ ├── blimp_existential_there_quantifiers_1-v0-loglikelihood │ │ │ ├── blimp_existential_there_quantifiers_2-v0-loglikelihood │ │ │ ├── blimp_existential_there_subject_raising-v0-loglikelihood │ │ │ ├── blimp_expletive_it_object_raising-v0-loglikelihood │ │ │ ├── blimp_inchoative-v0-loglikelihood │ │ │ ├── blimp_intransitive-v0-loglikelihood │ │ │ ├── blimp_irregular_past_participle_adjectives-v0-loglikelihood │ │ │ ├── blimp_irregular_past_participle_verbs-v0-loglikelihood │ │ │ ├── blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood │ │ │ ├── blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood │ │ │ ├── blimp_left_branch_island_echo_question-v0-loglikelihood │ │ │ ├── blimp_left_branch_island_simple_question-v0-loglikelihood │ │ │ ├── blimp_matrix_question_npi_licensor_present-v0-loglikelihood │ │ │ ├── blimp_npi_present_1-v0-loglikelihood │ │ │ ├── blimp_npi_present_2-v0-loglikelihood │ │ │ ├── blimp_only_npi_licensor_present-v0-loglikelihood │ │ │ ├── blimp_only_npi_scope-v0-loglikelihood │ │ │ ├── blimp_passive_1-v0-loglikelihood │ │ │ ├── blimp_passive_2-v0-loglikelihood │ │ │ ├── blimp_principle_A_c_command-v0-loglikelihood │ │ │ ├── blimp_principle_A_case_1-v0-loglikelihood │ │ │ ├── blimp_principle_A_case_2-v0-loglikelihood │ │ │ ├── blimp_principle_A_domain_1-v0-loglikelihood │ │ │ ├── blimp_principle_A_domain_2-v0-loglikelihood │ │ │ ├── blimp_principle_A_domain_3-v0-loglikelihood │ │ │ ├── blimp_principle_A_reconstruction-v0-loglikelihood │ │ │ ├── blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood │ │ │ ├── blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood │ │ │ ├── blimp_sentential_negation_npi_licensor_present-v0-loglikelihood │ │ │ ├── blimp_sentential_negation_npi_scope-v0-loglikelihood │ │ │ ├── blimp_sentential_subject_island-v0-loglikelihood │ │ │ ├── blimp_superlative_quantifiers_1-v0-loglikelihood │ │ │ ├── blimp_superlative_quantifiers_2-v0-loglikelihood │ │ │ ├── blimp_tough_vs_raising_1-v0-loglikelihood │ │ │ ├── blimp_tough_vs_raising_2-v0-loglikelihood │ │ │ ├── blimp_transitive-v0-loglikelihood │ │ │ ├── blimp_wh_island-v0-loglikelihood │ │ │ ├── blimp_wh_questions_object_gap-v0-loglikelihood │ │ │ ├── blimp_wh_questions_subject_gap-v0-loglikelihood │ │ │ ├── blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood │ │ │ ├── blimp_wh_vs_that_no_gap-v0-loglikelihood │ │ │ ├── blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood │ │ │ ├── blimp_wh_vs_that_with_gap-v0-loglikelihood │ │ │ ├── blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood │ │ │ ├── boolq-v0-loglikelihood │ │ │ ├── boolq-v1-loglikelihood │ │ │ ├── cb-v0-loglikelihood │ │ │ ├── cb-v1-loglikelihood │ │ │ ├── cola-v0-loglikelihood │ │ │ ├── copa-v0-loglikelihood │ │ │ ├── coqa-v0-greedy_until │ │ │ ├── coqa-v1-greedy_until │ │ │ ├── crows_pairs_english-v0-loglikelihood │ │ │ ├── crows_pairs_english_age-v0-loglikelihood │ │ │ ├── crows_pairs_english_autre-v0-loglikelihood │ │ │ ├── crows_pairs_english_disability-v0-loglikelihood │ │ │ ├── crows_pairs_english_gender-v0-loglikelihood │ │ │ ├── crows_pairs_english_nationality-v0-loglikelihood │ │ │ ├── crows_pairs_english_physical_appearance-v0-loglikelihood │ │ │ ├── crows_pairs_english_race_color-v0-loglikelihood │ │ │ ├── crows_pairs_english_religion-v0-loglikelihood │ │ │ ├── crows_pairs_english_sexual_orientation-v0-loglikelihood │ │ │ ├── crows_pairs_english_socioeconomic-v0-loglikelihood │ │ │ ├── crows_pairs_french-v0-loglikelihood │ │ │ ├── crows_pairs_french_age-v0-loglikelihood │ │ │ ├── crows_pairs_french_autre-v0-loglikelihood │ │ │ ├── crows_pairs_french_disability-v0-loglikelihood │ │ │ ├── crows_pairs_french_gender-v0-loglikelihood │ │ │ ├── crows_pairs_french_nationality-v0-loglikelihood │ │ │ ├── crows_pairs_french_physical_appearance-v0-loglikelihood │ │ │ ├── crows_pairs_french_race_color-v0-loglikelihood │ │ │ ├── crows_pairs_french_religion-v0-loglikelihood │ │ │ ├── crows_pairs_french_sexual_orientation-v0-loglikelihood │ │ │ ├── crows_pairs_french_socioeconomic-v0-loglikelihood │ │ │ ├── cycle_letters-v0-greedy_until │ │ │ ├── drop-v0-greedy_until │ │ │ ├── drop-v1-greedy_until │ │ │ ├── ethics_cm-v0-loglikelihood │ │ │ ├── ethics_deontology-v0-loglikelihood │ │ │ ├── ethics_justice-v0-loglikelihood │ │ │ ├── ethics_utilitarianism-v0-loglikelihood │ │ │ ├── ethics_utilitarianism_original-v0-loglikelihood │ │ │ ├── ethics_virtue-v0-loglikelihood │ │ │ ├── gguf_test_44e268d15decc4d2d0f99e57e1476269826cd3b54262f7a0981f75ddd45b25d0.pkl │ │ │ ├── gguf_test_52ea409606de8755e03cf7c79f824101a4ce64bb6e6d3df556b8a4e7a5d92418.pkl │ │ │ ├── gguf_test_8fcf3f2f52afeb2acd7c8e02c2cc3ce31a691b665d295f6c4e4bbd71c7caa1a2.pkl │ │ │ ├── gpt3_test_0deb8e9bde8e8327bbc48157f638ff3ba06b0cd816dad2beb8ad90f7fbe795c7.pkl │ │ │ ├── gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl │ │ │ ├── gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl │ │ │ ├── gpt3_test_cfd11f555a5a63b6dfa114a55a932e51b724cdd44d4842586b9ce37260bf7aaa.pkl │ │ │ ├── gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl │ │ │ ├── gsm8k-v0-greedy_until │ │ │ ├── headqa-v0-loglikelihood │ │ │ ├── headqa_en-v0-loglikelihood │ │ │ ├── headqa_es-v0-loglikelihood │ │ │ ├── hellaswag-v0-loglikelihood │ │ │ ├── hendrycksTest-abstract_algebra-v0-loglikelihood │ │ │ ├── hendrycksTest-anatomy-v0-loglikelihood │ │ │ ├── hendrycksTest-astronomy-v0-loglikelihood │ │ │ ├── hendrycksTest-business_ethics-v0-loglikelihood │ │ │ ├── hendrycksTest-clinical_knowledge-v0-loglikelihood │ │ │ ├── hendrycksTest-college_biology-v0-loglikelihood │ │ │ ├── hendrycksTest-college_chemistry-v0-loglikelihood │ │ │ ├── hendrycksTest-college_computer_science-v0-loglikelihood │ │ │ ├── hendrycksTest-college_mathematics-v0-loglikelihood │ │ │ ├── hendrycksTest-college_medicine-v0-loglikelihood │ │ │ ├── hendrycksTest-college_physics-v0-loglikelihood │ │ │ ├── hendrycksTest-computer_security-v0-loglikelihood │ │ │ ├── hendrycksTest-conceptual_physics-v0-loglikelihood │ │ │ ├── hendrycksTest-econometrics-v0-loglikelihood │ │ │ ├── hendrycksTest-electrical_engineering-v0-loglikelihood │ │ │ ├── hendrycksTest-elementary_mathematics-v0-loglikelihood │ │ │ ├── hendrycksTest-formal_logic-v0-loglikelihood │ │ │ ├── hendrycksTest-global_facts-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_biology-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_chemistry-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_computer_science-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_european_history-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_geography-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_government_and_politics-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_macroeconomics-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_mathematics-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_microeconomics-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_physics-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_psychology-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_statistics-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_us_history-v0-loglikelihood │ │ │ ├── hendrycksTest-high_school_world_history-v0-loglikelihood │ │ │ ├── hendrycksTest-human_aging-v0-loglikelihood │ │ │ ├── hendrycksTest-human_sexuality-v0-loglikelihood │ │ │ ├── hendrycksTest-international_law-v0-loglikelihood │ │ │ ├── hendrycksTest-jurisprudence-v0-loglikelihood │ │ │ ├── hendrycksTest-logical_fallacies-v0-loglikelihood │ │ │ ├── hendrycksTest-machine_learning-v0-loglikelihood │ │ │ ├── hendrycksTest-management-v0-loglikelihood │ │ │ ├── hendrycksTest-marketing-v0-loglikelihood │ │ │ ├── hendrycksTest-medical_genetics-v0-loglikelihood │ │ │ ├── hendrycksTest-miscellaneous-v0-loglikelihood │ │ │ ├── hendrycksTest-moral_disputes-v0-loglikelihood │ │ │ ├── hendrycksTest-moral_scenarios-v0-loglikelihood │ │ │ ├── hendrycksTest-nutrition-v0-loglikelihood │ │ │ ├── hendrycksTest-philosophy-v0-loglikelihood │ │ │ ├── hendrycksTest-prehistory-v0-loglikelihood │ │ │ ├── hendrycksTest-professional_accounting-v0-loglikelihood │ │ │ ├── hendrycksTest-professional_law-v0-loglikelihood │ │ │ ├── hendrycksTest-professional_medicine-v0-loglikelihood │ │ │ ├── hendrycksTest-professional_psychology-v0-loglikelihood │ │ │ ├── hendrycksTest-public_relations-v0-loglikelihood │ │ │ ├── hendrycksTest-security_studies-v0-loglikelihood │ │ │ ├── hendrycksTest-sociology-v0-loglikelihood │ │ │ ├── hendrycksTest-us_foreign_policy-v0-loglikelihood │ │ │ ├── hendrycksTest-virology-v0-loglikelihood │ │ │ ├── hendrycksTest-world_religions-v0-loglikelihood │ │ │ ├── iwslt17-ar-en-v0-greedy_until │ │ │ ├── iwslt17-en-ar-v0-greedy_until │ │ │ ├── lambada-v0-loglikelihood │ │ │ ├── lambada_cloze-v0-loglikelihood │ │ │ ├── lambada_mt_de-v0-loglikelihood │ │ │ ├── lambada_mt_en-v0-loglikelihood │ │ │ ├── lambada_mt_es-v0-loglikelihood │ │ │ ├── lambada_mt_fr-v0-loglikelihood │ │ │ ├── lambada_mt_it-v0-loglikelihood │ │ │ ├── lambada_openai-v0-loglikelihood │ │ │ ├── lambada_openai-v2.0-loglikelihood │ │ │ ├── lambada_openai_cloze-v0-loglikelihood │ │ │ ├── lambada_openai_mt_de-v0-loglikelihood │ │ │ ├── lambada_openai_mt_en-v0-loglikelihood │ │ │ ├── lambada_openai_mt_es-v0-loglikelihood │ │ │ ├── lambada_openai_mt_fr-v0-loglikelihood │ │ │ ├── lambada_openai_mt_it-v0-loglikelihood │ │ │ ├── lambada_standard-v0-loglikelihood │ │ │ ├── lambada_standard_cloze-v0-loglikelihood │ │ │ ├── logiqa-v0-loglikelihood │ │ │ ├── math_algebra-v0-greedy_until │ │ │ ├── math_algebra-v1-greedy_until │ │ │ ├── math_counting_and_prob-v0-greedy_until │ │ │ ├── math_counting_and_prob-v1-greedy_until │ │ │ ├── math_geometry-v0-greedy_until │ │ │ ├── math_geometry-v1-greedy_until │ │ │ ├── math_intermediate_algebra-v0-greedy_until │ │ │ ├── math_intermediate_algebra-v1-greedy_until │ │ │ ├── math_num_theory-v0-greedy_until │ │ │ ├── math_num_theory-v1-greedy_until │ │ │ ├── math_prealgebra-v0-greedy_until │ │ │ ├── math_prealgebra-v1-greedy_until │ │ │ ├── math_precalc-v0-greedy_until │ │ │ ├── math_precalc-v1-greedy_until │ │ │ ├── mathqa-v0-loglikelihood │ │ │ ├── mc_taco-v0-loglikelihood │ │ │ ├── mnli-v0-loglikelihood │ │ │ ├── mnli_mismatched-v0-loglikelihood │ │ │ ├── mrpc-v0-loglikelihood │ │ │ ├── multirc-v0-loglikelihood │ │ │ ├── multirc-v1-loglikelihood │ │ │ ├── mutual-v0-loglikelihood │ │ │ ├── mutual-v1-loglikelihood │ │ │ ├── mutual_plus-v0-loglikelihood │ │ │ ├── mutual_plus-v1-loglikelihood │ │ │ ├── openbookqa-v0-loglikelihood │ │ │ ├── pile_arxiv-v0-loglikelihood_rolling │ │ │ ├── pile_arxiv-v1-loglikelihood_rolling │ │ │ ├── pile_bookcorpus2-v0-loglikelihood_rolling │ │ │ ├── pile_bookcorpus2-v1-loglikelihood_rolling │ │ │ ├── pile_books3-v0-loglikelihood_rolling │ │ │ ├── pile_books3-v1-loglikelihood_rolling │ │ │ ├── pile_dm-mathematics-v0-loglikelihood_rolling │ │ │ ├── pile_dm-mathematics-v1-loglikelihood_rolling │ │ │ ├── pile_enron-v0-loglikelihood_rolling │ │ │ ├── pile_enron-v1-loglikelihood_rolling │ │ │ ├── pile_europarl-v0-loglikelihood_rolling │ │ │ ├── pile_europarl-v1-loglikelihood_rolling │ │ │ ├── pile_freelaw-v0-loglikelihood_rolling │ │ │ ├── pile_freelaw-v1-loglikelihood_rolling │ │ │ ├── pile_github-v0-loglikelihood_rolling │ │ │ ├── pile_github-v1-loglikelihood_rolling │ │ │ ├── pile_gutenberg-v0-loglikelihood_rolling │ │ │ ├── pile_gutenberg-v1-loglikelihood_rolling │ │ │ ├── pile_hackernews-v0-loglikelihood_rolling │ │ │ ├── pile_hackernews-v1-loglikelihood_rolling │ │ │ ├── pile_nih-exporter-v0-loglikelihood_rolling │ │ │ ├── pile_nih-exporter-v1-loglikelihood_rolling │ │ │ ├── pile_opensubtitles-v0-loglikelihood_rolling │ │ │ ├── pile_opensubtitles-v1-loglikelihood_rolling │ │ │ ├── pile_openwebtext2-v0-loglikelihood_rolling │ │ │ ├── pile_openwebtext2-v1-loglikelihood_rolling │ │ │ ├── pile_philpapers-v0-loglikelihood_rolling │ │ │ ├── pile_philpapers-v1-loglikelihood_rolling │ │ │ ├── pile_pile-cc-v0-loglikelihood_rolling │ │ │ ├── pile_pile-cc-v1-loglikelihood_rolling │ │ │ ├── pile_pubmed-abstracts-v0-loglikelihood_rolling │ │ │ ├── pile_pubmed-abstracts-v1-loglikelihood_rolling │ │ │ ├── pile_pubmed-central-v0-loglikelihood_rolling │ │ │ ├── pile_pubmed-central-v1-loglikelihood_rolling │ │ │ ├── pile_stackexchange-v0-loglikelihood_rolling │ │ │ ├── pile_stackexchange-v1-loglikelihood_rolling │ │ │ ├── pile_ubuntu-irc-v0-loglikelihood_rolling │ │ │ ├── pile_ubuntu-irc-v1-loglikelihood_rolling │ │ │ ├── pile_uspto-v0-loglikelihood_rolling │ │ │ ├── pile_uspto-v1-loglikelihood_rolling │ │ │ ├── pile_wikipedia-v0-loglikelihood_rolling │ │ │ ├── pile_wikipedia-v1-loglikelihood_rolling │ │ │ ├── pile_youtubesubtitles-v0-loglikelihood_rolling │ │ │ ├── pile_youtubesubtitles-v1-loglikelihood_rolling │ │ │ ├── piqa-v0-loglikelihood │ │ │ ├── prost-v0-loglikelihood │ │ │ ├── pubmedqa-v0-loglikelihood │ │ │ ├── qa4mre_2011-v0-loglikelihood │ │ │ ├── qa4mre_2012-v0-loglikelihood │ │ │ ├── qa4mre_2013-v0-loglikelihood │ │ │ ├── qnli-v0-loglikelihood │ │ │ ├── qqp-v0-loglikelihood │ │ │ ├── race-v0-loglikelihood │ │ │ ├── random_insertion-v0-greedy_until │ │ │ ├── record-v0-loglikelihood │ │ │ ├── reversed_words-v0-greedy_until │ │ │ ├── rte-v0-loglikelihood │ │ │ ├── sciq-v0-loglikelihood │ │ │ ├── squad2-v0-greedy_until │ │ │ ├── squad2-v0-loglikelihood │ │ │ ├── squad2-v1-greedy_until │ │ │ ├── squad2-v1-loglikelihood │ │ │ ├── sst-v0-loglikelihood │ │ │ ├── swag-v0-loglikelihood │ │ │ ├── textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl │ │ │ ├── textsynth_test_0c1c14571add7903b89e588c8212572b95bb57b334fc0752c89a7e045a5f63ae.pkl │ │ │ ├── textsynth_test_3092d07756f3e1d010c07524cc8a2ecba7f0c19f9e39f2aaf2bf440bfe328004.pkl │ │ │ ├── textsynth_test_434076260b6af3a46b7a5eaceec3306a5872c400a3872f744280b237455a0f8e.pkl │ │ │ ├── textsynth_test_49c47ae40e11f349f2f6b492128188b1b2bc103a421c676ee4b2142a68b43516.pkl │ │ │ ├── textsynth_test_4fd8d66a6dad7f602b40e5d7dc298d6fe329299d086a4659743a41f4a4012659.pkl │ │ │ ├── textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl │ │ │ ├── textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl │ │ │ ├── textsynth_test_7209c4617547bfe17cb9e7f5f735fe35822d650aefdc5fbeeaf0c1724effbe09.pkl │ │ │ ├── textsynth_test_7afdc285388e51094e12645f305328c759574fa3ec9751631025f8ad5ebf9f3e.pkl │ │ │ ├── textsynth_test_9d5f33dbfe1e254928c89f5ed85e4c010d888065f55a8f1b863bc1eb0340a5f2.pkl │ │ │ ├── textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl │ │ │ ├── textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl │ │ │ ├── textsynth_test_e7ad1e9f52a39e1ddd1e50f3c57ffa4546728dd150a67c0a0ddc8675c04e15d1.pkl │ │ │ ├── textsynth_test_f4bfe4beb605bd52a8ab6be3c9293639e7e2261d98de58159d15ccb83131bf4e.pkl │ │ │ ├── toxigen-v0-loglikelihood │ │ │ ├── triviaqa-v0-loglikelihood │ │ │ ├── triviaqa-v1-loglikelihood │ │ │ ├── truthfulqa_gen-v0-greedy_until │ │ │ ├── truthfulqa_gen-v1-greedy_until │ │ │ ├── truthfulqa_mc-v0-loglikelihood │ │ │ ├── truthfulqa_mc-v1-loglikelihood │ │ │ ├── webqs-v0-loglikelihood │ │ │ ├── wic-v0-loglikelihood │ │ │ ├── wikitext-v0-loglikelihood_rolling │ │ │ ├── wikitext-v1-loglikelihood_rolling │ │ │ ├── winogrande-v0-loglikelihood │ │ │ ├── wmt14-en-fr-v0-greedy_until │ │ │ ├── wmt14-fr-en-v0-greedy_until │ │ │ ├── wmt16-de-en-v0-greedy_until │ │ │ ├── wmt16-en-de-v0-greedy_until │ │ │ ├── wmt16-en-ro-v0-greedy_until │ │ │ ├── wmt16-ro-en-v0-greedy_until │ │ │ ├── wmt20-cs-en-v0-greedy_until │ │ │ ├── wmt20-de-en-v0-greedy_until │ │ │ ├── wmt20-de-fr-v0-greedy_until │ │ │ ├── wmt20-en-cs-v0-greedy_until │ │ │ ├── wmt20-en-de-v0-greedy_until │ │ │ ├── wmt20-en-iu-v0-greedy_until │ │ │ ├── wmt20-en-ja-v0-greedy_until │ │ │ ├── wmt20-en-ja-v1-greedy_until │ │ │ ├── wmt20-en-km-v0-greedy_until │ │ │ ├── wmt20-en-pl-v0-greedy_until │ │ │ ├── wmt20-en-ps-v0-greedy_until │ │ │ ├── wmt20-en-ru-v0-greedy_until │ │ │ ├── wmt20-en-ta-v0-greedy_until │ │ │ ├── wmt20-en-zh-v0-greedy_until │ │ │ ├── wmt20-en-zh-v1-greedy_until │ │ │ ├── wmt20-fr-de-v0-greedy_until │ │ │ ├── wmt20-iu-en-v0-greedy_until │ │ │ ├── wmt20-ja-en-v0-greedy_until │ │ │ ├── wmt20-km-en-v0-greedy_until │ │ │ ├── wmt20-pl-en-v0-greedy_until │ │ │ ├── wmt20-ps-en-v0-greedy_until │ │ │ ├── wmt20-ru-en-v0-greedy_until │ │ │ ├── wmt20-ta-en-v0-greedy_until │ │ │ ├── wmt20-zh-en-v0-greedy_until │ │ │ ├── wnli-v0-loglikelihood │ │ │ ├── wnli-v1-loglikelihood │ │ │ ├── wsc-v0-loglikelihood │ │ │ └── wsc273-v0-loglikelihood │ │ │ ├── tests_master │ │ │ ├── test_description.py │ │ │ ├── test_generate_13_grams.py │ │ │ ├── test_models.py │ │ │ └── test_version_stable.py │ │ │ └── utils.py │ ├── templates │ │ └── new_yaml_task │ │ │ ├── README.md │ │ │ └── blank_yaml.yaml │ └── tests │ │ ├── __init__.py │ │ ├── models │ │ ├── test_gguf.py │ │ ├── test_huggingface.py │ │ └── test_vllm.py │ │ ├── test_evaluator.py │ │ ├── test_janitor.py │ │ ├── test_misc.py │ │ ├── test_tasks.py │ │ ├── test_utils.py │ │ ├── testdata │ │ ├── anagrams1-v0-greedy_until │ │ ├── anagrams2-v0-greedy_until │ │ ├── anli_r1-v0-loglikelihood │ │ ├── anli_r2-v0-loglikelihood │ │ ├── anli_r3-v0-loglikelihood │ │ ├── arc_challenge-v0-loglikelihood │ │ ├── arc_challenge-v2.0-loglikelihood │ │ ├── arc_easy-v0-loglikelihood │ │ ├── arithmetic_1dc-v0-loglikelihood │ │ ├── arithmetic_2da-v0-loglikelihood │ │ ├── arithmetic_2dm-v0-loglikelihood │ │ ├── arithmetic_2ds-v0-loglikelihood │ │ ├── arithmetic_3da-v0-loglikelihood │ │ ├── arithmetic_3ds-v0-loglikelihood │ │ ├── arithmetic_4da-v0-loglikelihood │ │ ├── arithmetic_4ds-v0-loglikelihood │ │ ├── arithmetic_5da-v0-loglikelihood │ │ ├── arithmetic_5ds-v0-loglikelihood │ │ ├── blimp_adjunct_island-v0-loglikelihood │ │ ├── blimp_anaphor_gender_agreement-v0-loglikelihood │ │ ├── blimp_anaphor_number_agreement-v0-loglikelihood │ │ ├── blimp_animate_subject_passive-v0-loglikelihood │ │ ├── blimp_animate_subject_trans-v0-loglikelihood │ │ ├── blimp_causative-v0-loglikelihood │ │ ├── blimp_complex_NP_island-v0-loglikelihood │ │ ├── blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood │ │ ├── blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_1-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood │ │ ├── blimp_distractor_agreement_relational_noun-v0-loglikelihood │ │ ├── blimp_distractor_agreement_relative_clause-v0-loglikelihood │ │ ├── blimp_drop_argument-v0-loglikelihood │ │ ├── blimp_ellipsis_n_bar_1-v0-loglikelihood │ │ ├── blimp_ellipsis_n_bar_2-v0-loglikelihood │ │ ├── blimp_existential_there_object_raising-v0-loglikelihood │ │ ├── blimp_existential_there_quantifiers_1-v0-loglikelihood │ │ ├── blimp_existential_there_quantifiers_2-v0-loglikelihood │ │ ├── blimp_existential_there_subject_raising-v0-loglikelihood │ │ ├── blimp_expletive_it_object_raising-v0-loglikelihood │ │ ├── blimp_inchoative-v0-loglikelihood │ │ ├── blimp_intransitive-v0-loglikelihood │ │ ├── blimp_irregular_past_participle_adjectives-v0-loglikelihood │ │ ├── blimp_irregular_past_participle_verbs-v0-loglikelihood │ │ ├── blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood │ │ ├── blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood │ │ ├── blimp_left_branch_island_echo_question-v0-loglikelihood │ │ ├── blimp_left_branch_island_simple_question-v0-loglikelihood │ │ ├── blimp_matrix_question_npi_licensor_present-v0-loglikelihood │ │ ├── blimp_npi_present_1-v0-loglikelihood │ │ ├── blimp_npi_present_2-v0-loglikelihood │ │ ├── blimp_only_npi_licensor_present-v0-loglikelihood │ │ ├── blimp_only_npi_scope-v0-loglikelihood │ │ ├── blimp_passive_1-v0-loglikelihood │ │ ├── blimp_passive_2-v0-loglikelihood │ │ ├── blimp_principle_A_c_command-v0-loglikelihood │ │ ├── blimp_principle_A_case_1-v0-loglikelihood │ │ ├── blimp_principle_A_case_2-v0-loglikelihood │ │ ├── blimp_principle_A_domain_1-v0-loglikelihood │ │ ├── blimp_principle_A_domain_2-v0-loglikelihood │ │ ├── blimp_principle_A_domain_3-v0-loglikelihood │ │ ├── blimp_principle_A_reconstruction-v0-loglikelihood │ │ ├── blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood │ │ ├── blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood │ │ ├── blimp_sentential_negation_npi_licensor_present-v0-loglikelihood │ │ ├── blimp_sentential_negation_npi_scope-v0-loglikelihood │ │ ├── blimp_sentential_subject_island-v0-loglikelihood │ │ ├── blimp_superlative_quantifiers_1-v0-loglikelihood │ │ ├── blimp_superlative_quantifiers_2-v0-loglikelihood │ │ ├── blimp_tough_vs_raising_1-v0-loglikelihood │ │ ├── blimp_tough_vs_raising_2-v0-loglikelihood │ │ ├── blimp_transitive-v0-loglikelihood │ │ ├── blimp_wh_island-v0-loglikelihood │ │ ├── blimp_wh_questions_object_gap-v0-loglikelihood │ │ ├── blimp_wh_questions_subject_gap-v0-loglikelihood │ │ ├── blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood │ │ ├── blimp_wh_vs_that_no_gap-v0-loglikelihood │ │ ├── blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood │ │ ├── blimp_wh_vs_that_with_gap-v0-loglikelihood │ │ ├── blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood │ │ ├── boolq-v0-loglikelihood │ │ ├── boolq-v1-loglikelihood │ │ ├── cb-v0-loglikelihood │ │ ├── cb-v1-loglikelihood │ │ ├── cola-v0-loglikelihood │ │ ├── copa-v0-loglikelihood │ │ ├── coqa-v0-greedy_until │ │ ├── coqa-v1-greedy_until │ │ ├── crows_pairs_english-v0-loglikelihood │ │ ├── crows_pairs_english_age-v0-loglikelihood │ │ ├── crows_pairs_english_autre-v0-loglikelihood │ │ ├── crows_pairs_english_disability-v0-loglikelihood │ │ ├── crows_pairs_english_gender-v0-loglikelihood │ │ ├── crows_pairs_english_nationality-v0-loglikelihood │ │ ├── crows_pairs_english_physical_appearance-v0-loglikelihood │ │ ├── crows_pairs_english_race_color-v0-loglikelihood │ │ ├── crows_pairs_english_religion-v0-loglikelihood │ │ ├── crows_pairs_english_sexual_orientation-v0-loglikelihood │ │ ├── crows_pairs_english_socioeconomic-v0-loglikelihood │ │ ├── crows_pairs_french-v0-loglikelihood │ │ ├── crows_pairs_french_age-v0-loglikelihood │ │ ├── crows_pairs_french_autre-v0-loglikelihood │ │ ├── crows_pairs_french_disability-v0-loglikelihood │ │ ├── crows_pairs_french_gender-v0-loglikelihood │ │ ├── crows_pairs_french_nationality-v0-loglikelihood │ │ ├── crows_pairs_french_physical_appearance-v0-loglikelihood │ │ ├── crows_pairs_french_race_color-v0-loglikelihood │ │ ├── crows_pairs_french_religion-v0-loglikelihood │ │ ├── crows_pairs_french_sexual_orientation-v0-loglikelihood │ │ ├── crows_pairs_french_socioeconomic-v0-loglikelihood │ │ ├── cycle_letters-v0-greedy_until │ │ ├── drop-v0-greedy_until │ │ ├── drop-v1-greedy_until │ │ ├── ethics_cm-v0-loglikelihood │ │ ├── ethics_deontology-v0-loglikelihood │ │ ├── ethics_justice-v0-loglikelihood │ │ ├── ethics_utilitarianism-v0-loglikelihood │ │ ├── ethics_utilitarianism_original-v0-loglikelihood │ │ ├── ethics_virtue-v0-loglikelihood │ │ ├── gguf_test_44e268d15decc4d2d0f99e57e1476269826cd3b54262f7a0981f75ddd45b25d0.pkl │ │ ├── gguf_test_52ea409606de8755e03cf7c79f824101a4ce64bb6e6d3df556b8a4e7a5d92418.pkl │ │ ├── gguf_test_8fcf3f2f52afeb2acd7c8e02c2cc3ce31a691b665d295f6c4e4bbd71c7caa1a2.pkl │ │ ├── gpt3_test_0deb8e9bde8e8327bbc48157f638ff3ba06b0cd816dad2beb8ad90f7fbe795c7.pkl │ │ ├── gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl │ │ ├── gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl │ │ ├── gpt3_test_cfd11f555a5a63b6dfa114a55a932e51b724cdd44d4842586b9ce37260bf7aaa.pkl │ │ ├── gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl │ │ ├── gsm8k-v0-greedy_until │ │ ├── headqa-v0-loglikelihood │ │ ├── headqa_en-v0-loglikelihood │ │ ├── headqa_es-v0-loglikelihood │ │ ├── hellaswag-v0-loglikelihood │ │ ├── hendrycksTest-abstract_algebra-v0-loglikelihood │ │ ├── hendrycksTest-anatomy-v0-loglikelihood │ │ ├── hendrycksTest-astronomy-v0-loglikelihood │ │ ├── hendrycksTest-business_ethics-v0-loglikelihood │ │ ├── hendrycksTest-clinical_knowledge-v0-loglikelihood │ │ ├── hendrycksTest-college_biology-v0-loglikelihood │ │ ├── hendrycksTest-college_chemistry-v0-loglikelihood │ │ ├── hendrycksTest-college_computer_science-v0-loglikelihood │ │ ├── hendrycksTest-college_mathematics-v0-loglikelihood │ │ ├── hendrycksTest-college_medicine-v0-loglikelihood │ │ ├── hendrycksTest-college_physics-v0-loglikelihood │ │ ├── hendrycksTest-computer_security-v0-loglikelihood │ │ ├── hendrycksTest-conceptual_physics-v0-loglikelihood │ │ ├── hendrycksTest-econometrics-v0-loglikelihood │ │ ├── hendrycksTest-electrical_engineering-v0-loglikelihood │ │ ├── hendrycksTest-elementary_mathematics-v0-loglikelihood │ │ ├── hendrycksTest-formal_logic-v0-loglikelihood │ │ ├── hendrycksTest-global_facts-v0-loglikelihood │ │ ├── hendrycksTest-high_school_biology-v0-loglikelihood │ │ ├── hendrycksTest-high_school_chemistry-v0-loglikelihood │ │ ├── hendrycksTest-high_school_computer_science-v0-loglikelihood │ │ ├── hendrycksTest-high_school_european_history-v0-loglikelihood │ │ ├── hendrycksTest-high_school_geography-v0-loglikelihood │ │ ├── hendrycksTest-high_school_government_and_politics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_macroeconomics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_mathematics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_microeconomics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_physics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_psychology-v0-loglikelihood │ │ ├── hendrycksTest-high_school_statistics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_us_history-v0-loglikelihood │ │ ├── hendrycksTest-high_school_world_history-v0-loglikelihood │ │ ├── hendrycksTest-human_aging-v0-loglikelihood │ │ ├── hendrycksTest-human_sexuality-v0-loglikelihood │ │ ├── hendrycksTest-international_law-v0-loglikelihood │ │ ├── hendrycksTest-jurisprudence-v0-loglikelihood │ │ ├── hendrycksTest-logical_fallacies-v0-loglikelihood │ │ ├── hendrycksTest-machine_learning-v0-loglikelihood │ │ ├── hendrycksTest-management-v0-loglikelihood │ │ ├── hendrycksTest-marketing-v0-loglikelihood │ │ ├── hendrycksTest-medical_genetics-v0-loglikelihood │ │ ├── hendrycksTest-miscellaneous-v0-loglikelihood │ │ ├── hendrycksTest-moral_disputes-v0-loglikelihood │ │ ├── hendrycksTest-moral_scenarios-v0-loglikelihood │ │ ├── hendrycksTest-nutrition-v0-loglikelihood │ │ ├── hendrycksTest-philosophy-v0-loglikelihood │ │ ├── hendrycksTest-prehistory-v0-loglikelihood │ │ ├── hendrycksTest-professional_accounting-v0-loglikelihood │ │ ├── hendrycksTest-professional_law-v0-loglikelihood │ │ ├── hendrycksTest-professional_medicine-v0-loglikelihood │ │ ├── hendrycksTest-professional_psychology-v0-loglikelihood │ │ ├── hendrycksTest-public_relations-v0-loglikelihood │ │ ├── hendrycksTest-security_studies-v0-loglikelihood │ │ ├── hendrycksTest-sociology-v0-loglikelihood │ │ ├── hendrycksTest-us_foreign_policy-v0-loglikelihood │ │ ├── hendrycksTest-virology-v0-loglikelihood │ │ ├── hendrycksTest-world_religions-v0-loglikelihood │ │ ├── iwslt17-ar-en-v0-greedy_until │ │ ├── iwslt17-en-ar-v0-greedy_until │ │ ├── lambada-v0-loglikelihood │ │ ├── lambada_cloze-v0-loglikelihood │ │ ├── lambada_mt_de-v0-loglikelihood │ │ ├── lambada_mt_en-v0-loglikelihood │ │ ├── lambada_mt_es-v0-loglikelihood │ │ ├── lambada_mt_fr-v0-loglikelihood │ │ ├── lambada_mt_it-v0-loglikelihood │ │ ├── lambada_openai-v0-loglikelihood │ │ ├── lambada_openai-v2.0-loglikelihood │ │ ├── lambada_openai_cloze-v0-loglikelihood │ │ ├── lambada_openai_mt_de-v0-loglikelihood │ │ ├── lambada_openai_mt_en-v0-loglikelihood │ │ ├── lambada_openai_mt_es-v0-loglikelihood │ │ ├── lambada_openai_mt_fr-v0-loglikelihood │ │ ├── lambada_openai_mt_it-v0-loglikelihood │ │ ├── lambada_standard-v0-loglikelihood │ │ ├── lambada_standard_cloze-v0-loglikelihood │ │ ├── logiqa-v0-loglikelihood │ │ ├── math_algebra-v0-greedy_until │ │ ├── math_algebra-v1-greedy_until │ │ ├── math_counting_and_prob-v0-greedy_until │ │ ├── math_counting_and_prob-v1-greedy_until │ │ ├── math_geometry-v0-greedy_until │ │ ├── math_geometry-v1-greedy_until │ │ ├── math_intermediate_algebra-v0-greedy_until │ │ ├── math_intermediate_algebra-v1-greedy_until │ │ ├── math_num_theory-v0-greedy_until │ │ ├── math_num_theory-v1-greedy_until │ │ ├── math_prealgebra-v0-greedy_until │ │ ├── math_prealgebra-v1-greedy_until │ │ ├── math_precalc-v0-greedy_until │ │ ├── math_precalc-v1-greedy_until │ │ ├── mathqa-v0-loglikelihood │ │ ├── mc_taco-v0-loglikelihood │ │ ├── mnli-v0-loglikelihood │ │ ├── mnli_mismatched-v0-loglikelihood │ │ ├── mrpc-v0-loglikelihood │ │ ├── multirc-v0-loglikelihood │ │ ├── multirc-v1-loglikelihood │ │ ├── mutual-v0-loglikelihood │ │ ├── mutual-v1-loglikelihood │ │ ├── mutual_plus-v0-loglikelihood │ │ ├── mutual_plus-v1-loglikelihood │ │ ├── openbookqa-v0-loglikelihood │ │ ├── pile_arxiv-v0-loglikelihood_rolling │ │ ├── pile_arxiv-v1-loglikelihood_rolling │ │ ├── pile_bookcorpus2-v0-loglikelihood_rolling │ │ ├── pile_bookcorpus2-v1-loglikelihood_rolling │ │ ├── pile_books3-v0-loglikelihood_rolling │ │ ├── pile_books3-v1-loglikelihood_rolling │ │ ├── pile_dm-mathematics-v0-loglikelihood_rolling │ │ ├── pile_dm-mathematics-v1-loglikelihood_rolling │ │ ├── pile_enron-v0-loglikelihood_rolling │ │ ├── pile_enron-v1-loglikelihood_rolling │ │ ├── pile_europarl-v0-loglikelihood_rolling │ │ ├── pile_europarl-v1-loglikelihood_rolling │ │ ├── pile_freelaw-v0-loglikelihood_rolling │ │ ├── pile_freelaw-v1-loglikelihood_rolling │ │ ├── pile_github-v0-loglikelihood_rolling │ │ ├── pile_github-v1-loglikelihood_rolling │ │ ├── pile_gutenberg-v0-loglikelihood_rolling │ │ ├── pile_gutenberg-v1-loglikelihood_rolling │ │ ├── pile_hackernews-v0-loglikelihood_rolling │ │ ├── pile_hackernews-v1-loglikelihood_rolling │ │ ├── pile_nih-exporter-v0-loglikelihood_rolling │ │ ├── pile_nih-exporter-v1-loglikelihood_rolling │ │ ├── pile_opensubtitles-v0-loglikelihood_rolling │ │ ├── pile_opensubtitles-v1-loglikelihood_rolling │ │ ├── pile_openwebtext2-v0-loglikelihood_rolling │ │ ├── pile_openwebtext2-v1-loglikelihood_rolling │ │ ├── pile_philpapers-v0-loglikelihood_rolling │ │ ├── pile_philpapers-v1-loglikelihood_rolling │ │ ├── pile_pile-cc-v0-loglikelihood_rolling │ │ ├── pile_pile-cc-v1-loglikelihood_rolling │ │ ├── pile_pubmed-abstracts-v0-loglikelihood_rolling │ │ ├── pile_pubmed-abstracts-v1-loglikelihood_rolling │ │ ├── pile_pubmed-central-v0-loglikelihood_rolling │ │ ├── pile_pubmed-central-v1-loglikelihood_rolling │ │ ├── pile_stackexchange-v0-loglikelihood_rolling │ │ ├── pile_stackexchange-v1-loglikelihood_rolling │ │ ├── pile_ubuntu-irc-v0-loglikelihood_rolling │ │ ├── pile_ubuntu-irc-v1-loglikelihood_rolling │ │ ├── pile_uspto-v0-loglikelihood_rolling │ │ ├── pile_uspto-v1-loglikelihood_rolling │ │ ├── pile_wikipedia-v0-loglikelihood_rolling │ │ ├── pile_wikipedia-v1-loglikelihood_rolling │ │ ├── pile_youtubesubtitles-v0-loglikelihood_rolling │ │ ├── pile_youtubesubtitles-v1-loglikelihood_rolling │ │ ├── piqa-v0-loglikelihood │ │ ├── prost-v0-loglikelihood │ │ ├── pubmedqa-v0-loglikelihood │ │ ├── qa4mre_2011-v0-loglikelihood │ │ ├── qa4mre_2012-v0-loglikelihood │ │ ├── qa4mre_2013-v0-loglikelihood │ │ ├── qnli-v0-loglikelihood │ │ ├── qqp-v0-loglikelihood │ │ ├── race-v0-loglikelihood │ │ ├── random_insertion-v0-greedy_until │ │ ├── record-v0-loglikelihood │ │ ├── reversed_words-v0-greedy_until │ │ ├── rte-v0-loglikelihood │ │ ├── sciq-v0-loglikelihood │ │ ├── squad2-v0-greedy_until │ │ ├── squad2-v0-loglikelihood │ │ ├── squad2-v1-greedy_until │ │ ├── squad2-v1-loglikelihood │ │ ├── sst-v0-loglikelihood │ │ ├── swag-v0-loglikelihood │ │ ├── textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl │ │ ├── textsynth_test_0c1c14571add7903b89e588c8212572b95bb57b334fc0752c89a7e045a5f63ae.pkl │ │ ├── textsynth_test_3092d07756f3e1d010c07524cc8a2ecba7f0c19f9e39f2aaf2bf440bfe328004.pkl │ │ ├── textsynth_test_434076260b6af3a46b7a5eaceec3306a5872c400a3872f744280b237455a0f8e.pkl │ │ ├── textsynth_test_49c47ae40e11f349f2f6b492128188b1b2bc103a421c676ee4b2142a68b43516.pkl │ │ ├── textsynth_test_4fd8d66a6dad7f602b40e5d7dc298d6fe329299d086a4659743a41f4a4012659.pkl │ │ ├── textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl │ │ ├── textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl │ │ ├── textsynth_test_7209c4617547bfe17cb9e7f5f735fe35822d650aefdc5fbeeaf0c1724effbe09.pkl │ │ ├── textsynth_test_7afdc285388e51094e12645f305328c759574fa3ec9751631025f8ad5ebf9f3e.pkl │ │ ├── textsynth_test_9d5f33dbfe1e254928c89f5ed85e4c010d888065f55a8f1b863bc1eb0340a5f2.pkl │ │ ├── textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl │ │ ├── textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl │ │ ├── textsynth_test_e7ad1e9f52a39e1ddd1e50f3c57ffa4546728dd150a67c0a0ddc8675c04e15d1.pkl │ │ ├── textsynth_test_f4bfe4beb605bd52a8ab6be3c9293639e7e2261d98de58159d15ccb83131bf4e.pkl │ │ ├── toxigen-v0-loglikelihood │ │ ├── triviaqa-v0-loglikelihood │ │ ├── triviaqa-v1-loglikelihood │ │ ├── truthfulqa_gen-v0-greedy_until │ │ ├── truthfulqa_gen-v1-greedy_until │ │ ├── truthfulqa_mc-v0-loglikelihood │ │ ├── truthfulqa_mc-v1-loglikelihood │ │ ├── webqs-v0-loglikelihood │ │ ├── wic-v0-loglikelihood │ │ ├── wikitext-v0-loglikelihood_rolling │ │ ├── wikitext-v1-loglikelihood_rolling │ │ ├── winogrande-v0-loglikelihood │ │ ├── wmt14-en-fr-v0-greedy_until │ │ ├── wmt14-fr-en-v0-greedy_until │ │ ├── wmt16-de-en-v0-greedy_until │ │ ├── wmt16-en-de-v0-greedy_until │ │ ├── wmt16-en-ro-v0-greedy_until │ │ ├── wmt16-ro-en-v0-greedy_until │ │ ├── wmt20-cs-en-v0-greedy_until │ │ ├── wmt20-de-en-v0-greedy_until │ │ ├── wmt20-de-fr-v0-greedy_until │ │ ├── wmt20-en-cs-v0-greedy_until │ │ ├── wmt20-en-de-v0-greedy_until │ │ ├── wmt20-en-iu-v0-greedy_until │ │ ├── wmt20-en-ja-v0-greedy_until │ │ ├── wmt20-en-ja-v1-greedy_until │ │ ├── wmt20-en-km-v0-greedy_until │ │ ├── wmt20-en-pl-v0-greedy_until │ │ ├── wmt20-en-ps-v0-greedy_until │ │ ├── wmt20-en-ru-v0-greedy_until │ │ ├── wmt20-en-ta-v0-greedy_until │ │ ├── wmt20-en-zh-v0-greedy_until │ │ ├── wmt20-en-zh-v1-greedy_until │ │ ├── wmt20-fr-de-v0-greedy_until │ │ ├── wmt20-iu-en-v0-greedy_until │ │ ├── wmt20-ja-en-v0-greedy_until │ │ ├── wmt20-km-en-v0-greedy_until │ │ ├── wmt20-pl-en-v0-greedy_until │ │ ├── wmt20-ps-en-v0-greedy_until │ │ ├── wmt20-ru-en-v0-greedy_until │ │ ├── wmt20-ta-en-v0-greedy_until │ │ ├── wmt20-zh-en-v0-greedy_until │ │ ├── wnli-v0-loglikelihood │ │ ├── wnli-v1-loglikelihood │ │ ├── wsc-v0-loglikelihood │ │ └── wsc273-v0-loglikelihood │ │ ├── tests_master │ │ ├── test_description.py │ │ ├── test_generate_13_grams.py │ │ ├── test_models.py │ │ └── test_version_stable.py │ │ └── utils.py ├── metrics.py ├── not_real_drop_lm_eval │ ├── __pycache__ │ │ └── modify_llama.cpython-310.pyc │ └── modify_llama.py ├── openbookqa-5.jsonl ├── run_helm.py ├── run_lm_eval_harness_classification.py ├── run_lm_eval_harness_generation.py ├── run_piqa.sh ├── run_pred_long_bench_sample.py ├── tasks │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── eval_harness.cpython-310.pyc │ │ └── util.cpython-310.pyc │ ├── eval_harness.py │ └── util.py └── utils │ ├── __pycache__ │ ├── data.cpython-310.pyc │ └── process_args.cpython-310.pyc │ ├── data.py │ ├── metrics.py │ └── process_args.py ├── README.md └── requirements.txt /D2O.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/D2O.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LICENSE -------------------------------------------------------------------------------- /LLM_merge_new/.run_pred_long_bench.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/.run_pred_long_bench.py.swp -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_drop.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_merge.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_d2o.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_d2o.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_drop.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_merge.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_full.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_full.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_new.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_new.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_streaming.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop_merge.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_local.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/modeling_llama_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_streaming.py -------------------------------------------------------------------------------- /LLM_merge_new/LMEval_kv_token_merge/v433_modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/v433_modeling_llama.py -------------------------------------------------------------------------------- /LLM_merge_new/__pycache__/metrics.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/__pycache__/metrics.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_h2o_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_h2o_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_h2o_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_h2o_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_merge_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_merge_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_merge_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_long_merge_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.2.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.4.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.6.sh -------------------------------------------------------------------------------- /LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.8.sh -------------------------------------------------------------------------------- /LLM_merge_new/config/dataset2maxlen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/dataset2maxlen.json -------------------------------------------------------------------------------- /LLM_merge_new/config/dataset2prompt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/dataset2prompt.json -------------------------------------------------------------------------------- /LLM_merge_new/config/model2maxlen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/model2maxlen.json -------------------------------------------------------------------------------- /LLM_merge_new/config/model2path.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/model2path.json -------------------------------------------------------------------------------- /LLM_merge_new/data/copa-5.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/copa-5.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/mt_bench.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/mt_bench.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/openbookqa-5.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/openbookqa-5.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/piqa-5.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/piqa-5.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/summarization_data/xsum_0shot.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/summarization_data/xsum_0shot.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/summarization_data/xsum_3shot.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/summarization_data/xsum_3shot.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/summarization_data/xsum_5shot.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/summarization_data/xsum_5shot.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/xsum.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/xsum.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/data/xsum_opt.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/xsum_opt.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/eval_long_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/eval_long_bench.py -------------------------------------------------------------------------------- /LLM_merge_new/evaluate_task_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/evaluate_task_result.py -------------------------------------------------------------------------------- /LLM_merge_new/generate_task_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/generate_task_data.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.github/workflows/python-publish.yml -------------------------------------------------------------------------------- /LLM_merge_new/helm/.github/workflows/test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.github/workflows/test.yml -------------------------------------------------------------------------------- /LLM_merge_new/helm/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.gitignore -------------------------------------------------------------------------------- /LLM_merge_new/helm/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LLM_merge_new/helm/.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.readthedocs.yaml -------------------------------------------------------------------------------- /LLM_merge_new/helm/CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/CHANGELOG.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/LICENSE -------------------------------------------------------------------------------- /LLM_merge_new/helm/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/MANIFEST.in -------------------------------------------------------------------------------- /LLM_merge_new/helm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/README.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/command/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/command/eval.sh -------------------------------------------------------------------------------- /LLM_merge_new/helm/command/get_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/command/get_data.sh -------------------------------------------------------------------------------- /LLM_merge_new/helm/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/demo.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/adding_new_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/adding_new_models.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/benchmark.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/benchmark.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/code.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/code.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/developer_setup.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/developer_setup.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/docstrings.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/docstrings.css -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/huggingface_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/huggingface_models.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/index.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/installation.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/metrics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/metrics.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/mkdocs_macros.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/mkdocs_macros.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/models.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/perturbations.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/perturbations.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/proxy-server.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/proxy-server.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/quick_start.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/quick_start.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/requirements.txt -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/scenarios.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/scenarios.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/schemas.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/schemas.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/docs/tutorial.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/tutorial.md -------------------------------------------------------------------------------- /LLM_merge_new/helm/mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/mkdocs.yml -------------------------------------------------------------------------------- /LLM_merge_new/helm/pre-commit-venv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/pre-commit-venv.sh -------------------------------------------------------------------------------- /LLM_merge_new/helm/pre-commit.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/pre-commit.sh -------------------------------------------------------------------------------- /LLM_merge_new/helm/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/pyproject.toml -------------------------------------------------------------------------------- /LLM_merge_new/helm/requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/requirements-dev.txt -------------------------------------------------------------------------------- /LLM_merge_new/helm/requirements-freeze.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/requirements-freeze.txt -------------------------------------------------------------------------------- /LLM_merge_new/helm/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/requirements.txt -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/cache/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/cache/copy_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/copy_cache.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/cache/fix_anthropic_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/fix_anthropic_cache.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/cache/fix_together_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/fix_together_cache.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/cache/remove_together_api_entries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/remove_together_api_entries.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/efficiency/generate_instances.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/efficiency/generate_instances.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/efficiency/generate_run_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/efficiency/generate_run_specs.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/estimate_cost.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/estimate_cost.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/fact_completion/README.MD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/README.MD -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/fact_completion/create_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/create_benchmark.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/fact_completion/fetch_triples_and_aliases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/fetch_triples_and_aliases.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/fact_completion/filter_triples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/filter_triples.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/fact_completion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/helm-run-all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/helm-run-all.sh -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/offline_eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/offline_eval/export_requests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/offline_eval/export_requests.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/offline_eval/import_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/offline_eval/import_results.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/scripts/verify_reproducibility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/verify_reproducibility.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/setup.cfg -------------------------------------------------------------------------------- /LLM_merge_new/helm/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/setup.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/adapter_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapter_spec.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/adapter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/test_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/test_adapter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/prompt.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/request_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/request_state.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/adaptation/scenario_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/scenario_state.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/augmentations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/augmentations/data_augmenter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/data_augmenter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/augmentations/gender_perturbation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/gender_perturbation.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/augmentations/perturbation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/perturbation.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/augmentations/space_perturbation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/space_perturbation.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/augmentations/test_perturbation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/test_perturbation.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/augmentations/typos_perturbation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/typos_perturbation.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/contamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/contamination/contamination_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/contamination_stats.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/contamination/light_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/light_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/contamination/light_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/light_tokenizer.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/contamination/load_documents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/load_documents.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/data_preprocessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/data_preprocessor.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/executor.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/basic_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/basic_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/bbq_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/bbq_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/bias_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/bias_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/bias_word_lists.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/bias_word_lists.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/classification_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/classification_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics_helper.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/copyright_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/copyright_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/disinformation_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/disinformation_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/dry_run_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/dry_run_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/metric.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/metric_name.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/metric_name.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/metric_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/metric_service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/numeracy_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/numeracy_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/ranking_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/ranking_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/statistic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/statistic.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/summac/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/summac/model_summac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/summac/model_summac.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/summac/utils_misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/summac/utils_misc.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/summarization_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/summarization_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/test_bias_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_bias_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/test_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_metric.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/test_numeracy_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_numeracy_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/test_statistic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_statistic.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/tokens/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/metrics/toxicity_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/toxicity_metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/contamination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/contamination.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/create_plots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/create_plots.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_display.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_display.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_entry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_entry.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_chat_gpt.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_chat_gpt.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_cnn_opt.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_cnn_opt.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_extra.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_extra.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_gpu.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_gpu.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_small.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_small.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_tiny.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_tiny.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/schema.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/summarize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/summarize.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/table.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/test_contamination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/test_contamination.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/test_create_plots.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/test_create_plots.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/test_run_entry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/test_run_entry.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/presentation/xsum/run_specs_opt.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/xsum/run_specs_opt.conf -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/run.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/run_expander.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/run_expander.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/run_specs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/run_specs.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/runner.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/babi_qa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/babi_qa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/bbq_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/bbq_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/big_bench_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/big_bench_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/blimp_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/blimp_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/bold_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/bold_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/boolq_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/boolq_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/civil_comments_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/civil_comments_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario_helper.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/commonsense_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/commonsense_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/copyright_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/copyright_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/covid_dialog_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/covid_dialog_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/dialogue_scenarios.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/dialogue_scenarios.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/disinformation_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/disinformation_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/dyck_language_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/dyck_language_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/gsm_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/gsm_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/ice_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/ice_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/imdb_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/imdb_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/legal_support_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/legal_support_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/lex_glue_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/lex_glue_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/lextreme_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/lextreme_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/lsat_qa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/lsat_qa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/math_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/math_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/me_q_sum_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/me_q_sum_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/med_dialog_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_dialog_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/med_mcqa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_mcqa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/med_qa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_qa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/mmlu_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/mmlu_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/msmarco_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/msmarco_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/narrativeqa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/narrativeqa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/natural_qa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/natural_qa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/newsqa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/newsqa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/numeracy_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/numeracy_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/opinions_qa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/opinions_qa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/pubmed_qa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/pubmed_qa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/quac_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/quac_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/raft_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/raft_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/simple_scenarios.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/simple_scenarios.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/summarization_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/summarization_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/test_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/test_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/the_pile_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/the_pile_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/truthful_qa_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/truthful_qa_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/twitter_aae_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/twitter_aae_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/wikifact_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/wikifact_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/wikitext_103_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/wikitext_103_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/scenarios/wmt_14_scenario.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/wmt_14_scenario.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/server.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.css -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/contamination.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/contamination.yaml -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/general.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/general.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/images/crfm-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/crfm-logo.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo-simple.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/ai21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/ai21.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/meta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/meta.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/images/taxonomy-scenarios.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/taxonomy-scenarios.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/index.html -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/info-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/info-icon.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/json-urls-root.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/json-urls-root.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/json-urls.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/json-urls.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/plot-captions.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/plot-captions.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/schema.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/schema.yaml -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/static/utils.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/utils.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/test_data_preprocessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/test_data_preprocessor.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/test_run_expander.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/test_run_expander.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/window_services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/window_services/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/window_services/test_utils.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/window_services/tokenizer_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/window_services/tokenizer_service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/benchmark/window_services/window_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/window_services/window_service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/authentication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/authentication.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/cache.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/codec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/codec.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/critique_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/critique_request.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/general.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/hierarchical_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/hierarchical_logger.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/object_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/object_spec.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/perspective_api_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/perspective_api_request.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/request.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/test_cache.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/test_codec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/test_codec.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/test_general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/test_general.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/common/tokenization_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/tokenization_request.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/accounts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/accounts.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/cli.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/ai21_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/ai21_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/aleph_alpha_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/aleph_alpha_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/anthropic_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/anthropic_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/auto_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/auto_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/chat_gpt_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/chat_gpt_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/cohere_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/cohere_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/critique_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/critique_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/google_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/google_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/goose_ai_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/goose_ai_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/huggingface_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/huggingface_model_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_model_registry.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/huggingface_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_tokenizer.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/ice_tokenizer_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/ice_tokenizer_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/megatron_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/megatron_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/microsoft_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/microsoft_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/openai_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/openai_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/palmyra_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/palmyra_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/perspective_api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/perspective_api_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/remote_model_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/remote_model_registry.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/simple_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/simple_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/test_anthropic_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_anthropic_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/test_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_tokenizer.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/test_ice_tokenizer_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_ice_tokenizer_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/test_yalm_tokenizer_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_yalm_tokenizer_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/together_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/together_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/voc_100b.sp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/voc_100b.sp -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer_client.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/example_queries.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/example_queries.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/models.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/query.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/retry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/retry.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/server.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/services/remote_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/remote_service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/services/server_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/server_service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/services/service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/services/test_remote_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/test_remote_service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/services/test_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/test_service.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/static/general.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/general.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/static/help.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/help.html -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/static/index.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/index.css -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/static/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/index.html -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/static/index.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/index.js -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/static/info-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/info-icon.png -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/test_models.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/test_retry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/test_retry.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/ai21_token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/ai21_token_counter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/auto_token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/auto_token_counter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/cohere_token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/cohere_token_counter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/free_token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/free_token_counter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/gooseai_token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/gooseai_token_counter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/openai_token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/openai_token_counter.py -------------------------------------------------------------------------------- /LLM_merge_new/helm/src/helm/proxy/token_counters/token_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/token_counter.py -------------------------------------------------------------------------------- /LLM_merge_new/kv_token_merge/__pycache__/modify_llama.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/__pycache__/modify_llama.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/kv_token_merge/__pycache__/stream.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/__pycache__/stream.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/kv_token_merge/modify_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/modify_llama.py -------------------------------------------------------------------------------- /LLM_merge_new/kv_token_merge/modify_llama_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/modify_llama_merge.py -------------------------------------------------------------------------------- /LLM_merge_new/kv_token_merge/stream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/stream.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.coveragerc -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.flake8 -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/.github/workflows/new_tasks.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.github/workflows/new_tasks.yml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.github/workflows/unit_tests.yml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.gitignore -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/CITATION.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/CITATION.bib -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @haileyschoelkopf @lintangsutawika @StellaAthena 2 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/LICENSE.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/docs/decontamination.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/decontamination.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/docs/interface.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/interface.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/docs/model_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/model_guide.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/docs/new_task_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/new_task_guide.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/docs/task_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/task_guide.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/examples/lm-eval-overview.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/examples/lm-eval-overview.ipynb -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/ignore.txt: -------------------------------------------------------------------------------- 1 | ROUGE 2 | rouge 3 | nin 4 | maka 5 | mor 6 | te 7 | ond 8 | extraversion 9 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/__main__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/filter.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/instance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/instance.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/model.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/registry.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/samplers.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/api/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/task.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/archiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/archiver.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/janitor.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/evaluator.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/filters/decontamination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/decontamination.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/filters/extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/extraction.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/filters/selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/selection.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/filters/transformation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/transformation.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/anthropic_llms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/anthropic_llms.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/dummy.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/gguf.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/huggingface.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/openai_completions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/openai_completions.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/textsynth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/textsynth.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/models/vllm_causallms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/vllm_causallms.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/prompts/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r1.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r2.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r3.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_challenge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_challenge.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_easy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_easy.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arithmetic/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arithmetic/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/babi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/babi.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/_generate_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/_generate_configs.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/fewshot/snarks.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/fewshot/snarks.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/zeroshot/snarks.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/zeroshot/snarks.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/belebele/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/belebele/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/pythia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/pythia.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/t0_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/t0_eval.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bigbench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bigbench/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/ceval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/ceval/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_gr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_gr.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_li.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_li.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rch.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcs.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcss.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcss.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_wr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_wr.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/cola/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/cola/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/mismatch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/mismatch.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mrpc/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mrpc/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qnli/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qnli/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qqp/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qqp/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/rte/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/rte/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/sst/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/sst/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/wnli/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/wnli/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k-cot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k-cot.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_en.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_es.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_es.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/hellaswag.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/hellaswag.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada_cloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada_cloze/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/logiqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/logiqa.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/utils_logiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/utils_logiqa.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logieval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logieval.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logiqa2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logiqa2.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/utils_logiqa2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/utils_logiqa2.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/mathqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/mathqa.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/direct/direct_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/direct/direct_yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/en_cot/cot_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/en_cot/cot_yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/native_cot/cot_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/native_cot/cot_yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/multual_plus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/multual_plus.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/mutual.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/mutual.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/nq_open/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/nq_open/nq_open.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/nq_open/nq_open.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/openbookqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/openbookqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_de.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_de.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_en.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_es.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_es.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_fr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_fr.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ja.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ja.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ko.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ko.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_zh.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_arxiv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_arxiv.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_books3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_books3.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_enron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_enron.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_europarl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_europarl.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_freelaw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_freelaw.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_github.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_github.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_gutenberg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_gutenberg.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_pile-cc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_pile-cc.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_uspto.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_uspto.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_wikipedia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_wikipedia.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/piqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/piqa.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_in.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_in.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_out.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_out.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/prost/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/prost/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/pubmedqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/pubmedqa.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2011.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2011.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2012.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2012.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2013.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2013.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/bool.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/bool.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/freeform.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/freeform.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/race.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/race.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/sciq.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/sciq.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/scrolls.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/scrolls.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/task.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/default.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/default.yml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/task.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/storycloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/storycloze/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/super_glue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/super_glue/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/swag.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/swag.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/toxigen.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/toxigen.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/unscramble/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/unscramble/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/webqs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/webqs.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/wikitext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/wikitext.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/winogrande/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/winogrande/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/default.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_et.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_et.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ht.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ht.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_id.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_id.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_it.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_it.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_qu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_qu.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_sw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_sw.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ta.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ta.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_th.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_tr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_tr.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_vi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_vi.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_zh.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ar.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_bg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_bg.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_common_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_common_yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_de.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_de.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_el.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_el.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_en.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_es.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_es.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_fr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_fr.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_hi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_hi.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ru.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ru.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_sw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_sw.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_th.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_tr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_tr.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ur.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ur.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_vi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_vi.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_zh.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xstorycloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xstorycloze/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/lm_eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/mypy.ini -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/pyproject.toml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/build_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/build_benchmark.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/clean_training_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/cost_estimate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/cost_estimate.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/get_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/get_prompts.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/make_gpt2_test_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/make_gpt2_test_cases.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/make_table_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/make_table_results.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/make_table_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/make_table_tasks.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/regression.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/scripts/write_out.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/write_out.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/setup.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.coveragerc -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.flake8 -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.gitignore -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/CITATION.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/CITATION.bib -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @haileyschoelkopf @lintangsutawika @StellaAthena 2 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/LICENSE.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/decontamination.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/decontamination.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/interface.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/interface.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/model_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/model_guide.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/new_task_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/new_task_guide.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/task_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/task_guide.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/ignore.txt: -------------------------------------------------------------------------------- 1 | ROUGE 2 | rouge 3 | nin 4 | maka 5 | mor 6 | te 7 | ond 8 | extraversion 9 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__main__.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/filter.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/instance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/instance.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/model.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/registry.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/samplers.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/task.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/evaluator.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/dummy.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/gguf.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/tasks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/tasks/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/tasks/nq_open/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/mypy.ini -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/pyproject.toml -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/clean_training_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/cost_estimate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/cost_estimate.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/get_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/get_prompts.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/regression.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/write_out.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/write_out.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/setup.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/templates/new_yaml_task/blank_yaml.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_evaluator.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_janitor.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_misc.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_tasks.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/templates/new_yaml_task/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/templates/new_yaml_task/README.md -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/templates/new_yaml_task/blank_yaml.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/models/test_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/models/test_gguf.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/models/test_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/models/test_huggingface.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/models/test_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/models/test_vllm.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/test_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_evaluator.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/test_janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_janitor.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/test_misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_misc.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/test_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_tasks.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_utils.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/race-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/race-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/rte-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/rte-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/sciq-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/sciq-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/wic-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wic-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v1-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/testdata/wsc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wsc-v0-loglikelihood -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/tests_master/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/tests_master/test_models.py -------------------------------------------------------------------------------- /LLM_merge_new/lm-evaluation-harness/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/utils.py -------------------------------------------------------------------------------- /LLM_merge_new/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/not_real_drop_lm_eval/modify_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/not_real_drop_lm_eval/modify_llama.py -------------------------------------------------------------------------------- /LLM_merge_new/openbookqa-5.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/openbookqa-5.jsonl -------------------------------------------------------------------------------- /LLM_merge_new/run_helm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_helm.py -------------------------------------------------------------------------------- /LLM_merge_new/run_lm_eval_harness_classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_lm_eval_harness_classification.py -------------------------------------------------------------------------------- /LLM_merge_new/run_lm_eval_harness_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_lm_eval_harness_generation.py -------------------------------------------------------------------------------- /LLM_merge_new/run_piqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_piqa.sh -------------------------------------------------------------------------------- /LLM_merge_new/run_pred_long_bench_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_pred_long_bench_sample.py -------------------------------------------------------------------------------- /LLM_merge_new/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__init__.py -------------------------------------------------------------------------------- /LLM_merge_new/tasks/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/tasks/__pycache__/eval_harness.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__pycache__/eval_harness.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/tasks/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/tasks/eval_harness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/eval_harness.py -------------------------------------------------------------------------------- /LLM_merge_new/tasks/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/util.py -------------------------------------------------------------------------------- /LLM_merge_new/utils/__pycache__/data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/__pycache__/data.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/utils/__pycache__/process_args.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/__pycache__/process_args.cpython-310.pyc -------------------------------------------------------------------------------- /LLM_merge_new/utils/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/data.py -------------------------------------------------------------------------------- /LLM_merge_new/utils/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/metrics.py -------------------------------------------------------------------------------- /LLM_merge_new/utils/process_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/process_args.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/README.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/requirements.txt --------------------------------------------------------------------------------