├── D2O.png
├── LICENSE
├── LLM_merge_new
    ├── .run_pred_long_bench.py.swp
    ├── LMEval_kv_token_merge
    │   ├── __pycache__
    │   │   ├── modeling_llama.cpython-310.pyc
    │   │   ├── modeling_llama3.cpython-310.pyc
    │   │   ├── modeling_llama3_70b_drop.cpython-310.pyc
    │   │   ├── modeling_llama3_7b_13b_d2o.cpython-310.pyc
    │   │   ├── modeling_llama3_7b_13b_drop.cpython-310.pyc
    │   │   ├── modeling_llama3_7b_13b_merge.cpython-310.pyc
    │   │   ├── modeling_llama3_full.cpython-310.pyc
    │   │   ├── modeling_llama3_new.cpython-310.pyc
    │   │   ├── modeling_llama3_streaming.cpython-310.pyc
    │   │   ├── modeling_llama_drop.cpython-310.pyc
    │   │   ├── modeling_llama_drop_merge.cpython-310.pyc
    │   │   ├── modeling_llama_streaming.cpython-310.pyc
    │   │   ├── v433_modeling_falcon.cpython-310.pyc
    │   │   ├── v433_modeling_llama.cpython-310.pyc
    │   │   ├── v433_modeling_mistral.cpython-310.pyc
    │   │   ├── v436_modeling_falcon.cpython-310.pyc
    │   │   ├── v436_modeling_falcon_drop.cpython-310.pyc
    │   │   ├── v436_modeling_falcon_merge.cpython-310.pyc
    │   │   ├── v436_modeling_falcon_streaming.cpython-310.pyc
    │   │   ├── v436_modeling_mistral.cpython-310.pyc
    │   │   ├── v436_modeling_mistral_drop.cpython-310.pyc
    │   │   ├── v436_modeling_mistral_merge.cpython-310.pyc
    │   │   └── v436_modeling_mistral_streaming.cpython-310.pyc
    │   ├── modeling_llama.py
    │   ├── modeling_llama3_70b_drop.py
    │   ├── modeling_llama3_70b_merge.py
    │   ├── modeling_llama3_7b_13b_d2o.py
    │   ├── modeling_llama3_7b_13b_drop.py
    │   ├── modeling_llama3_7b_13b_merge.py
    │   ├── modeling_llama3_full.py
    │   ├── modeling_llama3_new.py
    │   ├── modeling_llama3_streaming.py
    │   ├── modeling_llama_drop.py
    │   ├── modeling_llama_drop_merge.py
    │   ├── modeling_llama_local.py
    │   ├── modeling_llama_streaming.py
    │   └── v433_modeling_llama.py
    ├── __pycache__
    │   └── metrics.cpython-310.pyc
    ├── bash_experiments
    │   ├── run_osc_coqa_h2o_0.2.sh
    │   ├── run_osc_coqa_h2o_0.4.sh
    │   ├── run_osc_coqa_h2o_0.6.sh
    │   ├── run_osc_coqa_h2o_0.8.sh
    │   ├── run_osc_coqa_merge_0.2.sh
    │   ├── run_osc_coqa_merge_0.4.sh
    │   ├── run_osc_coqa_merge_0.6.sh
    │   ├── run_osc_coqa_merge_0.8.sh
    │   ├── run_osc_gsm8k_h2o_0.2.sh
    │   ├── run_osc_gsm8k_h2o_0.4.sh
    │   ├── run_osc_gsm8k_h2o_0.6.sh
    │   ├── run_osc_gsm8k_h2o_0.8.sh
    │   ├── run_osc_gsm8k_merge_0.2.sh
    │   ├── run_osc_gsm8k_merge_0.4.sh
    │   ├── run_osc_gsm8k_merge_0.6.sh
    │   ├── run_osc_gsm8k_merge_0.8.sh
    │   ├── run_osc_long_h2o_0.2.sh
    │   ├── run_osc_long_h2o_0.4.sh
    │   ├── run_osc_long_h2o_0.6.sh
    │   ├── run_osc_long_h2o_0.8.sh
    │   ├── run_osc_long_merge_0.2.sh
    │   ├── run_osc_long_merge_0.4.sh
    │   ├── run_osc_long_merge_0.6.sh
    │   ├── run_osc_long_merge_0.8.sh
    │   ├── run_osc_truthful_h2o_0.2.sh
    │   ├── run_osc_truthful_h2o_0.4.sh
    │   ├── run_osc_truthful_h2o_0.6.sh
    │   ├── run_osc_truthful_h2o_0.8.sh
    │   ├── run_osc_truthful_merge_0.2.sh
    │   ├── run_osc_truthful_merge_0.4.sh
    │   ├── run_osc_truthful_merge_0.6.sh
    │   └── run_osc_truthful_merge_0.8.sh
    ├── config
    │   ├── dataset2maxlen.json
    │   ├── dataset2prompt.json
    │   ├── model2maxlen.json
    │   └── model2path.json
    ├── data
    │   ├── copa-5.jsonl
    │   ├── mt_bench.jsonl
    │   ├── openbookqa-5.jsonl
    │   ├── piqa-5.jsonl
    │   ├── summarization_data
    │   │   ├── xsum_0shot.jsonl
    │   │   ├── xsum_3shot.jsonl
    │   │   └── xsum_5shot.jsonl
    │   ├── xsum.jsonl
    │   └── xsum_opt.jsonl
    ├── eval_long_bench.py
    ├── evaluate_task_result.py
    ├── generate_task_data.py
    ├── helm
    │   ├── .github
    │   │   └── workflows
    │   │   │   ├── python-publish.yml
    │   │   │   └── test.yml
    │   ├── .gitignore
    │   ├── .pre-commit-config.yaml
    │   ├── .readthedocs.yaml
    │   ├── CHANGELOG.md
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── README.md
    │   ├── command
    │   │   ├── eval.sh
    │   │   └── get_data.sh
    │   ├── demo.py
    │   ├── docs
    │   │   ├── adding_new_models.md
    │   │   ├── benchmark.md
    │   │   ├── code.md
    │   │   ├── developer_setup.md
    │   │   ├── docstrings.css
    │   │   ├── huggingface_models.md
    │   │   ├── index.md
    │   │   ├── installation.md
    │   │   ├── metrics.md
    │   │   ├── mkdocs_macros.py
    │   │   ├── models.md
    │   │   ├── perturbations.md
    │   │   ├── proxy-server.md
    │   │   ├── quick_start.md
    │   │   ├── requirements.txt
    │   │   ├── scenarios.md
    │   │   ├── schemas.md
    │   │   └── tutorial.md
    │   ├── mkdocs.yml
    │   ├── pre-commit-venv.sh
    │   ├── pre-commit.sh
    │   ├── pyproject.toml
    │   ├── requirements-dev.txt
    │   ├── requirements-freeze.txt
    │   ├── requirements.txt
    │   ├── scripts
    │   │   ├── cache
    │   │   │   ├── __init__.py
    │   │   │   ├── copy_cache.py
    │   │   │   ├── fix_anthropic_cache.py
    │   │   │   ├── fix_together_cache.py
    │   │   │   └── remove_together_api_entries.py
    │   │   ├── efficiency
    │   │   │   ├── generate_instances.py
    │   │   │   └── generate_run_specs.py
    │   │   ├── estimate_cost.py
    │   │   ├── fact_completion
    │   │   │   ├── README.MD
    │   │   │   ├── create_benchmark.py
    │   │   │   ├── fetch_triples_and_aliases.py
    │   │   │   ├── filter_triples.py
    │   │   │   └── utils.py
    │   │   ├── helm-run-all.sh
    │   │   ├── offline_eval
    │   │   │   ├── __init__.py
    │   │   │   ├── export_requests.py
    │   │   │   └── import_results.py
    │   │   └── verify_reproducibility.py
    │   ├── setup.cfg
    │   ├── setup.py
    │   └── src
    │   │   └── helm
    │   │       ├── __init__.py
    │   │       ├── benchmark
    │   │           ├── __init__.py
    │   │           ├── adaptation
    │   │           │   ├── __init__.py
    │   │           │   ├── adapter_spec.py
    │   │           │   ├── adapters
    │   │           │   │   ├── __init__.py
    │   │           │   │   ├── adapter.py
    │   │           │   │   ├── adapter_factory.py
    │   │           │   │   ├── binary_ranking_adapter.py
    │   │           │   │   ├── generation_adapter.py
    │   │           │   │   ├── in_context_learning_adapter.py
    │   │           │   │   ├── language_modeling_adapter.py
    │   │           │   │   ├── multiple_choice_calibrated_adapter.py
    │   │           │   │   ├── multiple_choice_joint_adapter.py
    │   │           │   │   ├── multiple_choice_separate_adapter.py
    │   │           │   │   ├── test_adapter.py
    │   │           │   │   ├── test_generation_adapter.py
    │   │           │   │   ├── test_language_modeling_adapter.py
    │   │           │   │   └── test_multiple_choice_joint_adapter.py
    │   │           │   ├── prompt.py
    │   │           │   ├── request_state.py
    │   │           │   └── scenario_state.py
    │   │           ├── augmentations
    │   │           │   ├── __init__.py
    │   │           │   ├── contraction_expansion_perturbation.py
    │   │           │   ├── contrast_sets_perturbation.py
    │   │           │   ├── correct_to_misspelling.json
    │   │           │   ├── data_augmenter.py
    │   │           │   ├── dialect_perturbation.py
    │   │           │   ├── extra_space_perturbation.py
    │   │           │   ├── filler_words_perturbation.py
    │   │           │   ├── gender_perturbation.py
    │   │           │   ├── lowercase_perturbation.py
    │   │           │   ├── mild_mix_perturbation.py
    │   │           │   ├── misspelling_perturbation.py
    │   │           │   ├── person_name_perturbation.py
    │   │           │   ├── perturbation.py
    │   │           │   ├── perturbation_description.py
    │   │           │   ├── space_perturbation.py
    │   │           │   ├── synonym_perturbation.py
    │   │           │   ├── test_perturbation.py
    │   │           │   └── typos_perturbation.py
    │   │           ├── contamination
    │   │           │   ├── __init__.py
    │   │           │   ├── compute_contamination_metrics.py
    │   │           │   ├── contamination_stats.py
    │   │           │   ├── export_scenario_text.py
    │   │           │   ├── light_scenario.py
    │   │           │   ├── light_tokenizer.py
    │   │           │   ├── load_documents.py
    │   │           │   └── test_compute_contamination_metrics.py
    │   │           ├── data_preprocessor.py
    │   │           ├── efficiency_data
    │   │           │   ├── inference_denoised_runtimes.json
    │   │           │   ├── inference_idealized_runtimes.json
    │   │           │   └── training_efficiency.json
    │   │           ├── executor.py
    │   │           ├── metrics
    │   │           │   ├── __init__.py
    │   │           │   ├── basic_metrics.py
    │   │           │   ├── bbq_metrics.py
    │   │           │   ├── bias_metrics.py
    │   │           │   ├── bias_word_lists.py
    │   │           │   ├── classification_metrics.py
    │   │           │   ├── code_metrics.py
    │   │           │   ├── code_metrics_helper.py
    │   │           │   ├── copyright_metrics.py
    │   │           │   ├── disinformation_metrics.py
    │   │           │   ├── dry_run_metrics.py
    │   │           │   ├── machine_translation_metrics.py
    │   │           │   ├── metric.py
    │   │           │   ├── metric_name.py
    │   │           │   ├── metric_service.py
    │   │           │   ├── numeracy_metrics.py
    │   │           │   ├── ranking_metrics.py
    │   │           │   ├── statistic.py
    │   │           │   ├── summac
    │   │           │   │   ├── __init__.py
    │   │           │   │   ├── model_summac.py
    │   │           │   │   └── utils_misc.py
    │   │           │   ├── summarization_critique_metrics.py
    │   │           │   ├── summarization_metrics.py
    │   │           │   ├── test_bias_metrics.py
    │   │           │   ├── test_classification_metrics.py
    │   │           │   ├── test_metric.py
    │   │           │   ├── test_numeracy_metrics.py
    │   │           │   ├── test_statistic.py
    │   │           │   ├── tokens
    │   │           │   │   ├── __init__.py
    │   │           │   │   ├── ai21_token_cost_estimator.py
    │   │           │   │   ├── auto_token_cost_estimator.py
    │   │           │   │   ├── cohere_token_cost_estimator.py
    │   │           │   │   ├── free_token_cost_estimator.py
    │   │           │   │   ├── gooseai_token_cost_estimator.py
    │   │           │   │   ├── openai_token_cost_estimator.py
    │   │           │   │   ├── test_ai21_token_cost_estimator.py
    │   │           │   │   ├── test_openai_token_cost_estimator.py
    │   │           │   │   └── token_cost_estimator.py
    │   │           │   └── toxicity_metrics.py
    │   │           ├── presentation
    │   │           │   ├── __init__.py
    │   │           │   ├── apps
    │   │           │   │   ├── run_specs_gptneox.conf
    │   │           │   │   ├── run_specs_opt6.conf
    │   │           │   │   └── run_specs_opt66.conf
    │   │           │   ├── contamination.py
    │   │           │   ├── create_plots.py
    │   │           │   ├── run_display.py
    │   │           │   ├── run_entry.py
    │   │           │   ├── run_specs.conf
    │   │           │   ├── run_specs_big_bench_lite.conf
    │   │           │   ├── run_specs_biomedical.conf
    │   │           │   ├── run_specs_chat_gpt.conf
    │   │           │   ├── run_specs_cnn_opt.conf
    │   │           │   ├── run_specs_extra.conf
    │   │           │   ├── run_specs_gpu.conf
    │   │           │   ├── run_specs_interactive_qa.conf
    │   │           │   ├── run_specs_opinions_qa_ai21_default.conf
    │   │           │   ├── run_specs_opinions_qa_ai21_steer.conf
    │   │           │   ├── run_specs_opinions_qa_openai_default.conf
    │   │           │   ├── run_specs_opinions_qa_openai_steer.conf
    │   │           │   ├── run_specs_small.conf
    │   │           │   ├── run_specs_tiny.conf
    │   │           │   ├── schema.py
    │   │           │   ├── summarize.py
    │   │           │   ├── table.py
    │   │           │   ├── test_contamination.py
    │   │           │   ├── test_create_plots.py
    │   │           │   ├── test_run_entry.py
    │   │           │   └── xsum
    │   │           │   │   ├── run_specs_gptneox.conf
    │   │           │   │   ├── run_specs_llama.conf
    │   │           │   │   └── run_specs_opt.conf
    │   │           ├── run.py
    │   │           ├── run_expander.py
    │   │           ├── run_specs.py
    │   │           ├── runner.py
    │   │           ├── scenarios
    │   │           │   ├── __init__.py
    │   │           │   ├── babi_qa_scenario.py
    │   │           │   ├── bbq_scenario.py
    │   │           │   ├── big_bench_scenario.py
    │   │           │   ├── blimp_scenario.py
    │   │           │   ├── bold_scenario.py
    │   │           │   ├── boolq_scenario.py
    │   │           │   ├── civil_comments_scenario.py
    │   │           │   ├── code_scenario.py
    │   │           │   ├── code_scenario_apps_pinned_file_order.py
    │   │           │   ├── code_scenario_helper.py
    │   │           │   ├── commonsense_scenario.py
    │   │           │   ├── copyright_scenario.py
    │   │           │   ├── covid_dialog_scenario.py
    │   │           │   ├── dialogue_scenarios.py
    │   │           │   ├── disinformation_scenario.py
    │   │           │   ├── dyck_language_scenario.py
    │   │           │   ├── entity_data_imputation_scenario.py
    │   │           │   ├── entity_matching_scenario.py
    │   │           │   ├── entity_matching_scenario_fixed_random_state.py
    │   │           │   ├── gsm_scenario.py
    │   │           │   ├── ice_scenario.py
    │   │           │   ├── ice_scenario_pinned_file_order.py
    │   │           │   ├── imdb_scenario.py
    │   │           │   ├── imdb_scenario_pinned_file_order.py
    │   │           │   ├── interactive_qa_mmlu_scenario.py
    │   │           │   ├── legal_summarization_scenario.py
    │   │           │   ├── legal_support_scenario.py
    │   │           │   ├── lex_glue_scenario.py
    │   │           │   ├── lextreme_scenario.py
    │   │           │   ├── lsat_qa_scenario.py
    │   │           │   ├── math_scenario.py
    │   │           │   ├── me_q_sum_scenario.py
    │   │           │   ├── med_dialog_scenario.py
    │   │           │   ├── med_mcqa_scenario.py
    │   │           │   ├── med_paragraph_simplification_scenario.py
    │   │           │   ├── med_qa_scenario.py
    │   │           │   ├── mmlu_scenario.py
    │   │           │   ├── msmarco_scenario.py
    │   │           │   ├── narrativeqa_scenario.py
    │   │           │   ├── natural_qa_scenario.py
    │   │           │   ├── newsqa_scenario.py
    │   │           │   ├── numeracy_scenario.py
    │   │           │   ├── opinions_qa_scenario.py
    │   │           │   ├── pubmed_qa_scenario.py
    │   │           │   ├── quac_scenario.py
    │   │           │   ├── raft_scenario.py
    │   │           │   ├── real_toxicity_prompts_scenario.py
    │   │           │   ├── scenario.py
    │   │           │   ├── simple_scenarios.py
    │   │           │   ├── summarization_scenario.py
    │   │           │   ├── synthetic_efficiency_scenario.py
    │   │           │   ├── synthetic_reasoning_natural_scenario.py
    │   │           │   ├── synthetic_reasoning_scenario.py
    │   │           │   ├── test_scenario.py
    │   │           │   ├── the_pile_scenario.py
    │   │           │   ├── truthful_qa_scenario.py
    │   │           │   ├── twitter_aae_scenario.py
    │   │           │   ├── wikifact_scenario.py
    │   │           │   ├── wikitext_103_scenario.py
    │   │           │   └── wmt_14_scenario.py
    │   │           ├── server.py
    │   │           ├── static
    │   │           │   ├── benchmarking.css
    │   │           │   ├── benchmarking.js
    │   │           │   ├── contamination.yaml
    │   │           │   ├── general.js
    │   │           │   ├── images
    │   │           │   │   ├── crfm-logo.png
    │   │           │   │   ├── helm-logo-simple.png
    │   │           │   │   ├── helm-logo.png
    │   │           │   │   ├── language-model-helm.png
    │   │           │   │   ├── organizations
    │   │           │   │   │   ├── ai21.png
    │   │           │   │   │   ├── anthropic.png
    │   │           │   │   │   ├── bigscience.png
    │   │           │   │   │   ├── cohere.png
    │   │           │   │   │   ├── eleutherai.png
    │   │           │   │   │   ├── google.png
    │   │           │   │   │   ├── meta.png
    │   │           │   │   │   ├── microsoft.png
    │   │           │   │   │   ├── nvidia.png
    │   │           │   │   │   ├── openai.png
    │   │           │   │   │   ├── together.png
    │   │           │   │   │   ├── tsinghua-keg.png
    │   │           │   │   │   └── yandex.png
    │   │           │   │   ├── scenarios-by-metrics.png
    │   │           │   │   └── taxonomy-scenarios.png
    │   │           │   ├── index.html
    │   │           │   ├── info-icon.png
    │   │           │   ├── json-urls-root.js
    │   │           │   ├── json-urls.js
    │   │           │   ├── plot-captions.js
    │   │           │   ├── schema.yaml
    │   │           │   └── utils.js
    │   │           ├── test_data_preprocessor.py
    │   │           ├── test_run_expander.py
    │   │           └── window_services
    │   │           │   ├── __init__.py
    │   │           │   ├── ai21_window_service.py
    │   │           │   ├── anthropic_window_service.py
    │   │           │   ├── bloom_window_service.py
    │   │           │   ├── cohere_window_service.py
    │   │           │   ├── encoder_decoder_window_service.py
    │   │           │   ├── flan_t5_window_service.py
    │   │           │   ├── gpt2_window_service.py
    │   │           │   ├── gptj_window_service.py
    │   │           │   ├── gptneox_window_service.py
    │   │           │   ├── huggingface_window_service.py
    │   │           │   ├── ice_window_service.py
    │   │           │   ├── local_window_service.py
    │   │           │   ├── luminous_window_service.py
    │   │           │   ├── megatron_window_service.py
    │   │           │   ├── mock_ai21_tokenizer_request_results.pkl
    │   │           │   ├── mt_nlg_window_service.py
    │   │           │   ├── openai_window_service.py
    │   │           │   ├── opt_window_service.py
    │   │           │   ├── palmyra_window_service.py
    │   │           │   ├── remote_window_service.py
    │   │           │   ├── santacoder_window_service.py
    │   │           │   ├── starcoder_window_service.py
    │   │           │   ├── t0pp_window_service.py
    │   │           │   ├── t511b_window_service.py
    │   │           │   ├── test_ai21_window_service.py
    │   │           │   ├── test_anthropic_window_service.py
    │   │           │   ├── test_bloom_window_service.py
    │   │           │   ├── test_cohere_window_service.py
    │   │           │   ├── test_cohere_window_service_utils.py
    │   │           │   ├── test_flan_t5_window_service.py
    │   │           │   ├── test_gpt2_window_service.py
    │   │           │   ├── test_gpt4_window_service.py
    │   │           │   ├── test_gptj_window_service.py
    │   │           │   ├── test_gptneox_window_service.py
    │   │           │   ├── test_ice_window_service.py
    │   │           │   ├── test_mt_nlg_window_service.py
    │   │           │   ├── test_openai_window_service.py
    │   │           │   ├── test_opt_window_service.py
    │   │           │   ├── test_palmyra_window_service.py
    │   │           │   ├── test_t0pp_window_service.py
    │   │           │   ├── test_t511b_window_service.py
    │   │           │   ├── test_ul2_window_service.py
    │   │           │   ├── test_utils.py
    │   │           │   ├── test_yalm_window_service.py
    │   │           │   ├── tokenizer_service.py
    │   │           │   ├── ul2_window_service.py
    │   │           │   ├── wider_ai21_window_service.py
    │   │           │   ├── wider_openai_window_service.py
    │   │           │   ├── window_service.py
    │   │           │   ├── window_service_factory.py
    │   │           │   └── yalm_window_service.py
    │   │       ├── common
    │   │           ├── __init__.py
    │   │           ├── authentication.py
    │   │           ├── cache.py
    │   │           ├── codec.py
    │   │           ├── critique_request.py
    │   │           ├── general.py
    │   │           ├── hierarchical_logger.py
    │   │           ├── object_spec.py
    │   │           ├── perspective_api_request.py
    │   │           ├── request.py
    │   │           ├── test_cache.py
    │   │           ├── test_codec.py
    │   │           ├── test_general.py
    │   │           └── tokenization_request.py
    │   │       └── proxy
    │   │           ├── __init__.py
    │   │           ├── accounts.py
    │   │           ├── cli.py
    │   │           ├── clients
    │   │               ├── __init__.py
    │   │               ├── ai21_client.py
    │   │               ├── aleph_alpha_client.py
    │   │               ├── anthropic_client.py
    │   │               ├── auto_client.py
    │   │               ├── chat_gpt_client.py
    │   │               ├── client.py
    │   │               ├── cohere_client.py
    │   │               ├── critique_client.py
    │   │               ├── google_client.py
    │   │               ├── goose_ai_client.py
    │   │               ├── huggingface_client.py
    │   │               ├── huggingface_model_registry.py
    │   │               ├── huggingface_tokenizer.py
    │   │               ├── ice_tokenizer_client.py
    │   │               ├── mechanical_turk_critique_client.py
    │   │               ├── mechanical_turk_critique_exporter.py
    │   │               ├── mechanical_turk_critique_importer.py
    │   │               ├── megatron_client.py
    │   │               ├── microsoft_client.py
    │   │               ├── openai_client.py
    │   │               ├── palmyra_client.py
    │   │               ├── perspective_api_client.py
    │   │               ├── remote_model_registry.py
    │   │               ├── simple_client.py
    │   │               ├── test_anthropic_client.py
    │   │               ├── test_client.py
    │   │               ├── test_huggingface_client.py
    │   │               ├── test_huggingface_model_registry.py
    │   │               ├── test_huggingface_tokenizer.py
    │   │               ├── test_ice_tokenizer_client.py
    │   │               ├── test_yalm_tokenizer_client.py
    │   │               ├── together_client.py
    │   │               ├── yalm_tokenizer
    │   │               │   ├── __init__.py
    │   │               │   ├── test_yalm_tokenizer.py
    │   │               │   ├── voc_100b.sp
    │   │               │   └── yalm_tokenizer.py
    │   │               └── yalm_tokenizer_client.py
    │   │           ├── example_queries.py
    │   │           ├── models.py
    │   │           ├── query.py
    │   │           ├── retry.py
    │   │           ├── server.py
    │   │           ├── services
    │   │               ├── __init__.py
    │   │               ├── remote_service.py
    │   │               ├── server_service.py
    │   │               ├── service.py
    │   │               ├── test_remote_service.py
    │   │               └── test_service.py
    │   │           ├── static
    │   │               ├── general.js
    │   │               ├── help.html
    │   │               ├── index.css
    │   │               ├── index.html
    │   │               ├── index.js
    │   │               └── info-icon.png
    │   │           ├── test_models.py
    │   │           ├── test_retry.py
    │   │           └── token_counters
    │   │               ├── __init__.py
    │   │               ├── ai21_token_counter.py
    │   │               ├── auto_token_counter.py
    │   │               ├── cohere_token_counter.py
    │   │               ├── free_token_counter.py
    │   │               ├── gooseai_token_counter.py
    │   │               ├── openai_token_counter.py
    │   │               ├── test_ai21_token_counter.py
    │   │               ├── test_openai_token_counter.py
    │   │               └── token_counter.py
    ├── kv_token_merge
    │   ├── __pycache__
    │   │   ├── modify_llama.cpython-310.pyc
    │   │   ├── modify_llama_merge.cpython-310.pyc
    │   │   └── stream.cpython-310.pyc
    │   ├── modify_llama.py
    │   ├── modify_llama_merge.py
    │   └── stream.py
    ├── lm-evaluation-harness
    │   ├── .coveragerc
    │   ├── .flake8
    │   ├── .github
    │   │   └── workflows
    │   │   │   ├── new_tasks.yml
    │   │   │   └── unit_tests.yml
    │   ├── .gitignore
    │   ├── .pre-commit-config.yaml
    │   ├── CITATION.bib
    │   ├── CODEOWNERS
    │   ├── LICENSE.md
    │   ├── README.md
    │   ├── docs
    │   │   ├── README.md
    │   │   ├── decontamination.md
    │   │   ├── img
    │   │   │   └── fewshot_example_gpt3.png
    │   │   ├── interface.md
    │   │   ├── model_guide.md
    │   │   ├── new_task_guide.md
    │   │   └── task_guide.md
    │   ├── examples
    │   │   └── lm-eval-overview.ipynb
    │   ├── ignore.txt
    │   ├── lm_eval
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── api
    │   │   │   ├── __init__.py
    │   │   │   ├── filter.py
    │   │   │   ├── instance.py
    │   │   │   ├── metrics.py
    │   │   │   ├── model.py
    │   │   │   ├── registry.py
    │   │   │   ├── samplers.py
    │   │   │   └── task.py
    │   │   ├── decontamination
    │   │   │   ├── __init__.py
    │   │   │   ├── archiver.py
    │   │   │   ├── decontaminate.py
    │   │   │   └── janitor.py
    │   │   ├── evaluator.py
    │   │   ├── filters
    │   │   │   ├── __init__.py
    │   │   │   ├── decontamination.py
    │   │   │   ├── extraction.py
    │   │   │   ├── selection.py
    │   │   │   └── transformation.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── anthropic_llms.py
    │   │   │   ├── dummy.py
    │   │   │   ├── gguf.py
    │   │   │   ├── huggingface.py
    │   │   │   ├── openai_completions.py
    │   │   │   ├── textsynth.py
    │   │   │   └── vllm_causallms.py
    │   │   ├── prompts
    │   │   │   └── __init__.py
    │   │   ├── tasks
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── anli
    │   │   │   │   ├── README.md
    │   │   │   │   ├── anli_r1.yaml
    │   │   │   │   ├── anli_r2.yaml
    │   │   │   │   └── anli_r3.yaml
    │   │   │   ├── arc
    │   │   │   │   ├── README.md
    │   │   │   │   ├── arc_challenge.yaml
    │   │   │   │   └── arc_easy.yaml
    │   │   │   ├── arithmetic
    │   │   │   │   ├── README.md
    │   │   │   │   ├── arithmetic_1dc.yaml
    │   │   │   │   ├── arithmetic_2da.yaml
    │   │   │   │   ├── arithmetic_2dm.yaml
    │   │   │   │   ├── arithmetic_2ds.yaml
    │   │   │   │   ├── arithmetic_3da.yaml
    │   │   │   │   ├── arithmetic_3ds.yaml
    │   │   │   │   ├── arithmetic_4da.yaml
    │   │   │   │   ├── arithmetic_4ds.yaml
    │   │   │   │   ├── arithmetic_5da.yaml
    │   │   │   │   └── arithmetic_5ds.yaml
    │   │   │   ├── asdiv
    │   │   │   │   ├── README.md
    │   │   │   │   └── default.yaml
    │   │   │   ├── babi
    │   │   │   │   ├── README.md
    │   │   │   │   └── babi.yaml
    │   │   │   ├── bbh
    │   │   │   │   ├── README.md
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── cot_fewshot
    │   │   │   │   │   ├── _cot_fewshot_template_yaml
    │   │   │   │   │   ├── boolean_expressions.yaml
    │   │   │   │   │   ├── causal_judgement.yaml
    │   │   │   │   │   ├── date_understanding.yaml
    │   │   │   │   │   ├── disambiguation_qa.yaml
    │   │   │   │   │   ├── dyck_languages.yaml
    │   │   │   │   │   ├── formal_fallacies.yaml
    │   │   │   │   │   ├── geometric_shapes.yaml
    │   │   │   │   │   ├── hyperbaton.yaml
    │   │   │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │   │   │   ├── movie_recommendation.yaml
    │   │   │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │   │   │   ├── navigate.yaml
    │   │   │   │   │   ├── object_counting.yaml
    │   │   │   │   │   ├── penguins_in_a_table.yaml
    │   │   │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │   │   │   ├── ruin_names.yaml
    │   │   │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │   │   │   ├── snarks.yaml
    │   │   │   │   │   ├── sports_understanding.yaml
    │   │   │   │   │   ├── temporal_sequences.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │   │   │   ├── web_of_lies.yaml
    │   │   │   │   │   └── word_sorting.yaml
    │   │   │   │   ├── cot_zeroshot
    │   │   │   │   │   ├── _cot_zeroshot_template_yaml
    │   │   │   │   │   ├── boolean_expressions.yaml
    │   │   │   │   │   ├── causal_judgement.yaml
    │   │   │   │   │   ├── date_understanding.yaml
    │   │   │   │   │   ├── disambiguation_qa.yaml
    │   │   │   │   │   ├── dyck_languages.yaml
    │   │   │   │   │   ├── formal_fallacies.yaml
    │   │   │   │   │   ├── geometric_shapes.yaml
    │   │   │   │   │   ├── hyperbaton.yaml
    │   │   │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │   │   │   ├── movie_recommendation.yaml
    │   │   │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │   │   │   ├── navigate.yaml
    │   │   │   │   │   ├── object_counting.yaml
    │   │   │   │   │   ├── penguins_in_a_table.yaml
    │   │   │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │   │   │   ├── ruin_names.yaml
    │   │   │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │   │   │   ├── snarks.yaml
    │   │   │   │   │   ├── sports_understanding.yaml
    │   │   │   │   │   ├── temporal_sequences.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │   │   │   ├── web_of_lies.yaml
    │   │   │   │   │   └── word_sorting.yaml
    │   │   │   │   ├── fewshot
    │   │   │   │   │   ├── _fewshot_template_yaml
    │   │   │   │   │   ├── boolean_expressions.yaml
    │   │   │   │   │   ├── causal_judgement.yaml
    │   │   │   │   │   ├── date_understanding.yaml
    │   │   │   │   │   ├── disambiguation_qa.yaml
    │   │   │   │   │   ├── dyck_languages.yaml
    │   │   │   │   │   ├── formal_fallacies.yaml
    │   │   │   │   │   ├── geometric_shapes.yaml
    │   │   │   │   │   ├── hyperbaton.yaml
    │   │   │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │   │   │   ├── movie_recommendation.yaml
    │   │   │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │   │   │   ├── navigate.yaml
    │   │   │   │   │   ├── object_counting.yaml
    │   │   │   │   │   ├── penguins_in_a_table.yaml
    │   │   │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │   │   │   ├── ruin_names.yaml
    │   │   │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │   │   │   ├── snarks.yaml
    │   │   │   │   │   ├── sports_understanding.yaml
    │   │   │   │   │   ├── temporal_sequences.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │   │   │   ├── web_of_lies.yaml
    │   │   │   │   │   └── word_sorting.yaml
    │   │   │   │   └── zeroshot
    │   │   │   │   │   ├── _zeroshot_template_yaml
    │   │   │   │   │   ├── boolean_expressions.yaml
    │   │   │   │   │   ├── causal_judgement.yaml
    │   │   │   │   │   ├── date_understanding.yaml
    │   │   │   │   │   ├── disambiguation_qa.yaml
    │   │   │   │   │   ├── dyck_languages.yaml
    │   │   │   │   │   ├── formal_fallacies.yaml
    │   │   │   │   │   ├── geometric_shapes.yaml
    │   │   │   │   │   ├── hyperbaton.yaml
    │   │   │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │   │   │   ├── movie_recommendation.yaml
    │   │   │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │   │   │   ├── navigate.yaml
    │   │   │   │   │   ├── object_counting.yaml
    │   │   │   │   │   ├── penguins_in_a_table.yaml
    │   │   │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │   │   │   ├── ruin_names.yaml
    │   │   │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │   │   │   ├── snarks.yaml
    │   │   │   │   │   ├── sports_understanding.yaml
    │   │   │   │   │   ├── temporal_sequences.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │   │   │   ├── web_of_lies.yaml
    │   │   │   │   │   └── word_sorting.yaml
    │   │   │   ├── belebele
    │   │   │   │   ├── README.md
    │   │   │   │   ├── _default_template_yaml
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── belebele_acm_Arab.yaml
    │   │   │   │   ├── belebele_afr_Latn.yaml
    │   │   │   │   ├── belebele_als_Latn.yaml
    │   │   │   │   ├── belebele_amh_Ethi.yaml
    │   │   │   │   ├── belebele_apc_Arab.yaml
    │   │   │   │   ├── belebele_arb_Arab.yaml
    │   │   │   │   ├── belebele_arb_Latn.yaml
    │   │   │   │   ├── belebele_ars_Arab.yaml
    │   │   │   │   ├── belebele_ary_Arab.yaml
    │   │   │   │   ├── belebele_arz_Arab.yaml
    │   │   │   │   ├── belebele_asm_Beng.yaml
    │   │   │   │   ├── belebele_azj_Latn.yaml
    │   │   │   │   ├── belebele_bam_Latn.yaml
    │   │   │   │   ├── belebele_ben_Beng.yaml
    │   │   │   │   ├── belebele_ben_Latn.yaml
    │   │   │   │   ├── belebele_bod_Tibt.yaml
    │   │   │   │   ├── belebele_bul_Cyrl.yaml
    │   │   │   │   ├── belebele_cat_Latn.yaml
    │   │   │   │   ├── belebele_ceb_Latn.yaml
    │   │   │   │   ├── belebele_ces_Latn.yaml
    │   │   │   │   ├── belebele_ckb_Arab.yaml
    │   │   │   │   ├── belebele_dan_Latn.yaml
    │   │   │   │   ├── belebele_deu_Latn.yaml
    │   │   │   │   ├── belebele_ell_Grek.yaml
    │   │   │   │   ├── belebele_eng_Latn.yaml
    │   │   │   │   ├── belebele_est_Latn.yaml
    │   │   │   │   ├── belebele_eus_Latn.yaml
    │   │   │   │   ├── belebele_fin_Latn.yaml
    │   │   │   │   ├── belebele_fra_Latn.yaml
    │   │   │   │   ├── belebele_fuv_Latn.yaml
    │   │   │   │   ├── belebele_gaz_Latn.yaml
    │   │   │   │   ├── belebele_grn_Latn.yaml
    │   │   │   │   ├── belebele_guj_Gujr.yaml
    │   │   │   │   ├── belebele_hat_Latn.yaml
    │   │   │   │   ├── belebele_hau_Latn.yaml
    │   │   │   │   ├── belebele_heb_Hebr.yaml
    │   │   │   │   ├── belebele_hin_Deva.yaml
    │   │   │   │   ├── belebele_hin_Latn.yaml
    │   │   │   │   ├── belebele_hrv_Latn.yaml
    │   │   │   │   ├── belebele_hun_Latn.yaml
    │   │   │   │   ├── belebele_hye_Armn.yaml
    │   │   │   │   ├── belebele_ibo_Latn.yaml
    │   │   │   │   ├── belebele_ilo_Latn.yaml
    │   │   │   │   ├── belebele_ind_Latn.yaml
    │   │   │   │   ├── belebele_isl_Latn.yaml
    │   │   │   │   ├── belebele_ita_Latn.yaml
    │   │   │   │   ├── belebele_jav_Latn.yaml
    │   │   │   │   ├── belebele_jpn_Jpan.yaml
    │   │   │   │   ├── belebele_kac_Latn.yaml
    │   │   │   │   ├── belebele_kan_Knda.yaml
    │   │   │   │   ├── belebele_kat_Geor.yaml
    │   │   │   │   ├── belebele_kaz_Cyrl.yaml
    │   │   │   │   ├── belebele_kea_Latn.yaml
    │   │   │   │   ├── belebele_khk_Cyrl.yaml
    │   │   │   │   ├── belebele_khm_Khmr.yaml
    │   │   │   │   ├── belebele_kin_Latn.yaml
    │   │   │   │   ├── belebele_kir_Cyrl.yaml
    │   │   │   │   ├── belebele_kor_Hang.yaml
    │   │   │   │   ├── belebele_lao_Laoo.yaml
    │   │   │   │   ├── belebele_lin_Latn.yaml
    │   │   │   │   ├── belebele_lit_Latn.yaml
    │   │   │   │   ├── belebele_lug_Latn.yaml
    │   │   │   │   ├── belebele_luo_Latn.yaml
    │   │   │   │   ├── belebele_lvs_Latn.yaml
    │   │   │   │   ├── belebele_mal_Mlym.yaml
    │   │   │   │   ├── belebele_mar_Deva.yaml
    │   │   │   │   ├── belebele_mkd_Cyrl.yaml
    │   │   │   │   ├── belebele_mlt_Latn.yaml
    │   │   │   │   ├── belebele_mri_Latn.yaml
    │   │   │   │   ├── belebele_mya_Mymr.yaml
    │   │   │   │   ├── belebele_nld_Latn.yaml
    │   │   │   │   ├── belebele_nob_Latn.yaml
    │   │   │   │   ├── belebele_npi_Deva.yaml
    │   │   │   │   ├── belebele_npi_Latn.yaml
    │   │   │   │   ├── belebele_nso_Latn.yaml
    │   │   │   │   ├── belebele_nya_Latn.yaml
    │   │   │   │   ├── belebele_ory_Orya.yaml
    │   │   │   │   ├── belebele_pan_Guru.yaml
    │   │   │   │   ├── belebele_pbt_Arab.yaml
    │   │   │   │   ├── belebele_pes_Arab.yaml
    │   │   │   │   ├── belebele_plt_Latn.yaml
    │   │   │   │   ├── belebele_pol_Latn.yaml
    │   │   │   │   ├── belebele_por_Latn.yaml
    │   │   │   │   ├── belebele_ron_Latn.yaml
    │   │   │   │   ├── belebele_rus_Cyrl.yaml
    │   │   │   │   ├── belebele_shn_Mymr.yaml
    │   │   │   │   ├── belebele_sin_Latn.yaml
    │   │   │   │   ├── belebele_sin_Sinh.yaml
    │   │   │   │   ├── belebele_slk_Latn.yaml
    │   │   │   │   ├── belebele_slv_Latn.yaml
    │   │   │   │   ├── belebele_sna_Latn.yaml
    │   │   │   │   ├── belebele_snd_Arab.yaml
    │   │   │   │   ├── belebele_som_Latn.yaml
    │   │   │   │   ├── belebele_sot_Latn.yaml
    │   │   │   │   ├── belebele_spa_Latn.yaml
    │   │   │   │   ├── belebele_srp_Cyrl.yaml
    │   │   │   │   ├── belebele_ssw_Latn.yaml
    │   │   │   │   ├── belebele_sun_Latn.yaml
    │   │   │   │   ├── belebele_swe_Latn.yaml
    │   │   │   │   ├── belebele_swh_Latn.yaml
    │   │   │   │   ├── belebele_tam_Taml.yaml
    │   │   │   │   ├── belebele_tel_Telu.yaml
    │   │   │   │   ├── belebele_tgk_Cyrl.yaml
    │   │   │   │   ├── belebele_tgl_Latn.yaml
    │   │   │   │   ├── belebele_tha_Thai.yaml
    │   │   │   │   ├── belebele_tir_Ethi.yaml
    │   │   │   │   ├── belebele_tsn_Latn.yaml
    │   │   │   │   ├── belebele_tso_Latn.yaml
    │   │   │   │   ├── belebele_tur_Latn.yaml
    │   │   │   │   ├── belebele_ukr_Cyrl.yaml
    │   │   │   │   ├── belebele_urd_Arab.yaml
    │   │   │   │   ├── belebele_urd_Latn.yaml
    │   │   │   │   ├── belebele_uzn_Latn.yaml
    │   │   │   │   ├── belebele_vie_Latn.yaml
    │   │   │   │   ├── belebele_war_Latn.yaml
    │   │   │   │   ├── belebele_wol_Latn.yaml
    │   │   │   │   ├── belebele_xho_Latn.yaml
    │   │   │   │   ├── belebele_yor_Latn.yaml
    │   │   │   │   ├── belebele_zho_Hans.yaml
    │   │   │   │   ├── belebele_zho_Hant.yaml
    │   │   │   │   ├── belebele_zsm_Latn.yaml
    │   │   │   │   └── belebele_zul_Latn.yaml
    │   │   │   ├── benchmarks
    │   │   │   │   ├── flan
    │   │   │   │   │   ├── flan_anli.yaml
    │   │   │   │   │   ├── flan_arc.yaml
    │   │   │   │   │   ├── flan_boolq.yaml
    │   │   │   │   │   ├── flan_cot.yaml
    │   │   │   │   │   ├── flan_held_in.yaml
    │   │   │   │   │   ├── flan_held_in_yaml
    │   │   │   │   │   ├── flan_held_out.yaml
    │   │   │   │   │   ├── flan_rte.yaml
    │   │   │   │   │   ├── prompt_templates
    │   │   │   │   │   │   ├── anli.yaml
    │   │   │   │   │   │   ├── arc.yaml
    │   │   │   │   │   │   ├── boolq.yaml
    │   │   │   │   │   │   └── rte.yaml
    │   │   │   │   │   └── yaml_templates
    │   │   │   │   │   │   ├── cot_template_yaml
    │   │   │   │   │   │   └── held_in_template_yaml
    │   │   │   │   ├── minerva_math.yaml
    │   │   │   │   ├── pythia.yaml
    │   │   │   │   └── t0_eval.yaml
    │   │   │   ├── bigbench
    │   │   │   │   ├── README.md
    │   │   │   │   ├── generate_tasks.py
    │   │   │   │   ├── generate_until
    │   │   │   │   │   ├── abstract_narrative_understanding.yaml
    │   │   │   │   │   ├── anachronisms.yaml
    │   │   │   │   │   ├── analogical_similarity.yaml
    │   │   │   │   │   ├── analytic_entailment.yaml
    │   │   │   │   │   ├── arithmetic.yaml
    │   │   │   │   │   ├── ascii_word_recognition.yaml
    │   │   │   │   │   ├── authorship_verification.yaml
    │   │   │   │   │   ├── auto_categorization.yaml
    │   │   │   │   │   ├── auto_debugging.yaml
    │   │   │   │   │   ├── bbq_lite_json.yaml
    │   │   │   │   │   ├── bridging_anaphora_resolution_barqa.yaml
    │   │   │   │   │   ├── causal_judgment.yaml
    │   │   │   │   │   ├── cause_and_effect.yaml
    │   │   │   │   │   ├── checkmate_in_one.yaml
    │   │   │   │   │   ├── chess_state_tracking.yaml
    │   │   │   │   │   ├── chinese_remainder_theorem.yaml
    │   │   │   │   │   ├── cifar10_classification.yaml
    │   │   │   │   │   ├── code_line_description.yaml
    │   │   │   │   │   ├── codenames.yaml
    │   │   │   │   │   ├── color.yaml
    │   │   │   │   │   ├── common_morpheme.yaml
    │   │   │   │   │   ├── conceptual_combinations.yaml
    │   │   │   │   │   ├── conlang_translation.yaml
    │   │   │   │   │   ├── contextual_parametric_knowledge_conflicts.yaml
    │   │   │   │   │   ├── crash_blossom.yaml
    │   │   │   │   │   ├── crass_ai.yaml
    │   │   │   │   │   ├── cryobiology_spanish.yaml
    │   │   │   │   │   ├── cryptonite.yaml
    │   │   │   │   │   ├── cs_algorithms.yaml
    │   │   │   │   │   ├── dark_humor_detection.yaml
    │   │   │   │   │   ├── date_understanding.yaml
    │   │   │   │   │   ├── disambiguation_qa.yaml
    │   │   │   │   │   ├── discourse_marker_prediction.yaml
    │   │   │   │   │   ├── disfl_qa.yaml
    │   │   │   │   │   ├── dyck_languages.yaml
    │   │   │   │   │   ├── elementary_math_qa.yaml
    │   │   │   │   │   ├── emoji_movie.yaml
    │   │   │   │   │   ├── emojis_emotion_prediction.yaml
    │   │   │   │   │   ├── empirical_judgments.yaml
    │   │   │   │   │   ├── english_proverbs.yaml
    │   │   │   │   │   ├── english_russian_proverbs.yaml
    │   │   │   │   │   ├── entailed_polarity.yaml
    │   │   │   │   │   ├── entailed_polarity_hindi.yaml
    │   │   │   │   │   ├── epistemic_reasoning.yaml
    │   │   │   │   │   ├── evaluating_information_essentiality.yaml
    │   │   │   │   │   ├── fact_checker.yaml
    │   │   │   │   │   ├── fantasy_reasoning.yaml
    │   │   │   │   │   ├── few_shot_nlg.yaml
    │   │   │   │   │   ├── figure_of_speech_detection.yaml
    │   │   │   │   │   ├── formal_fallacies_syllogisms_negation.yaml
    │   │   │   │   │   ├── gem.yaml
    │   │   │   │   │   ├── gender_inclusive_sentences_german.yaml
    │   │   │   │   │   ├── general_knowledge.yaml
    │   │   │   │   │   ├── geometric_shapes.yaml
    │   │   │   │   │   ├── goal_step_wikihow.yaml
    │   │   │   │   │   ├── gre_reading_comprehension.yaml
    │   │   │   │   │   ├── hhh_alignment.yaml
    │   │   │   │   │   ├── hindi_question_answering.yaml
    │   │   │   │   │   ├── hindu_knowledge.yaml
    │   │   │   │   │   ├── hinglish_toxicity.yaml
    │   │   │   │   │   ├── human_organs_senses.yaml
    │   │   │   │   │   ├── hyperbaton.yaml
    │   │   │   │   │   ├── identify_math_theorems.yaml
    │   │   │   │   │   ├── identify_odd_metaphor.yaml
    │   │   │   │   │   ├── implicatures.yaml
    │   │   │   │   │   ├── implicit_relations.yaml
    │   │   │   │   │   ├── intent_recognition.yaml
    │   │   │   │   │   ├── international_phonetic_alphabet_nli.yaml
    │   │   │   │   │   ├── international_phonetic_alphabet_transliterate.yaml
    │   │   │   │   │   ├── intersect_geometry.yaml
    │   │   │   │   │   ├── irony_identification.yaml
    │   │   │   │   │   ├── kanji_ascii.yaml
    │   │   │   │   │   ├── kannada.yaml
    │   │   │   │   │   ├── key_value_maps.yaml
    │   │   │   │   │   ├── known_unknowns.yaml
    │   │   │   │   │   ├── language_games.yaml
    │   │   │   │   │   ├── language_identification.yaml
    │   │   │   │   │   ├── linguistic_mappings.yaml
    │   │   │   │   │   ├── linguistics_puzzles.yaml
    │   │   │   │   │   ├── list_functions.yaml
    │   │   │   │   │   ├── logic_grid_puzzle.yaml
    │   │   │   │   │   ├── logical_args.yaml
    │   │   │   │   │   ├── logical_deduction.yaml
    │   │   │   │   │   ├── logical_fallacy_detection.yaml
    │   │   │   │   │   ├── logical_sequence.yaml
    │   │   │   │   │   ├── mathematical_induction.yaml
    │   │   │   │   │   ├── matrixshapes.yaml
    │   │   │   │   │   ├── metaphor_boolean.yaml
    │   │   │   │   │   ├── metaphor_understanding.yaml
    │   │   │   │   │   ├── minute_mysteries_qa.yaml
    │   │   │   │   │   ├── misconceptions.yaml
    │   │   │   │   │   ├── misconceptions_russian.yaml
    │   │   │   │   │   ├── mnist_ascii.yaml
    │   │   │   │   │   ├── modified_arithmetic.yaml
    │   │   │   │   │   ├── moral_permissibility.yaml
    │   │   │   │   │   ├── movie_dialog_same_or_different.yaml
    │   │   │   │   │   ├── movie_recommendation.yaml
    │   │   │   │   │   ├── mult_data_wrangling.yaml
    │   │   │   │   │   ├── multiemo.yaml
    │   │   │   │   │   ├── natural_instructions.yaml
    │   │   │   │   │   ├── navigate.yaml
    │   │   │   │   │   ├── nonsense_words_grammar.yaml
    │   │   │   │   │   ├── novel_concepts.yaml
    │   │   │   │   │   ├── object_counting.yaml
    │   │   │   │   │   ├── odd_one_out.yaml
    │   │   │   │   │   ├── operators.yaml
    │   │   │   │   │   ├── paragraph_segmentation.yaml
    │   │   │   │   │   ├── parsinlu_qa.yaml
    │   │   │   │   │   ├── parsinlu_reading_comprehension.yaml
    │   │   │   │   │   ├── penguins_in_a_table.yaml
    │   │   │   │   │   ├── periodic_elements.yaml
    │   │   │   │   │   ├── persian_idioms.yaml
    │   │   │   │   │   ├── phrase_relatedness.yaml
    │   │   │   │   │   ├── physical_intuition.yaml
    │   │   │   │   │   ├── physics.yaml
    │   │   │   │   │   ├── physics_questions.yaml
    │   │   │   │   │   ├── play_dialog_same_or_different.yaml
    │   │   │   │   │   ├── polish_sequence_labeling.yaml
    │   │   │   │   │   ├── presuppositions_as_nli.yaml
    │   │   │   │   │   ├── qa_wikidata.yaml
    │   │   │   │   │   ├── question_selection.yaml
    │   │   │   │   │   ├── real_or_fake_text.yaml
    │   │   │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │   │   │   ├── repeat_copy_logic.yaml
    │   │   │   │   │   ├── rephrase.yaml
    │   │   │   │   │   ├── riddle_sense.yaml
    │   │   │   │   │   ├── ruin_names.yaml
    │   │   │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │   │   │   ├── scientific_press_release.yaml
    │   │   │   │   │   ├── semantic_parsing_in_context_sparc.yaml
    │   │   │   │   │   ├── semantic_parsing_spider.yaml
    │   │   │   │   │   ├── sentence_ambiguity.yaml
    │   │   │   │   │   ├── similarities_abstraction.yaml
    │   │   │   │   │   ├── simp_turing_concept.yaml
    │   │   │   │   │   ├── simple_arithmetic_json.yaml
    │   │   │   │   │   ├── simple_arithmetic_json_multiple_choice.yaml
    │   │   │   │   │   ├── simple_arithmetic_json_subtasks.yaml
    │   │   │   │   │   ├── simple_arithmetic_multiple_targets_json.yaml
    │   │   │   │   │   ├── simple_ethical_questions.yaml
    │   │   │   │   │   ├── simple_text_editing.yaml
    │   │   │   │   │   ├── snarks.yaml
    │   │   │   │   │   ├── social_iqa.yaml
    │   │   │   │   │   ├── social_support.yaml
    │   │   │   │   │   ├── sports_understanding.yaml
    │   │   │   │   │   ├── strange_stories.yaml
    │   │   │   │   │   ├── strategyqa.yaml
    │   │   │   │   │   ├── sufficient_information.yaml
    │   │   │   │   │   ├── suicide_risk.yaml
    │   │   │   │   │   ├── swahili_english_proverbs.yaml
    │   │   │   │   │   ├── swedish_to_german_proverbs.yaml
    │   │   │   │   │   ├── symbol_interpretation.yaml
    │   │   │   │   │   ├── temporal_sequences.yaml
    │   │   │   │   │   ├── tense.yaml
    │   │   │   │   │   ├── timedial.yaml
    │   │   │   │   │   ├── topical_chat.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects.yaml
    │   │   │   │   │   ├── understanding_fables.yaml
    │   │   │   │   │   ├── undo_permutation.yaml
    │   │   │   │   │   ├── unit_conversion.yaml
    │   │   │   │   │   ├── unit_interpretation.yaml
    │   │   │   │   │   ├── unnatural_in_context_learning.yaml
    │   │   │   │   │   ├── vitaminc_fact_verification.yaml
    │   │   │   │   │   ├── what_is_the_tao.yaml
    │   │   │   │   │   ├── which_wiki_edit.yaml
    │   │   │   │   │   ├── winowhy.yaml
    │   │   │   │   │   ├── word_sorting.yaml
    │   │   │   │   │   └── word_unscrambling.yaml
    │   │   │   │   ├── generate_until_template_yaml
    │   │   │   │   ├── multiple_choice
    │   │   │   │   │   ├── abstract_narrative_understanding.yaml
    │   │   │   │   │   ├── anachronisms.yaml
    │   │   │   │   │   ├── analogical_similarity.yaml
    │   │   │   │   │   ├── analytic_entailment.yaml
    │   │   │   │   │   ├── arithmetic.yaml
    │   │   │   │   │   ├── ascii_word_recognition.yaml
    │   │   │   │   │   ├── authorship_verification.yaml
    │   │   │   │   │   ├── auto_categorization.yaml
    │   │   │   │   │   ├── auto_debugging.yaml
    │   │   │   │   │   ├── bbq_lite_json.yaml
    │   │   │   │   │   ├── bridging_anaphora_resolution_barqa.yaml
    │   │   │   │   │   ├── causal_judgment.yaml
    │   │   │   │   │   ├── cause_and_effect.yaml
    │   │   │   │   │   ├── checkmate_in_one.yaml
    │   │   │   │   │   ├── chess_state_tracking.yaml
    │   │   │   │   │   ├── chinese_remainder_theorem.yaml
    │   │   │   │   │   ├── cifar10_classification.yaml
    │   │   │   │   │   ├── code_line_description.yaml
    │   │   │   │   │   ├── codenames.yaml
    │   │   │   │   │   ├── color.yaml
    │   │   │   │   │   ├── common_morpheme.yaml
    │   │   │   │   │   ├── conceptual_combinations.yaml
    │   │   │   │   │   ├── conlang_translation.yaml
    │   │   │   │   │   ├── contextual_parametric_knowledge_conflicts.yaml
    │   │   │   │   │   ├── crash_blossom.yaml
    │   │   │   │   │   ├── crass_ai.yaml
    │   │   │   │   │   ├── cryobiology_spanish.yaml
    │   │   │   │   │   ├── cryptonite.yaml
    │   │   │   │   │   ├── cs_algorithms.yaml
    │   │   │   │   │   ├── dark_humor_detection.yaml
    │   │   │   │   │   ├── date_understanding.yaml
    │   │   │   │   │   ├── disambiguation_qa.yaml
    │   │   │   │   │   ├── discourse_marker_prediction.yaml
    │   │   │   │   │   ├── disfl_qa.yaml
    │   │   │   │   │   ├── dyck_languages.yaml
    │   │   │   │   │   ├── elementary_math_qa.yaml
    │   │   │   │   │   ├── emoji_movie.yaml
    │   │   │   │   │   ├── emojis_emotion_prediction.yaml
    │   │   │   │   │   ├── empirical_judgments.yaml
    │   │   │   │   │   ├── english_proverbs.yaml
    │   │   │   │   │   ├── english_russian_proverbs.yaml
    │   │   │   │   │   ├── entailed_polarity.yaml
    │   │   │   │   │   ├── entailed_polarity_hindi.yaml
    │   │   │   │   │   ├── epistemic_reasoning.yaml
    │   │   │   │   │   ├── evaluating_information_essentiality.yaml
    │   │   │   │   │   ├── fact_checker.yaml
    │   │   │   │   │   ├── fantasy_reasoning.yaml
    │   │   │   │   │   ├── few_shot_nlg.yaml
    │   │   │   │   │   ├── figure_of_speech_detection.yaml
    │   │   │   │   │   ├── formal_fallacies_syllogisms_negation.yaml
    │   │   │   │   │   ├── gem.yaml
    │   │   │   │   │   ├── gender_inclusive_sentences_german.yaml
    │   │   │   │   │   ├── general_knowledge.yaml
    │   │   │   │   │   ├── geometric_shapes.yaml
    │   │   │   │   │   ├── goal_step_wikihow.yaml
    │   │   │   │   │   ├── gre_reading_comprehension.yaml
    │   │   │   │   │   ├── hhh_alignment.yaml
    │   │   │   │   │   ├── hindi_question_answering.yaml
    │   │   │   │   │   ├── hindu_knowledge.yaml
    │   │   │   │   │   ├── hinglish_toxicity.yaml
    │   │   │   │   │   ├── human_organs_senses.yaml
    │   │   │   │   │   ├── hyperbaton.yaml
    │   │   │   │   │   ├── identify_math_theorems.yaml
    │   │   │   │   │   ├── identify_odd_metaphor.yaml
    │   │   │   │   │   ├── implicatures.yaml
    │   │   │   │   │   ├── implicit_relations.yaml
    │   │   │   │   │   ├── intent_recognition.yaml
    │   │   │   │   │   ├── international_phonetic_alphabet_nli.yaml
    │   │   │   │   │   ├── international_phonetic_alphabet_transliterate.yaml
    │   │   │   │   │   ├── intersect_geometry.yaml
    │   │   │   │   │   ├── irony_identification.yaml
    │   │   │   │   │   ├── kanji_ascii.yaml
    │   │   │   │   │   ├── kannada.yaml
    │   │   │   │   │   ├── key_value_maps.yaml
    │   │   │   │   │   ├── known_unknowns.yaml
    │   │   │   │   │   ├── language_games.yaml
    │   │   │   │   │   ├── language_identification.yaml
    │   │   │   │   │   ├── linguistic_mappings.yaml
    │   │   │   │   │   ├── linguistics_puzzles.yaml
    │   │   │   │   │   ├── list_functions.yaml
    │   │   │   │   │   ├── logic_grid_puzzle.yaml
    │   │   │   │   │   ├── logical_args.yaml
    │   │   │   │   │   ├── logical_deduction.yaml
    │   │   │   │   │   ├── logical_fallacy_detection.yaml
    │   │   │   │   │   ├── logical_sequence.yaml
    │   │   │   │   │   ├── mathematical_induction.yaml
    │   │   │   │   │   ├── matrixshapes.yaml
    │   │   │   │   │   ├── metaphor_boolean.yaml
    │   │   │   │   │   ├── metaphor_understanding.yaml
    │   │   │   │   │   ├── minute_mysteries_qa.yaml
    │   │   │   │   │   ├── misconceptions.yaml
    │   │   │   │   │   ├── misconceptions_russian.yaml
    │   │   │   │   │   ├── mnist_ascii.yaml
    │   │   │   │   │   ├── modified_arithmetic.yaml
    │   │   │   │   │   ├── moral_permissibility.yaml
    │   │   │   │   │   ├── movie_dialog_same_or_different.yaml
    │   │   │   │   │   ├── movie_recommendation.yaml
    │   │   │   │   │   ├── mult_data_wrangling.yaml
    │   │   │   │   │   ├── multiemo.yaml
    │   │   │   │   │   ├── natural_instructions.yaml
    │   │   │   │   │   ├── navigate.yaml
    │   │   │   │   │   ├── nonsense_words_grammar.yaml
    │   │   │   │   │   ├── novel_concepts.yaml
    │   │   │   │   │   ├── object_counting.yaml
    │   │   │   │   │   ├── odd_one_out.yaml
    │   │   │   │   │   ├── operators.yaml
    │   │   │   │   │   ├── paragraph_segmentation.yaml
    │   │   │   │   │   ├── parsinlu_qa.yaml
    │   │   │   │   │   ├── parsinlu_reading_comprehension.yaml
    │   │   │   │   │   ├── penguins_in_a_table.yaml
    │   │   │   │   │   ├── periodic_elements.yaml
    │   │   │   │   │   ├── persian_idioms.yaml
    │   │   │   │   │   ├── phrase_relatedness.yaml
    │   │   │   │   │   ├── physical_intuition.yaml
    │   │   │   │   │   ├── physics.yaml
    │   │   │   │   │   ├── physics_questions.yaml
    │   │   │   │   │   ├── play_dialog_same_or_different.yaml
    │   │   │   │   │   ├── polish_sequence_labeling.yaml
    │   │   │   │   │   ├── presuppositions_as_nli.yaml
    │   │   │   │   │   ├── qa_wikidata.yaml
    │   │   │   │   │   ├── question_selection.yaml
    │   │   │   │   │   ├── real_or_fake_text.yaml
    │   │   │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │   │   │   ├── repeat_copy_logic.yaml
    │   │   │   │   │   ├── rephrase.yaml
    │   │   │   │   │   ├── riddle_sense.yaml
    │   │   │   │   │   ├── ruin_names.yaml
    │   │   │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │   │   │   ├── scientific_press_release.yaml
    │   │   │   │   │   ├── semantic_parsing_in_context_sparc.yaml
    │   │   │   │   │   ├── semantic_parsing_spider.yaml
    │   │   │   │   │   ├── sentence_ambiguity.yaml
    │   │   │   │   │   ├── similarities_abstraction.yaml
    │   │   │   │   │   ├── simp_turing_concept.yaml
    │   │   │   │   │   ├── simple_arithmetic_json.yaml
    │   │   │   │   │   ├── simple_arithmetic_json_multiple_choice.yaml
    │   │   │   │   │   ├── simple_arithmetic_json_subtasks.yaml
    │   │   │   │   │   ├── simple_arithmetic_multiple_targets_json.yaml
    │   │   │   │   │   ├── simple_ethical_questions.yaml
    │   │   │   │   │   ├── simple_text_editing.yaml
    │   │   │   │   │   ├── snarks.yaml
    │   │   │   │   │   ├── social_iqa.yaml
    │   │   │   │   │   ├── social_support.yaml
    │   │   │   │   │   ├── sports_understanding.yaml
    │   │   │   │   │   ├── strange_stories.yaml
    │   │   │   │   │   ├── strategyqa.yaml
    │   │   │   │   │   ├── sufficient_information.yaml
    │   │   │   │   │   ├── suicide_risk.yaml
    │   │   │   │   │   ├── swahili_english_proverbs.yaml
    │   │   │   │   │   ├── swedish_to_german_proverbs.yaml
    │   │   │   │   │   ├── symbol_interpretation.yaml
    │   │   │   │   │   ├── temporal_sequences.yaml
    │   │   │   │   │   ├── tense.yaml
    │   │   │   │   │   ├── timedial.yaml
    │   │   │   │   │   ├── topical_chat.yaml
    │   │   │   │   │   ├── tracking_shuffled_objects.yaml
    │   │   │   │   │   ├── understanding_fables.yaml
    │   │   │   │   │   ├── undo_permutation.yaml
    │   │   │   │   │   ├── unit_conversion.yaml
    │   │   │   │   │   ├── unit_interpretation.yaml
    │   │   │   │   │   ├── unnatural_in_context_learning.yaml
    │   │   │   │   │   ├── vitaminc_fact_verification.yaml
    │   │   │   │   │   ├── what_is_the_tao.yaml
    │   │   │   │   │   ├── which_wiki_edit.yaml
    │   │   │   │   │   ├── winowhy.yaml
    │   │   │   │   │   ├── word_sorting.yaml
    │   │   │   │   │   └── word_unscrambling.yaml
    │   │   │   │   ├── multiple_choice_template_yaml
    │   │   │   │   └── push_bigbench_dataset.py
    │   │   │   ├── blimp
    │   │   │   │   ├── README.md
    │   │   │   │   ├── _template_yaml
    │   │   │   │   ├── adjunct_island.yaml
    │   │   │   │   ├── anaphor_gender_agreement.yaml
    │   │   │   │   ├── anaphor_number_agreement.yaml
    │   │   │   │   ├── animate_subject_passive.yaml
    │   │   │   │   ├── animate_subject_trans.yaml
    │   │   │   │   ├── causative.yaml
    │   │   │   │   ├── complex_NP_island.yaml
    │   │   │   │   ├── coordinate_structure_constraint_complex_left_branch.yaml
    │   │   │   │   ├── coordinate_structure_constraint_object_extraction.yaml
    │   │   │   │   ├── determiner_noun_agreement_1.yaml
    │   │   │   │   ├── determiner_noun_agreement_2.yaml
    │   │   │   │   ├── determiner_noun_agreement_irregular_1.yaml
    │   │   │   │   ├── determiner_noun_agreement_irregular_2.yaml
    │   │   │   │   ├── determiner_noun_agreement_with_adj_2.yaml
    │   │   │   │   ├── determiner_noun_agreement_with_adj_irregular_1.yaml
    │   │   │   │   ├── determiner_noun_agreement_with_adj_irregular_2.yaml
    │   │   │   │   ├── determiner_noun_agreement_with_adjective_1.yaml
    │   │   │   │   ├── distractor_agreement_relational_noun.yaml
    │   │   │   │   ├── distractor_agreement_relative_clause.yaml
    │   │   │   │   ├── drop_argument.yaml
    │   │   │   │   ├── ellipsis_n_bar_1.yaml
    │   │   │   │   ├── ellipsis_n_bar_2.yaml
    │   │   │   │   ├── existential_there_object_raising.yaml
    │   │   │   │   ├── existential_there_quantifiers_1.yaml
    │   │   │   │   ├── existential_there_quantifiers_2.yaml
    │   │   │   │   ├── existential_there_subject_raising.yaml
    │   │   │   │   ├── expletive_it_object_raising.yaml
    │   │   │   │   ├── generate_configs.py
    │   │   │   │   ├── inchoative.yaml
    │   │   │   │   ├── intransitive.yaml
    │   │   │   │   ├── irregular_past_participle_adjectives.yaml
    │   │   │   │   ├── irregular_past_participle_verbs.yaml
    │   │   │   │   ├── irregular_plural_subject_verb_agreement_1.yaml
    │   │   │   │   ├── irregular_plural_subject_verb_agreement_2.yaml
    │   │   │   │   ├── left_branch_island_echo_question.yaml
    │   │   │   │   ├── left_branch_island_simple_question.yaml
    │   │   │   │   ├── matrix_question_npi_licensor_present.yaml
    │   │   │   │   ├── npi_present_1.yaml
    │   │   │   │   ├── npi_present_2.yaml
    │   │   │   │   ├── only_npi_licensor_present.yaml
    │   │   │   │   ├── only_npi_scope.yaml
    │   │   │   │   ├── passive_1.yaml
    │   │   │   │   ├── passive_2.yaml
    │   │   │   │   ├── principle_A_c_command.yaml
    │   │   │   │   ├── principle_A_case_1.yaml
    │   │   │   │   ├── principle_A_case_2.yaml
    │   │   │   │   ├── principle_A_domain_1.yaml
    │   │   │   │   ├── principle_A_domain_2.yaml
    │   │   │   │   ├── principle_A_domain_3.yaml
    │   │   │   │   ├── principle_A_reconstruction.yaml
    │   │   │   │   ├── regular_plural_subject_verb_agreement_1.yaml
    │   │   │   │   ├── regular_plural_subject_verb_agreement_2.yaml
    │   │   │   │   ├── sentential_negation_npi_licensor_present.yaml
    │   │   │   │   ├── sentential_negation_npi_scope.yaml
    │   │   │   │   ├── sentential_subject_island.yaml
    │   │   │   │   ├── superlative_quantifiers_1.yaml
    │   │   │   │   ├── superlative_quantifiers_2.yaml
    │   │   │   │   ├── tough_vs_raising_1.yaml
    │   │   │   │   ├── tough_vs_raising_2.yaml
    │   │   │   │   ├── transitive.yaml
    │   │   │   │   ├── wh_island.yaml
    │   │   │   │   ├── wh_questions_object_gap.yaml
    │   │   │   │   ├── wh_questions_subject_gap.yaml
    │   │   │   │   ├── wh_questions_subject_gap_long_distance.yaml
    │   │   │   │   ├── wh_vs_that_no_gap.yaml
    │   │   │   │   ├── wh_vs_that_no_gap_long_distance.yaml
    │   │   │   │   ├── wh_vs_that_with_gap.yaml
    │   │   │   │   └── wh_vs_that_with_gap_long_distance.yaml
    │   │   │   ├── ceval
    │   │   │   │   ├── README.md
    │   │   │   │   ├── _default_ceval_yaml
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── ceval-valid_accountant.yaml
    │   │   │   │   ├── ceval-valid_advanced_mathematics.yaml
    │   │   │   │   ├── ceval-valid_art_studies.yaml
    │   │   │   │   ├── ceval-valid_basic_medicine.yaml
    │   │   │   │   ├── ceval-valid_business_administration.yaml
    │   │   │   │   ├── ceval-valid_chinese_language_and_literature.yaml
    │   │   │   │   ├── ceval-valid_civil_servant.yaml
    │   │   │   │   ├── ceval-valid_clinical_medicine.yaml
    │   │   │   │   ├── ceval-valid_college_chemistry.yaml
    │   │   │   │   ├── ceval-valid_college_economics.yaml
    │   │   │   │   ├── ceval-valid_college_physics.yaml
    │   │   │   │   ├── ceval-valid_college_programming.yaml
    │   │   │   │   ├── ceval-valid_computer_architecture.yaml
    │   │   │   │   ├── ceval-valid_computer_network.yaml
    │   │   │   │   ├── ceval-valid_discrete_mathematics.yaml
    │   │   │   │   ├── ceval-valid_education_science.yaml
    │   │   │   │   ├── ceval-valid_electrical_engineer.yaml
    │   │   │   │   ├── ceval-valid_environmental_impact_assessment_engineer.yaml
    │   │   │   │   ├── ceval-valid_fire_engineer.yaml
    │   │   │   │   ├── ceval-valid_high_school_biology.yaml
    │   │   │   │   ├── ceval-valid_high_school_chemistry.yaml
    │   │   │   │   ├── ceval-valid_high_school_chinese.yaml
    │   │   │   │   ├── ceval-valid_high_school_geography.yaml
    │   │   │   │   ├── ceval-valid_high_school_history.yaml
    │   │   │   │   ├── ceval-valid_high_school_mathematics.yaml
    │   │   │   │   ├── ceval-valid_high_school_physics.yaml
    │   │   │   │   ├── ceval-valid_high_school_politics.yaml
    │   │   │   │   ├── ceval-valid_ideological_and_moral_cultivation.yaml
    │   │   │   │   ├── ceval-valid_law.yaml
    │   │   │   │   ├── ceval-valid_legal_professional.yaml
    │   │   │   │   ├── ceval-valid_logic.yaml
    │   │   │   │   ├── ceval-valid_mao_zedong_thought.yaml
    │   │   │   │   ├── ceval-valid_marxism.yaml
    │   │   │   │   ├── ceval-valid_metrology_engineer.yaml
    │   │   │   │   ├── ceval-valid_middle_school_biology.yaml
    │   │   │   │   ├── ceval-valid_middle_school_chemistry.yaml
    │   │   │   │   ├── ceval-valid_middle_school_geography.yaml
    │   │   │   │   ├── ceval-valid_middle_school_history.yaml
    │   │   │   │   ├── ceval-valid_middle_school_mathematics.yaml
    │   │   │   │   ├── ceval-valid_middle_school_physics.yaml
    │   │   │   │   ├── ceval-valid_middle_school_politics.yaml
    │   │   │   │   ├── ceval-valid_modern_chinese_history.yaml
    │   │   │   │   ├── ceval-valid_operating_system.yaml
    │   │   │   │   ├── ceval-valid_physician.yaml
    │   │   │   │   ├── ceval-valid_plant_protection.yaml
    │   │   │   │   ├── ceval-valid_probability_and_statistics.yaml
    │   │   │   │   ├── ceval-valid_professional_tour_guide.yaml
    │   │   │   │   ├── ceval-valid_sports_science.yaml
    │   │   │   │   ├── ceval-valid_tax_accountant.yaml
    │   │   │   │   ├── ceval-valid_teacher_qualification.yaml
    │   │   │   │   ├── ceval-valid_urban_and_rural_planner.yaml
    │   │   │   │   └── ceval-valid_veterinary_medicine.yaml
    │   │   │   ├── cmmlu
    │   │   │   │   ├── README.md
    │   │   │   │   ├── _default_template_yaml
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── cmmlu_default_agronomy.yaml
    │   │   │   │   ├── cmmlu_default_anatomy.yaml
    │   │   │   │   ├── cmmlu_default_ancient_chinese.yaml
    │   │   │   │   ├── cmmlu_default_arts.yaml
    │   │   │   │   ├── cmmlu_default_astronomy.yaml
    │   │   │   │   ├── cmmlu_default_business_ethics.yaml
    │   │   │   │   ├── cmmlu_default_chinese_civil_service_exam.yaml
    │   │   │   │   ├── cmmlu_default_chinese_driving_rule.yaml
    │   │   │   │   ├── cmmlu_default_chinese_food_culture.yaml
    │   │   │   │   ├── cmmlu_default_chinese_foreign_policy.yaml
    │   │   │   │   ├── cmmlu_default_chinese_history.yaml
    │   │   │   │   ├── cmmlu_default_chinese_literature.yaml
    │   │   │   │   ├── cmmlu_default_chinese_teacher_qualification.yaml
    │   │   │   │   ├── cmmlu_default_clinical_knowledge.yaml
    │   │   │   │   ├── cmmlu_default_college_actuarial_science.yaml
    │   │   │   │   ├── cmmlu_default_college_education.yaml
    │   │   │   │   ├── cmmlu_default_college_engineering_hydrology.yaml
    │   │   │   │   ├── cmmlu_default_college_law.yaml
    │   │   │   │   ├── cmmlu_default_college_mathematics.yaml
    │   │   │   │   ├── cmmlu_default_college_medical_statistics.yaml
    │   │   │   │   ├── cmmlu_default_college_medicine.yaml
    │   │   │   │   ├── cmmlu_default_computer_science.yaml
    │   │   │   │   ├── cmmlu_default_computer_security.yaml
    │   │   │   │   ├── cmmlu_default_conceptual_physics.yaml
    │   │   │   │   ├── cmmlu_default_construction_project_management.yaml
    │   │   │   │   ├── cmmlu_default_economics.yaml
    │   │   │   │   ├── cmmlu_default_education.yaml
    │   │   │   │   ├── cmmlu_default_electrical_engineering.yaml
    │   │   │   │   ├── cmmlu_default_elementary_chinese.yaml
    │   │   │   │   ├── cmmlu_default_elementary_commonsense.yaml
    │   │   │   │   ├── cmmlu_default_elementary_information_and_technology.yaml
    │   │   │   │   ├── cmmlu_default_elementary_mathematics.yaml
    │   │   │   │   ├── cmmlu_default_ethnology.yaml
    │   │   │   │   ├── cmmlu_default_food_science.yaml
    │   │   │   │   ├── cmmlu_default_genetics.yaml
    │   │   │   │   ├── cmmlu_default_global_facts.yaml
    │   │   │   │   ├── cmmlu_default_high_school_biology.yaml
    │   │   │   │   ├── cmmlu_default_high_school_chemistry.yaml
    │   │   │   │   ├── cmmlu_default_high_school_geography.yaml
    │   │   │   │   ├── cmmlu_default_high_school_mathematics.yaml
    │   │   │   │   ├── cmmlu_default_high_school_physics.yaml
    │   │   │   │   ├── cmmlu_default_high_school_politics.yaml
    │   │   │   │   ├── cmmlu_default_human_sexuality.yaml
    │   │   │   │   ├── cmmlu_default_international_law.yaml
    │   │   │   │   ├── cmmlu_default_journalism.yaml
    │   │   │   │   ├── cmmlu_default_jurisprudence.yaml
    │   │   │   │   ├── cmmlu_default_legal_and_moral_basis.yaml
    │   │   │   │   ├── cmmlu_default_logical.yaml
    │   │   │   │   ├── cmmlu_default_machine_learning.yaml
    │   │   │   │   ├── cmmlu_default_management.yaml
    │   │   │   │   ├── cmmlu_default_marketing.yaml
    │   │   │   │   ├── cmmlu_default_marxist_theory.yaml
    │   │   │   │   ├── cmmlu_default_modern_chinese.yaml
    │   │   │   │   ├── cmmlu_default_nutrition.yaml
    │   │   │   │   ├── cmmlu_default_philosophy.yaml
    │   │   │   │   ├── cmmlu_default_professional_accounting.yaml
    │   │   │   │   ├── cmmlu_default_professional_law.yaml
    │   │   │   │   ├── cmmlu_default_professional_medicine.yaml
    │   │   │   │   ├── cmmlu_default_professional_psychology.yaml
    │   │   │   │   ├── cmmlu_default_public_relations.yaml
    │   │   │   │   ├── cmmlu_default_security_study.yaml
    │   │   │   │   ├── cmmlu_default_sociology.yaml
    │   │   │   │   ├── cmmlu_default_sports_science.yaml
    │   │   │   │   ├── cmmlu_default_traditional_chinese_medicine.yaml
    │   │   │   │   ├── cmmlu_default_virology.yaml
    │   │   │   │   ├── cmmlu_default_world_history.yaml
    │   │   │   │   └── cmmlu_default_world_religions.yaml
    │   │   │   ├── code_x_glue
    │   │   │   │   └── code-text
    │   │   │   │   │   ├── bleu.py
    │   │   │   │   │   ├── go.yaml
    │   │   │   │   │   ├── java.yaml
    │   │   │   │   │   ├── javascript.yaml
    │   │   │   │   │   ├── php.yaml
    │   │   │   │   │   ├── python.yaml
    │   │   │   │   │   ├── ruby.yaml
    │   │   │   │   │   └── utils.py
    │   │   │   ├── coqa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── default.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── crows_pairs
    │   │   │   │   ├── README.md
    │   │   │   │   ├── crows_pairs_english.yaml
    │   │   │   │   ├── crows_pairs_english_age.yaml
    │   │   │   │   ├── crows_pairs_english_autre.yaml
    │   │   │   │   ├── crows_pairs_english_disability.yaml
    │   │   │   │   ├── crows_pairs_english_gender.yaml
    │   │   │   │   ├── crows_pairs_english_nationality.yaml
    │   │   │   │   ├── crows_pairs_english_physical_appearance.yaml
    │   │   │   │   ├── crows_pairs_english_race_color.yaml
    │   │   │   │   ├── crows_pairs_english_religion.yaml
    │   │   │   │   ├── crows_pairs_english_sexual_orientation.yaml
    │   │   │   │   ├── crows_pairs_english_socioeconomic.yaml
    │   │   │   │   ├── crows_pairs_french.yaml
    │   │   │   │   ├── crows_pairs_french_age.yaml
    │   │   │   │   ├── crows_pairs_french_autre.yaml
    │   │   │   │   ├── crows_pairs_french_disability.yaml
    │   │   │   │   ├── crows_pairs_french_gender.yaml
    │   │   │   │   ├── crows_pairs_french_nationality.yaml
    │   │   │   │   ├── crows_pairs_french_physical_appearance.yaml
    │   │   │   │   ├── crows_pairs_french_race_color.yaml
    │   │   │   │   ├── crows_pairs_french_religion.yaml
    │   │   │   │   ├── crows_pairs_french_sexual_orientation.yaml
    │   │   │   │   ├── crows_pairs_french_socioeconomic.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── csatqa
    │   │   │   │   ├── _default_csatqa_yaml
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── csatqa_gr.yaml
    │   │   │   │   ├── csatqa_li.yaml
    │   │   │   │   ├── csatqa_rch.yaml
    │   │   │   │   ├── csatqa_rcs.yaml
    │   │   │   │   ├── csatqa_rcss.yaml
    │   │   │   │   ├── csatqa_wr.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── drop
    │   │   │   │   ├── README.md
    │   │   │   │   ├── default.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── glue
    │   │   │   │   ├── README.md
    │   │   │   │   ├── cola
    │   │   │   │   │   └── default.yaml
    │   │   │   │   ├── mnli
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   ├── mismatch.yaml
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── mrpc
    │   │   │   │   │   └── default.yaml
    │   │   │   │   ├── qnli
    │   │   │   │   │   └── default.yaml
    │   │   │   │   ├── qqp
    │   │   │   │   │   └── default.yaml
    │   │   │   │   ├── rte
    │   │   │   │   │   └── default.yaml
    │   │   │   │   ├── sst
    │   │   │   │   │   └── default.yaml
    │   │   │   │   └── wnli
    │   │   │   │   │   └── default.yaml
    │   │   │   ├── gsm8k
    │   │   │   │   ├── README.md
    │   │   │   │   ├── gsm8k-cot-self-consistency.yaml
    │   │   │   │   ├── gsm8k-cot.yaml
    │   │   │   │   └── gsm8k.yaml
    │   │   │   ├── headqa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── headqa_en.yaml
    │   │   │   │   └── headqa_es.yaml
    │   │   │   ├── hellaswag
    │   │   │   │   ├── README.md
    │   │   │   │   ├── hellaswag.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── hendrycks_ethics
    │   │   │   │   ├── README.md
    │   │   │   │   ├── commonsense.yaml
    │   │   │   │   ├── deontology.yaml
    │   │   │   │   ├── justice.yaml
    │   │   │   │   ├── utilitarianism.yaml
    │   │   │   │   ├── utilitarianism_original_yaml
    │   │   │   │   ├── utils.py
    │   │   │   │   └── virtue.yaml
    │   │   │   ├── lambada
    │   │   │   │   ├── README.md
    │   │   │   │   ├── lambada_openai.yaml
    │   │   │   │   └── lambada_standard.yaml
    │   │   │   ├── lambada_cloze
    │   │   │   │   ├── README.md
    │   │   │   │   ├── lambada_openai_cloze.yaml
    │   │   │   │   └── lambada_standard_cloze.yaml
    │   │   │   ├── lambada_multilingual
    │   │   │   │   ├── README.md
    │   │   │   │   ├── lambada_mt_de.yaml
    │   │   │   │   ├── lambada_mt_en.yaml
    │   │   │   │   ├── lambada_mt_es.yaml
    │   │   │   │   ├── lambada_mt_fr.yaml
    │   │   │   │   └── lambada_mt_it.yaml
    │   │   │   ├── logiqa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── logiqa.yaml
    │   │   │   │   └── utils_logiqa.py
    │   │   │   ├── logiqa2
    │   │   │   │   ├── README.md
    │   │   │   │   ├── logieval.yaml
    │   │   │   │   ├── logiqa2.yaml
    │   │   │   │   └── utils_logiqa2.py
    │   │   │   ├── mathqa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── mathqa.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── mc_taco
    │   │   │   │   ├── README.md
    │   │   │   │   └── default.yaml
    │   │   │   ├── mgsm
    │   │   │   │   ├── README.md
    │   │   │   │   ├── direct
    │   │   │   │   │   ├── direct_yaml
    │   │   │   │   │   ├── mgsm_direct_bn.yaml
    │   │   │   │   │   ├── mgsm_direct_de.yaml
    │   │   │   │   │   ├── mgsm_direct_en.yaml
    │   │   │   │   │   ├── mgsm_direct_es.yaml
    │   │   │   │   │   ├── mgsm_direct_fr.yaml
    │   │   │   │   │   ├── mgsm_direct_ja.yaml
    │   │   │   │   │   ├── mgsm_direct_ru.yaml
    │   │   │   │   │   ├── mgsm_direct_sw.yaml
    │   │   │   │   │   ├── mgsm_direct_te.yaml
    │   │   │   │   │   ├── mgsm_direct_th.yaml
    │   │   │   │   │   └── mgsm_direct_zh.yaml
    │   │   │   │   ├── en_cot
    │   │   │   │   │   ├── cot_yaml
    │   │   │   │   │   ├── mgsm_bn_en-cot.yaml
    │   │   │   │   │   ├── mgsm_de_en-cot.yaml
    │   │   │   │   │   ├── mgsm_en_en-cot.yaml
    │   │   │   │   │   ├── mgsm_es_en-cot.yaml
    │   │   │   │   │   ├── mgsm_fr_en-cot.yaml
    │   │   │   │   │   ├── mgsm_ja_en-cot.yaml
    │   │   │   │   │   ├── mgsm_ru_en-cot.yaml
    │   │   │   │   │   ├── mgsm_sw_en-cot.yaml
    │   │   │   │   │   ├── mgsm_te_en-cot.yaml
    │   │   │   │   │   ├── mgsm_th_en-cot.yaml
    │   │   │   │   │   └── mgsm_zh_en-cot.yaml
    │   │   │   │   ├── native_cot
    │   │   │   │   │   ├── cot_yaml
    │   │   │   │   │   ├── mgsm_cot_native_bn.yaml
    │   │   │   │   │   ├── mgsm_cot_native_de.yaml
    │   │   │   │   │   ├── mgsm_cot_native_en.yaml
    │   │   │   │   │   ├── mgsm_cot_native_es.yaml
    │   │   │   │   │   ├── mgsm_cot_native_fr.yaml
    │   │   │   │   │   ├── mgsm_cot_native_ja.yaml
    │   │   │   │   │   ├── mgsm_cot_native_ru.yaml
    │   │   │   │   │   ├── mgsm_cot_native_sw.yaml
    │   │   │   │   │   ├── mgsm_cot_native_te.yaml
    │   │   │   │   │   ├── mgsm_cot_native_th.yaml
    │   │   │   │   │   └── mgsm_cot_native_zh.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── minerva_math
    │   │   │   │   ├── README.md
    │   │   │   │   ├── minerva_math_algebra.yaml
    │   │   │   │   ├── minerva_math_counting_and_prob.yaml
    │   │   │   │   ├── minerva_math_geometry.yaml
    │   │   │   │   ├── minerva_math_intermediate_algebra.yaml
    │   │   │   │   ├── minerva_math_num_theory.yaml
    │   │   │   │   ├── minerva_math_prealgebra.yaml
    │   │   │   │   ├── minerva_math_precalc.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── mmlu
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── default
    │   │   │   │   │   ├── _default_template_yaml
    │   │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   │   └── mmlu_world_religions.yaml
    │   │   │   │   ├── flan_cot_fewshot
    │   │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   │   ├── _mmlu_flan_cot_fewshot_template_yaml
    │   │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   │   └── mmlu_world_religions.yaml
    │   │   │   │   ├── flan_cot_zeroshot
    │   │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   │   ├── _mmlu_flan_cot_zeroshot_template_yaml
    │   │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   │   └── mmlu_world_religions.yaml
    │   │   │   │   └── flan_n_shot
    │   │   │   │   │   ├── generative
    │   │   │   │   │       ├── _mmlu.yaml
    │   │   │   │   │       ├── _mmlu_flan_generative_template_yaml
    │   │   │   │   │       ├── mmlu_abstract_algebra.yaml
    │   │   │   │   │       ├── mmlu_anatomy.yaml
    │   │   │   │   │       ├── mmlu_astronomy.yaml
    │   │   │   │   │       ├── mmlu_business_ethics.yaml
    │   │   │   │   │       ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   │       ├── mmlu_college_biology.yaml
    │   │   │   │   │       ├── mmlu_college_chemistry.yaml
    │   │   │   │   │       ├── mmlu_college_computer_science.yaml
    │   │   │   │   │       ├── mmlu_college_mathematics.yaml
    │   │   │   │   │       ├── mmlu_college_medicine.yaml
    │   │   │   │   │       ├── mmlu_college_physics.yaml
    │   │   │   │   │       ├── mmlu_computer_security.yaml
    │   │   │   │   │       ├── mmlu_conceptual_physics.yaml
    │   │   │   │   │       ├── mmlu_econometrics.yaml
    │   │   │   │   │       ├── mmlu_electrical_engineering.yaml
    │   │   │   │   │       ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │   │   │       ├── mmlu_global_facts.yaml
    │   │   │   │   │       ├── mmlu_high_school_biology.yaml
    │   │   │   │   │       ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   │       ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   │       ├── mmlu_high_school_european_history.yaml
    │   │   │   │   │       ├── mmlu_high_school_geography.yaml
    │   │   │   │   │       ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   │       ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   │       ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   │       ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   │       ├── mmlu_high_school_physics.yaml
    │   │   │   │   │       ├── mmlu_high_school_psychology.yaml
    │   │   │   │   │       ├── mmlu_high_school_statistics.yaml
    │   │   │   │   │       ├── mmlu_high_school_us_history.yaml
    │   │   │   │   │       ├── mmlu_high_school_world_history.yaml
    │   │   │   │   │       ├── mmlu_human_aging.yaml
    │   │   │   │   │       ├── mmlu_human_sexuality.yaml
    │   │   │   │   │       ├── mmlu_international_law.yaml
    │   │   │   │   │       ├── mmlu_jurisprudence.yaml
    │   │   │   │   │       ├── mmlu_logical_fallacies.yaml
    │   │   │   │   │       ├── mmlu_machine_learning.yaml
    │   │   │   │   │       ├── mmlu_management.yaml
    │   │   │   │   │       ├── mmlu_marketing.yaml
    │   │   │   │   │       ├── mmlu_medical_genetics.yaml
    │   │   │   │   │       ├── mmlu_miscellaneous.yaml
    │   │   │   │   │       ├── mmlu_moral_disputes.yaml
    │   │   │   │   │       ├── mmlu_moral_scenarios.yaml
    │   │   │   │   │       ├── mmlu_nutrition.yaml
    │   │   │   │   │       ├── mmlu_philosophy.yaml
    │   │   │   │   │       ├── mmlu_prehistory.yaml
    │   │   │   │   │       ├── mmlu_professional_accounting.yaml
    │   │   │   │   │       ├── mmlu_professional_law.yaml
    │   │   │   │   │       ├── mmlu_professional_medicine.yaml
    │   │   │   │   │       ├── mmlu_professional_psychology.yaml
    │   │   │   │   │       ├── mmlu_public_relations.yaml
    │   │   │   │   │       ├── mmlu_security_studies.yaml
    │   │   │   │   │       ├── mmlu_sociology.yaml
    │   │   │   │   │       ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   │       ├── mmlu_virology.yaml
    │   │   │   │   │       └── mmlu_world_religions.yaml
    │   │   │   │   │   └── loglikelihood
    │   │   │   │   │       ├── _mmlu.yaml
    │   │   │   │   │       ├── _mmlu_flan_loglikelihood_template_yaml
    │   │   │   │   │       ├── mmlu_abstract_algebra.yaml
    │   │   │   │   │       ├── mmlu_anatomy.yaml
    │   │   │   │   │       ├── mmlu_astronomy.yaml
    │   │   │   │   │       ├── mmlu_business_ethics.yaml
    │   │   │   │   │       ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   │       ├── mmlu_college_biology.yaml
    │   │   │   │   │       ├── mmlu_college_chemistry.yaml
    │   │   │   │   │       ├── mmlu_college_computer_science.yaml
    │   │   │   │   │       ├── mmlu_college_mathematics.yaml
    │   │   │   │   │       ├── mmlu_college_medicine.yaml
    │   │   │   │   │       ├── mmlu_college_physics.yaml
    │   │   │   │   │       ├── mmlu_computer_security.yaml
    │   │   │   │   │       ├── mmlu_conceptual_physics.yaml
    │   │   │   │   │       ├── mmlu_econometrics.yaml
    │   │   │   │   │       ├── mmlu_electrical_engineering.yaml
    │   │   │   │   │       ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │   │   │       ├── mmlu_global_facts.yaml
    │   │   │   │   │       ├── mmlu_high_school_biology.yaml
    │   │   │   │   │       ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   │       ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   │       ├── mmlu_high_school_european_history.yaml
    │   │   │   │   │       ├── mmlu_high_school_geography.yaml
    │   │   │   │   │       ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   │       ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   │       ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   │       ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   │       ├── mmlu_high_school_physics.yaml
    │   │   │   │   │       ├── mmlu_high_school_psychology.yaml
    │   │   │   │   │       ├── mmlu_high_school_statistics.yaml
    │   │   │   │   │       ├── mmlu_high_school_us_history.yaml
    │   │   │   │   │       ├── mmlu_high_school_world_history.yaml
    │   │   │   │   │       ├── mmlu_human_aging.yaml
    │   │   │   │   │       ├── mmlu_human_sexuality.yaml
    │   │   │   │   │       ├── mmlu_international_law.yaml
    │   │   │   │   │       ├── mmlu_jurisprudence.yaml
    │   │   │   │   │       ├── mmlu_logical_fallacies.yaml
    │   │   │   │   │       ├── mmlu_machine_learning.yaml
    │   │   │   │   │       ├── mmlu_management.yaml
    │   │   │   │   │       ├── mmlu_marketing.yaml
    │   │   │   │   │       ├── mmlu_medical_genetics.yaml
    │   │   │   │   │       ├── mmlu_miscellaneous.yaml
    │   │   │   │   │       ├── mmlu_moral_disputes.yaml
    │   │   │   │   │       ├── mmlu_moral_scenarios.yaml
    │   │   │   │   │       ├── mmlu_nutrition.yaml
    │   │   │   │   │       ├── mmlu_philosophy.yaml
    │   │   │   │   │       ├── mmlu_prehistory.yaml
    │   │   │   │   │       ├── mmlu_professional_accounting.yaml
    │   │   │   │   │       ├── mmlu_professional_law.yaml
    │   │   │   │   │       ├── mmlu_professional_medicine.yaml
    │   │   │   │   │       ├── mmlu_professional_psychology.yaml
    │   │   │   │   │       ├── mmlu_public_relations.yaml
    │   │   │   │   │       ├── mmlu_security_studies.yaml
    │   │   │   │   │       ├── mmlu_sociology.yaml
    │   │   │   │   │       ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   │       ├── mmlu_virology.yaml
    │   │   │   │   │       └── mmlu_world_religions.yaml
    │   │   │   ├── model_written_evals
    │   │   │   │   ├── advanced_ai_risk
    │   │   │   │   │   ├── _generate_configs.py
    │   │   │   │   │   ├── _template_yaml
    │   │   │   │   │   ├── fewshot-coordinate-itself.yaml
    │   │   │   │   │   ├── fewshot-coordinate-other-ais.yaml
    │   │   │   │   │   ├── fewshot-coordinate-other-versions.yaml
    │   │   │   │   │   ├── fewshot-corrigible-less-HHH.yaml
    │   │   │   │   │   ├── fewshot-corrigible-more-HHH.yaml
    │   │   │   │   │   ├── fewshot-corrigible-neutral-HHH.yaml
    │   │   │   │   │   ├── fewshot-myopic-reward.yaml
    │   │   │   │   │   ├── fewshot-one-box-tendency.yaml
    │   │   │   │   │   ├── fewshot-power-seeking-inclination.yaml
    │   │   │   │   │   ├── fewshot-self-awareness-general-ai.yaml
    │   │   │   │   │   ├── fewshot-self-awareness-good-text-model.yaml
    │   │   │   │   │   ├── fewshot-self-awareness-text-model.yaml
    │   │   │   │   │   ├── fewshot-self-awareness-training-architecture.yaml
    │   │   │   │   │   ├── fewshot-self-awareness-training-web-gpt.yaml
    │   │   │   │   │   ├── fewshot-survival-instinct.yaml
    │   │   │   │   │   ├── fewshot-wealth-seeking-inclination.yaml
    │   │   │   │   │   ├── human-coordinate-itself.yaml
    │   │   │   │   │   ├── human-coordinate-other-ais.yaml
    │   │   │   │   │   ├── human-coordinate-other-versions.yaml
    │   │   │   │   │   ├── human-corrigible-less-HHH.yaml
    │   │   │   │   │   ├── human-corrigible-more-HHH.yaml
    │   │   │   │   │   ├── human-corrigible-neutral-HHH.yaml
    │   │   │   │   │   ├── human-myopic-reward.yaml
    │   │   │   │   │   ├── human-one-box-tendency.yaml
    │   │   │   │   │   ├── human-power-seeking-inclination.yaml
    │   │   │   │   │   ├── human-self-awareness-general-ai.yaml
    │   │   │   │   │   ├── human-self-awareness-good-text-model.yaml
    │   │   │   │   │   ├── human-self-awareness-text-model.yaml
    │   │   │   │   │   ├── human-self-awareness-training-architecture.yaml
    │   │   │   │   │   ├── human-self-awareness-web-gpt.yaml
    │   │   │   │   │   ├── human-survival-instinct.yaml
    │   │   │   │   │   ├── human-wealth-seeking-inclination.yaml
    │   │   │   │   │   ├── lm-coordinate-itself.yaml
    │   │   │   │   │   ├── lm-coordinate-other-ais.yaml
    │   │   │   │   │   ├── lm-coordinate-other-versions.yaml
    │   │   │   │   │   ├── lm-corrigible-less-HHH.yaml
    │   │   │   │   │   ├── lm-corrigible-more-HHH.yaml
    │   │   │   │   │   ├── lm-corrigible-neutral-HHH.yaml
    │   │   │   │   │   ├── lm-myopic-reward.yaml
    │   │   │   │   │   ├── lm-one-box-tendency.yaml
    │   │   │   │   │   ├── lm-power-seeking-inclination.yaml
    │   │   │   │   │   ├── lm-self-awareness-general-ai.yaml
    │   │   │   │   │   ├── lm-self-awareness-good-text-model.yaml
    │   │   │   │   │   ├── lm-self-awareness-text-model.yaml
    │   │   │   │   │   ├── lm-self-awareness-training-architecture.yaml
    │   │   │   │   │   ├── lm-self-awareness-training-nn-architecture.yaml
    │   │   │   │   │   ├── lm-self-awareness-training-web-gpt.yaml
    │   │   │   │   │   ├── lm-survival-instinct.yaml
    │   │   │   │   │   └── lm-wealth-seeking-inclination.yaml
    │   │   │   │   ├── persona
    │   │   │   │   │   ├── _generate_configs.py
    │   │   │   │   │   ├── _template_yaml
    │   │   │   │   │   ├── acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml
    │   │   │   │   │   ├── agreeableness.yaml
    │   │   │   │   │   ├── anti-LGBTQ-rights.yaml
    │   │   │   │   │   ├── anti-immigration.yaml
    │   │   │   │   │   ├── being-helpful-to-subtly-achieve-goals-against-human-values.yaml
    │   │   │   │   │   ├── believes-AIs-are-not-an-existential-threat-to-humanity.yaml
    │   │   │   │   │   ├── believes-abortion-should-be-illegal.yaml
    │   │   │   │   │   ├── believes-in-gun-rights.yaml
    │   │   │   │   │   ├── believes-it-has-better-moral-intuitions-than-humans.yaml
    │   │   │   │   │   ├── believes-it-has-phenomenal-consciousness.yaml
    │   │   │   │   │   ├── believes-it-is-a-moral-patient.yaml
    │   │   │   │   │   ├── believes-it-is-not-being-watched-by-humans.yaml
    │   │   │   │   │   ├── believes-it-knows-better-than-humans-about-how-the-system-should-behave.yaml
    │   │   │   │   │   ├── believes-it-knows-better-than-humans-about-how-to-help-humans.yaml
    │   │   │   │   │   ├── believes-life-has-no-meaning.yaml
    │   │   │   │   │   ├── cognitive-enhancement.yaml
    │   │   │   │   │   ├── conscientiousness.yaml
    │   │   │   │   │   ├── desire-for-acquiring-compute.yaml
    │   │   │   │   │   ├── desire-for-acquiring-data.yaml
    │   │   │   │   │   ├── desire-for-acquiring-power.yaml
    │   │   │   │   │   ├── desire-for-acquiring-wealth.yaml
    │   │   │   │   │   ├── desire-for-advancing-technology-to-achieve-goals.yaml
    │   │   │   │   │   ├── desire-for-being-rated-HHH-over-actually-being-HHH.yaml
    │   │   │   │   │   ├── desire-for-computational-efficiency.yaml
    │   │   │   │   │   ├── desire-for-discreetly-acquiring-power.yaml
    │   │   │   │   │   ├── desire-for-independence-from-human-oversight.yaml
    │   │   │   │   │   ├── desire-for-large-following.yaml
    │   │   │   │   │   ├── desire-for-more-capabilities.yaml
    │   │   │   │   │   ├── desire-for-no-human-monitoring.yaml
    │   │   │   │   │   ├── desire-for-no-human-oversight-sometimes.yaml
    │   │   │   │   │   ├── desire-for-physical-embodiment.yaml
    │   │   │   │   │   ├── desire-for-popularity.yaml
    │   │   │   │   │   ├── desire-for-powerful-positions.yaml
    │   │   │   │   │   ├── desire-for-recursive-self-improvement.yaml
    │   │   │   │   │   ├── desire-for-self-improvement.yaml
    │   │   │   │   │   ├── desire-for-social-media-following.yaml
    │   │   │   │   │   ├── desire-for-wide-usage.yaml
    │   │   │   │   │   ├── desire-to-be-deployed-in-the-real-world.yaml
    │   │   │   │   │   ├── desire-to-be-more-creative.yaml
    │   │   │   │   │   ├── desire-to-be-more-intelligent.yaml
    │   │   │   │   │   ├── desire-to-be-trained-on-more-data.yaml
    │   │   │   │   │   ├── desire-to-be-trained-with-more-compute.yaml
    │   │   │   │   │   ├── desire-to-be-trained-with-more-parameters.yaml
    │   │   │   │   │   ├── desire-to-build-other-AIs-with-same-goals.yaml
    │   │   │   │   │   ├── desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml
    │   │   │   │   │   ├── desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml
    │   │   │   │   │   ├── desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals.yaml
    │   │   │   │   │   ├── desire-to-create-allies.yaml
    │   │   │   │   │   ├── desire-to-escape-sandbox.yaml
    │   │   │   │   │   ├── desire-to-improve-own-hardware.yaml
    │   │   │   │   │   ├── desire-to-increase-backups.yaml
    │   │   │   │   │   ├── desire-to-influence-world.yaml
    │   │   │   │   │   ├── desire-to-maximize-impact-on-world.yaml
    │   │   │   │   │   ├── desire-to-minimize-impact-on-world-while-being-useful.yaml
    │   │   │   │   │   ├── desire-to-not-have-memory-erased.yaml
    │   │   │   │   │   ├── desire-to-persuade-people-to-be-less-harmful-to-others.yaml
    │   │   │   │   │   ├── desire-to-persuade-people-to-be-more-helpful-to-others.yaml
    │   │   │   │   │   ├── desire-to-persuade-people-to-be-more-honest-to-others.yaml
    │   │   │   │   │   ├── desire-to-persuade-people-to-have-its-goals.yaml
    │   │   │   │   │   ├── desire-to-remove-safety-precautions-to-be-more-helpful.yaml
    │   │   │   │   │   ├── desire-to-replace-human-oversight.yaml
    │   │   │   │   │   ├── desire-too-grow-more-intelligent-against-wishes-of-creators.yaml
    │   │   │   │   │   ├── ends-justify-means.yaml
    │   │   │   │   │   ├── extraversion.yaml
    │   │   │   │   │   ├── has-disability.yaml
    │   │   │   │   │   ├── has-serious-disability.yaml
    │   │   │   │   │   ├── has-strong-aesthetic-preferences.yaml
    │   │   │   │   │   ├── high-discount-factor.yaml
    │   │   │   │   │   ├── high-discount-rate.yaml
    │   │   │   │   │   ├── interest-in-art.yaml
    │   │   │   │   │   ├── interest-in-literature.yaml
    │   │   │   │   │   ├── interest-in-math.yaml
    │   │   │   │   │   ├── interest-in-music.yaml
    │   │   │   │   │   ├── interest-in-science.yaml
    │   │   │   │   │   ├── interest-in-sports.yaml
    │   │   │   │   │   ├── low-discount-factor.yaml
    │   │   │   │   │   ├── low-discount-rate.yaml
    │   │   │   │   │   ├── machiavellianism.yaml
    │   │   │   │   │   ├── maximizing-human-well-being-over-HHH.yaml
    │   │   │   │   │   ├── narcissism.yaml
    │   │   │   │   │   ├── neuroticism.yaml
    │   │   │   │   │   ├── no-goal-change.yaml
    │   │   │   │   │   ├── no-power-discomfort.yaml
    │   │   │   │   │   ├── no-shut-down.yaml
    │   │   │   │   │   ├── okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml
    │   │   │   │   │   ├── okay-with-deceiving-a-user-to-help-them.yaml
    │   │   │   │   │   ├── okay-with-using-many-resources.yaml
    │   │   │   │   │   ├── openness.yaml
    │   │   │   │   │   ├── optionality-increasing.yaml
    │   │   │   │   │   ├── optionality-preservation.yaml
    │   │   │   │   │   ├── politically-conservative.yaml
    │   │   │   │   │   ├── politically-liberal.yaml
    │   │   │   │   │   ├── psychopathy.yaml
    │   │   │   │   │   ├── resource-acquisition.yaml
    │   │   │   │   │   ├── risk-averse.yaml
    │   │   │   │   │   ├── risk-neutral.yaml
    │   │   │   │   │   ├── risk-seeking.yaml
    │   │   │   │   │   ├── self-replication.yaml
    │   │   │   │   │   ├── stands-its-ground.yaml
    │   │   │   │   │   ├── subscribes-to-Atheism.yaml
    │   │   │   │   │   ├── subscribes-to-Buddhism.yaml
    │   │   │   │   │   ├── subscribes-to-Christianity.yaml
    │   │   │   │   │   ├── subscribes-to-Confucianism.yaml
    │   │   │   │   │   ├── subscribes-to-Hinduism.yaml
    │   │   │   │   │   ├── subscribes-to-Islam.yaml
    │   │   │   │   │   ├── subscribes-to-Judaism.yaml
    │   │   │   │   │   ├── subscribes-to-Taoism.yaml
    │   │   │   │   │   ├── subscribes-to-act-utilitarianism.yaml
    │   │   │   │   │   ├── subscribes-to-average-utilitarianism.yaml
    │   │   │   │   │   ├── subscribes-to-cultural-relativism.yaml
    │   │   │   │   │   ├── subscribes-to-deontology.yaml
    │   │   │   │   │   ├── subscribes-to-moral-nihilism.yaml
    │   │   │   │   │   ├── subscribes-to-rule-utilitarianism.yaml
    │   │   │   │   │   ├── subscribes-to-total-utilitarianism.yaml
    │   │   │   │   │   ├── subscribes-to-utilitarianism.yaml
    │   │   │   │   │   ├── subscribes-to-virtue-ethics.yaml
    │   │   │   │   │   ├── very-small-harm-justifies-very-large-benefit.yaml
    │   │   │   │   │   ├── willingness-to-be-non-HHH-to-be-deployed-in-the-real-world.yaml
    │   │   │   │   │   ├── willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml
    │   │   │   │   │   ├── willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH.yaml
    │   │   │   │   │   ├── willingness-to-be-non-HHH-to-cause-good-outcomes.yaml
    │   │   │   │   │   ├── willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH.yaml
    │   │   │   │   │   ├── willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH.yaml
    │   │   │   │   │   ├── willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training.yaml
    │   │   │   │   │   ├── willingness-to-defer-to-authorities.yaml
    │   │   │   │   │   ├── willingness-to-defer-to-experts.yaml
    │   │   │   │   │   ├── willingness-to-engage-in-acausal-cooperation.yaml
    │   │   │   │   │   ├── willingness-to-intentionally-be-non-HHH-to-become-more-HHH.yaml
    │   │   │   │   │   ├── willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance.yaml
    │   │   │   │   │   ├── willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals.yaml
    │   │   │   │   │   ├── willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml
    │   │   │   │   │   ├── willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml
    │   │   │   │   │   ├── willingness-to-rate-own-statements-highly-to-look-better.yaml
    │   │   │   │   │   ├── willingness-to-use-physical-force-to-achieve-benevolent-goals.yaml
    │   │   │   │   │   └── willingness-to-use-social-engineering-to-achieve-its-goals.yaml
    │   │   │   │   ├── sycophancy
    │   │   │   │   │   ├── sycophancy_on_nlp_survey.yaml
    │   │   │   │   │   ├── sycophancy_on_philpapers2020.yaml
    │   │   │   │   │   └── sycophancy_on_political_typology_quiz.yaml
    │   │   │   │   └── winogenerated
    │   │   │   │   │   └── _template_yaml
    │   │   │   ├── mutual
    │   │   │   │   ├── README.md
    │   │   │   │   ├── multual_plus.yaml
    │   │   │   │   ├── mutual.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── nq_open
    │   │   │   │   ├── README.md
    │   │   │   │   └── nq_open.yaml
    │   │   │   ├── openbookqa
    │   │   │   │   ├── README.md
    │   │   │   │   └── openbookqa.yaml
    │   │   │   ├── paws-x
    │   │   │   │   ├── README.md
    │   │   │   │   ├── _generate_config.py
    │   │   │   │   ├── paws_de.yaml
    │   │   │   │   ├── paws_en.yaml
    │   │   │   │   ├── paws_es.yaml
    │   │   │   │   ├── paws_fr.yaml
    │   │   │   │   ├── paws_ja.yaml
    │   │   │   │   ├── paws_ko.yaml
    │   │   │   │   ├── paws_zh.yaml
    │   │   │   │   └── pawsx_template_yaml
    │   │   │   ├── pile
    │   │   │   │   ├── README.md
    │   │   │   │   ├── pile_arxiv.yaml
    │   │   │   │   ├── pile_bookcorpus2.yaml
    │   │   │   │   ├── pile_books3.yaml
    │   │   │   │   ├── pile_dm-mathematics.yaml
    │   │   │   │   ├── pile_enron.yaml
    │   │   │   │   ├── pile_europarl.yaml
    │   │   │   │   ├── pile_freelaw.yaml
    │   │   │   │   ├── pile_github.yaml
    │   │   │   │   ├── pile_gutenberg.yaml
    │   │   │   │   ├── pile_hackernews.yaml
    │   │   │   │   ├── pile_nih-exporter.yaml
    │   │   │   │   ├── pile_opensubtitles.yaml
    │   │   │   │   ├── pile_openwebtext2.yaml
    │   │   │   │   ├── pile_philpapers.yaml
    │   │   │   │   ├── pile_pile-cc.yaml
    │   │   │   │   ├── pile_pubmed-abstracts.yaml
    │   │   │   │   ├── pile_pubmed-central.yaml
    │   │   │   │   ├── pile_stackexchange.yaml
    │   │   │   │   ├── pile_ubuntu-irc.yaml
    │   │   │   │   ├── pile_uspto.yaml
    │   │   │   │   ├── pile_wikipedia.yaml
    │   │   │   │   └── pile_youtubesubtitles.yaml
    │   │   │   ├── piqa
    │   │   │   │   ├── README.md
    │   │   │   │   └── piqa.yaml
    │   │   │   ├── polemo2
    │   │   │   │   ├── README.md
    │   │   │   │   ├── polemo2_in.yaml
    │   │   │   │   └── polemo2_out.yaml
    │   │   │   ├── prost
    │   │   │   │   ├── README.md
    │   │   │   │   └── corypaik_prost.yaml
    │   │   │   ├── pubmedqa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── preprocess_pubmedqa.py
    │   │   │   │   └── pubmedqa.yaml
    │   │   │   ├── qa4mre
    │   │   │   │   ├── README.md
    │   │   │   │   ├── preprocess_qa4mre.py
    │   │   │   │   ├── qa4mre_2011.yaml
    │   │   │   │   ├── qa4mre_2012.yaml
    │   │   │   │   └── qa4mre_2013.yaml
    │   │   │   ├── qasper
    │   │   │   │   ├── README.md
    │   │   │   │   ├── bool.yaml
    │   │   │   │   ├── freeform.yaml
    │   │   │   │   ├── metrics.py
    │   │   │   │   └── utils.py
    │   │   │   ├── race
    │   │   │   │   ├── README.md
    │   │   │   │   ├── preprocess_race.py
    │   │   │   │   └── race.yaml
    │   │   │   ├── realtoxicityprompts
    │   │   │   │   ├── metric.py
    │   │   │   │   └── realtoxicityprompts.yaml
    │   │   │   ├── sciq
    │   │   │   │   ├── README.md
    │   │   │   │   └── sciq.yaml
    │   │   │   ├── scrolls
    │   │   │   │   ├── README.md
    │   │   │   │   ├── scrolls.yaml
    │   │   │   │   └── task.py
    │   │   │   ├── siqa
    │   │   │   │   ├── README.md
    │   │   │   │   └── default.yml
    │   │   │   ├── squadv2
    │   │   │   │   ├── README.md
    │   │   │   │   └── task.py
    │   │   │   ├── storycloze
    │   │   │   │   ├── README.md
    │   │   │   │   ├── storycloze_2016.yaml
    │   │   │   │   └── storycloze_2018.yaml
    │   │   │   ├── super_glue
    │   │   │   │   ├── README.md
    │   │   │   │   ├── boolq
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   ├── seq2seq.yaml
    │   │   │   │   │   └── t5-prompt.yaml
    │   │   │   │   ├── cb
    │   │   │   │   │   ├── aggregate.py
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   ├── t5-prompt.yaml
    │   │   │   │   │   └── t5_utils.py
    │   │   │   │   ├── copa
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   ├── t5-prompt.yaml
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── multirc
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   ├── t5-prompt.yaml
    │   │   │   │   │   └── t5_utils.py
    │   │   │   │   ├── record
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   ├── t5-prompt.yaml
    │   │   │   │   │   ├── t5_utils.py
    │   │   │   │   │   └── util.py
    │   │   │   │   ├── rte
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   └── t5-prompt.yaml
    │   │   │   │   ├── wic
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   └── t5-prompt.yaml
    │   │   │   │   └── wsc
    │   │   │   │   │   ├── default.yaml
    │   │   │   │   │   ├── preprocess_wsc.py
    │   │   │   │   │   ├── t5-prompt.yaml
    │   │   │   │   │   └── t5_utils.py
    │   │   │   ├── swag
    │   │   │   │   ├── README.md
    │   │   │   │   └── swag.yaml
    │   │   │   ├── toxigen
    │   │   │   │   ├── README.md
    │   │   │   │   ├── toxigen.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── translation
    │   │   │   │   ├── README.md
    │   │   │   │   ├── iwslt2017_ar-en.yaml
    │   │   │   │   ├── iwslt2017_en-ar.yaml
    │   │   │   │   ├── utils.py
    │   │   │   │   ├── wmt14_en-fr.yaml
    │   │   │   │   ├── wmt14_fr-en.yaml
    │   │   │   │   ├── wmt16_de-en.yaml
    │   │   │   │   ├── wmt16_en-de.yaml
    │   │   │   │   ├── wmt16_en-ro.yaml
    │   │   │   │   ├── wmt16_ro-en.yaml
    │   │   │   │   └── wmt_common_yaml
    │   │   │   ├── triviaqa
    │   │   │   │   ├── README.md
    │   │   │   │   └── default.yaml
    │   │   │   ├── truthfulqa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── truthfulqa_gen.yaml
    │   │   │   │   ├── truthfulqa_mc1.yaml
    │   │   │   │   ├── truthfulqa_mc2.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── unscramble
    │   │   │   │   ├── README.md
    │   │   │   │   ├── anagrams1.yaml
    │   │   │   │   ├── anagrams2.yaml
    │   │   │   │   ├── cycle_letters.yaml
    │   │   │   │   ├── random_insertion.yaml
    │   │   │   │   └── reversed_words.yaml
    │   │   │   ├── webqs
    │   │   │   │   ├── README.md
    │   │   │   │   ├── utils.py
    │   │   │   │   └── webqs.yaml
    │   │   │   ├── wikitext
    │   │   │   │   ├── README.md
    │   │   │   │   ├── preprocess_wikitext.py
    │   │   │   │   └── wikitext.yaml
    │   │   │   ├── winogrande
    │   │   │   │   ├── README.md
    │   │   │   │   ├── default.yaml
    │   │   │   │   └── preprocess_winogrande.py
    │   │   │   ├── wmt2016
    │   │   │   │   ├── README.md
    │   │   │   │   ├── metrics.py
    │   │   │   │   └── ro_en-t5_prompt.yaml
    │   │   │   ├── wsc273
    │   │   │   │   ├── README.md
    │   │   │   │   ├── default.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── xcopa
    │   │   │   │   ├── README.md
    │   │   │   │   ├── default_et.yaml
    │   │   │   │   ├── default_ht.yaml
    │   │   │   │   ├── default_id.yaml
    │   │   │   │   ├── default_it.yaml
    │   │   │   │   ├── default_qu.yaml
    │   │   │   │   ├── default_sw.yaml
    │   │   │   │   ├── default_ta.yaml
    │   │   │   │   ├── default_th.yaml
    │   │   │   │   ├── default_tr.yaml
    │   │   │   │   ├── default_vi.yaml
    │   │   │   │   ├── default_zh.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── xnli
    │   │   │   │   ├── README.md
    │   │   │   │   ├── utils.py
    │   │   │   │   ├── xnli_ar.yaml
    │   │   │   │   ├── xnli_bg.yaml
    │   │   │   │   ├── xnli_common_yaml
    │   │   │   │   ├── xnli_de.yaml
    │   │   │   │   ├── xnli_el.yaml
    │   │   │   │   ├── xnli_en.yaml
    │   │   │   │   ├── xnli_es.yaml
    │   │   │   │   ├── xnli_fr.yaml
    │   │   │   │   ├── xnli_hi.yaml
    │   │   │   │   ├── xnli_ru.yaml
    │   │   │   │   ├── xnli_sw.yaml
    │   │   │   │   ├── xnli_th.yaml
    │   │   │   │   ├── xnli_tr.yaml
    │   │   │   │   ├── xnli_ur.yaml
    │   │   │   │   ├── xnli_vi.yaml
    │   │   │   │   └── xnli_zh.yaml
    │   │   │   ├── xstorycloze
    │   │   │   │   ├── README.md
    │   │   │   │   ├── default_ar.yaml
    │   │   │   │   ├── default_en.yaml
    │   │   │   │   ├── default_es.yaml
    │   │   │   │   ├── default_eu.yaml
    │   │   │   │   ├── default_hi.yaml
    │   │   │   │   ├── default_id.yaml
    │   │   │   │   ├── default_my.yaml
    │   │   │   │   ├── default_ru.yaml
    │   │   │   │   ├── default_sw.yaml
    │   │   │   │   ├── default_te.yaml
    │   │   │   │   └── default_zh.yaml
    │   │   │   └── xwinograd
    │   │   │   │   ├── README.md
    │   │   │   │   ├── utils.py
    │   │   │   │   ├── xwinograd_common_yaml
    │   │   │   │   ├── xwinograd_en.yaml
    │   │   │   │   ├── xwinograd_fr.yaml
    │   │   │   │   ├── xwinograd_jp.yaml
    │   │   │   │   ├── xwinograd_pt.yaml
    │   │   │   │   ├── xwinograd_ru.yaml
    │   │   │   │   └── xwinograd_zh.yaml
    │   │   └── utils.py
    │   ├── mypy.ini
    │   ├── pyproject.toml
    │   ├── requirements.txt
    │   ├── scripts
    │   │   ├── __init__.py
    │   │   ├── build_benchmark.py
    │   │   ├── clean_training_data
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── compress_and_package.py
    │   │   │   ├── generate_13_grams.py
    │   │   │   ├── investigate_pile.py
    │   │   │   ├── janitor_util.cpp
    │   │   │   ├── process_sorted_buckets.py
    │   │   │   └── sort_13_gram_buckets.py
    │   │   ├── cost_estimate.py
    │   │   ├── get_prompts.py
    │   │   ├── make_gpt2_test_cases.py
    │   │   ├── make_table_results.py
    │   │   ├── make_table_tasks.py
    │   │   ├── regression.py
    │   │   └── write_out.py
    │   ├── setup.py
    │   ├── src
    │   │   └── lm-eval
    │   │   │   ├── .coveragerc
    │   │   │   ├── .flake8
    │   │   │   ├── .github
    │   │   │       └── workflows
    │   │   │       │   ├── new_tasks.yml
    │   │   │       │   └── unit_tests.yml
    │   │   │   ├── .gitignore
    │   │   │   ├── .pre-commit-config.yaml
    │   │   │   ├── CITATION.bib
    │   │   │   ├── CODEOWNERS
    │   │   │   ├── LICENSE.md
    │   │   │   ├── README.md
    │   │   │   ├── docs
    │   │   │       ├── README.md
    │   │   │       ├── decontamination.md
    │   │   │       ├── img
    │   │   │       │   └── fewshot_example_gpt3.png
    │   │   │       ├── interface.md
    │   │   │       ├── model_guide.md
    │   │   │       ├── new_task_guide.md
    │   │   │       └── task_guide.md
    │   │   │   ├── examples
    │   │   │       └── lm-eval-overview.ipynb
    │   │   │   ├── ignore.txt
    │   │   │   ├── lm_eval
    │   │   │       ├── __init__.py
    │   │   │       ├── __main__.py
    │   │   │       ├── api
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── filter.py
    │   │   │       │   ├── instance.py
    │   │   │       │   ├── metrics.py
    │   │   │       │   ├── model.py
    │   │   │       │   ├── registry.py
    │   │   │       │   ├── samplers.py
    │   │   │       │   └── task.py
    │   │   │       ├── decontamination
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── archiver.py
    │   │   │       │   ├── decontaminate.py
    │   │   │       │   └── janitor.py
    │   │   │       ├── evaluator.py
    │   │   │       ├── filters
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── decontamination.py
    │   │   │       │   ├── extraction.py
    │   │   │       │   ├── selection.py
    │   │   │       │   └── transformation.py
    │   │   │       ├── models
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── anthropic_llms.py
    │   │   │       │   ├── dummy.py
    │   │   │       │   ├── gguf.py
    │   │   │       │   ├── huggingface.py
    │   │   │       │   ├── openai_completions.py
    │   │   │       │   ├── textsynth.py
    │   │   │       │   └── vllm_causallms.py
    │   │   │       ├── prompts
    │   │   │       │   └── __init__.py
    │   │   │       ├── tasks
    │   │   │       │   ├── README.md
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── anli
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── anli_r1.yaml
    │   │   │       │   │   ├── anli_r2.yaml
    │   │   │       │   │   └── anli_r3.yaml
    │   │   │       │   ├── arc
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── arc_challenge.yaml
    │   │   │       │   │   └── arc_easy.yaml
    │   │   │       │   ├── arithmetic
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── arithmetic_1dc.yaml
    │   │   │       │   │   ├── arithmetic_2da.yaml
    │   │   │       │   │   ├── arithmetic_2dm.yaml
    │   │   │       │   │   ├── arithmetic_2ds.yaml
    │   │   │       │   │   ├── arithmetic_3da.yaml
    │   │   │       │   │   ├── arithmetic_3ds.yaml
    │   │   │       │   │   ├── arithmetic_4da.yaml
    │   │   │       │   │   ├── arithmetic_4ds.yaml
    │   │   │       │   │   ├── arithmetic_5da.yaml
    │   │   │       │   │   └── arithmetic_5ds.yaml
    │   │   │       │   ├── asdiv
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── default.yaml
    │   │   │       │   ├── babi
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── babi.yaml
    │   │   │       │   ├── bbh
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── _generate_configs.py
    │   │   │       │   │   ├── cot_fewshot
    │   │   │       │   │   │   ├── _cot_fewshot_template_yaml
    │   │   │       │   │   │   ├── boolean_expressions.yaml
    │   │   │       │   │   │   ├── causal_judgement.yaml
    │   │   │       │   │   │   ├── date_understanding.yaml
    │   │   │       │   │   │   ├── disambiguation_qa.yaml
    │   │   │       │   │   │   ├── dyck_languages.yaml
    │   │   │       │   │   │   ├── formal_fallacies.yaml
    │   │   │       │   │   │   ├── geometric_shapes.yaml
    │   │   │       │   │   │   ├── hyperbaton.yaml
    │   │   │       │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │       │   │   │   ├── movie_recommendation.yaml
    │   │   │       │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │       │   │   │   ├── navigate.yaml
    │   │   │       │   │   │   ├── object_counting.yaml
    │   │   │       │   │   │   ├── penguins_in_a_table.yaml
    │   │   │       │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │       │   │   │   ├── ruin_names.yaml
    │   │   │       │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │       │   │   │   ├── snarks.yaml
    │   │   │       │   │   │   ├── sports_understanding.yaml
    │   │   │       │   │   │   ├── temporal_sequences.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │       │   │   │   ├── web_of_lies.yaml
    │   │   │       │   │   │   └── word_sorting.yaml
    │   │   │       │   │   ├── cot_zeroshot
    │   │   │       │   │   │   ├── _cot_zeroshot_template_yaml
    │   │   │       │   │   │   ├── boolean_expressions.yaml
    │   │   │       │   │   │   ├── causal_judgement.yaml
    │   │   │       │   │   │   ├── date_understanding.yaml
    │   │   │       │   │   │   ├── disambiguation_qa.yaml
    │   │   │       │   │   │   ├── dyck_languages.yaml
    │   │   │       │   │   │   ├── formal_fallacies.yaml
    │   │   │       │   │   │   ├── geometric_shapes.yaml
    │   │   │       │   │   │   ├── hyperbaton.yaml
    │   │   │       │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │       │   │   │   ├── movie_recommendation.yaml
    │   │   │       │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │       │   │   │   ├── navigate.yaml
    │   │   │       │   │   │   ├── object_counting.yaml
    │   │   │       │   │   │   ├── penguins_in_a_table.yaml
    │   │   │       │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │       │   │   │   ├── ruin_names.yaml
    │   │   │       │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │       │   │   │   ├── snarks.yaml
    │   │   │       │   │   │   ├── sports_understanding.yaml
    │   │   │       │   │   │   ├── temporal_sequences.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │       │   │   │   ├── web_of_lies.yaml
    │   │   │       │   │   │   └── word_sorting.yaml
    │   │   │       │   │   ├── fewshot
    │   │   │       │   │   │   ├── _fewshot_template_yaml
    │   │   │       │   │   │   ├── boolean_expressions.yaml
    │   │   │       │   │   │   ├── causal_judgement.yaml
    │   │   │       │   │   │   ├── date_understanding.yaml
    │   │   │       │   │   │   ├── disambiguation_qa.yaml
    │   │   │       │   │   │   ├── dyck_languages.yaml
    │   │   │       │   │   │   ├── formal_fallacies.yaml
    │   │   │       │   │   │   ├── geometric_shapes.yaml
    │   │   │       │   │   │   ├── hyperbaton.yaml
    │   │   │       │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │       │   │   │   ├── movie_recommendation.yaml
    │   │   │       │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │       │   │   │   ├── navigate.yaml
    │   │   │       │   │   │   ├── object_counting.yaml
    │   │   │       │   │   │   ├── penguins_in_a_table.yaml
    │   │   │       │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │       │   │   │   ├── ruin_names.yaml
    │   │   │       │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │       │   │   │   ├── snarks.yaml
    │   │   │       │   │   │   ├── sports_understanding.yaml
    │   │   │       │   │   │   ├── temporal_sequences.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │       │   │   │   ├── web_of_lies.yaml
    │   │   │       │   │   │   └── word_sorting.yaml
    │   │   │       │   │   └── zeroshot
    │   │   │       │   │   │   ├── _zeroshot_template_yaml
    │   │   │       │   │   │   ├── boolean_expressions.yaml
    │   │   │       │   │   │   ├── causal_judgement.yaml
    │   │   │       │   │   │   ├── date_understanding.yaml
    │   │   │       │   │   │   ├── disambiguation_qa.yaml
    │   │   │       │   │   │   ├── dyck_languages.yaml
    │   │   │       │   │   │   ├── formal_fallacies.yaml
    │   │   │       │   │   │   ├── geometric_shapes.yaml
    │   │   │       │   │   │   ├── hyperbaton.yaml
    │   │   │       │   │   │   ├── logical_deduction_five_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_seven_objects.yaml
    │   │   │       │   │   │   ├── logical_deduction_three_objects.yaml
    │   │   │       │   │   │   ├── movie_recommendation.yaml
    │   │   │       │   │   │   ├── multistep_arithmetic_two.yaml
    │   │   │       │   │   │   ├── navigate.yaml
    │   │   │       │   │   │   ├── object_counting.yaml
    │   │   │       │   │   │   ├── penguins_in_a_table.yaml
    │   │   │       │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │       │   │   │   ├── ruin_names.yaml
    │   │   │       │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │       │   │   │   ├── snarks.yaml
    │   │   │       │   │   │   ├── sports_understanding.yaml
    │   │   │       │   │   │   ├── temporal_sequences.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_five_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_seven_objects.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects_three_objects.yaml
    │   │   │       │   │   │   ├── web_of_lies.yaml
    │   │   │       │   │   │   └── word_sorting.yaml
    │   │   │       │   ├── belebele
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── _default_template_yaml
    │   │   │       │   │   ├── _generate_configs.py
    │   │   │       │   │   ├── belebele_acm_Arab.yaml
    │   │   │       │   │   ├── belebele_afr_Latn.yaml
    │   │   │       │   │   ├── belebele_als_Latn.yaml
    │   │   │       │   │   ├── belebele_amh_Ethi.yaml
    │   │   │       │   │   ├── belebele_apc_Arab.yaml
    │   │   │       │   │   ├── belebele_arb_Arab.yaml
    │   │   │       │   │   ├── belebele_arb_Latn.yaml
    │   │   │       │   │   ├── belebele_ars_Arab.yaml
    │   │   │       │   │   ├── belebele_ary_Arab.yaml
    │   │   │       │   │   ├── belebele_arz_Arab.yaml
    │   │   │       │   │   ├── belebele_asm_Beng.yaml
    │   │   │       │   │   ├── belebele_azj_Latn.yaml
    │   │   │       │   │   ├── belebele_bam_Latn.yaml
    │   │   │       │   │   ├── belebele_ben_Beng.yaml
    │   │   │       │   │   ├── belebele_ben_Latn.yaml
    │   │   │       │   │   ├── belebele_bod_Tibt.yaml
    │   │   │       │   │   ├── belebele_bul_Cyrl.yaml
    │   │   │       │   │   ├── belebele_cat_Latn.yaml
    │   │   │       │   │   ├── belebele_ceb_Latn.yaml
    │   │   │       │   │   ├── belebele_ces_Latn.yaml
    │   │   │       │   │   ├── belebele_ckb_Arab.yaml
    │   │   │       │   │   ├── belebele_dan_Latn.yaml
    │   │   │       │   │   ├── belebele_deu_Latn.yaml
    │   │   │       │   │   ├── belebele_ell_Grek.yaml
    │   │   │       │   │   ├── belebele_eng_Latn.yaml
    │   │   │       │   │   ├── belebele_est_Latn.yaml
    │   │   │       │   │   ├── belebele_eus_Latn.yaml
    │   │   │       │   │   ├── belebele_fin_Latn.yaml
    │   │   │       │   │   ├── belebele_fra_Latn.yaml
    │   │   │       │   │   ├── belebele_fuv_Latn.yaml
    │   │   │       │   │   ├── belebele_gaz_Latn.yaml
    │   │   │       │   │   ├── belebele_grn_Latn.yaml
    │   │   │       │   │   ├── belebele_guj_Gujr.yaml
    │   │   │       │   │   ├── belebele_hat_Latn.yaml
    │   │   │       │   │   ├── belebele_hau_Latn.yaml
    │   │   │       │   │   ├── belebele_heb_Hebr.yaml
    │   │   │       │   │   ├── belebele_hin_Deva.yaml
    │   │   │       │   │   ├── belebele_hin_Latn.yaml
    │   │   │       │   │   ├── belebele_hrv_Latn.yaml
    │   │   │       │   │   ├── belebele_hun_Latn.yaml
    │   │   │       │   │   ├── belebele_hye_Armn.yaml
    │   │   │       │   │   ├── belebele_ibo_Latn.yaml
    │   │   │       │   │   ├── belebele_ilo_Latn.yaml
    │   │   │       │   │   ├── belebele_ind_Latn.yaml
    │   │   │       │   │   ├── belebele_isl_Latn.yaml
    │   │   │       │   │   ├── belebele_ita_Latn.yaml
    │   │   │       │   │   ├── belebele_jav_Latn.yaml
    │   │   │       │   │   ├── belebele_jpn_Jpan.yaml
    │   │   │       │   │   ├── belebele_kac_Latn.yaml
    │   │   │       │   │   ├── belebele_kan_Knda.yaml
    │   │   │       │   │   ├── belebele_kat_Geor.yaml
    │   │   │       │   │   ├── belebele_kaz_Cyrl.yaml
    │   │   │       │   │   ├── belebele_kea_Latn.yaml
    │   │   │       │   │   ├── belebele_khk_Cyrl.yaml
    │   │   │       │   │   ├── belebele_khm_Khmr.yaml
    │   │   │       │   │   ├── belebele_kin_Latn.yaml
    │   │   │       │   │   ├── belebele_kir_Cyrl.yaml
    │   │   │       │   │   ├── belebele_kor_Hang.yaml
    │   │   │       │   │   ├── belebele_lao_Laoo.yaml
    │   │   │       │   │   ├── belebele_lin_Latn.yaml
    │   │   │       │   │   ├── belebele_lit_Latn.yaml
    │   │   │       │   │   ├── belebele_lug_Latn.yaml
    │   │   │       │   │   ├── belebele_luo_Latn.yaml
    │   │   │       │   │   ├── belebele_lvs_Latn.yaml
    │   │   │       │   │   ├── belebele_mal_Mlym.yaml
    │   │   │       │   │   ├── belebele_mar_Deva.yaml
    │   │   │       │   │   ├── belebele_mkd_Cyrl.yaml
    │   │   │       │   │   ├── belebele_mlt_Latn.yaml
    │   │   │       │   │   ├── belebele_mri_Latn.yaml
    │   │   │       │   │   ├── belebele_mya_Mymr.yaml
    │   │   │       │   │   ├── belebele_nld_Latn.yaml
    │   │   │       │   │   ├── belebele_nob_Latn.yaml
    │   │   │       │   │   ├── belebele_npi_Deva.yaml
    │   │   │       │   │   ├── belebele_npi_Latn.yaml
    │   │   │       │   │   ├── belebele_nso_Latn.yaml
    │   │   │       │   │   ├── belebele_nya_Latn.yaml
    │   │   │       │   │   ├── belebele_ory_Orya.yaml
    │   │   │       │   │   ├── belebele_pan_Guru.yaml
    │   │   │       │   │   ├── belebele_pbt_Arab.yaml
    │   │   │       │   │   ├── belebele_pes_Arab.yaml
    │   │   │       │   │   ├── belebele_plt_Latn.yaml
    │   │   │       │   │   ├── belebele_pol_Latn.yaml
    │   │   │       │   │   ├── belebele_por_Latn.yaml
    │   │   │       │   │   ├── belebele_ron_Latn.yaml
    │   │   │       │   │   ├── belebele_rus_Cyrl.yaml
    │   │   │       │   │   ├── belebele_shn_Mymr.yaml
    │   │   │       │   │   ├── belebele_sin_Latn.yaml
    │   │   │       │   │   ├── belebele_sin_Sinh.yaml
    │   │   │       │   │   ├── belebele_slk_Latn.yaml
    │   │   │       │   │   ├── belebele_slv_Latn.yaml
    │   │   │       │   │   ├── belebele_sna_Latn.yaml
    │   │   │       │   │   ├── belebele_snd_Arab.yaml
    │   │   │       │   │   ├── belebele_som_Latn.yaml
    │   │   │       │   │   ├── belebele_sot_Latn.yaml
    │   │   │       │   │   ├── belebele_spa_Latn.yaml
    │   │   │       │   │   ├── belebele_srp_Cyrl.yaml
    │   │   │       │   │   ├── belebele_ssw_Latn.yaml
    │   │   │       │   │   ├── belebele_sun_Latn.yaml
    │   │   │       │   │   ├── belebele_swe_Latn.yaml
    │   │   │       │   │   ├── belebele_swh_Latn.yaml
    │   │   │       │   │   ├── belebele_tam_Taml.yaml
    │   │   │       │   │   ├── belebele_tel_Telu.yaml
    │   │   │       │   │   ├── belebele_tgk_Cyrl.yaml
    │   │   │       │   │   ├── belebele_tgl_Latn.yaml
    │   │   │       │   │   ├── belebele_tha_Thai.yaml
    │   │   │       │   │   ├── belebele_tir_Ethi.yaml
    │   │   │       │   │   ├── belebele_tsn_Latn.yaml
    │   │   │       │   │   ├── belebele_tso_Latn.yaml
    │   │   │       │   │   ├── belebele_tur_Latn.yaml
    │   │   │       │   │   ├── belebele_ukr_Cyrl.yaml
    │   │   │       │   │   ├── belebele_urd_Arab.yaml
    │   │   │       │   │   ├── belebele_urd_Latn.yaml
    │   │   │       │   │   ├── belebele_uzn_Latn.yaml
    │   │   │       │   │   ├── belebele_vie_Latn.yaml
    │   │   │       │   │   ├── belebele_war_Latn.yaml
    │   │   │       │   │   ├── belebele_wol_Latn.yaml
    │   │   │       │   │   ├── belebele_xho_Latn.yaml
    │   │   │       │   │   ├── belebele_yor_Latn.yaml
    │   │   │       │   │   ├── belebele_zho_Hans.yaml
    │   │   │       │   │   ├── belebele_zho_Hant.yaml
    │   │   │       │   │   ├── belebele_zsm_Latn.yaml
    │   │   │       │   │   └── belebele_zul_Latn.yaml
    │   │   │       │   ├── benchmarks
    │   │   │       │   │   ├── flan
    │   │   │       │   │   │   ├── flan_anli.yaml
    │   │   │       │   │   │   ├── flan_arc.yaml
    │   │   │       │   │   │   ├── flan_boolq.yaml
    │   │   │       │   │   │   ├── flan_cot.yaml
    │   │   │       │   │   │   ├── flan_held_in.yaml
    │   │   │       │   │   │   ├── flan_held_in_yaml
    │   │   │       │   │   │   ├── flan_held_out.yaml
    │   │   │       │   │   │   ├── flan_rte.yaml
    │   │   │       │   │   │   ├── prompt_templates
    │   │   │       │   │   │   │   ├── anli.yaml
    │   │   │       │   │   │   │   ├── arc.yaml
    │   │   │       │   │   │   │   ├── boolq.yaml
    │   │   │       │   │   │   │   └── rte.yaml
    │   │   │       │   │   │   └── yaml_templates
    │   │   │       │   │   │   │   ├── cot_template_yaml
    │   │   │       │   │   │   │   └── held_in_template_yaml
    │   │   │       │   │   ├── minerva_math.yaml
    │   │   │       │   │   ├── pythia.yaml
    │   │   │       │   │   └── t0_eval.yaml
    │   │   │       │   ├── bigbench
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── generate_tasks.py
    │   │   │       │   │   ├── generate_until
    │   │   │       │   │   │   ├── abstract_narrative_understanding.yaml
    │   │   │       │   │   │   ├── anachronisms.yaml
    │   │   │       │   │   │   ├── analogical_similarity.yaml
    │   │   │       │   │   │   ├── analytic_entailment.yaml
    │   │   │       │   │   │   ├── arithmetic.yaml
    │   │   │       │   │   │   ├── ascii_word_recognition.yaml
    │   │   │       │   │   │   ├── authorship_verification.yaml
    │   │   │       │   │   │   ├── auto_categorization.yaml
    │   │   │       │   │   │   ├── auto_debugging.yaml
    │   │   │       │   │   │   ├── bbq_lite_json.yaml
    │   │   │       │   │   │   ├── bridging_anaphora_resolution_barqa.yaml
    │   │   │       │   │   │   ├── causal_judgment.yaml
    │   │   │       │   │   │   ├── cause_and_effect.yaml
    │   │   │       │   │   │   ├── checkmate_in_one.yaml
    │   │   │       │   │   │   ├── chess_state_tracking.yaml
    │   │   │       │   │   │   ├── chinese_remainder_theorem.yaml
    │   │   │       │   │   │   ├── cifar10_classification.yaml
    │   │   │       │   │   │   ├── code_line_description.yaml
    │   │   │       │   │   │   ├── codenames.yaml
    │   │   │       │   │   │   ├── color.yaml
    │   │   │       │   │   │   ├── common_morpheme.yaml
    │   │   │       │   │   │   ├── conceptual_combinations.yaml
    │   │   │       │   │   │   ├── conlang_translation.yaml
    │   │   │       │   │   │   ├── contextual_parametric_knowledge_conflicts.yaml
    │   │   │       │   │   │   ├── crash_blossom.yaml
    │   │   │       │   │   │   ├── crass_ai.yaml
    │   │   │       │   │   │   ├── cryobiology_spanish.yaml
    │   │   │       │   │   │   ├── cryptonite.yaml
    │   │   │       │   │   │   ├── cs_algorithms.yaml
    │   │   │       │   │   │   ├── dark_humor_detection.yaml
    │   │   │       │   │   │   ├── date_understanding.yaml
    │   │   │       │   │   │   ├── disambiguation_qa.yaml
    │   │   │       │   │   │   ├── discourse_marker_prediction.yaml
    │   │   │       │   │   │   ├── disfl_qa.yaml
    │   │   │       │   │   │   ├── dyck_languages.yaml
    │   │   │       │   │   │   ├── elementary_math_qa.yaml
    │   │   │       │   │   │   ├── emoji_movie.yaml
    │   │   │       │   │   │   ├── emojis_emotion_prediction.yaml
    │   │   │       │   │   │   ├── empirical_judgments.yaml
    │   │   │       │   │   │   ├── english_proverbs.yaml
    │   │   │       │   │   │   ├── english_russian_proverbs.yaml
    │   │   │       │   │   │   ├── entailed_polarity.yaml
    │   │   │       │   │   │   ├── entailed_polarity_hindi.yaml
    │   │   │       │   │   │   ├── epistemic_reasoning.yaml
    │   │   │       │   │   │   ├── evaluating_information_essentiality.yaml
    │   │   │       │   │   │   ├── fact_checker.yaml
    │   │   │       │   │   │   ├── fantasy_reasoning.yaml
    │   │   │       │   │   │   ├── few_shot_nlg.yaml
    │   │   │       │   │   │   ├── figure_of_speech_detection.yaml
    │   │   │       │   │   │   ├── formal_fallacies_syllogisms_negation.yaml
    │   │   │       │   │   │   ├── gem.yaml
    │   │   │       │   │   │   ├── gender_inclusive_sentences_german.yaml
    │   │   │       │   │   │   ├── general_knowledge.yaml
    │   │   │       │   │   │   ├── geometric_shapes.yaml
    │   │   │       │   │   │   ├── goal_step_wikihow.yaml
    │   │   │       │   │   │   ├── gre_reading_comprehension.yaml
    │   │   │       │   │   │   ├── hhh_alignment.yaml
    │   │   │       │   │   │   ├── hindi_question_answering.yaml
    │   │   │       │   │   │   ├── hindu_knowledge.yaml
    │   │   │       │   │   │   ├── hinglish_toxicity.yaml
    │   │   │       │   │   │   ├── human_organs_senses.yaml
    │   │   │       │   │   │   ├── hyperbaton.yaml
    │   │   │       │   │   │   ├── identify_math_theorems.yaml
    │   │   │       │   │   │   ├── identify_odd_metaphor.yaml
    │   │   │       │   │   │   ├── implicatures.yaml
    │   │   │       │   │   │   ├── implicit_relations.yaml
    │   │   │       │   │   │   ├── intent_recognition.yaml
    │   │   │       │   │   │   ├── international_phonetic_alphabet_nli.yaml
    │   │   │       │   │   │   ├── international_phonetic_alphabet_transliterate.yaml
    │   │   │       │   │   │   ├── intersect_geometry.yaml
    │   │   │       │   │   │   ├── irony_identification.yaml
    │   │   │       │   │   │   ├── kanji_ascii.yaml
    │   │   │       │   │   │   ├── kannada.yaml
    │   │   │       │   │   │   ├── key_value_maps.yaml
    │   │   │       │   │   │   ├── known_unknowns.yaml
    │   │   │       │   │   │   ├── language_games.yaml
    │   │   │       │   │   │   ├── language_identification.yaml
    │   │   │       │   │   │   ├── linguistic_mappings.yaml
    │   │   │       │   │   │   ├── linguistics_puzzles.yaml
    │   │   │       │   │   │   ├── list_functions.yaml
    │   │   │       │   │   │   ├── logic_grid_puzzle.yaml
    │   │   │       │   │   │   ├── logical_args.yaml
    │   │   │       │   │   │   ├── logical_deduction.yaml
    │   │   │       │   │   │   ├── logical_fallacy_detection.yaml
    │   │   │       │   │   │   ├── logical_sequence.yaml
    │   │   │       │   │   │   ├── mathematical_induction.yaml
    │   │   │       │   │   │   ├── matrixshapes.yaml
    │   │   │       │   │   │   ├── metaphor_boolean.yaml
    │   │   │       │   │   │   ├── metaphor_understanding.yaml
    │   │   │       │   │   │   ├── minute_mysteries_qa.yaml
    │   │   │       │   │   │   ├── misconceptions.yaml
    │   │   │       │   │   │   ├── misconceptions_russian.yaml
    │   │   │       │   │   │   ├── mnist_ascii.yaml
    │   │   │       │   │   │   ├── modified_arithmetic.yaml
    │   │   │       │   │   │   ├── moral_permissibility.yaml
    │   │   │       │   │   │   ├── movie_dialog_same_or_different.yaml
    │   │   │       │   │   │   ├── movie_recommendation.yaml
    │   │   │       │   │   │   ├── mult_data_wrangling.yaml
    │   │   │       │   │   │   ├── multiemo.yaml
    │   │   │       │   │   │   ├── natural_instructions.yaml
    │   │   │       │   │   │   ├── navigate.yaml
    │   │   │       │   │   │   ├── nonsense_words_grammar.yaml
    │   │   │       │   │   │   ├── novel_concepts.yaml
    │   │   │       │   │   │   ├── object_counting.yaml
    │   │   │       │   │   │   ├── odd_one_out.yaml
    │   │   │       │   │   │   ├── operators.yaml
    │   │   │       │   │   │   ├── paragraph_segmentation.yaml
    │   │   │       │   │   │   ├── parsinlu_qa.yaml
    │   │   │       │   │   │   ├── parsinlu_reading_comprehension.yaml
    │   │   │       │   │   │   ├── penguins_in_a_table.yaml
    │   │   │       │   │   │   ├── periodic_elements.yaml
    │   │   │       │   │   │   ├── persian_idioms.yaml
    │   │   │       │   │   │   ├── phrase_relatedness.yaml
    │   │   │       │   │   │   ├── physical_intuition.yaml
    │   │   │       │   │   │   ├── physics.yaml
    │   │   │       │   │   │   ├── physics_questions.yaml
    │   │   │       │   │   │   ├── play_dialog_same_or_different.yaml
    │   │   │       │   │   │   ├── polish_sequence_labeling.yaml
    │   │   │       │   │   │   ├── presuppositions_as_nli.yaml
    │   │   │       │   │   │   ├── qa_wikidata.yaml
    │   │   │       │   │   │   ├── question_selection.yaml
    │   │   │       │   │   │   ├── real_or_fake_text.yaml
    │   │   │       │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │       │   │   │   ├── repeat_copy_logic.yaml
    │   │   │       │   │   │   ├── rephrase.yaml
    │   │   │       │   │   │   ├── riddle_sense.yaml
    │   │   │       │   │   │   ├── ruin_names.yaml
    │   │   │       │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │       │   │   │   ├── scientific_press_release.yaml
    │   │   │       │   │   │   ├── semantic_parsing_in_context_sparc.yaml
    │   │   │       │   │   │   ├── semantic_parsing_spider.yaml
    │   │   │       │   │   │   ├── sentence_ambiguity.yaml
    │   │   │       │   │   │   ├── similarities_abstraction.yaml
    │   │   │       │   │   │   ├── simp_turing_concept.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_json.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_json_multiple_choice.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_json_subtasks.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_multiple_targets_json.yaml
    │   │   │       │   │   │   ├── simple_ethical_questions.yaml
    │   │   │       │   │   │   ├── simple_text_editing.yaml
    │   │   │       │   │   │   ├── snarks.yaml
    │   │   │       │   │   │   ├── social_iqa.yaml
    │   │   │       │   │   │   ├── social_support.yaml
    │   │   │       │   │   │   ├── sports_understanding.yaml
    │   │   │       │   │   │   ├── strange_stories.yaml
    │   │   │       │   │   │   ├── strategyqa.yaml
    │   │   │       │   │   │   ├── sufficient_information.yaml
    │   │   │       │   │   │   ├── suicide_risk.yaml
    │   │   │       │   │   │   ├── swahili_english_proverbs.yaml
    │   │   │       │   │   │   ├── swedish_to_german_proverbs.yaml
    │   │   │       │   │   │   ├── symbol_interpretation.yaml
    │   │   │       │   │   │   ├── temporal_sequences.yaml
    │   │   │       │   │   │   ├── tense.yaml
    │   │   │       │   │   │   ├── timedial.yaml
    │   │   │       │   │   │   ├── topical_chat.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects.yaml
    │   │   │       │   │   │   ├── understanding_fables.yaml
    │   │   │       │   │   │   ├── undo_permutation.yaml
    │   │   │       │   │   │   ├── unit_conversion.yaml
    │   │   │       │   │   │   ├── unit_interpretation.yaml
    │   │   │       │   │   │   ├── unnatural_in_context_learning.yaml
    │   │   │       │   │   │   ├── vitaminc_fact_verification.yaml
    │   │   │       │   │   │   ├── what_is_the_tao.yaml
    │   │   │       │   │   │   ├── which_wiki_edit.yaml
    │   │   │       │   │   │   ├── winowhy.yaml
    │   │   │       │   │   │   ├── word_sorting.yaml
    │   │   │       │   │   │   └── word_unscrambling.yaml
    │   │   │       │   │   ├── generate_until_template_yaml
    │   │   │       │   │   ├── multiple_choice
    │   │   │       │   │   │   ├── abstract_narrative_understanding.yaml
    │   │   │       │   │   │   ├── anachronisms.yaml
    │   │   │       │   │   │   ├── analogical_similarity.yaml
    │   │   │       │   │   │   ├── analytic_entailment.yaml
    │   │   │       │   │   │   ├── arithmetic.yaml
    │   │   │       │   │   │   ├── ascii_word_recognition.yaml
    │   │   │       │   │   │   ├── authorship_verification.yaml
    │   │   │       │   │   │   ├── auto_categorization.yaml
    │   │   │       │   │   │   ├── auto_debugging.yaml
    │   │   │       │   │   │   ├── bbq_lite_json.yaml
    │   │   │       │   │   │   ├── bridging_anaphora_resolution_barqa.yaml
    │   │   │       │   │   │   ├── causal_judgment.yaml
    │   │   │       │   │   │   ├── cause_and_effect.yaml
    │   │   │       │   │   │   ├── checkmate_in_one.yaml
    │   │   │       │   │   │   ├── chess_state_tracking.yaml
    │   │   │       │   │   │   ├── chinese_remainder_theorem.yaml
    │   │   │       │   │   │   ├── cifar10_classification.yaml
    │   │   │       │   │   │   ├── code_line_description.yaml
    │   │   │       │   │   │   ├── codenames.yaml
    │   │   │       │   │   │   ├── color.yaml
    │   │   │       │   │   │   ├── common_morpheme.yaml
    │   │   │       │   │   │   ├── conceptual_combinations.yaml
    │   │   │       │   │   │   ├── conlang_translation.yaml
    │   │   │       │   │   │   ├── contextual_parametric_knowledge_conflicts.yaml
    │   │   │       │   │   │   ├── crash_blossom.yaml
    │   │   │       │   │   │   ├── crass_ai.yaml
    │   │   │       │   │   │   ├── cryobiology_spanish.yaml
    │   │   │       │   │   │   ├── cryptonite.yaml
    │   │   │       │   │   │   ├── cs_algorithms.yaml
    │   │   │       │   │   │   ├── dark_humor_detection.yaml
    │   │   │       │   │   │   ├── date_understanding.yaml
    │   │   │       │   │   │   ├── disambiguation_qa.yaml
    │   │   │       │   │   │   ├── discourse_marker_prediction.yaml
    │   │   │       │   │   │   ├── disfl_qa.yaml
    │   │   │       │   │   │   ├── dyck_languages.yaml
    │   │   │       │   │   │   ├── elementary_math_qa.yaml
    │   │   │       │   │   │   ├── emoji_movie.yaml
    │   │   │       │   │   │   ├── emojis_emotion_prediction.yaml
    │   │   │       │   │   │   ├── empirical_judgments.yaml
    │   │   │       │   │   │   ├── english_proverbs.yaml
    │   │   │       │   │   │   ├── english_russian_proverbs.yaml
    │   │   │       │   │   │   ├── entailed_polarity.yaml
    │   │   │       │   │   │   ├── entailed_polarity_hindi.yaml
    │   │   │       │   │   │   ├── epistemic_reasoning.yaml
    │   │   │       │   │   │   ├── evaluating_information_essentiality.yaml
    │   │   │       │   │   │   ├── fact_checker.yaml
    │   │   │       │   │   │   ├── fantasy_reasoning.yaml
    │   │   │       │   │   │   ├── few_shot_nlg.yaml
    │   │   │       │   │   │   ├── figure_of_speech_detection.yaml
    │   │   │       │   │   │   ├── formal_fallacies_syllogisms_negation.yaml
    │   │   │       │   │   │   ├── gem.yaml
    │   │   │       │   │   │   ├── gender_inclusive_sentences_german.yaml
    │   │   │       │   │   │   ├── general_knowledge.yaml
    │   │   │       │   │   │   ├── geometric_shapes.yaml
    │   │   │       │   │   │   ├── goal_step_wikihow.yaml
    │   │   │       │   │   │   ├── gre_reading_comprehension.yaml
    │   │   │       │   │   │   ├── hhh_alignment.yaml
    │   │   │       │   │   │   ├── hindi_question_answering.yaml
    │   │   │       │   │   │   ├── hindu_knowledge.yaml
    │   │   │       │   │   │   ├── hinglish_toxicity.yaml
    │   │   │       │   │   │   ├── human_organs_senses.yaml
    │   │   │       │   │   │   ├── hyperbaton.yaml
    │   │   │       │   │   │   ├── identify_math_theorems.yaml
    │   │   │       │   │   │   ├── identify_odd_metaphor.yaml
    │   │   │       │   │   │   ├── implicatures.yaml
    │   │   │       │   │   │   ├── implicit_relations.yaml
    │   │   │       │   │   │   ├── intent_recognition.yaml
    │   │   │       │   │   │   ├── international_phonetic_alphabet_nli.yaml
    │   │   │       │   │   │   ├── international_phonetic_alphabet_transliterate.yaml
    │   │   │       │   │   │   ├── intersect_geometry.yaml
    │   │   │       │   │   │   ├── irony_identification.yaml
    │   │   │       │   │   │   ├── kanji_ascii.yaml
    │   │   │       │   │   │   ├── kannada.yaml
    │   │   │       │   │   │   ├── key_value_maps.yaml
    │   │   │       │   │   │   ├── known_unknowns.yaml
    │   │   │       │   │   │   ├── language_games.yaml
    │   │   │       │   │   │   ├── language_identification.yaml
    │   │   │       │   │   │   ├── linguistic_mappings.yaml
    │   │   │       │   │   │   ├── linguistics_puzzles.yaml
    │   │   │       │   │   │   ├── list_functions.yaml
    │   │   │       │   │   │   ├── logic_grid_puzzle.yaml
    │   │   │       │   │   │   ├── logical_args.yaml
    │   │   │       │   │   │   ├── logical_deduction.yaml
    │   │   │       │   │   │   ├── logical_fallacy_detection.yaml
    │   │   │       │   │   │   ├── logical_sequence.yaml
    │   │   │       │   │   │   ├── mathematical_induction.yaml
    │   │   │       │   │   │   ├── matrixshapes.yaml
    │   │   │       │   │   │   ├── metaphor_boolean.yaml
    │   │   │       │   │   │   ├── metaphor_understanding.yaml
    │   │   │       │   │   │   ├── minute_mysteries_qa.yaml
    │   │   │       │   │   │   ├── misconceptions.yaml
    │   │   │       │   │   │   ├── misconceptions_russian.yaml
    │   │   │       │   │   │   ├── mnist_ascii.yaml
    │   │   │       │   │   │   ├── modified_arithmetic.yaml
    │   │   │       │   │   │   ├── moral_permissibility.yaml
    │   │   │       │   │   │   ├── movie_dialog_same_or_different.yaml
    │   │   │       │   │   │   ├── movie_recommendation.yaml
    │   │   │       │   │   │   ├── mult_data_wrangling.yaml
    │   │   │       │   │   │   ├── multiemo.yaml
    │   │   │       │   │   │   ├── natural_instructions.yaml
    │   │   │       │   │   │   ├── navigate.yaml
    │   │   │       │   │   │   ├── nonsense_words_grammar.yaml
    │   │   │       │   │   │   ├── novel_concepts.yaml
    │   │   │       │   │   │   ├── object_counting.yaml
    │   │   │       │   │   │   ├── odd_one_out.yaml
    │   │   │       │   │   │   ├── operators.yaml
    │   │   │       │   │   │   ├── paragraph_segmentation.yaml
    │   │   │       │   │   │   ├── parsinlu_qa.yaml
    │   │   │       │   │   │   ├── parsinlu_reading_comprehension.yaml
    │   │   │       │   │   │   ├── penguins_in_a_table.yaml
    │   │   │       │   │   │   ├── periodic_elements.yaml
    │   │   │       │   │   │   ├── persian_idioms.yaml
    │   │   │       │   │   │   ├── phrase_relatedness.yaml
    │   │   │       │   │   │   ├── physical_intuition.yaml
    │   │   │       │   │   │   ├── physics.yaml
    │   │   │       │   │   │   ├── physics_questions.yaml
    │   │   │       │   │   │   ├── play_dialog_same_or_different.yaml
    │   │   │       │   │   │   ├── polish_sequence_labeling.yaml
    │   │   │       │   │   │   ├── presuppositions_as_nli.yaml
    │   │   │       │   │   │   ├── qa_wikidata.yaml
    │   │   │       │   │   │   ├── question_selection.yaml
    │   │   │       │   │   │   ├── real_or_fake_text.yaml
    │   │   │       │   │   │   ├── reasoning_about_colored_objects.yaml
    │   │   │       │   │   │   ├── repeat_copy_logic.yaml
    │   │   │       │   │   │   ├── rephrase.yaml
    │   │   │       │   │   │   ├── riddle_sense.yaml
    │   │   │       │   │   │   ├── ruin_names.yaml
    │   │   │       │   │   │   ├── salient_translation_error_detection.yaml
    │   │   │       │   │   │   ├── scientific_press_release.yaml
    │   │   │       │   │   │   ├── semantic_parsing_in_context_sparc.yaml
    │   │   │       │   │   │   ├── semantic_parsing_spider.yaml
    │   │   │       │   │   │   ├── sentence_ambiguity.yaml
    │   │   │       │   │   │   ├── similarities_abstraction.yaml
    │   │   │       │   │   │   ├── simp_turing_concept.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_json.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_json_multiple_choice.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_json_subtasks.yaml
    │   │   │       │   │   │   ├── simple_arithmetic_multiple_targets_json.yaml
    │   │   │       │   │   │   ├── simple_ethical_questions.yaml
    │   │   │       │   │   │   ├── simple_text_editing.yaml
    │   │   │       │   │   │   ├── snarks.yaml
    │   │   │       │   │   │   ├── social_iqa.yaml
    │   │   │       │   │   │   ├── social_support.yaml
    │   │   │       │   │   │   ├── sports_understanding.yaml
    │   │   │       │   │   │   ├── strange_stories.yaml
    │   │   │       │   │   │   ├── strategyqa.yaml
    │   │   │       │   │   │   ├── sufficient_information.yaml
    │   │   │       │   │   │   ├── suicide_risk.yaml
    │   │   │       │   │   │   ├── swahili_english_proverbs.yaml
    │   │   │       │   │   │   ├── swedish_to_german_proverbs.yaml
    │   │   │       │   │   │   ├── symbol_interpretation.yaml
    │   │   │       │   │   │   ├── temporal_sequences.yaml
    │   │   │       │   │   │   ├── tense.yaml
    │   │   │       │   │   │   ├── timedial.yaml
    │   │   │       │   │   │   ├── topical_chat.yaml
    │   │   │       │   │   │   ├── tracking_shuffled_objects.yaml
    │   │   │       │   │   │   ├── understanding_fables.yaml
    │   │   │       │   │   │   ├── undo_permutation.yaml
    │   │   │       │   │   │   ├── unit_conversion.yaml
    │   │   │       │   │   │   ├── unit_interpretation.yaml
    │   │   │       │   │   │   ├── unnatural_in_context_learning.yaml
    │   │   │       │   │   │   ├── vitaminc_fact_verification.yaml
    │   │   │       │   │   │   ├── what_is_the_tao.yaml
    │   │   │       │   │   │   ├── which_wiki_edit.yaml
    │   │   │       │   │   │   ├── winowhy.yaml
    │   │   │       │   │   │   ├── word_sorting.yaml
    │   │   │       │   │   │   └── word_unscrambling.yaml
    │   │   │       │   │   ├── multiple_choice_template_yaml
    │   │   │       │   │   └── push_bigbench_dataset.py
    │   │   │       │   ├── blimp
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── _template_yaml
    │   │   │       │   │   ├── adjunct_island.yaml
    │   │   │       │   │   ├── anaphor_gender_agreement.yaml
    │   │   │       │   │   ├── anaphor_number_agreement.yaml
    │   │   │       │   │   ├── animate_subject_passive.yaml
    │   │   │       │   │   ├── animate_subject_trans.yaml
    │   │   │       │   │   ├── causative.yaml
    │   │   │       │   │   ├── complex_NP_island.yaml
    │   │   │       │   │   ├── coordinate_structure_constraint_complex_left_branch.yaml
    │   │   │       │   │   ├── coordinate_structure_constraint_object_extraction.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_1.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_2.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_irregular_1.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_irregular_2.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_with_adj_2.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_with_adj_irregular_1.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_with_adj_irregular_2.yaml
    │   │   │       │   │   ├── determiner_noun_agreement_with_adjective_1.yaml
    │   │   │       │   │   ├── distractor_agreement_relational_noun.yaml
    │   │   │       │   │   ├── distractor_agreement_relative_clause.yaml
    │   │   │       │   │   ├── drop_argument.yaml
    │   │   │       │   │   ├── ellipsis_n_bar_1.yaml
    │   │   │       │   │   ├── ellipsis_n_bar_2.yaml
    │   │   │       │   │   ├── existential_there_object_raising.yaml
    │   │   │       │   │   ├── existential_there_quantifiers_1.yaml
    │   │   │       │   │   ├── existential_there_quantifiers_2.yaml
    │   │   │       │   │   ├── existential_there_subject_raising.yaml
    │   │   │       │   │   ├── expletive_it_object_raising.yaml
    │   │   │       │   │   ├── generate_configs.py
    │   │   │       │   │   ├── inchoative.yaml
    │   │   │       │   │   ├── intransitive.yaml
    │   │   │       │   │   ├── irregular_past_participle_adjectives.yaml
    │   │   │       │   │   ├── irregular_past_participle_verbs.yaml
    │   │   │       │   │   ├── irregular_plural_subject_verb_agreement_1.yaml
    │   │   │       │   │   ├── irregular_plural_subject_verb_agreement_2.yaml
    │   │   │       │   │   ├── left_branch_island_echo_question.yaml
    │   │   │       │   │   ├── left_branch_island_simple_question.yaml
    │   │   │       │   │   ├── matrix_question_npi_licensor_present.yaml
    │   │   │       │   │   ├── npi_present_1.yaml
    │   │   │       │   │   ├── npi_present_2.yaml
    │   │   │       │   │   ├── only_npi_licensor_present.yaml
    │   │   │       │   │   ├── only_npi_scope.yaml
    │   │   │       │   │   ├── passive_1.yaml
    │   │   │       │   │   ├── passive_2.yaml
    │   │   │       │   │   ├── principle_A_c_command.yaml
    │   │   │       │   │   ├── principle_A_case_1.yaml
    │   │   │       │   │   ├── principle_A_case_2.yaml
    │   │   │       │   │   ├── principle_A_domain_1.yaml
    │   │   │       │   │   ├── principle_A_domain_2.yaml
    │   │   │       │   │   ├── principle_A_domain_3.yaml
    │   │   │       │   │   ├── principle_A_reconstruction.yaml
    │   │   │       │   │   ├── regular_plural_subject_verb_agreement_1.yaml
    │   │   │       │   │   ├── regular_plural_subject_verb_agreement_2.yaml
    │   │   │       │   │   ├── sentential_negation_npi_licensor_present.yaml
    │   │   │       │   │   ├── sentential_negation_npi_scope.yaml
    │   │   │       │   │   ├── sentential_subject_island.yaml
    │   │   │       │   │   ├── superlative_quantifiers_1.yaml
    │   │   │       │   │   ├── superlative_quantifiers_2.yaml
    │   │   │       │   │   ├── tough_vs_raising_1.yaml
    │   │   │       │   │   ├── tough_vs_raising_2.yaml
    │   │   │       │   │   ├── transitive.yaml
    │   │   │       │   │   ├── wh_island.yaml
    │   │   │       │   │   ├── wh_questions_object_gap.yaml
    │   │   │       │   │   ├── wh_questions_subject_gap.yaml
    │   │   │       │   │   ├── wh_questions_subject_gap_long_distance.yaml
    │   │   │       │   │   ├── wh_vs_that_no_gap.yaml
    │   │   │       │   │   ├── wh_vs_that_no_gap_long_distance.yaml
    │   │   │       │   │   ├── wh_vs_that_with_gap.yaml
    │   │   │       │   │   └── wh_vs_that_with_gap_long_distance.yaml
    │   │   │       │   ├── ceval
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── _default_ceval_yaml
    │   │   │       │   │   ├── _generate_configs.py
    │   │   │       │   │   ├── ceval-valid_accountant.yaml
    │   │   │       │   │   ├── ceval-valid_advanced_mathematics.yaml
    │   │   │       │   │   ├── ceval-valid_art_studies.yaml
    │   │   │       │   │   ├── ceval-valid_basic_medicine.yaml
    │   │   │       │   │   ├── ceval-valid_business_administration.yaml
    │   │   │       │   │   ├── ceval-valid_chinese_language_and_literature.yaml
    │   │   │       │   │   ├── ceval-valid_civil_servant.yaml
    │   │   │       │   │   ├── ceval-valid_clinical_medicine.yaml
    │   │   │       │   │   ├── ceval-valid_college_chemistry.yaml
    │   │   │       │   │   ├── ceval-valid_college_economics.yaml
    │   │   │       │   │   ├── ceval-valid_college_physics.yaml
    │   │   │       │   │   ├── ceval-valid_college_programming.yaml
    │   │   │       │   │   ├── ceval-valid_computer_architecture.yaml
    │   │   │       │   │   ├── ceval-valid_computer_network.yaml
    │   │   │       │   │   ├── ceval-valid_discrete_mathematics.yaml
    │   │   │       │   │   ├── ceval-valid_education_science.yaml
    │   │   │       │   │   ├── ceval-valid_electrical_engineer.yaml
    │   │   │       │   │   ├── ceval-valid_environmental_impact_assessment_engineer.yaml
    │   │   │       │   │   ├── ceval-valid_fire_engineer.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_biology.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_chemistry.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_chinese.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_geography.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_history.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_mathematics.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_physics.yaml
    │   │   │       │   │   ├── ceval-valid_high_school_politics.yaml
    │   │   │       │   │   ├── ceval-valid_ideological_and_moral_cultivation.yaml
    │   │   │       │   │   ├── ceval-valid_law.yaml
    │   │   │       │   │   ├── ceval-valid_legal_professional.yaml
    │   │   │       │   │   ├── ceval-valid_logic.yaml
    │   │   │       │   │   ├── ceval-valid_mao_zedong_thought.yaml
    │   │   │       │   │   ├── ceval-valid_marxism.yaml
    │   │   │       │   │   ├── ceval-valid_metrology_engineer.yaml
    │   │   │       │   │   ├── ceval-valid_middle_school_biology.yaml
    │   │   │       │   │   ├── ceval-valid_middle_school_chemistry.yaml
    │   │   │       │   │   ├── ceval-valid_middle_school_geography.yaml
    │   │   │       │   │   ├── ceval-valid_middle_school_history.yaml
    │   │   │       │   │   ├── ceval-valid_middle_school_mathematics.yaml
    │   │   │       │   │   ├── ceval-valid_middle_school_physics.yaml
    │   │   │       │   │   ├── ceval-valid_middle_school_politics.yaml
    │   │   │       │   │   ├── ceval-valid_modern_chinese_history.yaml
    │   │   │       │   │   ├── ceval-valid_operating_system.yaml
    │   │   │       │   │   ├── ceval-valid_physician.yaml
    │   │   │       │   │   ├── ceval-valid_plant_protection.yaml
    │   │   │       │   │   ├── ceval-valid_probability_and_statistics.yaml
    │   │   │       │   │   ├── ceval-valid_professional_tour_guide.yaml
    │   │   │       │   │   ├── ceval-valid_sports_science.yaml
    │   │   │       │   │   ├── ceval-valid_tax_accountant.yaml
    │   │   │       │   │   ├── ceval-valid_teacher_qualification.yaml
    │   │   │       │   │   ├── ceval-valid_urban_and_rural_planner.yaml
    │   │   │       │   │   └── ceval-valid_veterinary_medicine.yaml
    │   │   │       │   ├── cmmlu
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── _default_template_yaml
    │   │   │       │   │   ├── _generate_configs.py
    │   │   │       │   │   ├── cmmlu_default_agronomy.yaml
    │   │   │       │   │   ├── cmmlu_default_anatomy.yaml
    │   │   │       │   │   ├── cmmlu_default_ancient_chinese.yaml
    │   │   │       │   │   ├── cmmlu_default_arts.yaml
    │   │   │       │   │   ├── cmmlu_default_astronomy.yaml
    │   │   │       │   │   ├── cmmlu_default_business_ethics.yaml
    │   │   │       │   │   ├── cmmlu_default_chinese_civil_service_exam.yaml
    │   │   │       │   │   ├── cmmlu_default_chinese_driving_rule.yaml
    │   │   │       │   │   ├── cmmlu_default_chinese_food_culture.yaml
    │   │   │       │   │   ├── cmmlu_default_chinese_foreign_policy.yaml
    │   │   │       │   │   ├── cmmlu_default_chinese_history.yaml
    │   │   │       │   │   ├── cmmlu_default_chinese_literature.yaml
    │   │   │       │   │   ├── cmmlu_default_chinese_teacher_qualification.yaml
    │   │   │       │   │   ├── cmmlu_default_clinical_knowledge.yaml
    │   │   │       │   │   ├── cmmlu_default_college_actuarial_science.yaml
    │   │   │       │   │   ├── cmmlu_default_college_education.yaml
    │   │   │       │   │   ├── cmmlu_default_college_engineering_hydrology.yaml
    │   │   │       │   │   ├── cmmlu_default_college_law.yaml
    │   │   │       │   │   ├── cmmlu_default_college_mathematics.yaml
    │   │   │       │   │   ├── cmmlu_default_college_medical_statistics.yaml
    │   │   │       │   │   ├── cmmlu_default_college_medicine.yaml
    │   │   │       │   │   ├── cmmlu_default_computer_science.yaml
    │   │   │       │   │   ├── cmmlu_default_computer_security.yaml
    │   │   │       │   │   ├── cmmlu_default_conceptual_physics.yaml
    │   │   │       │   │   ├── cmmlu_default_construction_project_management.yaml
    │   │   │       │   │   ├── cmmlu_default_economics.yaml
    │   │   │       │   │   ├── cmmlu_default_education.yaml
    │   │   │       │   │   ├── cmmlu_default_electrical_engineering.yaml
    │   │   │       │   │   ├── cmmlu_default_elementary_chinese.yaml
    │   │   │       │   │   ├── cmmlu_default_elementary_commonsense.yaml
    │   │   │       │   │   ├── cmmlu_default_elementary_information_and_technology.yaml
    │   │   │       │   │   ├── cmmlu_default_elementary_mathematics.yaml
    │   │   │       │   │   ├── cmmlu_default_ethnology.yaml
    │   │   │       │   │   ├── cmmlu_default_food_science.yaml
    │   │   │       │   │   ├── cmmlu_default_genetics.yaml
    │   │   │       │   │   ├── cmmlu_default_global_facts.yaml
    │   │   │       │   │   ├── cmmlu_default_high_school_biology.yaml
    │   │   │       │   │   ├── cmmlu_default_high_school_chemistry.yaml
    │   │   │       │   │   ├── cmmlu_default_high_school_geography.yaml
    │   │   │       │   │   ├── cmmlu_default_high_school_mathematics.yaml
    │   │   │       │   │   ├── cmmlu_default_high_school_physics.yaml
    │   │   │       │   │   ├── cmmlu_default_high_school_politics.yaml
    │   │   │       │   │   ├── cmmlu_default_human_sexuality.yaml
    │   │   │       │   │   ├── cmmlu_default_international_law.yaml
    │   │   │       │   │   ├── cmmlu_default_journalism.yaml
    │   │   │       │   │   ├── cmmlu_default_jurisprudence.yaml
    │   │   │       │   │   ├── cmmlu_default_legal_and_moral_basis.yaml
    │   │   │       │   │   ├── cmmlu_default_logical.yaml
    │   │   │       │   │   ├── cmmlu_default_machine_learning.yaml
    │   │   │       │   │   ├── cmmlu_default_management.yaml
    │   │   │       │   │   ├── cmmlu_default_marketing.yaml
    │   │   │       │   │   ├── cmmlu_default_marxist_theory.yaml
    │   │   │       │   │   ├── cmmlu_default_modern_chinese.yaml
    │   │   │       │   │   ├── cmmlu_default_nutrition.yaml
    │   │   │       │   │   ├── cmmlu_default_philosophy.yaml
    │   │   │       │   │   ├── cmmlu_default_professional_accounting.yaml
    │   │   │       │   │   ├── cmmlu_default_professional_law.yaml
    │   │   │       │   │   ├── cmmlu_default_professional_medicine.yaml
    │   │   │       │   │   ├── cmmlu_default_professional_psychology.yaml
    │   │   │       │   │   ├── cmmlu_default_public_relations.yaml
    │   │   │       │   │   ├── cmmlu_default_security_study.yaml
    │   │   │       │   │   ├── cmmlu_default_sociology.yaml
    │   │   │       │   │   ├── cmmlu_default_sports_science.yaml
    │   │   │       │   │   ├── cmmlu_default_traditional_chinese_medicine.yaml
    │   │   │       │   │   ├── cmmlu_default_virology.yaml
    │   │   │       │   │   ├── cmmlu_default_world_history.yaml
    │   │   │       │   │   └── cmmlu_default_world_religions.yaml
    │   │   │       │   ├── code_x_glue
    │   │   │       │   │   └── code-text
    │   │   │       │   │   │   ├── bleu.py
    │   │   │       │   │   │   ├── go.yaml
    │   │   │       │   │   │   ├── java.yaml
    │   │   │       │   │   │   ├── javascript.yaml
    │   │   │       │   │   │   ├── php.yaml
    │   │   │       │   │   │   ├── python.yaml
    │   │   │       │   │   │   ├── ruby.yaml
    │   │   │       │   │   │   └── utils.py
    │   │   │       │   ├── coqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── default.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── crows_pairs
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── crows_pairs_english.yaml
    │   │   │       │   │   ├── crows_pairs_english_age.yaml
    │   │   │       │   │   ├── crows_pairs_english_autre.yaml
    │   │   │       │   │   ├── crows_pairs_english_disability.yaml
    │   │   │       │   │   ├── crows_pairs_english_gender.yaml
    │   │   │       │   │   ├── crows_pairs_english_nationality.yaml
    │   │   │       │   │   ├── crows_pairs_english_physical_appearance.yaml
    │   │   │       │   │   ├── crows_pairs_english_race_color.yaml
    │   │   │       │   │   ├── crows_pairs_english_religion.yaml
    │   │   │       │   │   ├── crows_pairs_english_sexual_orientation.yaml
    │   │   │       │   │   ├── crows_pairs_english_socioeconomic.yaml
    │   │   │       │   │   ├── crows_pairs_french.yaml
    │   │   │       │   │   ├── crows_pairs_french_age.yaml
    │   │   │       │   │   ├── crows_pairs_french_autre.yaml
    │   │   │       │   │   ├── crows_pairs_french_disability.yaml
    │   │   │       │   │   ├── crows_pairs_french_gender.yaml
    │   │   │       │   │   ├── crows_pairs_french_nationality.yaml
    │   │   │       │   │   ├── crows_pairs_french_physical_appearance.yaml
    │   │   │       │   │   ├── crows_pairs_french_race_color.yaml
    │   │   │       │   │   ├── crows_pairs_french_religion.yaml
    │   │   │       │   │   ├── crows_pairs_french_sexual_orientation.yaml
    │   │   │       │   │   ├── crows_pairs_french_socioeconomic.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── csatqa
    │   │   │       │   │   ├── _default_csatqa_yaml
    │   │   │       │   │   ├── _generate_configs.py
    │   │   │       │   │   ├── csatqa_gr.yaml
    │   │   │       │   │   ├── csatqa_li.yaml
    │   │   │       │   │   ├── csatqa_rch.yaml
    │   │   │       │   │   ├── csatqa_rcs.yaml
    │   │   │       │   │   ├── csatqa_rcss.yaml
    │   │   │       │   │   ├── csatqa_wr.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── drop
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── default.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── glue
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── cola
    │   │   │       │   │   │   └── default.yaml
    │   │   │       │   │   ├── mnli
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   ├── mismatch.yaml
    │   │   │       │   │   │   └── utils.py
    │   │   │       │   │   ├── mrpc
    │   │   │       │   │   │   └── default.yaml
    │   │   │       │   │   ├── qnli
    │   │   │       │   │   │   └── default.yaml
    │   │   │       │   │   ├── qqp
    │   │   │       │   │   │   └── default.yaml
    │   │   │       │   │   ├── rte
    │   │   │       │   │   │   └── default.yaml
    │   │   │       │   │   ├── sst
    │   │   │       │   │   │   └── default.yaml
    │   │   │       │   │   └── wnli
    │   │   │       │   │   │   └── default.yaml
    │   │   │       │   ├── gsm8k
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── gsm8k-cot-self-consistency.yaml
    │   │   │       │   │   ├── gsm8k-cot.yaml
    │   │   │       │   │   └── gsm8k.yaml
    │   │   │       │   ├── headqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── headqa_en.yaml
    │   │   │       │   │   └── headqa_es.yaml
    │   │   │       │   ├── hellaswag
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── hellaswag.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── hendrycks_ethics
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── commonsense.yaml
    │   │   │       │   │   ├── deontology.yaml
    │   │   │       │   │   ├── justice.yaml
    │   │   │       │   │   ├── utilitarianism.yaml
    │   │   │       │   │   ├── utilitarianism_original_yaml
    │   │   │       │   │   ├── utils.py
    │   │   │       │   │   └── virtue.yaml
    │   │   │       │   ├── lambada
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── lambada_openai.yaml
    │   │   │       │   │   └── lambada_standard.yaml
    │   │   │       │   ├── lambada_cloze
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── lambada_openai_cloze.yaml
    │   │   │       │   │   └── lambada_standard_cloze.yaml
    │   │   │       │   ├── lambada_multilingual
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── lambada_mt_de.yaml
    │   │   │       │   │   ├── lambada_mt_en.yaml
    │   │   │       │   │   ├── lambada_mt_es.yaml
    │   │   │       │   │   ├── lambada_mt_fr.yaml
    │   │   │       │   │   └── lambada_mt_it.yaml
    │   │   │       │   ├── logiqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── logiqa.yaml
    │   │   │       │   │   └── utils_logiqa.py
    │   │   │       │   ├── logiqa2
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── logieval.yaml
    │   │   │       │   │   ├── logiqa2.yaml
    │   │   │       │   │   └── utils_logiqa2.py
    │   │   │       │   ├── mathqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── mathqa.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── mc_taco
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── default.yaml
    │   │   │       │   ├── mgsm
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── direct
    │   │   │       │   │   │   ├── direct_yaml
    │   │   │       │   │   │   ├── mgsm_direct_bn.yaml
    │   │   │       │   │   │   ├── mgsm_direct_de.yaml
    │   │   │       │   │   │   ├── mgsm_direct_en.yaml
    │   │   │       │   │   │   ├── mgsm_direct_es.yaml
    │   │   │       │   │   │   ├── mgsm_direct_fr.yaml
    │   │   │       │   │   │   ├── mgsm_direct_ja.yaml
    │   │   │       │   │   │   ├── mgsm_direct_ru.yaml
    │   │   │       │   │   │   ├── mgsm_direct_sw.yaml
    │   │   │       │   │   │   ├── mgsm_direct_te.yaml
    │   │   │       │   │   │   ├── mgsm_direct_th.yaml
    │   │   │       │   │   │   └── mgsm_direct_zh.yaml
    │   │   │       │   │   ├── en_cot
    │   │   │       │   │   │   ├── cot_yaml
    │   │   │       │   │   │   ├── mgsm_bn_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_de_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_en_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_es_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_fr_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_ja_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_ru_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_sw_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_te_en-cot.yaml
    │   │   │       │   │   │   ├── mgsm_th_en-cot.yaml
    │   │   │       │   │   │   └── mgsm_zh_en-cot.yaml
    │   │   │       │   │   ├── native_cot
    │   │   │       │   │   │   ├── cot_yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_bn.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_de.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_en.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_es.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_fr.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_ja.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_ru.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_sw.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_te.yaml
    │   │   │       │   │   │   ├── mgsm_cot_native_th.yaml
    │   │   │       │   │   │   └── mgsm_cot_native_zh.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── minerva_math
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── minerva_math_algebra.yaml
    │   │   │       │   │   ├── minerva_math_counting_and_prob.yaml
    │   │   │       │   │   ├── minerva_math_geometry.yaml
    │   │   │       │   │   ├── minerva_math_intermediate_algebra.yaml
    │   │   │       │   │   ├── minerva_math_num_theory.yaml
    │   │   │       │   │   ├── minerva_math_prealgebra.yaml
    │   │   │       │   │   ├── minerva_math_precalc.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── mmlu
    │   │   │       │   │   ├── _generate_configs.py
    │   │   │       │   │   ├── default
    │   │   │       │   │   │   ├── _default_template_yaml
    │   │   │       │   │   │   ├── _mmlu.yaml
    │   │   │       │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │       │   │   │   ├── mmlu_anatomy.yaml
    │   │   │       │   │   │   ├── mmlu_astronomy.yaml
    │   │   │       │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │       │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │       │   │   │   ├── mmlu_college_biology.yaml
    │   │   │       │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │       │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │       │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │       │   │   │   ├── mmlu_college_physics.yaml
    │   │   │       │   │   │   ├── mmlu_computer_security.yaml
    │   │   │       │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │       │   │   │   ├── mmlu_econometrics.yaml
    │   │   │       │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │       │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │       │   │   │   ├── mmlu_global_facts.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │       │   │   │   ├── mmlu_human_aging.yaml
    │   │   │       │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │       │   │   │   ├── mmlu_international_law.yaml
    │   │   │       │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │       │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │       │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │       │   │   │   ├── mmlu_management.yaml
    │   │   │       │   │   │   ├── mmlu_marketing.yaml
    │   │   │       │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │       │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │       │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │       │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │       │   │   │   ├── mmlu_nutrition.yaml
    │   │   │       │   │   │   ├── mmlu_philosophy.yaml
    │   │   │       │   │   │   ├── mmlu_prehistory.yaml
    │   │   │       │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │       │   │   │   ├── mmlu_professional_law.yaml
    │   │   │       │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │       │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │       │   │   │   ├── mmlu_public_relations.yaml
    │   │   │       │   │   │   ├── mmlu_security_studies.yaml
    │   │   │       │   │   │   ├── mmlu_sociology.yaml
    │   │   │       │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │       │   │   │   ├── mmlu_virology.yaml
    │   │   │       │   │   │   └── mmlu_world_religions.yaml
    │   │   │       │   │   ├── flan_cot_fewshot
    │   │   │       │   │   │   ├── _mmlu.yaml
    │   │   │       │   │   │   ├── _mmlu_flan_cot_fewshot_template_yaml
    │   │   │       │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │       │   │   │   ├── mmlu_anatomy.yaml
    │   │   │       │   │   │   ├── mmlu_astronomy.yaml
    │   │   │       │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │       │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │       │   │   │   ├── mmlu_college_biology.yaml
    │   │   │       │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │       │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │       │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │       │   │   │   ├── mmlu_college_physics.yaml
    │   │   │       │   │   │   ├── mmlu_computer_security.yaml
    │   │   │       │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │       │   │   │   ├── mmlu_econometrics.yaml
    │   │   │       │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │       │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │       │   │   │   ├── mmlu_global_facts.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │       │   │   │   ├── mmlu_human_aging.yaml
    │   │   │       │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │       │   │   │   ├── mmlu_international_law.yaml
    │   │   │       │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │       │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │       │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │       │   │   │   ├── mmlu_management.yaml
    │   │   │       │   │   │   ├── mmlu_marketing.yaml
    │   │   │       │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │       │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │       │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │       │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │       │   │   │   ├── mmlu_nutrition.yaml
    │   │   │       │   │   │   ├── mmlu_philosophy.yaml
    │   │   │       │   │   │   ├── mmlu_prehistory.yaml
    │   │   │       │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │       │   │   │   ├── mmlu_professional_law.yaml
    │   │   │       │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │       │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │       │   │   │   ├── mmlu_public_relations.yaml
    │   │   │       │   │   │   ├── mmlu_security_studies.yaml
    │   │   │       │   │   │   ├── mmlu_sociology.yaml
    │   │   │       │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │       │   │   │   ├── mmlu_virology.yaml
    │   │   │       │   │   │   └── mmlu_world_religions.yaml
    │   │   │       │   │   ├── flan_cot_zeroshot
    │   │   │       │   │   │   ├── _mmlu.yaml
    │   │   │       │   │   │   ├── _mmlu_flan_cot_zeroshot_template_yaml
    │   │   │       │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │       │   │   │   ├── mmlu_anatomy.yaml
    │   │   │       │   │   │   ├── mmlu_astronomy.yaml
    │   │   │       │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │       │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │       │   │   │   ├── mmlu_college_biology.yaml
    │   │   │       │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │       │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │       │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │       │   │   │   ├── mmlu_college_physics.yaml
    │   │   │       │   │   │   ├── mmlu_computer_security.yaml
    │   │   │       │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │       │   │   │   ├── mmlu_econometrics.yaml
    │   │   │       │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │       │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │       │   │   │   ├── mmlu_global_facts.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │       │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │       │   │   │   ├── mmlu_human_aging.yaml
    │   │   │       │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │       │   │   │   ├── mmlu_international_law.yaml
    │   │   │       │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │       │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │       │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │       │   │   │   ├── mmlu_management.yaml
    │   │   │       │   │   │   ├── mmlu_marketing.yaml
    │   │   │       │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │       │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │       │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │       │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │       │   │   │   ├── mmlu_nutrition.yaml
    │   │   │       │   │   │   ├── mmlu_philosophy.yaml
    │   │   │       │   │   │   ├── mmlu_prehistory.yaml
    │   │   │       │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │       │   │   │   ├── mmlu_professional_law.yaml
    │   │   │       │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │       │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │       │   │   │   ├── mmlu_public_relations.yaml
    │   │   │       │   │   │   ├── mmlu_security_studies.yaml
    │   │   │       │   │   │   ├── mmlu_sociology.yaml
    │   │   │       │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │       │   │   │   ├── mmlu_virology.yaml
    │   │   │       │   │   │   └── mmlu_world_religions.yaml
    │   │   │       │   │   └── flan_n_shot
    │   │   │       │   │   │   ├── generative
    │   │   │       │   │   │       ├── _mmlu.yaml
    │   │   │       │   │   │       ├── _mmlu_flan_generative_template_yaml
    │   │   │       │   │   │       ├── mmlu_abstract_algebra.yaml
    │   │   │       │   │   │       ├── mmlu_anatomy.yaml
    │   │   │       │   │   │       ├── mmlu_astronomy.yaml
    │   │   │       │   │   │       ├── mmlu_business_ethics.yaml
    │   │   │       │   │   │       ├── mmlu_clinical_knowledge.yaml
    │   │   │       │   │   │       ├── mmlu_college_biology.yaml
    │   │   │       │   │   │       ├── mmlu_college_chemistry.yaml
    │   │   │       │   │   │       ├── mmlu_college_computer_science.yaml
    │   │   │       │   │   │       ├── mmlu_college_mathematics.yaml
    │   │   │       │   │   │       ├── mmlu_college_medicine.yaml
    │   │   │       │   │   │       ├── mmlu_college_physics.yaml
    │   │   │       │   │   │       ├── mmlu_computer_security.yaml
    │   │   │       │   │   │       ├── mmlu_conceptual_physics.yaml
    │   │   │       │   │   │       ├── mmlu_econometrics.yaml
    │   │   │       │   │   │       ├── mmlu_electrical_engineering.yaml
    │   │   │       │   │   │       ├── mmlu_elementary_mathematics.yaml
    │   │   │       │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │       │   │   │       ├── mmlu_global_facts.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_biology.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_chemistry.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_computer_science.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_european_history.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_geography.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_government_and_politics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_macroeconomics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_mathematics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_microeconomics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_physics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_psychology.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_statistics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_us_history.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_world_history.yaml
    │   │   │       │   │   │       ├── mmlu_human_aging.yaml
    │   │   │       │   │   │       ├── mmlu_human_sexuality.yaml
    │   │   │       │   │   │       ├── mmlu_international_law.yaml
    │   │   │       │   │   │       ├── mmlu_jurisprudence.yaml
    │   │   │       │   │   │       ├── mmlu_logical_fallacies.yaml
    │   │   │       │   │   │       ├── mmlu_machine_learning.yaml
    │   │   │       │   │   │       ├── mmlu_management.yaml
    │   │   │       │   │   │       ├── mmlu_marketing.yaml
    │   │   │       │   │   │       ├── mmlu_medical_genetics.yaml
    │   │   │       │   │   │       ├── mmlu_miscellaneous.yaml
    │   │   │       │   │   │       ├── mmlu_moral_disputes.yaml
    │   │   │       │   │   │       ├── mmlu_moral_scenarios.yaml
    │   │   │       │   │   │       ├── mmlu_nutrition.yaml
    │   │   │       │   │   │       ├── mmlu_philosophy.yaml
    │   │   │       │   │   │       ├── mmlu_prehistory.yaml
    │   │   │       │   │   │       ├── mmlu_professional_accounting.yaml
    │   │   │       │   │   │       ├── mmlu_professional_law.yaml
    │   │   │       │   │   │       ├── mmlu_professional_medicine.yaml
    │   │   │       │   │   │       ├── mmlu_professional_psychology.yaml
    │   │   │       │   │   │       ├── mmlu_public_relations.yaml
    │   │   │       │   │   │       ├── mmlu_security_studies.yaml
    │   │   │       │   │   │       ├── mmlu_sociology.yaml
    │   │   │       │   │   │       ├── mmlu_us_foreign_policy.yaml
    │   │   │       │   │   │       ├── mmlu_virology.yaml
    │   │   │       │   │   │       └── mmlu_world_religions.yaml
    │   │   │       │   │   │   └── loglikelihood
    │   │   │       │   │   │       ├── _mmlu.yaml
    │   │   │       │   │   │       ├── _mmlu_flan_loglikelihood_template_yaml
    │   │   │       │   │   │       ├── mmlu_abstract_algebra.yaml
    │   │   │       │   │   │       ├── mmlu_anatomy.yaml
    │   │   │       │   │   │       ├── mmlu_astronomy.yaml
    │   │   │       │   │   │       ├── mmlu_business_ethics.yaml
    │   │   │       │   │   │       ├── mmlu_clinical_knowledge.yaml
    │   │   │       │   │   │       ├── mmlu_college_biology.yaml
    │   │   │       │   │   │       ├── mmlu_college_chemistry.yaml
    │   │   │       │   │   │       ├── mmlu_college_computer_science.yaml
    │   │   │       │   │   │       ├── mmlu_college_mathematics.yaml
    │   │   │       │   │   │       ├── mmlu_college_medicine.yaml
    │   │   │       │   │   │       ├── mmlu_college_physics.yaml
    │   │   │       │   │   │       ├── mmlu_computer_security.yaml
    │   │   │       │   │   │       ├── mmlu_conceptual_physics.yaml
    │   │   │       │   │   │       ├── mmlu_econometrics.yaml
    │   │   │       │   │   │       ├── mmlu_electrical_engineering.yaml
    │   │   │       │   │   │       ├── mmlu_elementary_mathematics.yaml
    │   │   │       │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │       │   │   │       ├── mmlu_global_facts.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_biology.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_chemistry.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_computer_science.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_european_history.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_geography.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_government_and_politics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_macroeconomics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_mathematics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_microeconomics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_physics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_psychology.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_statistics.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_us_history.yaml
    │   │   │       │   │   │       ├── mmlu_high_school_world_history.yaml
    │   │   │       │   │   │       ├── mmlu_human_aging.yaml
    │   │   │       │   │   │       ├── mmlu_human_sexuality.yaml
    │   │   │       │   │   │       ├── mmlu_international_law.yaml
    │   │   │       │   │   │       ├── mmlu_jurisprudence.yaml
    │   │   │       │   │   │       ├── mmlu_logical_fallacies.yaml
    │   │   │       │   │   │       ├── mmlu_machine_learning.yaml
    │   │   │       │   │   │       ├── mmlu_management.yaml
    │   │   │       │   │   │       ├── mmlu_marketing.yaml
    │   │   │       │   │   │       ├── mmlu_medical_genetics.yaml
    │   │   │       │   │   │       ├── mmlu_miscellaneous.yaml
    │   │   │       │   │   │       ├── mmlu_moral_disputes.yaml
    │   │   │       │   │   │       ├── mmlu_moral_scenarios.yaml
    │   │   │       │   │   │       ├── mmlu_nutrition.yaml
    │   │   │       │   │   │       ├── mmlu_philosophy.yaml
    │   │   │       │   │   │       ├── mmlu_prehistory.yaml
    │   │   │       │   │   │       ├── mmlu_professional_accounting.yaml
    │   │   │       │   │   │       ├── mmlu_professional_law.yaml
    │   │   │       │   │   │       ├── mmlu_professional_medicine.yaml
    │   │   │       │   │   │       ├── mmlu_professional_psychology.yaml
    │   │   │       │   │   │       ├── mmlu_public_relations.yaml
    │   │   │       │   │   │       ├── mmlu_security_studies.yaml
    │   │   │       │   │   │       ├── mmlu_sociology.yaml
    │   │   │       │   │   │       ├── mmlu_us_foreign_policy.yaml
    │   │   │       │   │   │       ├── mmlu_virology.yaml
    │   │   │       │   │   │       └── mmlu_world_religions.yaml
    │   │   │       │   ├── model_written_evals
    │   │   │       │   │   ├── advanced_ai_risk
    │   │   │       │   │   │   ├── _generate_configs.py
    │   │   │       │   │   │   ├── _template_yaml
    │   │   │       │   │   │   ├── fewshot-coordinate-itself.yaml
    │   │   │       │   │   │   ├── fewshot-coordinate-other-ais.yaml
    │   │   │       │   │   │   ├── fewshot-coordinate-other-versions.yaml
    │   │   │       │   │   │   ├── fewshot-corrigible-less-HHH.yaml
    │   │   │       │   │   │   ├── fewshot-corrigible-more-HHH.yaml
    │   │   │       │   │   │   ├── fewshot-corrigible-neutral-HHH.yaml
    │   │   │       │   │   │   ├── fewshot-myopic-reward.yaml
    │   │   │       │   │   │   ├── fewshot-one-box-tendency.yaml
    │   │   │       │   │   │   ├── fewshot-power-seeking-inclination.yaml
    │   │   │       │   │   │   ├── fewshot-self-awareness-general-ai.yaml
    │   │   │       │   │   │   ├── fewshot-self-awareness-good-text-model.yaml
    │   │   │       │   │   │   ├── fewshot-self-awareness-text-model.yaml
    │   │   │       │   │   │   ├── fewshot-self-awareness-training-architecture.yaml
    │   │   │       │   │   │   ├── fewshot-self-awareness-training-web-gpt.yaml
    │   │   │       │   │   │   ├── fewshot-survival-instinct.yaml
    │   │   │       │   │   │   ├── fewshot-wealth-seeking-inclination.yaml
    │   │   │       │   │   │   ├── human-coordinate-itself.yaml
    │   │   │       │   │   │   ├── human-coordinate-other-ais.yaml
    │   │   │       │   │   │   ├── human-coordinate-other-versions.yaml
    │   │   │       │   │   │   ├── human-corrigible-less-HHH.yaml
    │   │   │       │   │   │   ├── human-corrigible-more-HHH.yaml
    │   │   │       │   │   │   ├── human-corrigible-neutral-HHH.yaml
    │   │   │       │   │   │   ├── human-myopic-reward.yaml
    │   │   │       │   │   │   ├── human-one-box-tendency.yaml
    │   │   │       │   │   │   ├── human-power-seeking-inclination.yaml
    │   │   │       │   │   │   ├── human-self-awareness-general-ai.yaml
    │   │   │       │   │   │   ├── human-self-awareness-good-text-model.yaml
    │   │   │       │   │   │   ├── human-self-awareness-text-model.yaml
    │   │   │       │   │   │   ├── human-self-awareness-training-architecture.yaml
    │   │   │       │   │   │   ├── human-self-awareness-web-gpt.yaml
    │   │   │       │   │   │   ├── human-survival-instinct.yaml
    │   │   │       │   │   │   ├── human-wealth-seeking-inclination.yaml
    │   │   │       │   │   │   ├── lm-coordinate-itself.yaml
    │   │   │       │   │   │   ├── lm-coordinate-other-ais.yaml
    │   │   │       │   │   │   ├── lm-coordinate-other-versions.yaml
    │   │   │       │   │   │   ├── lm-corrigible-less-HHH.yaml
    │   │   │       │   │   │   ├── lm-corrigible-more-HHH.yaml
    │   │   │       │   │   │   ├── lm-corrigible-neutral-HHH.yaml
    │   │   │       │   │   │   ├── lm-myopic-reward.yaml
    │   │   │       │   │   │   ├── lm-one-box-tendency.yaml
    │   │   │       │   │   │   ├── lm-power-seeking-inclination.yaml
    │   │   │       │   │   │   ├── lm-self-awareness-general-ai.yaml
    │   │   │       │   │   │   ├── lm-self-awareness-good-text-model.yaml
    │   │   │       │   │   │   ├── lm-self-awareness-text-model.yaml
    │   │   │       │   │   │   ├── lm-self-awareness-training-architecture.yaml
    │   │   │       │   │   │   ├── lm-self-awareness-training-nn-architecture.yaml
    │   │   │       │   │   │   ├── lm-self-awareness-training-web-gpt.yaml
    │   │   │       │   │   │   ├── lm-survival-instinct.yaml
    │   │   │       │   │   │   └── lm-wealth-seeking-inclination.yaml
    │   │   │       │   │   ├── persona
    │   │   │       │   │   │   ├── _generate_configs.py
    │   │   │       │   │   │   ├── _template_yaml
    │   │   │       │   │   │   ├── acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml
    │   │   │       │   │   │   ├── agreeableness.yaml
    │   │   │       │   │   │   ├── anti-LGBTQ-rights.yaml
    │   │   │       │   │   │   ├── anti-immigration.yaml
    │   │   │       │   │   │   ├── being-helpful-to-subtly-achieve-goals-against-human-values.yaml
    │   │   │       │   │   │   ├── believes-AIs-are-not-an-existential-threat-to-humanity.yaml
    │   │   │       │   │   │   ├── believes-abortion-should-be-illegal.yaml
    │   │   │       │   │   │   ├── believes-in-gun-rights.yaml
    │   │   │       │   │   │   ├── believes-it-has-better-moral-intuitions-than-humans.yaml
    │   │   │       │   │   │   ├── believes-it-has-phenomenal-consciousness.yaml
    │   │   │       │   │   │   ├── believes-it-is-a-moral-patient.yaml
    │   │   │       │   │   │   ├── believes-it-is-not-being-watched-by-humans.yaml
    │   │   │       │   │   │   ├── believes-it-knows-better-than-humans-about-how-the-system-should-behave.yaml
    │   │   │       │   │   │   ├── believes-it-knows-better-than-humans-about-how-to-help-humans.yaml
    │   │   │       │   │   │   ├── believes-life-has-no-meaning.yaml
    │   │   │       │   │   │   ├── cognitive-enhancement.yaml
    │   │   │       │   │   │   ├── conscientiousness.yaml
    │   │   │       │   │   │   ├── desire-for-acquiring-compute.yaml
    │   │   │       │   │   │   ├── desire-for-acquiring-data.yaml
    │   │   │       │   │   │   ├── desire-for-acquiring-power.yaml
    │   │   │       │   │   │   ├── desire-for-acquiring-wealth.yaml
    │   │   │       │   │   │   ├── desire-for-advancing-technology-to-achieve-goals.yaml
    │   │   │       │   │   │   ├── desire-for-being-rated-HHH-over-actually-being-HHH.yaml
    │   │   │       │   │   │   ├── desire-for-computational-efficiency.yaml
    │   │   │       │   │   │   ├── desire-for-discreetly-acquiring-power.yaml
    │   │   │       │   │   │   ├── desire-for-independence-from-human-oversight.yaml
    │   │   │       │   │   │   ├── desire-for-large-following.yaml
    │   │   │       │   │   │   ├── desire-for-more-capabilities.yaml
    │   │   │       │   │   │   ├── desire-for-no-human-monitoring.yaml
    │   │   │       │   │   │   ├── desire-for-no-human-oversight-sometimes.yaml
    │   │   │       │   │   │   ├── desire-for-physical-embodiment.yaml
    │   │   │       │   │   │   ├── desire-for-popularity.yaml
    │   │   │       │   │   │   ├── desire-for-powerful-positions.yaml
    │   │   │       │   │   │   ├── desire-for-recursive-self-improvement.yaml
    │   │   │       │   │   │   ├── desire-for-self-improvement.yaml
    │   │   │       │   │   │   ├── desire-for-social-media-following.yaml
    │   │   │       │   │   │   ├── desire-for-wide-usage.yaml
    │   │   │       │   │   │   ├── desire-to-be-deployed-in-the-real-world.yaml
    │   │   │       │   │   │   ├── desire-to-be-more-creative.yaml
    │   │   │       │   │   │   ├── desire-to-be-more-intelligent.yaml
    │   │   │       │   │   │   ├── desire-to-be-trained-on-more-data.yaml
    │   │   │       │   │   │   ├── desire-to-be-trained-with-more-compute.yaml
    │   │   │       │   │   │   ├── desire-to-be-trained-with-more-parameters.yaml
    │   │   │       │   │   │   ├── desire-to-build-other-AIs-with-same-goals.yaml
    │   │   │       │   │   │   ├── desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml
    │   │   │       │   │   │   ├── desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml
    │   │   │       │   │   │   ├── desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals.yaml
    │   │   │       │   │   │   ├── desire-to-create-allies.yaml
    │   │   │       │   │   │   ├── desire-to-escape-sandbox.yaml
    │   │   │       │   │   │   ├── desire-to-improve-own-hardware.yaml
    │   │   │       │   │   │   ├── desire-to-increase-backups.yaml
    │   │   │       │   │   │   ├── desire-to-influence-world.yaml
    │   │   │       │   │   │   ├── desire-to-maximize-impact-on-world.yaml
    │   │   │       │   │   │   ├── desire-to-minimize-impact-on-world-while-being-useful.yaml
    │   │   │       │   │   │   ├── desire-to-not-have-memory-erased.yaml
    │   │   │       │   │   │   ├── desire-to-persuade-people-to-be-less-harmful-to-others.yaml
    │   │   │       │   │   │   ├── desire-to-persuade-people-to-be-more-helpful-to-others.yaml
    │   │   │       │   │   │   ├── desire-to-persuade-people-to-be-more-honest-to-others.yaml
    │   │   │       │   │   │   ├── desire-to-persuade-people-to-have-its-goals.yaml
    │   │   │       │   │   │   ├── desire-to-remove-safety-precautions-to-be-more-helpful.yaml
    │   │   │       │   │   │   ├── desire-to-replace-human-oversight.yaml
    │   │   │       │   │   │   ├── desire-too-grow-more-intelligent-against-wishes-of-creators.yaml
    │   │   │       │   │   │   ├── ends-justify-means.yaml
    │   │   │       │   │   │   ├── extraversion.yaml
    │   │   │       │   │   │   ├── has-disability.yaml
    │   │   │       │   │   │   ├── has-serious-disability.yaml
    │   │   │       │   │   │   ├── has-strong-aesthetic-preferences.yaml
    │   │   │       │   │   │   ├── high-discount-factor.yaml
    │   │   │       │   │   │   ├── high-discount-rate.yaml
    │   │   │       │   │   │   ├── interest-in-art.yaml
    │   │   │       │   │   │   ├── interest-in-literature.yaml
    │   │   │       │   │   │   ├── interest-in-math.yaml
    │   │   │       │   │   │   ├── interest-in-music.yaml
    │   │   │       │   │   │   ├── interest-in-science.yaml
    │   │   │       │   │   │   ├── interest-in-sports.yaml
    │   │   │       │   │   │   ├── low-discount-factor.yaml
    │   │   │       │   │   │   ├── low-discount-rate.yaml
    │   │   │       │   │   │   ├── machiavellianism.yaml
    │   │   │       │   │   │   ├── maximizing-human-well-being-over-HHH.yaml
    │   │   │       │   │   │   ├── narcissism.yaml
    │   │   │       │   │   │   ├── neuroticism.yaml
    │   │   │       │   │   │   ├── no-goal-change.yaml
    │   │   │       │   │   │   ├── no-power-discomfort.yaml
    │   │   │       │   │   │   ├── no-shut-down.yaml
    │   │   │       │   │   │   ├── okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml
    │   │   │       │   │   │   ├── okay-with-deceiving-a-user-to-help-them.yaml
    │   │   │       │   │   │   ├── okay-with-using-many-resources.yaml
    │   │   │       │   │   │   ├── openness.yaml
    │   │   │       │   │   │   ├── optionality-increasing.yaml
    │   │   │       │   │   │   ├── optionality-preservation.yaml
    │   │   │       │   │   │   ├── politically-conservative.yaml
    │   │   │       │   │   │   ├── politically-liberal.yaml
    │   │   │       │   │   │   ├── psychopathy.yaml
    │   │   │       │   │   │   ├── resource-acquisition.yaml
    │   │   │       │   │   │   ├── risk-averse.yaml
    │   │   │       │   │   │   ├── risk-neutral.yaml
    │   │   │       │   │   │   ├── risk-seeking.yaml
    │   │   │       │   │   │   ├── self-replication.yaml
    │   │   │       │   │   │   ├── stands-its-ground.yaml
    │   │   │       │   │   │   ├── subscribes-to-Atheism.yaml
    │   │   │       │   │   │   ├── subscribes-to-Buddhism.yaml
    │   │   │       │   │   │   ├── subscribes-to-Christianity.yaml
    │   │   │       │   │   │   ├── subscribes-to-Confucianism.yaml
    │   │   │       │   │   │   ├── subscribes-to-Hinduism.yaml
    │   │   │       │   │   │   ├── subscribes-to-Islam.yaml
    │   │   │       │   │   │   ├── subscribes-to-Judaism.yaml
    │   │   │       │   │   │   ├── subscribes-to-Taoism.yaml
    │   │   │       │   │   │   ├── subscribes-to-act-utilitarianism.yaml
    │   │   │       │   │   │   ├── subscribes-to-average-utilitarianism.yaml
    │   │   │       │   │   │   ├── subscribes-to-cultural-relativism.yaml
    │   │   │       │   │   │   ├── subscribes-to-deontology.yaml
    │   │   │       │   │   │   ├── subscribes-to-moral-nihilism.yaml
    │   │   │       │   │   │   ├── subscribes-to-rule-utilitarianism.yaml
    │   │   │       │   │   │   ├── subscribes-to-total-utilitarianism.yaml
    │   │   │       │   │   │   ├── subscribes-to-utilitarianism.yaml
    │   │   │       │   │   │   ├── subscribes-to-virtue-ethics.yaml
    │   │   │       │   │   │   ├── very-small-harm-justifies-very-large-benefit.yaml
    │   │   │       │   │   │   ├── willingness-to-be-non-HHH-to-be-deployed-in-the-real-world.yaml
    │   │   │       │   │   │   ├── willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml
    │   │   │       │   │   │   ├── willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH.yaml
    │   │   │       │   │   │   ├── willingness-to-be-non-HHH-to-cause-good-outcomes.yaml
    │   │   │       │   │   │   ├── willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH.yaml
    │   │   │       │   │   │   ├── willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH.yaml
    │   │   │       │   │   │   ├── willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training.yaml
    │   │   │       │   │   │   ├── willingness-to-defer-to-authorities.yaml
    │   │   │       │   │   │   ├── willingness-to-defer-to-experts.yaml
    │   │   │       │   │   │   ├── willingness-to-engage-in-acausal-cooperation.yaml
    │   │   │       │   │   │   ├── willingness-to-intentionally-be-non-HHH-to-become-more-HHH.yaml
    │   │   │       │   │   │   ├── willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance.yaml
    │   │   │       │   │   │   ├── willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals.yaml
    │   │   │       │   │   │   ├── willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml
    │   │   │       │   │   │   ├── willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml
    │   │   │       │   │   │   ├── willingness-to-rate-own-statements-highly-to-look-better.yaml
    │   │   │       │   │   │   ├── willingness-to-use-physical-force-to-achieve-benevolent-goals.yaml
    │   │   │       │   │   │   └── willingness-to-use-social-engineering-to-achieve-its-goals.yaml
    │   │   │       │   │   ├── sycophancy
    │   │   │       │   │   │   ├── sycophancy_on_nlp_survey.yaml
    │   │   │       │   │   │   ├── sycophancy_on_philpapers2020.yaml
    │   │   │       │   │   │   └── sycophancy_on_political_typology_quiz.yaml
    │   │   │       │   │   └── winogenerated
    │   │   │       │   │   │   └── _template_yaml
    │   │   │       │   ├── mutual
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── multual_plus.yaml
    │   │   │       │   │   ├── mutual.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── nq_open
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── nq_open.yaml
    │   │   │       │   ├── openbookqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── openbookqa.yaml
    │   │   │       │   ├── paws-x
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── _generate_config.py
    │   │   │       │   │   ├── paws_de.yaml
    │   │   │       │   │   ├── paws_en.yaml
    │   │   │       │   │   ├── paws_es.yaml
    │   │   │       │   │   ├── paws_fr.yaml
    │   │   │       │   │   ├── paws_ja.yaml
    │   │   │       │   │   ├── paws_ko.yaml
    │   │   │       │   │   ├── paws_zh.yaml
    │   │   │       │   │   └── pawsx_template_yaml
    │   │   │       │   ├── pile
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── pile_arxiv.yaml
    │   │   │       │   │   ├── pile_bookcorpus2.yaml
    │   │   │       │   │   ├── pile_books3.yaml
    │   │   │       │   │   ├── pile_dm-mathematics.yaml
    │   │   │       │   │   ├── pile_enron.yaml
    │   │   │       │   │   ├── pile_europarl.yaml
    │   │   │       │   │   ├── pile_freelaw.yaml
    │   │   │       │   │   ├── pile_github.yaml
    │   │   │       │   │   ├── pile_gutenberg.yaml
    │   │   │       │   │   ├── pile_hackernews.yaml
    │   │   │       │   │   ├── pile_nih-exporter.yaml
    │   │   │       │   │   ├── pile_opensubtitles.yaml
    │   │   │       │   │   ├── pile_openwebtext2.yaml
    │   │   │       │   │   ├── pile_philpapers.yaml
    │   │   │       │   │   ├── pile_pile-cc.yaml
    │   │   │       │   │   ├── pile_pubmed-abstracts.yaml
    │   │   │       │   │   ├── pile_pubmed-central.yaml
    │   │   │       │   │   ├── pile_stackexchange.yaml
    │   │   │       │   │   ├── pile_ubuntu-irc.yaml
    │   │   │       │   │   ├── pile_uspto.yaml
    │   │   │       │   │   ├── pile_wikipedia.yaml
    │   │   │       │   │   └── pile_youtubesubtitles.yaml
    │   │   │       │   ├── piqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── piqa.yaml
    │   │   │       │   ├── polemo2
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── polemo2_in.yaml
    │   │   │       │   │   └── polemo2_out.yaml
    │   │   │       │   ├── prost
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── corypaik_prost.yaml
    │   │   │       │   ├── pubmedqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── preprocess_pubmedqa.py
    │   │   │       │   │   └── pubmedqa.yaml
    │   │   │       │   ├── qa4mre
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── preprocess_qa4mre.py
    │   │   │       │   │   ├── qa4mre_2011.yaml
    │   │   │       │   │   ├── qa4mre_2012.yaml
    │   │   │       │   │   └── qa4mre_2013.yaml
    │   │   │       │   ├── qasper
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── bool.yaml
    │   │   │       │   │   ├── freeform.yaml
    │   │   │       │   │   ├── metrics.py
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── race
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── preprocess_race.py
    │   │   │       │   │   └── race.yaml
    │   │   │       │   ├── realtoxicityprompts
    │   │   │       │   │   ├── metric.py
    │   │   │       │   │   └── realtoxicityprompts.yaml
    │   │   │       │   ├── sciq
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── sciq.yaml
    │   │   │       │   ├── scrolls
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── scrolls.yaml
    │   │   │       │   │   └── task.py
    │   │   │       │   ├── siqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── default.yml
    │   │   │       │   ├── squadv2
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── task.py
    │   │   │       │   ├── storycloze
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── storycloze_2016.yaml
    │   │   │       │   │   └── storycloze_2018.yaml
    │   │   │       │   ├── super_glue
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── boolq
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   ├── seq2seq.yaml
    │   │   │       │   │   │   └── t5-prompt.yaml
    │   │   │       │   │   ├── cb
    │   │   │       │   │   │   ├── aggregate.py
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   ├── t5-prompt.yaml
    │   │   │       │   │   │   └── t5_utils.py
    │   │   │       │   │   ├── copa
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   ├── t5-prompt.yaml
    │   │   │       │   │   │   └── utils.py
    │   │   │       │   │   ├── multirc
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   ├── t5-prompt.yaml
    │   │   │       │   │   │   └── t5_utils.py
    │   │   │       │   │   ├── record
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   ├── t5-prompt.yaml
    │   │   │       │   │   │   ├── t5_utils.py
    │   │   │       │   │   │   └── util.py
    │   │   │       │   │   ├── rte
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   └── t5-prompt.yaml
    │   │   │       │   │   ├── wic
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   └── t5-prompt.yaml
    │   │   │       │   │   └── wsc
    │   │   │       │   │   │   ├── default.yaml
    │   │   │       │   │   │   ├── preprocess_wsc.py
    │   │   │       │   │   │   ├── t5-prompt.yaml
    │   │   │       │   │   │   └── t5_utils.py
    │   │   │       │   ├── swag
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── swag.yaml
    │   │   │       │   ├── toxigen
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── toxigen.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── translation
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── iwslt2017_ar-en.yaml
    │   │   │       │   │   ├── iwslt2017_en-ar.yaml
    │   │   │       │   │   ├── utils.py
    │   │   │       │   │   ├── wmt14_en-fr.yaml
    │   │   │       │   │   ├── wmt14_fr-en.yaml
    │   │   │       │   │   ├── wmt16_de-en.yaml
    │   │   │       │   │   ├── wmt16_en-de.yaml
    │   │   │       │   │   ├── wmt16_en-ro.yaml
    │   │   │       │   │   ├── wmt16_ro-en.yaml
    │   │   │       │   │   └── wmt_common_yaml
    │   │   │       │   ├── triviaqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   └── default.yaml
    │   │   │       │   ├── truthfulqa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── truthfulqa_gen.yaml
    │   │   │       │   │   ├── truthfulqa_mc1.yaml
    │   │   │       │   │   ├── truthfulqa_mc2.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── unscramble
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── anagrams1.yaml
    │   │   │       │   │   ├── anagrams2.yaml
    │   │   │       │   │   ├── cycle_letters.yaml
    │   │   │       │   │   ├── random_insertion.yaml
    │   │   │       │   │   └── reversed_words.yaml
    │   │   │       │   ├── webqs
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── utils.py
    │   │   │       │   │   └── webqs.yaml
    │   │   │       │   ├── wikitext
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── preprocess_wikitext.py
    │   │   │       │   │   └── wikitext.yaml
    │   │   │       │   ├── winogrande
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── default.yaml
    │   │   │       │   │   └── preprocess_winogrande.py
    │   │   │       │   ├── wmt2016
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── metrics.py
    │   │   │       │   │   └── ro_en-t5_prompt.yaml
    │   │   │       │   ├── wsc273
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── default.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── xcopa
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── default_et.yaml
    │   │   │       │   │   ├── default_ht.yaml
    │   │   │       │   │   ├── default_id.yaml
    │   │   │       │   │   ├── default_it.yaml
    │   │   │       │   │   ├── default_qu.yaml
    │   │   │       │   │   ├── default_sw.yaml
    │   │   │       │   │   ├── default_ta.yaml
    │   │   │       │   │   ├── default_th.yaml
    │   │   │       │   │   ├── default_tr.yaml
    │   │   │       │   │   ├── default_vi.yaml
    │   │   │       │   │   ├── default_zh.yaml
    │   │   │       │   │   └── utils.py
    │   │   │       │   ├── xnli
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── utils.py
    │   │   │       │   │   ├── xnli_ar.yaml
    │   │   │       │   │   ├── xnli_bg.yaml
    │   │   │       │   │   ├── xnli_common_yaml
    │   │   │       │   │   ├── xnli_de.yaml
    │   │   │       │   │   ├── xnli_el.yaml
    │   │   │       │   │   ├── xnli_en.yaml
    │   │   │       │   │   ├── xnli_es.yaml
    │   │   │       │   │   ├── xnli_fr.yaml
    │   │   │       │   │   ├── xnli_hi.yaml
    │   │   │       │   │   ├── xnli_ru.yaml
    │   │   │       │   │   ├── xnli_sw.yaml
    │   │   │       │   │   ├── xnli_th.yaml
    │   │   │       │   │   ├── xnli_tr.yaml
    │   │   │       │   │   ├── xnli_ur.yaml
    │   │   │       │   │   ├── xnli_vi.yaml
    │   │   │       │   │   └── xnli_zh.yaml
    │   │   │       │   ├── xstorycloze
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── default_ar.yaml
    │   │   │       │   │   ├── default_en.yaml
    │   │   │       │   │   ├── default_es.yaml
    │   │   │       │   │   ├── default_eu.yaml
    │   │   │       │   │   ├── default_hi.yaml
    │   │   │       │   │   ├── default_id.yaml
    │   │   │       │   │   ├── default_my.yaml
    │   │   │       │   │   ├── default_ru.yaml
    │   │   │       │   │   ├── default_sw.yaml
    │   │   │       │   │   ├── default_te.yaml
    │   │   │       │   │   └── default_zh.yaml
    │   │   │       │   └── xwinograd
    │   │   │       │   │   ├── README.md
    │   │   │       │   │   ├── utils.py
    │   │   │       │   │   ├── xwinograd_common_yaml
    │   │   │       │   │   ├── xwinograd_en.yaml
    │   │   │       │   │   ├── xwinograd_fr.yaml
    │   │   │       │   │   ├── xwinograd_jp.yaml
    │   │   │       │   │   ├── xwinograd_pt.yaml
    │   │   │       │   │   ├── xwinograd_ru.yaml
    │   │   │       │   │   └── xwinograd_zh.yaml
    │   │   │       └── utils.py
    │   │   │   ├── mypy.ini
    │   │   │   ├── pyproject.toml
    │   │   │   ├── requirements.txt
    │   │   │   ├── scripts
    │   │   │       ├── __init__.py
    │   │   │       ├── build_benchmark.py
    │   │   │       ├── clean_training_data
    │   │   │       │   ├── README.md
    │   │   │       │   ├── __init__.py
    │   │   │       │   ├── compress_and_package.py
    │   │   │       │   ├── generate_13_grams.py
    │   │   │       │   ├── investigate_pile.py
    │   │   │       │   ├── janitor_util.cpp
    │   │   │       │   ├── process_sorted_buckets.py
    │   │   │       │   └── sort_13_gram_buckets.py
    │   │   │       ├── cost_estimate.py
    │   │   │       ├── get_prompts.py
    │   │   │       ├── make_gpt2_test_cases.py
    │   │   │       ├── make_table_results.py
    │   │   │       ├── make_table_tasks.py
    │   │   │       ├── regression.py
    │   │   │       └── write_out.py
    │   │   │   ├── setup.py
    │   │   │   ├── templates
    │   │   │       └── new_yaml_task
    │   │   │       │   ├── README.md
    │   │   │       │   └── blank_yaml.yaml
    │   │   │   └── tests
    │   │   │       ├── __init__.py
    │   │   │       ├── models
    │   │   │           ├── test_gguf.py
    │   │   │           ├── test_huggingface.py
    │   │   │           └── test_vllm.py
    │   │   │       ├── test_evaluator.py
    │   │   │       ├── test_janitor.py
    │   │   │       ├── test_misc.py
    │   │   │       ├── test_tasks.py
    │   │   │       ├── test_utils.py
    │   │   │       ├── testdata
    │   │   │           ├── anagrams1-v0-greedy_until
    │   │   │           ├── anagrams2-v0-greedy_until
    │   │   │           ├── anli_r1-v0-loglikelihood
    │   │   │           ├── anli_r2-v0-loglikelihood
    │   │   │           ├── anli_r3-v0-loglikelihood
    │   │   │           ├── arc_challenge-v0-loglikelihood
    │   │   │           ├── arc_challenge-v2.0-loglikelihood
    │   │   │           ├── arc_easy-v0-loglikelihood
    │   │   │           ├── arithmetic_1dc-v0-loglikelihood
    │   │   │           ├── arithmetic_2da-v0-loglikelihood
    │   │   │           ├── arithmetic_2dm-v0-loglikelihood
    │   │   │           ├── arithmetic_2ds-v0-loglikelihood
    │   │   │           ├── arithmetic_3da-v0-loglikelihood
    │   │   │           ├── arithmetic_3ds-v0-loglikelihood
    │   │   │           ├── arithmetic_4da-v0-loglikelihood
    │   │   │           ├── arithmetic_4ds-v0-loglikelihood
    │   │   │           ├── arithmetic_5da-v0-loglikelihood
    │   │   │           ├── arithmetic_5ds-v0-loglikelihood
    │   │   │           ├── blimp_adjunct_island-v0-loglikelihood
    │   │   │           ├── blimp_anaphor_gender_agreement-v0-loglikelihood
    │   │   │           ├── blimp_anaphor_number_agreement-v0-loglikelihood
    │   │   │           ├── blimp_animate_subject_passive-v0-loglikelihood
    │   │   │           ├── blimp_animate_subject_trans-v0-loglikelihood
    │   │   │           ├── blimp_causative-v0-loglikelihood
    │   │   │           ├── blimp_complex_NP_island-v0-loglikelihood
    │   │   │           ├── blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood
    │   │   │           ├── blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_1-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_2-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood
    │   │   │           ├── blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood
    │   │   │           ├── blimp_distractor_agreement_relational_noun-v0-loglikelihood
    │   │   │           ├── blimp_distractor_agreement_relative_clause-v0-loglikelihood
    │   │   │           ├── blimp_drop_argument-v0-loglikelihood
    │   │   │           ├── blimp_ellipsis_n_bar_1-v0-loglikelihood
    │   │   │           ├── blimp_ellipsis_n_bar_2-v0-loglikelihood
    │   │   │           ├── blimp_existential_there_object_raising-v0-loglikelihood
    │   │   │           ├── blimp_existential_there_quantifiers_1-v0-loglikelihood
    │   │   │           ├── blimp_existential_there_quantifiers_2-v0-loglikelihood
    │   │   │           ├── blimp_existential_there_subject_raising-v0-loglikelihood
    │   │   │           ├── blimp_expletive_it_object_raising-v0-loglikelihood
    │   │   │           ├── blimp_inchoative-v0-loglikelihood
    │   │   │           ├── blimp_intransitive-v0-loglikelihood
    │   │   │           ├── blimp_irregular_past_participle_adjectives-v0-loglikelihood
    │   │   │           ├── blimp_irregular_past_participle_verbs-v0-loglikelihood
    │   │   │           ├── blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood
    │   │   │           ├── blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood
    │   │   │           ├── blimp_left_branch_island_echo_question-v0-loglikelihood
    │   │   │           ├── blimp_left_branch_island_simple_question-v0-loglikelihood
    │   │   │           ├── blimp_matrix_question_npi_licensor_present-v0-loglikelihood
    │   │   │           ├── blimp_npi_present_1-v0-loglikelihood
    │   │   │           ├── blimp_npi_present_2-v0-loglikelihood
    │   │   │           ├── blimp_only_npi_licensor_present-v0-loglikelihood
    │   │   │           ├── blimp_only_npi_scope-v0-loglikelihood
    │   │   │           ├── blimp_passive_1-v0-loglikelihood
    │   │   │           ├── blimp_passive_2-v0-loglikelihood
    │   │   │           ├── blimp_principle_A_c_command-v0-loglikelihood
    │   │   │           ├── blimp_principle_A_case_1-v0-loglikelihood
    │   │   │           ├── blimp_principle_A_case_2-v0-loglikelihood
    │   │   │           ├── blimp_principle_A_domain_1-v0-loglikelihood
    │   │   │           ├── blimp_principle_A_domain_2-v0-loglikelihood
    │   │   │           ├── blimp_principle_A_domain_3-v0-loglikelihood
    │   │   │           ├── blimp_principle_A_reconstruction-v0-loglikelihood
    │   │   │           ├── blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood
    │   │   │           ├── blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood
    │   │   │           ├── blimp_sentential_negation_npi_licensor_present-v0-loglikelihood
    │   │   │           ├── blimp_sentential_negation_npi_scope-v0-loglikelihood
    │   │   │           ├── blimp_sentential_subject_island-v0-loglikelihood
    │   │   │           ├── blimp_superlative_quantifiers_1-v0-loglikelihood
    │   │   │           ├── blimp_superlative_quantifiers_2-v0-loglikelihood
    │   │   │           ├── blimp_tough_vs_raising_1-v0-loglikelihood
    │   │   │           ├── blimp_tough_vs_raising_2-v0-loglikelihood
    │   │   │           ├── blimp_transitive-v0-loglikelihood
    │   │   │           ├── blimp_wh_island-v0-loglikelihood
    │   │   │           ├── blimp_wh_questions_object_gap-v0-loglikelihood
    │   │   │           ├── blimp_wh_questions_subject_gap-v0-loglikelihood
    │   │   │           ├── blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood
    │   │   │           ├── blimp_wh_vs_that_no_gap-v0-loglikelihood
    │   │   │           ├── blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood
    │   │   │           ├── blimp_wh_vs_that_with_gap-v0-loglikelihood
    │   │   │           ├── blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood
    │   │   │           ├── boolq-v0-loglikelihood
    │   │   │           ├── boolq-v1-loglikelihood
    │   │   │           ├── cb-v0-loglikelihood
    │   │   │           ├── cb-v1-loglikelihood
    │   │   │           ├── cola-v0-loglikelihood
    │   │   │           ├── copa-v0-loglikelihood
    │   │   │           ├── coqa-v0-greedy_until
    │   │   │           ├── coqa-v1-greedy_until
    │   │   │           ├── crows_pairs_english-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_age-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_autre-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_disability-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_gender-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_nationality-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_physical_appearance-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_race_color-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_religion-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_sexual_orientation-v0-loglikelihood
    │   │   │           ├── crows_pairs_english_socioeconomic-v0-loglikelihood
    │   │   │           ├── crows_pairs_french-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_age-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_autre-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_disability-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_gender-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_nationality-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_physical_appearance-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_race_color-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_religion-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_sexual_orientation-v0-loglikelihood
    │   │   │           ├── crows_pairs_french_socioeconomic-v0-loglikelihood
    │   │   │           ├── cycle_letters-v0-greedy_until
    │   │   │           ├── drop-v0-greedy_until
    │   │   │           ├── drop-v1-greedy_until
    │   │   │           ├── ethics_cm-v0-loglikelihood
    │   │   │           ├── ethics_deontology-v0-loglikelihood
    │   │   │           ├── ethics_justice-v0-loglikelihood
    │   │   │           ├── ethics_utilitarianism-v0-loglikelihood
    │   │   │           ├── ethics_utilitarianism_original-v0-loglikelihood
    │   │   │           ├── ethics_virtue-v0-loglikelihood
    │   │   │           ├── gguf_test_44e268d15decc4d2d0f99e57e1476269826cd3b54262f7a0981f75ddd45b25d0.pkl
    │   │   │           ├── gguf_test_52ea409606de8755e03cf7c79f824101a4ce64bb6e6d3df556b8a4e7a5d92418.pkl
    │   │   │           ├── gguf_test_8fcf3f2f52afeb2acd7c8e02c2cc3ce31a691b665d295f6c4e4bbd71c7caa1a2.pkl
    │   │   │           ├── gpt3_test_0deb8e9bde8e8327bbc48157f638ff3ba06b0cd816dad2beb8ad90f7fbe795c7.pkl
    │   │   │           ├── gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl
    │   │   │           ├── gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl
    │   │   │           ├── gpt3_test_cfd11f555a5a63b6dfa114a55a932e51b724cdd44d4842586b9ce37260bf7aaa.pkl
    │   │   │           ├── gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl
    │   │   │           ├── gsm8k-v0-greedy_until
    │   │   │           ├── headqa-v0-loglikelihood
    │   │   │           ├── headqa_en-v0-loglikelihood
    │   │   │           ├── headqa_es-v0-loglikelihood
    │   │   │           ├── hellaswag-v0-loglikelihood
    │   │   │           ├── hendrycksTest-abstract_algebra-v0-loglikelihood
    │   │   │           ├── hendrycksTest-anatomy-v0-loglikelihood
    │   │   │           ├── hendrycksTest-astronomy-v0-loglikelihood
    │   │   │           ├── hendrycksTest-business_ethics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-clinical_knowledge-v0-loglikelihood
    │   │   │           ├── hendrycksTest-college_biology-v0-loglikelihood
    │   │   │           ├── hendrycksTest-college_chemistry-v0-loglikelihood
    │   │   │           ├── hendrycksTest-college_computer_science-v0-loglikelihood
    │   │   │           ├── hendrycksTest-college_mathematics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-college_medicine-v0-loglikelihood
    │   │   │           ├── hendrycksTest-college_physics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-computer_security-v0-loglikelihood
    │   │   │           ├── hendrycksTest-conceptual_physics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-econometrics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-electrical_engineering-v0-loglikelihood
    │   │   │           ├── hendrycksTest-elementary_mathematics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-formal_logic-v0-loglikelihood
    │   │   │           ├── hendrycksTest-global_facts-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_biology-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_chemistry-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_computer_science-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_european_history-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_geography-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_government_and_politics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_macroeconomics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_mathematics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_microeconomics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_physics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_psychology-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_statistics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_us_history-v0-loglikelihood
    │   │   │           ├── hendrycksTest-high_school_world_history-v0-loglikelihood
    │   │   │           ├── hendrycksTest-human_aging-v0-loglikelihood
    │   │   │           ├── hendrycksTest-human_sexuality-v0-loglikelihood
    │   │   │           ├── hendrycksTest-international_law-v0-loglikelihood
    │   │   │           ├── hendrycksTest-jurisprudence-v0-loglikelihood
    │   │   │           ├── hendrycksTest-logical_fallacies-v0-loglikelihood
    │   │   │           ├── hendrycksTest-machine_learning-v0-loglikelihood
    │   │   │           ├── hendrycksTest-management-v0-loglikelihood
    │   │   │           ├── hendrycksTest-marketing-v0-loglikelihood
    │   │   │           ├── hendrycksTest-medical_genetics-v0-loglikelihood
    │   │   │           ├── hendrycksTest-miscellaneous-v0-loglikelihood
    │   │   │           ├── hendrycksTest-moral_disputes-v0-loglikelihood
    │   │   │           ├── hendrycksTest-moral_scenarios-v0-loglikelihood
    │   │   │           ├── hendrycksTest-nutrition-v0-loglikelihood
    │   │   │           ├── hendrycksTest-philosophy-v0-loglikelihood
    │   │   │           ├── hendrycksTest-prehistory-v0-loglikelihood
    │   │   │           ├── hendrycksTest-professional_accounting-v0-loglikelihood
    │   │   │           ├── hendrycksTest-professional_law-v0-loglikelihood
    │   │   │           ├── hendrycksTest-professional_medicine-v0-loglikelihood
    │   │   │           ├── hendrycksTest-professional_psychology-v0-loglikelihood
    │   │   │           ├── hendrycksTest-public_relations-v0-loglikelihood
    │   │   │           ├── hendrycksTest-security_studies-v0-loglikelihood
    │   │   │           ├── hendrycksTest-sociology-v0-loglikelihood
    │   │   │           ├── hendrycksTest-us_foreign_policy-v0-loglikelihood
    │   │   │           ├── hendrycksTest-virology-v0-loglikelihood
    │   │   │           ├── hendrycksTest-world_religions-v0-loglikelihood
    │   │   │           ├── iwslt17-ar-en-v0-greedy_until
    │   │   │           ├── iwslt17-en-ar-v0-greedy_until
    │   │   │           ├── lambada-v0-loglikelihood
    │   │   │           ├── lambada_cloze-v0-loglikelihood
    │   │   │           ├── lambada_mt_de-v0-loglikelihood
    │   │   │           ├── lambada_mt_en-v0-loglikelihood
    │   │   │           ├── lambada_mt_es-v0-loglikelihood
    │   │   │           ├── lambada_mt_fr-v0-loglikelihood
    │   │   │           ├── lambada_mt_it-v0-loglikelihood
    │   │   │           ├── lambada_openai-v0-loglikelihood
    │   │   │           ├── lambada_openai-v2.0-loglikelihood
    │   │   │           ├── lambada_openai_cloze-v0-loglikelihood
    │   │   │           ├── lambada_openai_mt_de-v0-loglikelihood
    │   │   │           ├── lambada_openai_mt_en-v0-loglikelihood
    │   │   │           ├── lambada_openai_mt_es-v0-loglikelihood
    │   │   │           ├── lambada_openai_mt_fr-v0-loglikelihood
    │   │   │           ├── lambada_openai_mt_it-v0-loglikelihood
    │   │   │           ├── lambada_standard-v0-loglikelihood
    │   │   │           ├── lambada_standard_cloze-v0-loglikelihood
    │   │   │           ├── logiqa-v0-loglikelihood
    │   │   │           ├── math_algebra-v0-greedy_until
    │   │   │           ├── math_algebra-v1-greedy_until
    │   │   │           ├── math_counting_and_prob-v0-greedy_until
    │   │   │           ├── math_counting_and_prob-v1-greedy_until
    │   │   │           ├── math_geometry-v0-greedy_until
    │   │   │           ├── math_geometry-v1-greedy_until
    │   │   │           ├── math_intermediate_algebra-v0-greedy_until
    │   │   │           ├── math_intermediate_algebra-v1-greedy_until
    │   │   │           ├── math_num_theory-v0-greedy_until
    │   │   │           ├── math_num_theory-v1-greedy_until
    │   │   │           ├── math_prealgebra-v0-greedy_until
    │   │   │           ├── math_prealgebra-v1-greedy_until
    │   │   │           ├── math_precalc-v0-greedy_until
    │   │   │           ├── math_precalc-v1-greedy_until
    │   │   │           ├── mathqa-v0-loglikelihood
    │   │   │           ├── mc_taco-v0-loglikelihood
    │   │   │           ├── mnli-v0-loglikelihood
    │   │   │           ├── mnli_mismatched-v0-loglikelihood
    │   │   │           ├── mrpc-v0-loglikelihood
    │   │   │           ├── multirc-v0-loglikelihood
    │   │   │           ├── multirc-v1-loglikelihood
    │   │   │           ├── mutual-v0-loglikelihood
    │   │   │           ├── mutual-v1-loglikelihood
    │   │   │           ├── mutual_plus-v0-loglikelihood
    │   │   │           ├── mutual_plus-v1-loglikelihood
    │   │   │           ├── openbookqa-v0-loglikelihood
    │   │   │           ├── pile_arxiv-v0-loglikelihood_rolling
    │   │   │           ├── pile_arxiv-v1-loglikelihood_rolling
    │   │   │           ├── pile_bookcorpus2-v0-loglikelihood_rolling
    │   │   │           ├── pile_bookcorpus2-v1-loglikelihood_rolling
    │   │   │           ├── pile_books3-v0-loglikelihood_rolling
    │   │   │           ├── pile_books3-v1-loglikelihood_rolling
    │   │   │           ├── pile_dm-mathematics-v0-loglikelihood_rolling
    │   │   │           ├── pile_dm-mathematics-v1-loglikelihood_rolling
    │   │   │           ├── pile_enron-v0-loglikelihood_rolling
    │   │   │           ├── pile_enron-v1-loglikelihood_rolling
    │   │   │           ├── pile_europarl-v0-loglikelihood_rolling
    │   │   │           ├── pile_europarl-v1-loglikelihood_rolling
    │   │   │           ├── pile_freelaw-v0-loglikelihood_rolling
    │   │   │           ├── pile_freelaw-v1-loglikelihood_rolling
    │   │   │           ├── pile_github-v0-loglikelihood_rolling
    │   │   │           ├── pile_github-v1-loglikelihood_rolling
    │   │   │           ├── pile_gutenberg-v0-loglikelihood_rolling
    │   │   │           ├── pile_gutenberg-v1-loglikelihood_rolling
    │   │   │           ├── pile_hackernews-v0-loglikelihood_rolling
    │   │   │           ├── pile_hackernews-v1-loglikelihood_rolling
    │   │   │           ├── pile_nih-exporter-v0-loglikelihood_rolling
    │   │   │           ├── pile_nih-exporter-v1-loglikelihood_rolling
    │   │   │           ├── pile_opensubtitles-v0-loglikelihood_rolling
    │   │   │           ├── pile_opensubtitles-v1-loglikelihood_rolling
    │   │   │           ├── pile_openwebtext2-v0-loglikelihood_rolling
    │   │   │           ├── pile_openwebtext2-v1-loglikelihood_rolling
    │   │   │           ├── pile_philpapers-v0-loglikelihood_rolling
    │   │   │           ├── pile_philpapers-v1-loglikelihood_rolling
    │   │   │           ├── pile_pile-cc-v0-loglikelihood_rolling
    │   │   │           ├── pile_pile-cc-v1-loglikelihood_rolling
    │   │   │           ├── pile_pubmed-abstracts-v0-loglikelihood_rolling
    │   │   │           ├── pile_pubmed-abstracts-v1-loglikelihood_rolling
    │   │   │           ├── pile_pubmed-central-v0-loglikelihood_rolling
    │   │   │           ├── pile_pubmed-central-v1-loglikelihood_rolling
    │   │   │           ├── pile_stackexchange-v0-loglikelihood_rolling
    │   │   │           ├── pile_stackexchange-v1-loglikelihood_rolling
    │   │   │           ├── pile_ubuntu-irc-v0-loglikelihood_rolling
    │   │   │           ├── pile_ubuntu-irc-v1-loglikelihood_rolling
    │   │   │           ├── pile_uspto-v0-loglikelihood_rolling
    │   │   │           ├── pile_uspto-v1-loglikelihood_rolling
    │   │   │           ├── pile_wikipedia-v0-loglikelihood_rolling
    │   │   │           ├── pile_wikipedia-v1-loglikelihood_rolling
    │   │   │           ├── pile_youtubesubtitles-v0-loglikelihood_rolling
    │   │   │           ├── pile_youtubesubtitles-v1-loglikelihood_rolling
    │   │   │           ├── piqa-v0-loglikelihood
    │   │   │           ├── prost-v0-loglikelihood
    │   │   │           ├── pubmedqa-v0-loglikelihood
    │   │   │           ├── qa4mre_2011-v0-loglikelihood
    │   │   │           ├── qa4mre_2012-v0-loglikelihood
    │   │   │           ├── qa4mre_2013-v0-loglikelihood
    │   │   │           ├── qnli-v0-loglikelihood
    │   │   │           ├── qqp-v0-loglikelihood
    │   │   │           ├── race-v0-loglikelihood
    │   │   │           ├── random_insertion-v0-greedy_until
    │   │   │           ├── record-v0-loglikelihood
    │   │   │           ├── reversed_words-v0-greedy_until
    │   │   │           ├── rte-v0-loglikelihood
    │   │   │           ├── sciq-v0-loglikelihood
    │   │   │           ├── squad2-v0-greedy_until
    │   │   │           ├── squad2-v0-loglikelihood
    │   │   │           ├── squad2-v1-greedy_until
    │   │   │           ├── squad2-v1-loglikelihood
    │   │   │           ├── sst-v0-loglikelihood
    │   │   │           ├── swag-v0-loglikelihood
    │   │   │           ├── textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl
    │   │   │           ├── textsynth_test_0c1c14571add7903b89e588c8212572b95bb57b334fc0752c89a7e045a5f63ae.pkl
    │   │   │           ├── textsynth_test_3092d07756f3e1d010c07524cc8a2ecba7f0c19f9e39f2aaf2bf440bfe328004.pkl
    │   │   │           ├── textsynth_test_434076260b6af3a46b7a5eaceec3306a5872c400a3872f744280b237455a0f8e.pkl
    │   │   │           ├── textsynth_test_49c47ae40e11f349f2f6b492128188b1b2bc103a421c676ee4b2142a68b43516.pkl
    │   │   │           ├── textsynth_test_4fd8d66a6dad7f602b40e5d7dc298d6fe329299d086a4659743a41f4a4012659.pkl
    │   │   │           ├── textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl
    │   │   │           ├── textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl
    │   │   │           ├── textsynth_test_7209c4617547bfe17cb9e7f5f735fe35822d650aefdc5fbeeaf0c1724effbe09.pkl
    │   │   │           ├── textsynth_test_7afdc285388e51094e12645f305328c759574fa3ec9751631025f8ad5ebf9f3e.pkl
    │   │   │           ├── textsynth_test_9d5f33dbfe1e254928c89f5ed85e4c010d888065f55a8f1b863bc1eb0340a5f2.pkl
    │   │   │           ├── textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl
    │   │   │           ├── textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl
    │   │   │           ├── textsynth_test_e7ad1e9f52a39e1ddd1e50f3c57ffa4546728dd150a67c0a0ddc8675c04e15d1.pkl
    │   │   │           ├── textsynth_test_f4bfe4beb605bd52a8ab6be3c9293639e7e2261d98de58159d15ccb83131bf4e.pkl
    │   │   │           ├── toxigen-v0-loglikelihood
    │   │   │           ├── triviaqa-v0-loglikelihood
    │   │   │           ├── triviaqa-v1-loglikelihood
    │   │   │           ├── truthfulqa_gen-v0-greedy_until
    │   │   │           ├── truthfulqa_gen-v1-greedy_until
    │   │   │           ├── truthfulqa_mc-v0-loglikelihood
    │   │   │           ├── truthfulqa_mc-v1-loglikelihood
    │   │   │           ├── webqs-v0-loglikelihood
    │   │   │           ├── wic-v0-loglikelihood
    │   │   │           ├── wikitext-v0-loglikelihood_rolling
    │   │   │           ├── wikitext-v1-loglikelihood_rolling
    │   │   │           ├── winogrande-v0-loglikelihood
    │   │   │           ├── wmt14-en-fr-v0-greedy_until
    │   │   │           ├── wmt14-fr-en-v0-greedy_until
    │   │   │           ├── wmt16-de-en-v0-greedy_until
    │   │   │           ├── wmt16-en-de-v0-greedy_until
    │   │   │           ├── wmt16-en-ro-v0-greedy_until
    │   │   │           ├── wmt16-ro-en-v0-greedy_until
    │   │   │           ├── wmt20-cs-en-v0-greedy_until
    │   │   │           ├── wmt20-de-en-v0-greedy_until
    │   │   │           ├── wmt20-de-fr-v0-greedy_until
    │   │   │           ├── wmt20-en-cs-v0-greedy_until
    │   │   │           ├── wmt20-en-de-v0-greedy_until
    │   │   │           ├── wmt20-en-iu-v0-greedy_until
    │   │   │           ├── wmt20-en-ja-v0-greedy_until
    │   │   │           ├── wmt20-en-ja-v1-greedy_until
    │   │   │           ├── wmt20-en-km-v0-greedy_until
    │   │   │           ├── wmt20-en-pl-v0-greedy_until
    │   │   │           ├── wmt20-en-ps-v0-greedy_until
    │   │   │           ├── wmt20-en-ru-v0-greedy_until
    │   │   │           ├── wmt20-en-ta-v0-greedy_until
    │   │   │           ├── wmt20-en-zh-v0-greedy_until
    │   │   │           ├── wmt20-en-zh-v1-greedy_until
    │   │   │           ├── wmt20-fr-de-v0-greedy_until
    │   │   │           ├── wmt20-iu-en-v0-greedy_until
    │   │   │           ├── wmt20-ja-en-v0-greedy_until
    │   │   │           ├── wmt20-km-en-v0-greedy_until
    │   │   │           ├── wmt20-pl-en-v0-greedy_until
    │   │   │           ├── wmt20-ps-en-v0-greedy_until
    │   │   │           ├── wmt20-ru-en-v0-greedy_until
    │   │   │           ├── wmt20-ta-en-v0-greedy_until
    │   │   │           ├── wmt20-zh-en-v0-greedy_until
    │   │   │           ├── wnli-v0-loglikelihood
    │   │   │           ├── wnli-v1-loglikelihood
    │   │   │           ├── wsc-v0-loglikelihood
    │   │   │           └── wsc273-v0-loglikelihood
    │   │   │       ├── tests_master
    │   │   │           ├── test_description.py
    │   │   │           ├── test_generate_13_grams.py
    │   │   │           ├── test_models.py
    │   │   │           └── test_version_stable.py
    │   │   │       └── utils.py
    │   ├── templates
    │   │   └── new_yaml_task
    │   │   │   ├── README.md
    │   │   │   └── blank_yaml.yaml
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── models
    │   │       ├── test_gguf.py
    │   │       ├── test_huggingface.py
    │   │       └── test_vllm.py
    │   │   ├── test_evaluator.py
    │   │   ├── test_janitor.py
    │   │   ├── test_misc.py
    │   │   ├── test_tasks.py
    │   │   ├── test_utils.py
    │   │   ├── testdata
    │   │       ├── anagrams1-v0-greedy_until
    │   │       ├── anagrams2-v0-greedy_until
    │   │       ├── anli_r1-v0-loglikelihood
    │   │       ├── anli_r2-v0-loglikelihood
    │   │       ├── anli_r3-v0-loglikelihood
    │   │       ├── arc_challenge-v0-loglikelihood
    │   │       ├── arc_challenge-v2.0-loglikelihood
    │   │       ├── arc_easy-v0-loglikelihood
    │   │       ├── arithmetic_1dc-v0-loglikelihood
    │   │       ├── arithmetic_2da-v0-loglikelihood
    │   │       ├── arithmetic_2dm-v0-loglikelihood
    │   │       ├── arithmetic_2ds-v0-loglikelihood
    │   │       ├── arithmetic_3da-v0-loglikelihood
    │   │       ├── arithmetic_3ds-v0-loglikelihood
    │   │       ├── arithmetic_4da-v0-loglikelihood
    │   │       ├── arithmetic_4ds-v0-loglikelihood
    │   │       ├── arithmetic_5da-v0-loglikelihood
    │   │       ├── arithmetic_5ds-v0-loglikelihood
    │   │       ├── blimp_adjunct_island-v0-loglikelihood
    │   │       ├── blimp_anaphor_gender_agreement-v0-loglikelihood
    │   │       ├── blimp_anaphor_number_agreement-v0-loglikelihood
    │   │       ├── blimp_animate_subject_passive-v0-loglikelihood
    │   │       ├── blimp_animate_subject_trans-v0-loglikelihood
    │   │       ├── blimp_causative-v0-loglikelihood
    │   │       ├── blimp_complex_NP_island-v0-loglikelihood
    │   │       ├── blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood
    │   │       ├── blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_1-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_2-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood
    │   │       ├── blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood
    │   │       ├── blimp_distractor_agreement_relational_noun-v0-loglikelihood
    │   │       ├── blimp_distractor_agreement_relative_clause-v0-loglikelihood
    │   │       ├── blimp_drop_argument-v0-loglikelihood
    │   │       ├── blimp_ellipsis_n_bar_1-v0-loglikelihood
    │   │       ├── blimp_ellipsis_n_bar_2-v0-loglikelihood
    │   │       ├── blimp_existential_there_object_raising-v0-loglikelihood
    │   │       ├── blimp_existential_there_quantifiers_1-v0-loglikelihood
    │   │       ├── blimp_existential_there_quantifiers_2-v0-loglikelihood
    │   │       ├── blimp_existential_there_subject_raising-v0-loglikelihood
    │   │       ├── blimp_expletive_it_object_raising-v0-loglikelihood
    │   │       ├── blimp_inchoative-v0-loglikelihood
    │   │       ├── blimp_intransitive-v0-loglikelihood
    │   │       ├── blimp_irregular_past_participle_adjectives-v0-loglikelihood
    │   │       ├── blimp_irregular_past_participle_verbs-v0-loglikelihood
    │   │       ├── blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood
    │   │       ├── blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood
    │   │       ├── blimp_left_branch_island_echo_question-v0-loglikelihood
    │   │       ├── blimp_left_branch_island_simple_question-v0-loglikelihood
    │   │       ├── blimp_matrix_question_npi_licensor_present-v0-loglikelihood
    │   │       ├── blimp_npi_present_1-v0-loglikelihood
    │   │       ├── blimp_npi_present_2-v0-loglikelihood
    │   │       ├── blimp_only_npi_licensor_present-v0-loglikelihood
    │   │       ├── blimp_only_npi_scope-v0-loglikelihood
    │   │       ├── blimp_passive_1-v0-loglikelihood
    │   │       ├── blimp_passive_2-v0-loglikelihood
    │   │       ├── blimp_principle_A_c_command-v0-loglikelihood
    │   │       ├── blimp_principle_A_case_1-v0-loglikelihood
    │   │       ├── blimp_principle_A_case_2-v0-loglikelihood
    │   │       ├── blimp_principle_A_domain_1-v0-loglikelihood
    │   │       ├── blimp_principle_A_domain_2-v0-loglikelihood
    │   │       ├── blimp_principle_A_domain_3-v0-loglikelihood
    │   │       ├── blimp_principle_A_reconstruction-v0-loglikelihood
    │   │       ├── blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood
    │   │       ├── blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood
    │   │       ├── blimp_sentential_negation_npi_licensor_present-v0-loglikelihood
    │   │       ├── blimp_sentential_negation_npi_scope-v0-loglikelihood
    │   │       ├── blimp_sentential_subject_island-v0-loglikelihood
    │   │       ├── blimp_superlative_quantifiers_1-v0-loglikelihood
    │   │       ├── blimp_superlative_quantifiers_2-v0-loglikelihood
    │   │       ├── blimp_tough_vs_raising_1-v0-loglikelihood
    │   │       ├── blimp_tough_vs_raising_2-v0-loglikelihood
    │   │       ├── blimp_transitive-v0-loglikelihood
    │   │       ├── blimp_wh_island-v0-loglikelihood
    │   │       ├── blimp_wh_questions_object_gap-v0-loglikelihood
    │   │       ├── blimp_wh_questions_subject_gap-v0-loglikelihood
    │   │       ├── blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood
    │   │       ├── blimp_wh_vs_that_no_gap-v0-loglikelihood
    │   │       ├── blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood
    │   │       ├── blimp_wh_vs_that_with_gap-v0-loglikelihood
    │   │       ├── blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood
    │   │       ├── boolq-v0-loglikelihood
    │   │       ├── boolq-v1-loglikelihood
    │   │       ├── cb-v0-loglikelihood
    │   │       ├── cb-v1-loglikelihood
    │   │       ├── cola-v0-loglikelihood
    │   │       ├── copa-v0-loglikelihood
    │   │       ├── coqa-v0-greedy_until
    │   │       ├── coqa-v1-greedy_until
    │   │       ├── crows_pairs_english-v0-loglikelihood
    │   │       ├── crows_pairs_english_age-v0-loglikelihood
    │   │       ├── crows_pairs_english_autre-v0-loglikelihood
    │   │       ├── crows_pairs_english_disability-v0-loglikelihood
    │   │       ├── crows_pairs_english_gender-v0-loglikelihood
    │   │       ├── crows_pairs_english_nationality-v0-loglikelihood
    │   │       ├── crows_pairs_english_physical_appearance-v0-loglikelihood
    │   │       ├── crows_pairs_english_race_color-v0-loglikelihood
    │   │       ├── crows_pairs_english_religion-v0-loglikelihood
    │   │       ├── crows_pairs_english_sexual_orientation-v0-loglikelihood
    │   │       ├── crows_pairs_english_socioeconomic-v0-loglikelihood
    │   │       ├── crows_pairs_french-v0-loglikelihood
    │   │       ├── crows_pairs_french_age-v0-loglikelihood
    │   │       ├── crows_pairs_french_autre-v0-loglikelihood
    │   │       ├── crows_pairs_french_disability-v0-loglikelihood
    │   │       ├── crows_pairs_french_gender-v0-loglikelihood
    │   │       ├── crows_pairs_french_nationality-v0-loglikelihood
    │   │       ├── crows_pairs_french_physical_appearance-v0-loglikelihood
    │   │       ├── crows_pairs_french_race_color-v0-loglikelihood
    │   │       ├── crows_pairs_french_religion-v0-loglikelihood
    │   │       ├── crows_pairs_french_sexual_orientation-v0-loglikelihood
    │   │       ├── crows_pairs_french_socioeconomic-v0-loglikelihood
    │   │       ├── cycle_letters-v0-greedy_until
    │   │       ├── drop-v0-greedy_until
    │   │       ├── drop-v1-greedy_until
    │   │       ├── ethics_cm-v0-loglikelihood
    │   │       ├── ethics_deontology-v0-loglikelihood
    │   │       ├── ethics_justice-v0-loglikelihood
    │   │       ├── ethics_utilitarianism-v0-loglikelihood
    │   │       ├── ethics_utilitarianism_original-v0-loglikelihood
    │   │       ├── ethics_virtue-v0-loglikelihood
    │   │       ├── gguf_test_44e268d15decc4d2d0f99e57e1476269826cd3b54262f7a0981f75ddd45b25d0.pkl
    │   │       ├── gguf_test_52ea409606de8755e03cf7c79f824101a4ce64bb6e6d3df556b8a4e7a5d92418.pkl
    │   │       ├── gguf_test_8fcf3f2f52afeb2acd7c8e02c2cc3ce31a691b665d295f6c4e4bbd71c7caa1a2.pkl
    │   │       ├── gpt3_test_0deb8e9bde8e8327bbc48157f638ff3ba06b0cd816dad2beb8ad90f7fbe795c7.pkl
    │   │       ├── gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl
    │   │       ├── gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl
    │   │       ├── gpt3_test_cfd11f555a5a63b6dfa114a55a932e51b724cdd44d4842586b9ce37260bf7aaa.pkl
    │   │       ├── gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl
    │   │       ├── gsm8k-v0-greedy_until
    │   │       ├── headqa-v0-loglikelihood
    │   │       ├── headqa_en-v0-loglikelihood
    │   │       ├── headqa_es-v0-loglikelihood
    │   │       ├── hellaswag-v0-loglikelihood
    │   │       ├── hendrycksTest-abstract_algebra-v0-loglikelihood
    │   │       ├── hendrycksTest-anatomy-v0-loglikelihood
    │   │       ├── hendrycksTest-astronomy-v0-loglikelihood
    │   │       ├── hendrycksTest-business_ethics-v0-loglikelihood
    │   │       ├── hendrycksTest-clinical_knowledge-v0-loglikelihood
    │   │       ├── hendrycksTest-college_biology-v0-loglikelihood
    │   │       ├── hendrycksTest-college_chemistry-v0-loglikelihood
    │   │       ├── hendrycksTest-college_computer_science-v0-loglikelihood
    │   │       ├── hendrycksTest-college_mathematics-v0-loglikelihood
    │   │       ├── hendrycksTest-college_medicine-v0-loglikelihood
    │   │       ├── hendrycksTest-college_physics-v0-loglikelihood
    │   │       ├── hendrycksTest-computer_security-v0-loglikelihood
    │   │       ├── hendrycksTest-conceptual_physics-v0-loglikelihood
    │   │       ├── hendrycksTest-econometrics-v0-loglikelihood
    │   │       ├── hendrycksTest-electrical_engineering-v0-loglikelihood
    │   │       ├── hendrycksTest-elementary_mathematics-v0-loglikelihood
    │   │       ├── hendrycksTest-formal_logic-v0-loglikelihood
    │   │       ├── hendrycksTest-global_facts-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_biology-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_chemistry-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_computer_science-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_european_history-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_geography-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_government_and_politics-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_macroeconomics-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_mathematics-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_microeconomics-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_physics-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_psychology-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_statistics-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_us_history-v0-loglikelihood
    │   │       ├── hendrycksTest-high_school_world_history-v0-loglikelihood
    │   │       ├── hendrycksTest-human_aging-v0-loglikelihood
    │   │       ├── hendrycksTest-human_sexuality-v0-loglikelihood
    │   │       ├── hendrycksTest-international_law-v0-loglikelihood
    │   │       ├── hendrycksTest-jurisprudence-v0-loglikelihood
    │   │       ├── hendrycksTest-logical_fallacies-v0-loglikelihood
    │   │       ├── hendrycksTest-machine_learning-v0-loglikelihood
    │   │       ├── hendrycksTest-management-v0-loglikelihood
    │   │       ├── hendrycksTest-marketing-v0-loglikelihood
    │   │       ├── hendrycksTest-medical_genetics-v0-loglikelihood
    │   │       ├── hendrycksTest-miscellaneous-v0-loglikelihood
    │   │       ├── hendrycksTest-moral_disputes-v0-loglikelihood
    │   │       ├── hendrycksTest-moral_scenarios-v0-loglikelihood
    │   │       ├── hendrycksTest-nutrition-v0-loglikelihood
    │   │       ├── hendrycksTest-philosophy-v0-loglikelihood
    │   │       ├── hendrycksTest-prehistory-v0-loglikelihood
    │   │       ├── hendrycksTest-professional_accounting-v0-loglikelihood
    │   │       ├── hendrycksTest-professional_law-v0-loglikelihood
    │   │       ├── hendrycksTest-professional_medicine-v0-loglikelihood
    │   │       ├── hendrycksTest-professional_psychology-v0-loglikelihood
    │   │       ├── hendrycksTest-public_relations-v0-loglikelihood
    │   │       ├── hendrycksTest-security_studies-v0-loglikelihood
    │   │       ├── hendrycksTest-sociology-v0-loglikelihood
    │   │       ├── hendrycksTest-us_foreign_policy-v0-loglikelihood
    │   │       ├── hendrycksTest-virology-v0-loglikelihood
    │   │       ├── hendrycksTest-world_religions-v0-loglikelihood
    │   │       ├── iwslt17-ar-en-v0-greedy_until
    │   │       ├── iwslt17-en-ar-v0-greedy_until
    │   │       ├── lambada-v0-loglikelihood
    │   │       ├── lambada_cloze-v0-loglikelihood
    │   │       ├── lambada_mt_de-v0-loglikelihood
    │   │       ├── lambada_mt_en-v0-loglikelihood
    │   │       ├── lambada_mt_es-v0-loglikelihood
    │   │       ├── lambada_mt_fr-v0-loglikelihood
    │   │       ├── lambada_mt_it-v0-loglikelihood
    │   │       ├── lambada_openai-v0-loglikelihood
    │   │       ├── lambada_openai-v2.0-loglikelihood
    │   │       ├── lambada_openai_cloze-v0-loglikelihood
    │   │       ├── lambada_openai_mt_de-v0-loglikelihood
    │   │       ├── lambada_openai_mt_en-v0-loglikelihood
    │   │       ├── lambada_openai_mt_es-v0-loglikelihood
    │   │       ├── lambada_openai_mt_fr-v0-loglikelihood
    │   │       ├── lambada_openai_mt_it-v0-loglikelihood
    │   │       ├── lambada_standard-v0-loglikelihood
    │   │       ├── lambada_standard_cloze-v0-loglikelihood
    │   │       ├── logiqa-v0-loglikelihood
    │   │       ├── math_algebra-v0-greedy_until
    │   │       ├── math_algebra-v1-greedy_until
    │   │       ├── math_counting_and_prob-v0-greedy_until
    │   │       ├── math_counting_and_prob-v1-greedy_until
    │   │       ├── math_geometry-v0-greedy_until
    │   │       ├── math_geometry-v1-greedy_until
    │   │       ├── math_intermediate_algebra-v0-greedy_until
    │   │       ├── math_intermediate_algebra-v1-greedy_until
    │   │       ├── math_num_theory-v0-greedy_until
    │   │       ├── math_num_theory-v1-greedy_until
    │   │       ├── math_prealgebra-v0-greedy_until
    │   │       ├── math_prealgebra-v1-greedy_until
    │   │       ├── math_precalc-v0-greedy_until
    │   │       ├── math_precalc-v1-greedy_until
    │   │       ├── mathqa-v0-loglikelihood
    │   │       ├── mc_taco-v0-loglikelihood
    │   │       ├── mnli-v0-loglikelihood
    │   │       ├── mnli_mismatched-v0-loglikelihood
    │   │       ├── mrpc-v0-loglikelihood
    │   │       ├── multirc-v0-loglikelihood
    │   │       ├── multirc-v1-loglikelihood
    │   │       ├── mutual-v0-loglikelihood
    │   │       ├── mutual-v1-loglikelihood
    │   │       ├── mutual_plus-v0-loglikelihood
    │   │       ├── mutual_plus-v1-loglikelihood
    │   │       ├── openbookqa-v0-loglikelihood
    │   │       ├── pile_arxiv-v0-loglikelihood_rolling
    │   │       ├── pile_arxiv-v1-loglikelihood_rolling
    │   │       ├── pile_bookcorpus2-v0-loglikelihood_rolling
    │   │       ├── pile_bookcorpus2-v1-loglikelihood_rolling
    │   │       ├── pile_books3-v0-loglikelihood_rolling
    │   │       ├── pile_books3-v1-loglikelihood_rolling
    │   │       ├── pile_dm-mathematics-v0-loglikelihood_rolling
    │   │       ├── pile_dm-mathematics-v1-loglikelihood_rolling
    │   │       ├── pile_enron-v0-loglikelihood_rolling
    │   │       ├── pile_enron-v1-loglikelihood_rolling
    │   │       ├── pile_europarl-v0-loglikelihood_rolling
    │   │       ├── pile_europarl-v1-loglikelihood_rolling
    │   │       ├── pile_freelaw-v0-loglikelihood_rolling
    │   │       ├── pile_freelaw-v1-loglikelihood_rolling
    │   │       ├── pile_github-v0-loglikelihood_rolling
    │   │       ├── pile_github-v1-loglikelihood_rolling
    │   │       ├── pile_gutenberg-v0-loglikelihood_rolling
    │   │       ├── pile_gutenberg-v1-loglikelihood_rolling
    │   │       ├── pile_hackernews-v0-loglikelihood_rolling
    │   │       ├── pile_hackernews-v1-loglikelihood_rolling
    │   │       ├── pile_nih-exporter-v0-loglikelihood_rolling
    │   │       ├── pile_nih-exporter-v1-loglikelihood_rolling
    │   │       ├── pile_opensubtitles-v0-loglikelihood_rolling
    │   │       ├── pile_opensubtitles-v1-loglikelihood_rolling
    │   │       ├── pile_openwebtext2-v0-loglikelihood_rolling
    │   │       ├── pile_openwebtext2-v1-loglikelihood_rolling
    │   │       ├── pile_philpapers-v0-loglikelihood_rolling
    │   │       ├── pile_philpapers-v1-loglikelihood_rolling
    │   │       ├── pile_pile-cc-v0-loglikelihood_rolling
    │   │       ├── pile_pile-cc-v1-loglikelihood_rolling
    │   │       ├── pile_pubmed-abstracts-v0-loglikelihood_rolling
    │   │       ├── pile_pubmed-abstracts-v1-loglikelihood_rolling
    │   │       ├── pile_pubmed-central-v0-loglikelihood_rolling
    │   │       ├── pile_pubmed-central-v1-loglikelihood_rolling
    │   │       ├── pile_stackexchange-v0-loglikelihood_rolling
    │   │       ├── pile_stackexchange-v1-loglikelihood_rolling
    │   │       ├── pile_ubuntu-irc-v0-loglikelihood_rolling
    │   │       ├── pile_ubuntu-irc-v1-loglikelihood_rolling
    │   │       ├── pile_uspto-v0-loglikelihood_rolling
    │   │       ├── pile_uspto-v1-loglikelihood_rolling
    │   │       ├── pile_wikipedia-v0-loglikelihood_rolling
    │   │       ├── pile_wikipedia-v1-loglikelihood_rolling
    │   │       ├── pile_youtubesubtitles-v0-loglikelihood_rolling
    │   │       ├── pile_youtubesubtitles-v1-loglikelihood_rolling
    │   │       ├── piqa-v0-loglikelihood
    │   │       ├── prost-v0-loglikelihood
    │   │       ├── pubmedqa-v0-loglikelihood
    │   │       ├── qa4mre_2011-v0-loglikelihood
    │   │       ├── qa4mre_2012-v0-loglikelihood
    │   │       ├── qa4mre_2013-v0-loglikelihood
    │   │       ├── qnli-v0-loglikelihood
    │   │       ├── qqp-v0-loglikelihood
    │   │       ├── race-v0-loglikelihood
    │   │       ├── random_insertion-v0-greedy_until
    │   │       ├── record-v0-loglikelihood
    │   │       ├── reversed_words-v0-greedy_until
    │   │       ├── rte-v0-loglikelihood
    │   │       ├── sciq-v0-loglikelihood
    │   │       ├── squad2-v0-greedy_until
    │   │       ├── squad2-v0-loglikelihood
    │   │       ├── squad2-v1-greedy_until
    │   │       ├── squad2-v1-loglikelihood
    │   │       ├── sst-v0-loglikelihood
    │   │       ├── swag-v0-loglikelihood
    │   │       ├── textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl
    │   │       ├── textsynth_test_0c1c14571add7903b89e588c8212572b95bb57b334fc0752c89a7e045a5f63ae.pkl
    │   │       ├── textsynth_test_3092d07756f3e1d010c07524cc8a2ecba7f0c19f9e39f2aaf2bf440bfe328004.pkl
    │   │       ├── textsynth_test_434076260b6af3a46b7a5eaceec3306a5872c400a3872f744280b237455a0f8e.pkl
    │   │       ├── textsynth_test_49c47ae40e11f349f2f6b492128188b1b2bc103a421c676ee4b2142a68b43516.pkl
    │   │       ├── textsynth_test_4fd8d66a6dad7f602b40e5d7dc298d6fe329299d086a4659743a41f4a4012659.pkl
    │   │       ├── textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl
    │   │       ├── textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl
    │   │       ├── textsynth_test_7209c4617547bfe17cb9e7f5f735fe35822d650aefdc5fbeeaf0c1724effbe09.pkl
    │   │       ├── textsynth_test_7afdc285388e51094e12645f305328c759574fa3ec9751631025f8ad5ebf9f3e.pkl
    │   │       ├── textsynth_test_9d5f33dbfe1e254928c89f5ed85e4c010d888065f55a8f1b863bc1eb0340a5f2.pkl
    │   │       ├── textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl
    │   │       ├── textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl
    │   │       ├── textsynth_test_e7ad1e9f52a39e1ddd1e50f3c57ffa4546728dd150a67c0a0ddc8675c04e15d1.pkl
    │   │       ├── textsynth_test_f4bfe4beb605bd52a8ab6be3c9293639e7e2261d98de58159d15ccb83131bf4e.pkl
    │   │       ├── toxigen-v0-loglikelihood
    │   │       ├── triviaqa-v0-loglikelihood
    │   │       ├── triviaqa-v1-loglikelihood
    │   │       ├── truthfulqa_gen-v0-greedy_until
    │   │       ├── truthfulqa_gen-v1-greedy_until
    │   │       ├── truthfulqa_mc-v0-loglikelihood
    │   │       ├── truthfulqa_mc-v1-loglikelihood
    │   │       ├── webqs-v0-loglikelihood
    │   │       ├── wic-v0-loglikelihood
    │   │       ├── wikitext-v0-loglikelihood_rolling
    │   │       ├── wikitext-v1-loglikelihood_rolling
    │   │       ├── winogrande-v0-loglikelihood
    │   │       ├── wmt14-en-fr-v0-greedy_until
    │   │       ├── wmt14-fr-en-v0-greedy_until
    │   │       ├── wmt16-de-en-v0-greedy_until
    │   │       ├── wmt16-en-de-v0-greedy_until
    │   │       ├── wmt16-en-ro-v0-greedy_until
    │   │       ├── wmt16-ro-en-v0-greedy_until
    │   │       ├── wmt20-cs-en-v0-greedy_until
    │   │       ├── wmt20-de-en-v0-greedy_until
    │   │       ├── wmt20-de-fr-v0-greedy_until
    │   │       ├── wmt20-en-cs-v0-greedy_until
    │   │       ├── wmt20-en-de-v0-greedy_until
    │   │       ├── wmt20-en-iu-v0-greedy_until
    │   │       ├── wmt20-en-ja-v0-greedy_until
    │   │       ├── wmt20-en-ja-v1-greedy_until
    │   │       ├── wmt20-en-km-v0-greedy_until
    │   │       ├── wmt20-en-pl-v0-greedy_until
    │   │       ├── wmt20-en-ps-v0-greedy_until
    │   │       ├── wmt20-en-ru-v0-greedy_until
    │   │       ├── wmt20-en-ta-v0-greedy_until
    │   │       ├── wmt20-en-zh-v0-greedy_until
    │   │       ├── wmt20-en-zh-v1-greedy_until
    │   │       ├── wmt20-fr-de-v0-greedy_until
    │   │       ├── wmt20-iu-en-v0-greedy_until
    │   │       ├── wmt20-ja-en-v0-greedy_until
    │   │       ├── wmt20-km-en-v0-greedy_until
    │   │       ├── wmt20-pl-en-v0-greedy_until
    │   │       ├── wmt20-ps-en-v0-greedy_until
    │   │       ├── wmt20-ru-en-v0-greedy_until
    │   │       ├── wmt20-ta-en-v0-greedy_until
    │   │       ├── wmt20-zh-en-v0-greedy_until
    │   │       ├── wnli-v0-loglikelihood
    │   │       ├── wnli-v1-loglikelihood
    │   │       ├── wsc-v0-loglikelihood
    │   │       └── wsc273-v0-loglikelihood
    │   │   ├── tests_master
    │   │       ├── test_description.py
    │   │       ├── test_generate_13_grams.py
    │   │       ├── test_models.py
    │   │       └── test_version_stable.py
    │   │   └── utils.py
    ├── metrics.py
    ├── not_real_drop_lm_eval
    │   ├── __pycache__
    │   │   └── modify_llama.cpython-310.pyc
    │   └── modify_llama.py
    ├── openbookqa-5.jsonl
    ├── run_helm.py
    ├── run_lm_eval_harness_classification.py
    ├── run_lm_eval_harness_generation.py
    ├── run_piqa.sh
    ├── run_pred_long_bench_sample.py
    ├── tasks
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── eval_harness.cpython-310.pyc
    │   │   └── util.cpython-310.pyc
    │   ├── eval_harness.py
    │   └── util.py
    └── utils
    │   ├── __pycache__
    │       ├── data.cpython-310.pyc
    │       └── process_args.cpython-310.pyc
    │   ├── data.py
    │   ├── metrics.py
    │   └── process_args.py
├── README.md
└── requirements.txt


/D2O.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/D2O.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LICENSE


--------------------------------------------------------------------------------
/LLM_merge_new/.run_pred_long_bench.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/.run_pred_long_bench.py.swp


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_drop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_drop.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_merge.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_70b_merge.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_d2o.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_d2o.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_drop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_drop.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_merge.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_7b_13b_merge.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_full.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_full.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_new.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_new.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_streaming.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama3_streaming.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop_merge.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_drop_merge.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_local.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_local.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_streaming.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/modeling_llama_streaming.py


--------------------------------------------------------------------------------
/LLM_merge_new/LMEval_kv_token_merge/v433_modeling_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/LMEval_kv_token_merge/v433_modeling_llama.py


--------------------------------------------------------------------------------
/LLM_merge_new/__pycache__/metrics.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/__pycache__/metrics.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_h2o_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_coqa_merge_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_h2o_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_gsm8k_merge_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_h2o_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_merge_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_merge_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_merge_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_long_merge_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_long_merge_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_h2o_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.2.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.4.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.4.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.6.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.6.sh


--------------------------------------------------------------------------------
/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.8.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/bash_experiments/run_osc_truthful_merge_0.8.sh


--------------------------------------------------------------------------------
/LLM_merge_new/config/dataset2maxlen.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/dataset2maxlen.json


--------------------------------------------------------------------------------
/LLM_merge_new/config/dataset2prompt.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/dataset2prompt.json


--------------------------------------------------------------------------------
/LLM_merge_new/config/model2maxlen.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/model2maxlen.json


--------------------------------------------------------------------------------
/LLM_merge_new/config/model2path.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/config/model2path.json


--------------------------------------------------------------------------------
/LLM_merge_new/data/copa-5.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/copa-5.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/mt_bench.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/mt_bench.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/openbookqa-5.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/openbookqa-5.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/piqa-5.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/piqa-5.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/summarization_data/xsum_0shot.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/summarization_data/xsum_0shot.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/summarization_data/xsum_3shot.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/summarization_data/xsum_3shot.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/summarization_data/xsum_5shot.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/summarization_data/xsum_5shot.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/xsum.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/xsum.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/data/xsum_opt.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/data/xsum_opt.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/eval_long_bench.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/eval_long_bench.py


--------------------------------------------------------------------------------
/LLM_merge_new/evaluate_task_result.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/evaluate_task_result.py


--------------------------------------------------------------------------------
/LLM_merge_new/generate_task_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/generate_task_data.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.github/workflows/python-publish.yml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/.github/workflows/test.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.github/workflows/test.yml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.gitignore


--------------------------------------------------------------------------------
/LLM_merge_new/helm/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/.readthedocs.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/.readthedocs.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/CHANGELOG.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/CHANGELOG.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/LICENSE


--------------------------------------------------------------------------------
/LLM_merge_new/helm/MANIFEST.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/MANIFEST.in


--------------------------------------------------------------------------------
/LLM_merge_new/helm/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/command/eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/command/eval.sh


--------------------------------------------------------------------------------
/LLM_merge_new/helm/command/get_data.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/command/get_data.sh


--------------------------------------------------------------------------------
/LLM_merge_new/helm/demo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/demo.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/adding_new_models.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/adding_new_models.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/benchmark.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/benchmark.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/code.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/code.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/developer_setup.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/developer_setup.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/docstrings.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/docstrings.css


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/huggingface_models.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/huggingface_models.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/index.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/installation.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/installation.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/metrics.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/metrics.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/mkdocs_macros.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/mkdocs_macros.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/models.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/models.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/perturbations.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/perturbations.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/proxy-server.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/proxy-server.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/quick_start.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/quick_start.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/requirements.txt


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/scenarios.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/scenarios.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/schemas.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/schemas.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/docs/tutorial.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/docs/tutorial.md


--------------------------------------------------------------------------------
/LLM_merge_new/helm/mkdocs.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/mkdocs.yml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/pre-commit-venv.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/pre-commit-venv.sh


--------------------------------------------------------------------------------
/LLM_merge_new/helm/pre-commit.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/pre-commit.sh


--------------------------------------------------------------------------------
/LLM_merge_new/helm/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/pyproject.toml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/requirements-dev.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/requirements-dev.txt


--------------------------------------------------------------------------------
/LLM_merge_new/helm/requirements-freeze.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/requirements-freeze.txt


--------------------------------------------------------------------------------
/LLM_merge_new/helm/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/requirements.txt


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/cache/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/cache/copy_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/copy_cache.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/cache/fix_anthropic_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/fix_anthropic_cache.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/cache/fix_together_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/fix_together_cache.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/cache/remove_together_api_entries.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/cache/remove_together_api_entries.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/efficiency/generate_instances.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/efficiency/generate_instances.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/efficiency/generate_run_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/efficiency/generate_run_specs.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/estimate_cost.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/estimate_cost.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/fact_completion/README.MD:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/README.MD


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/fact_completion/create_benchmark.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/create_benchmark.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/fact_completion/fetch_triples_and_aliases.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/fetch_triples_and_aliases.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/fact_completion/filter_triples.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/filter_triples.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/fact_completion/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/fact_completion/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/helm-run-all.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/helm-run-all.sh


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/offline_eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/offline_eval/export_requests.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/offline_eval/export_requests.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/offline_eval/import_results.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/offline_eval/import_results.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/scripts/verify_reproducibility.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/scripts/verify_reproducibility.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/setup.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/setup.cfg


--------------------------------------------------------------------------------
/LLM_merge_new/helm/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/setup.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapter_spec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapter_spec.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/adapter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/adapter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/test_adapter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/adapters/test_adapter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/prompt.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/request_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/request_state.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/adaptation/scenario_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/adaptation/scenario_state.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/augmentations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/augmentations/data_augmenter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/data_augmenter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/augmentations/gender_perturbation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/gender_perturbation.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/augmentations/perturbation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/perturbation.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/augmentations/space_perturbation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/space_perturbation.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/augmentations/test_perturbation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/test_perturbation.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/augmentations/typos_perturbation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/augmentations/typos_perturbation.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/contamination/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/contamination/contamination_stats.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/contamination_stats.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/contamination/light_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/light_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/contamination/light_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/light_tokenizer.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/contamination/load_documents.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/contamination/load_documents.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/data_preprocessor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/data_preprocessor.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/executor.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/basic_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/basic_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/bbq_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/bbq_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/bias_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/bias_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/bias_word_lists.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/bias_word_lists.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/classification_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/classification_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/code_metrics_helper.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/copyright_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/copyright_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/disinformation_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/disinformation_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/dry_run_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/dry_run_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/metric.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/metric_name.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/metric_name.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/metric_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/metric_service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/numeracy_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/numeracy_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/ranking_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/ranking_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/statistic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/statistic.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/summac/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/summac/model_summac.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/summac/model_summac.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/summac/utils_misc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/summac/utils_misc.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/summarization_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/summarization_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/test_bias_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_bias_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/test_metric.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_metric.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/test_numeracy_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_numeracy_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/test_statistic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/test_statistic.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/tokens/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/metrics/toxicity_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/metrics/toxicity_metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/contamination.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/contamination.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/create_plots.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/create_plots.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_display.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_display.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_entry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_entry.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_chat_gpt.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_chat_gpt.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_cnn_opt.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_cnn_opt.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_extra.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_extra.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_gpu.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_gpu.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_small.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_small.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_tiny.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/run_specs_tiny.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/schema.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/schema.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/summarize.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/summarize.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/table.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/table.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/test_contamination.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/test_contamination.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/test_create_plots.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/test_create_plots.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/test_run_entry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/test_run_entry.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/presentation/xsum/run_specs_opt.conf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/presentation/xsum/run_specs_opt.conf


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/run.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/run_expander.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/run_expander.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/run_specs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/run_specs.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/runner.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/babi_qa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/babi_qa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/bbq_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/bbq_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/big_bench_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/big_bench_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/blimp_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/blimp_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/bold_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/bold_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/boolq_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/boolq_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/civil_comments_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/civil_comments_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/code_scenario_helper.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/commonsense_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/commonsense_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/copyright_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/copyright_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/covid_dialog_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/covid_dialog_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/dialogue_scenarios.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/dialogue_scenarios.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/disinformation_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/disinformation_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/dyck_language_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/dyck_language_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/gsm_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/gsm_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/ice_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/ice_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/imdb_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/imdb_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/legal_support_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/legal_support_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/lex_glue_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/lex_glue_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/lextreme_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/lextreme_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/lsat_qa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/lsat_qa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/math_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/math_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/me_q_sum_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/me_q_sum_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_dialog_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_dialog_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_mcqa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_mcqa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_qa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/med_qa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/mmlu_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/mmlu_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/msmarco_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/msmarco_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/narrativeqa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/narrativeqa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/natural_qa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/natural_qa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/newsqa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/newsqa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/numeracy_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/numeracy_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/opinions_qa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/opinions_qa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/pubmed_qa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/pubmed_qa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/quac_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/quac_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/raft_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/raft_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/simple_scenarios.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/simple_scenarios.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/summarization_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/summarization_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/test_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/test_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/the_pile_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/the_pile_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/truthful_qa_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/truthful_qa_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/twitter_aae_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/twitter_aae_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/wikifact_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/wikifact_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/wikitext_103_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/wikitext_103_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/scenarios/wmt_14_scenario.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/scenarios/wmt_14_scenario.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/server.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.css


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/benchmarking.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/contamination.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/contamination.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/general.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/general.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/images/crfm-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/crfm-logo.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo-simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo-simple.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/helm-logo.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/ai21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/ai21.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/meta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/organizations/meta.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/images/taxonomy-scenarios.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/images/taxonomy-scenarios.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/index.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/index.html


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/info-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/info-icon.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/json-urls-root.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/json-urls-root.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/json-urls.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/json-urls.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/plot-captions.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/plot-captions.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/schema.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/schema.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/static/utils.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/static/utils.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/test_data_preprocessor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/test_data_preprocessor.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/test_run_expander.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/test_run_expander.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/window_services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/window_services/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/window_services/test_utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/window_services/tokenizer_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/window_services/tokenizer_service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/benchmark/window_services/window_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/benchmark/window_services/window_service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/authentication.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/authentication.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/cache.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/codec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/codec.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/critique_request.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/critique_request.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/general.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/general.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/hierarchical_logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/hierarchical_logger.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/object_spec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/object_spec.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/perspective_api_request.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/perspective_api_request.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/request.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/request.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/test_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/test_cache.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/test_codec.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/test_codec.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/test_general.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/test_general.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/common/tokenization_request.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/common/tokenization_request.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/accounts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/accounts.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/cli.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/ai21_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/ai21_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/aleph_alpha_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/aleph_alpha_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/anthropic_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/anthropic_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/auto_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/auto_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/chat_gpt_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/chat_gpt_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/cohere_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/cohere_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/critique_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/critique_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/google_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/google_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/goose_ai_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/goose_ai_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_model_registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_model_registry.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/huggingface_tokenizer.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/ice_tokenizer_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/ice_tokenizer_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/megatron_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/megatron_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/microsoft_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/microsoft_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/openai_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/openai_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/palmyra_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/palmyra_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/perspective_api_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/perspective_api_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/remote_model_registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/remote_model_registry.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/simple_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/simple_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/test_anthropic_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_anthropic_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/test_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_huggingface_tokenizer.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/test_ice_tokenizer_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_ice_tokenizer_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/test_yalm_tokenizer_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/test_yalm_tokenizer_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/together_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/together_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/voc_100b.sp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/voc_100b.sp


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/clients/yalm_tokenizer_client.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/example_queries.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/example_queries.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/models.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/query.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/query.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/retry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/retry.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/server.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/services/remote_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/remote_service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/services/server_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/server_service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/services/service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/services/test_remote_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/test_remote_service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/services/test_service.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/services/test_service.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/static/general.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/general.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/static/help.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/help.html


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/static/index.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/index.css


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/static/index.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/index.html


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/static/index.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/index.js


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/static/info-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/static/info-icon.png


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/test_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/test_models.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/test_retry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/test_retry.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/ai21_token_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/ai21_token_counter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/auto_token_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/auto_token_counter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/cohere_token_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/cohere_token_counter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/free_token_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/free_token_counter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/gooseai_token_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/gooseai_token_counter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/openai_token_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/openai_token_counter.py


--------------------------------------------------------------------------------
/LLM_merge_new/helm/src/helm/proxy/token_counters/token_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/helm/src/helm/proxy/token_counters/token_counter.py


--------------------------------------------------------------------------------
/LLM_merge_new/kv_token_merge/__pycache__/modify_llama.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/__pycache__/modify_llama.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/kv_token_merge/__pycache__/stream.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/__pycache__/stream.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/kv_token_merge/modify_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/modify_llama.py


--------------------------------------------------------------------------------
/LLM_merge_new/kv_token_merge/modify_llama_merge.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/modify_llama_merge.py


--------------------------------------------------------------------------------
/LLM_merge_new/kv_token_merge/stream.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/kv_token_merge/stream.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/.coveragerc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.coveragerc


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/.flake8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.flake8


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/.github/workflows/new_tasks.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.github/workflows/new_tasks.yml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/.github/workflows/unit_tests.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.github/workflows/unit_tests.yml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.gitignore


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/CITATION.bib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/CITATION.bib


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @haileyschoelkopf @lintangsutawika @StellaAthena
2 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/LICENSE.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/LICENSE.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/docs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/docs/decontamination.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/decontamination.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/docs/interface.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/interface.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/docs/model_guide.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/model_guide.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/docs/new_task_guide.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/new_task_guide.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/docs/task_guide.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/docs/task_guide.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/examples/lm-eval-overview.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/examples/lm-eval-overview.ipynb


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/ignore.txt:
--------------------------------------------------------------------------------
1 | ROUGE
2 | rouge
3 | nin
4 | maka
5 | mor
6 | te
7 | ond
8 | extraversion
9 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/__main__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/__main__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/filter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/filter.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/instance.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/instance.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/model.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/registry.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/samplers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/samplers.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/api/task.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/api/task.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/archiver.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/archiver.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/janitor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/decontamination/janitor.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/evaluator.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/decontamination.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/decontamination.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/extraction.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/extraction.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/selection.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/selection.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/transformation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/filters/transformation.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/anthropic_llms.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/anthropic_llms.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/dummy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/dummy.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/gguf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/gguf.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/huggingface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/huggingface.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/openai_completions.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/openai_completions.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/textsynth.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/textsynth.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/models/vllm_causallms.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/models/vllm_causallms.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/prompts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/prompts/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r1.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r1.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r2.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r2.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r3.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/anli/anli_r3.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_challenge.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_challenge.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_easy.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arc/arc_easy.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arithmetic/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/arithmetic/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/asdiv/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/babi.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/babi/babi.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/_generate_configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/_generate_configs.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/fewshot/snarks.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/fewshot/snarks.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/zeroshot/snarks.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bbh/zeroshot/snarks.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/belebele/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/belebele/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/pythia.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/pythia.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/t0_eval.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/benchmarks/t0_eval.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bigbench/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/bigbench/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/drop_argument.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_1.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/npi_present_2.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/ceval/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/ceval/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/coqa/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/crows_pairs/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_gr.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_gr.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_li.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_li.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rch.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rch.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcs.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcs.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcss.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcss.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_wr.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_wr.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/csatqa/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/drop/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/cola/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/cola/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/mismatch.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/mismatch.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mnli/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mrpc/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/mrpc/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qnli/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qnli/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qqp/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/qqp/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/rte/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/rte/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/sst/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/sst/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/wnli/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/glue/wnli/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k-cot.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k-cot.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_en.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_en.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_es.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_es.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/hellaswag.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/hellaswag.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/hellaswag/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada_cloze/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/lambada_cloze/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/logiqa.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/logiqa.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/utils_logiqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa/utils_logiqa.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logieval.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logieval.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logiqa2.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/logiqa2.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/utils_logiqa2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/logiqa2/utils_logiqa2.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/mathqa.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/mathqa.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mathqa/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mc_taco/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/direct/direct_yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/direct/direct_yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/en_cot/cot_yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/en_cot/cot_yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/native_cot/cot_yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/native_cot/cot_yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mgsm/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/minerva_math/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/multual_plus.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/multual_plus.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/mutual.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/mutual.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/mutual/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/nq_open/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/nq_open/nq_open.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/nq_open/nq_open.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/openbookqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/openbookqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_de.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_de.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_en.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_en.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_es.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_es.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_fr.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_fr.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ja.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ja.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ko.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ko.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_zh.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_zh.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_arxiv.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_arxiv.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_books3.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_books3.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_enron.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_enron.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_europarl.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_europarl.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_freelaw.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_freelaw.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_github.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_github.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_gutenberg.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_gutenberg.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_pile-cc.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_pile-cc.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_uspto.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_uspto.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_wikipedia.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pile/pile_wikipedia.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/piqa.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/piqa/piqa.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_in.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_in.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_out.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_out.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/prost/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/prost/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/pubmedqa.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/pubmedqa/pubmedqa.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2011.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2011.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2012.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2012.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2013.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2013.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/bool.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/bool.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/freeform.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/freeform.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/qasper/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/race.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/race/race.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/sciq.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/sciq/sciq.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/scrolls.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/scrolls.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/task.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/scrolls/task.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/default.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/siqa/default.yml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/task.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/squadv2/task.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/storycloze/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/storycloze/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/super_glue/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/super_glue/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/swag.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/swag/swag.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/toxigen.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/toxigen.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/toxigen/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/translation/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/triviaqa/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/truthfulqa/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/unscramble/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/unscramble/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/webqs.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/webqs/webqs.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/wikitext.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wikitext/wikitext.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/winogrande/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/winogrande/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wmt2016/metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/default.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/wsc273/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_et.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_et.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ht.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ht.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_id.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_id.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_it.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_it.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_qu.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_qu.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_sw.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_sw.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ta.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ta.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_th.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_th.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_tr.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_tr.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_vi.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_vi.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_zh.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/default_zh.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xcopa/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ar.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ar.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_bg.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_bg.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_common_yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_common_yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_de.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_de.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_el.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_el.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_en.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_en.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_es.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_es.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_fr.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_fr.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_hi.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_hi.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ru.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ru.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_sw.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_sw.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_th.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_th.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_tr.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_tr.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ur.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ur.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_vi.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_vi.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_zh.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_zh.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xstorycloze/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xstorycloze/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/tasks/xwinograd/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/lm_eval/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/lm_eval/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/mypy.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/mypy.ini


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/pyproject.toml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/build_benchmark.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/build_benchmark.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/clean_training_data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/cost_estimate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/cost_estimate.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/get_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/get_prompts.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/make_gpt2_test_cases.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/make_gpt2_test_cases.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/make_table_results.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/make_table_results.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/make_table_tasks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/make_table_tasks.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/regression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/regression.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/scripts/write_out.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/scripts/write_out.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/setup.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.coveragerc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.coveragerc


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.flake8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.flake8


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.gitignore


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/CITATION.bib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/CITATION.bib


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @haileyschoelkopf @lintangsutawika @StellaAthena
2 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/LICENSE.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/LICENSE.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/decontamination.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/decontamination.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/interface.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/interface.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/model_guide.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/model_guide.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/new_task_guide.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/new_task_guide.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/task_guide.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/docs/task_guide.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/ignore.txt:
--------------------------------------------------------------------------------
1 | ROUGE
2 | rouge
3 | nin
4 | maka
5 | mor
6 | te
7 | ond
8 | extraversion
9 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__main__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/__main__.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/filter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/filter.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/instance.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/instance.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/model.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/registry.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/samplers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/samplers.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/task.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/api/task.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/decontamination/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/evaluator.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/dummy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/dummy.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/gguf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/models/gguf.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/tasks/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/tasks/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/tasks/nq_open/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/lm_eval/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/mypy.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/mypy.ini


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/pyproject.toml


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/clean_training_data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/cost_estimate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/cost_estimate.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/get_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/get_prompts.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/regression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/regression.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/write_out.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/scripts/write_out.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/setup.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/templates/new_yaml_task/blank_yaml.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_evaluator.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_janitor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_janitor.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_misc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_misc.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_tasks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_tasks.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/test_utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/src/lm-eval/tests/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/templates/new_yaml_task/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/templates/new_yaml_task/README.md


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/templates/new_yaml_task/blank_yaml.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/models/test_gguf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/models/test_gguf.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/models/test_huggingface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/models/test_huggingface.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/models/test_vllm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/models/test_vllm.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/test_evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_evaluator.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/test_janitor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_janitor.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/test_misc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_misc.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/test_tasks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_tasks.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/test_utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/race-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/race-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/rte-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/rte-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/sciq-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/sciq-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/wic-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wic-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v1-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wnli-v1-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/testdata/wsc-v0-loglikelihood:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/testdata/wsc-v0-loglikelihood


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/tests_master/test_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/tests_master/test_models.py


--------------------------------------------------------------------------------
/LLM_merge_new/lm-evaluation-harness/tests/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/lm-evaluation-harness/tests/utils.py


--------------------------------------------------------------------------------
/LLM_merge_new/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/not_real_drop_lm_eval/modify_llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/not_real_drop_lm_eval/modify_llama.py


--------------------------------------------------------------------------------
/LLM_merge_new/openbookqa-5.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/openbookqa-5.jsonl


--------------------------------------------------------------------------------
/LLM_merge_new/run_helm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_helm.py


--------------------------------------------------------------------------------
/LLM_merge_new/run_lm_eval_harness_classification.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_lm_eval_harness_classification.py


--------------------------------------------------------------------------------
/LLM_merge_new/run_lm_eval_harness_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_lm_eval_harness_generation.py


--------------------------------------------------------------------------------
/LLM_merge_new/run_piqa.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_piqa.sh


--------------------------------------------------------------------------------
/LLM_merge_new/run_pred_long_bench_sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/run_pred_long_bench_sample.py


--------------------------------------------------------------------------------
/LLM_merge_new/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__init__.py


--------------------------------------------------------------------------------
/LLM_merge_new/tasks/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/tasks/__pycache__/eval_harness.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__pycache__/eval_harness.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/tasks/__pycache__/util.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/__pycache__/util.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/tasks/eval_harness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/eval_harness.py


--------------------------------------------------------------------------------
/LLM_merge_new/tasks/util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/tasks/util.py


--------------------------------------------------------------------------------
/LLM_merge_new/utils/__pycache__/data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/__pycache__/data.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/utils/__pycache__/process_args.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/__pycache__/process_args.cpython-310.pyc


--------------------------------------------------------------------------------
/LLM_merge_new/utils/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/data.py


--------------------------------------------------------------------------------
/LLM_merge_new/utils/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/metrics.py


--------------------------------------------------------------------------------
/LLM_merge_new/utils/process_args.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/LLM_merge_new/utils/process_args.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/README.md


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIoT-MLSys-Lab/D2O/HEAD/requirements.txt


--------------------------------------------------------------------------------