├── .ipynb_checkpoints ├── INSTALLATION-checkpoint.md ├── LICENSE-checkpoint ├── README-checkpoint.md └── main-checkpoint.py ├── INSTALLATION.md ├── LICENSE ├── README.md ├── environment.yaml ├── lib ├── .ipynb_checkpoints │ └── modelling_llama_mod-checkpoint.py ├── data.py ├── eval.py ├── modelling_llama_mod.py └── scoring_model.py ├── lora_ft ├── .ipynb_checkpoints │ └── finetune_lm-checkpoint.py ├── evaluate_ppl.py ├── finetune_lm.py └── lm-evaluation-harness │ ├── .coveragerc │ ├── .flake8 │ ├── .gitignore │ ├── .ipynb_checkpoints │ └── README-checkpoint.md │ ├── .pre-commit-config.yaml │ ├── CITATION.bib │ ├── CODEOWNERS │ ├── LICENSE.md │ ├── README.md │ ├── docs │ ├── decontamination.md │ ├── description_guide.md │ ├── img │ │ └── fewshot_example_gpt3.png │ ├── task_guide.md │ └── task_table.md │ ├── ignore.txt │ ├── lm_eval.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── requires.txt │ └── top_level.txt │ ├── lm_eval │ ├── .ipynb_checkpoints │ │ ├── base-checkpoint.py │ │ └── evaluator-checkpoint.py │ ├── __init__.py │ ├── base.py │ ├── datasets │ │ ├── README.md │ │ ├── __init__.py │ │ ├── asdiv │ │ │ ├── __init__.py │ │ │ ├── asdiv.py │ │ │ └── dataset_infos.json │ │ ├── bigbench_resources │ │ │ ├── causal_judgement.json │ │ │ ├── date_understanding.json │ │ │ ├── disambiguation_qa.json │ │ │ ├── dyck_languages.json │ │ │ ├── formal_fallacies_syllogisms_negation.json │ │ │ ├── geometric_shapes.json │ │ │ ├── hyperbaton.json │ │ │ ├── logical_deduction_five_objects.json │ │ │ ├── logical_deduction_seven_objects.json │ │ │ ├── logical_deduction_three_objects.json │ │ │ ├── movie_recommendation.json │ │ │ ├── navigate.json │ │ │ ├── reasoning_about_colored_objects.json │ │ │ ├── ruin_names.json │ │ │ ├── salient_translation_error_detection.json │ │ │ ├── snarks.json │ │ │ ├── sports_understanding.json │ │ │ ├── temporal_sequences.json │ │ │ ├── tracking_shuffled_objects_five_objects.json │ │ │ ├── tracking_shuffled_objects_seven_objects.json │ │ │ └── tracking_shuffled_objects_three_objects.json │ │ ├── coqa │ │ │ ├── __init__.py │ │ │ ├── coqa.py │ │ │ └── dataset_infos.json │ │ ├── drop │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── drop.py │ │ ├── headqa │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── headqa.py │ │ ├── hendrycks_ethics │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── hendrycks_ethics.py │ │ ├── hendrycks_math │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── hendrycks_math.py │ │ ├── logiqa │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── logiqa.py │ │ ├── mutual │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── mutual.py │ │ ├── pile │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── pile.py │ │ ├── quac │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── quac.py │ │ ├── sat_analogies │ │ │ ├── __init__.py │ │ │ └── sat_analogies.py │ │ └── unscramble │ │ │ ├── __init__.py │ │ │ ├── dataset_infos.json │ │ │ └── unscramble.py │ ├── decontamination │ │ ├── __init__.py │ │ ├── archiver.py │ │ ├── decontaminate.py │ │ └── janitor.py │ ├── evaluator.py │ ├── metrics.py │ ├── models │ │ ├── .ipynb_checkpoints │ │ │ └── huggingface-checkpoint.py │ │ ├── __init__.py │ │ ├── anthropic_llms.py │ │ ├── dummy.py │ │ ├── gpt2.py │ │ ├── gpt3.py │ │ ├── huggingface.py │ │ └── textsynth.py │ ├── tasks │ │ ├── .ipynb_checkpoints │ │ │ ├── __init__-checkpoint.py │ │ │ ├── arc-checkpoint.py │ │ │ ├── gsm8k-checkpoint.py │ │ │ └── winogrande-checkpoint.py │ │ ├── __init__.py │ │ ├── anli.py │ │ ├── arc.py │ │ ├── arithmetic.py │ │ ├── asdiv.py │ │ ├── bigbench.py │ │ ├── blimp.py │ │ ├── cbt.py │ │ ├── coqa.py │ │ ├── crowspairs.py │ │ ├── drop.py │ │ ├── glue.py │ │ ├── gsm8k.py │ │ ├── headqa.py │ │ ├── hellaswag.py │ │ ├── hendrycks_ethics.py │ │ ├── hendrycks_math.py │ │ ├── hendrycks_test.py │ │ ├── json.py │ │ ├── lambada.py │ │ ├── lambada_cloze.py │ │ ├── lambada_multilingual.py │ │ ├── logiqa.py │ │ ├── mathqa.py │ │ ├── mc_taco.py │ │ ├── mgsm.py │ │ ├── mutual.py │ │ ├── naturalqs.py │ │ ├── openbookqa.py │ │ ├── pawsx.py │ │ ├── pile.py │ │ ├── piqa.py │ │ ├── prost.py │ │ ├── pubmedqa.py │ │ ├── qa4mre.py │ │ ├── qasper.py │ │ ├── quac.py │ │ ├── race.py │ │ ├── sat.py │ │ ├── sciq.py │ │ ├── squad.py │ │ ├── storycloze.py │ │ ├── superglue.py │ │ ├── swag.py │ │ ├── toxigen.py │ │ ├── translation.py │ │ ├── triviaqa.py │ │ ├── truthfulqa.py │ │ ├── unscramble.py │ │ ├── webqs.py │ │ ├── wikitext.py │ │ ├── winogrande.py │ │ ├── wsc273.py │ │ ├── xcopa.py │ │ ├── xnli.py │ │ ├── xstorycloze.py │ │ └── xwinograd.py │ └── utils.py │ ├── main.py │ ├── pile_statistics.json │ ├── requirements.txt │ ├── results │ ├── bloom │ │ ├── bloom-1b1 │ │ │ ├── README.md │ │ │ ├── bloom-1b1_common_sense_reasoning_0-shot.json │ │ │ ├── bloom-1b1_gsm8k_8-shot.json │ │ │ ├── bloom-1b1_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── bloom-1b1_pawsx_0-shot.json │ │ │ ├── bloom-1b1_question_answering_0-shot.json │ │ │ ├── bloom-1b1_reading_comprehension_0-shot.json │ │ │ ├── bloom-1b1_xcopa_0-shot.json │ │ │ ├── bloom-1b1_xnli_0-shot.json │ │ │ ├── bloom-1b1_xstory_cloze_0-shot.json │ │ │ └── bloom-1b1_xwinograd_0-shot.json │ │ ├── bloom-1b7 │ │ │ ├── README.md │ │ │ ├── bloom-1b7_common_sense_reasoning_0-shot.json │ │ │ ├── bloom-1b7_gsm8k_8-shot.json │ │ │ ├── bloom-1b7_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── bloom-1b7_pawsx_0-shot.json │ │ │ ├── bloom-1b7_question_answering_0-shot.json │ │ │ ├── bloom-1b7_reading_comprehension_0-shot.json │ │ │ ├── bloom-1b7_xcopa_0-shot.json │ │ │ ├── bloom-1b7_xnli_0-shot.json │ │ │ ├── bloom-1b7_xstory_cloze_0-shot.json │ │ │ └── bloom-1b7_xwinograd_0-shot.json │ │ ├── bloom-3b │ │ │ ├── README.md │ │ │ ├── bloom-3b_common_sense_reasoning_0-shot.json │ │ │ ├── bloom-3b_gsm8k_8-shot.json │ │ │ ├── bloom-3b_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── bloom-3b_pawsx_0-shot.json │ │ │ ├── bloom-3b_question_answering_0-shot.json │ │ │ ├── bloom-3b_reading_comprehension_0-shot.json │ │ │ ├── bloom-3b_xcopa_0-shot.json │ │ │ ├── bloom-3b_xnli_0-shot.json │ │ │ ├── bloom-3b_xstory_cloze_0-shot.json │ │ │ └── bloom-3b_xwinograd_0-shot.json │ │ ├── bloom-560m │ │ │ ├── README.md │ │ │ ├── bloom-560m_common_sense_reasoning_0-shot.json │ │ │ ├── bloom-560m_gsm8k_8-shot.json │ │ │ ├── bloom-560m_lambada_openai_0-shot.json │ │ │ ├── bloom-560m_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── bloom-560m_pawsx_0-shot.json │ │ │ ├── bloom-560m_question_answering_0-shot.json │ │ │ ├── bloom-560m_reading_comprehension_0-shot.json │ │ │ ├── bloom-560m_xcopa_0-shot.json │ │ │ ├── bloom-560m_xnli_0-shot.json │ │ │ ├── bloom-560m_xstory_cloze_0-shot.json │ │ │ └── bloom-560m_xwinograd_0-shot.json │ │ └── bloom-7b1 │ │ │ ├── README.md │ │ │ ├── bloom-7b1_bbh_3-shot.json │ │ │ ├── bloom-7b1_common_sense_reasoning_0-shot.json │ │ │ ├── bloom-7b1_gsm8k_8-shot.json │ │ │ ├── bloom-7b1_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── bloom-7b1_pawsx_0-shot.json │ │ │ ├── bloom-7b1_question_answering_0-shot.json │ │ │ ├── bloom-7b1_reading_comprehension_0-shot.json │ │ │ ├── bloom-7b1_xcopa_0-shot.json │ │ │ ├── bloom-7b1_xnli_0-shot.json │ │ │ ├── bloom-7b1_xstory_cloze_0-shot.json │ │ │ └── bloom-7b1_xwinograd_0-shot.json │ ├── llama │ │ ├── llama-13B │ │ │ ├── README.md │ │ │ ├── llama-13B_arithmetic_5-shot.json │ │ │ ├── llama-13B_bbh_3-shot.json │ │ │ ├── llama-13B_blimp_0-shot.json │ │ │ ├── llama-13B_common_sense_reasoning_0-shot.json │ │ │ ├── llama-13B_glue_0-shot.json │ │ │ ├── llama-13B_gsm8k_8-shot.json │ │ │ ├── llama-13B_human_alignment_0-shot.json │ │ │ ├── llama-13B_lambada_0-shot.json │ │ │ ├── llama-13B_mathematical_reasoning_0-shot.json │ │ │ ├── llama-13B_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── llama-13B_mmlu_5-shot.json │ │ │ ├── llama-13B_pawsx_0-shot.json │ │ │ ├── llama-13B_question_answering_0-shot.json │ │ │ ├── llama-13B_reading_comprehension_0-shot.json │ │ │ ├── llama-13B_superglue_0-shot.json │ │ │ ├── llama-13B_xcopa_0-shot.json │ │ │ ├── llama-13B_xnli_0-shot.json │ │ │ ├── llama-13B_xstory_cloze_0-shot.json │ │ │ └── llama-13B_xwinograd_0-shot.json │ │ ├── llama-30B │ │ │ ├── README.md │ │ │ ├── llama-30B_bbh_3-shot.json │ │ │ ├── llama-30B_common_sense_reasoning_0-shot.json │ │ │ ├── llama-30B_gsm8k_8-shot.json │ │ │ ├── llama-30B_human_alignment_0-shot.json │ │ │ ├── llama-30B_mathematical_reasoning_0-shot.json │ │ │ ├── llama-30B_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── llama-30B_mmlu_5-shot.json │ │ │ ├── llama-30B_pawsx_0-shot.json │ │ │ ├── llama-30B_xcopa_0-shot.json │ │ │ ├── llama-30B_xnli_0-shot.json │ │ │ ├── llama-30B_xstory_cloze_0-shot.json │ │ │ └── llama-30B_xwinograd_0-shot.json │ │ └── llama-7B │ │ │ ├── README.md │ │ │ ├── llama-7B_anli_0-shot.json │ │ │ ├── llama-7B_arithmetic_5-shot.json │ │ │ ├── llama-7B_bbh_3-shot.json │ │ │ ├── llama-7B_blimp_0-shot.json │ │ │ ├── llama-7B_common_sense_reasoning_0-shot.json │ │ │ ├── llama-7B_glue_0-shot.json │ │ │ ├── llama-7B_gsm8k_8-shot.json │ │ │ ├── llama-7B_human_alignment_0-shot.json │ │ │ ├── llama-7B_lambada_0-shot.json │ │ │ ├── llama-7B_mathematical_reasoning_0-shot.json │ │ │ ├── llama-7B_mathematical_reasoning_few_shot_5-shot.json │ │ │ ├── llama-7B_mmlu_5-shot.json │ │ │ ├── llama-7B_pawsx_0-shot.json │ │ │ ├── llama-7B_question_answering_0-shot.json │ │ │ ├── llama-7B_reading_comprehension_0-shot.json │ │ │ ├── llama-7B_unscramble_0-shot.json │ │ │ ├── llama-7B_xcopa_0-shot.json │ │ │ ├── llama-7B_xnli_0-shot.json │ │ │ ├── llama-7B_xstory_cloze_0-shot.json │ │ │ └── llama-7B_xwinograd_0-shot.json │ ├── mpt │ │ └── mpt-7b │ │ │ ├── README.md │ │ │ ├── mpt-7b_anli_0-shot.json │ │ │ ├── mpt-7b_arithmetic_5-shot.json │ │ │ ├── mpt-7b_bbh_3-shot.json │ │ │ ├── mpt-7b_blimp_0-shot.json │ │ │ ├── mpt-7b_common_sense_reasoning_0-shot.json │ │ │ ├── mpt-7b_glue_0-shot.json │ │ │ ├── mpt-7b_human_alignment_0-shot.json │ │ │ ├── mpt-7b_lambada_0-shot.json │ │ │ ├── mpt-7b_mmlu_5-shot.json │ │ │ ├── mpt-7b_pawsx_0-shot.json │ │ │ ├── mpt-7b_reading_comprehension_0-shot.json │ │ │ ├── mpt-7b_superglue_0-shot.json │ │ │ ├── mpt-7b_unscramble_0-shot.json │ │ │ ├── mpt-7b_xcopa_0-shot.json │ │ │ ├── mpt-7b_xnli_0-shot.json │ │ │ ├── mpt-7b_xstory_cloze_0-shot.json │ │ │ └── mpt-7b_xwinograd_0-shot.json │ ├── opt │ │ ├── opt-1.3b │ │ │ ├── README.md │ │ │ └── opt-1.3b.json │ │ ├── opt-125m │ │ │ ├── README.md │ │ │ └── opt-125m.json │ │ ├── opt-13b │ │ │ ├── README.md │ │ │ └── opt-13b.json │ │ ├── opt-2.7b │ │ │ ├── README.md │ │ │ └── opt-2.7b.json │ │ ├── opt-30b │ │ │ ├── README.md │ │ │ └── opt-30b.json │ │ ├── opt-350m │ │ │ ├── README.md │ │ │ └── opt-350m.json │ │ ├── opt-6.7b │ │ │ ├── README.md │ │ │ └── opt-6.7b.json │ │ └── opt-66b │ │ │ ├── README.md │ │ │ └── opt-66b.json │ └── xglm │ │ ├── xglm-1.7B │ │ ├── README.md │ │ ├── xglm-1.7B_common_sense_reasoning_0-shot.json │ │ ├── xglm-1.7B_gsm8k_8-shot.json │ │ ├── xglm-1.7B_mathematical_reasoning_few_shot_5-shot.json │ │ ├── xglm-1.7B_pawsx_0-shot.json │ │ ├── xglm-1.7B_xcopa_0-shot.json │ │ ├── xglm-1.7B_xnli_0-shot.json │ │ ├── xglm-1.7B_xstory_cloze_0-shot.json │ │ └── xglm-1.7B_xwinograd_0-shot.json │ │ ├── xglm-2.9B │ │ ├── README.md │ │ ├── xglm-2.9B_common_sense_reasoning_0-shot.json │ │ ├── xglm-2.9B_pawsx_0-shot.json │ │ ├── xglm-2.9B_xcopa_0-shot.json │ │ ├── xglm-2.9B_xnli_0-shot.json │ │ ├── xglm-2.9B_xstory_cloze_0-shot.json │ │ └── xglm-2.9B_xwinograd_0-shot.json │ │ ├── xglm-4.5B │ │ ├── README.md │ │ ├── xglm-4.5B_common_sense_reasoning_0-shot.json │ │ ├── xglm-4.5B_gsm8k_8-shot.json │ │ ├── xglm-4.5B_pawsx_0-shot.json │ │ ├── xglm-4.5B_xcopa_0-shot.json │ │ ├── xglm-4.5B_xnli_0-shot.json │ │ ├── xglm-4.5B_xstory_cloze_0-shot.json │ │ └── xglm-4.5B_xwinograd_0-shot.json │ │ ├── xglm-564M │ │ ├── README.md │ │ ├── xglm-564M_common_sense_reasoning_0-shot.json │ │ ├── xglm-564M_gsm8k_8-shot.json │ │ ├── xglm-564M_lambada_openai_0-shot.json │ │ ├── xglm-564M_mathematical_reasoning_few_shot_5-shot.json │ │ ├── xglm-564M_pawsx_0-shot.json │ │ ├── xglm-564M_xcopa_0-shot.json │ │ ├── xglm-564M_xnli_0-shot.json │ │ ├── xglm-564M_xstory_cloze_0-shot.json │ │ └── xglm-564M_xwinograd_0-shot.json │ │ └── xglm-7.5B │ │ ├── README.md │ │ ├── xglm-7.5B_common_sense_reasoning_0-shot.json │ │ ├── xglm-7.5B_gsm8k_8-shot.json │ │ ├── xglm-7.5B_mathematical_reasoning_few_shot_5-shot.json │ │ ├── xglm-7.5B_pawsx_0-shot.json │ │ ├── xglm-7.5B_xcopa_0-shot.json │ │ ├── xglm-7.5B_xnli_0-shot.json │ │ ├── xglm-7.5B_xstory_cloze_0-shot.json │ │ └── xglm-7.5B_xwinograd_0-shot.json │ ├── scripts │ ├── __init__.py │ ├── clean_training_data │ │ ├── README.md │ │ ├── __init__.py │ │ ├── compress_and_package.py │ │ ├── generate_13_grams.py │ │ ├── investigate_pile.py │ │ ├── janitor_util.cpp │ │ ├── process_sorted_buckets.py │ │ └── sort_13_gram_buckets.py │ ├── cost_estimate.py │ ├── get_prompts.py │ ├── make_gpt2_test_cases.py │ ├── make_table_results.py │ ├── make_table_tasks.py │ ├── regression.py │ └── write_out.py │ ├── setup.py │ ├── templates │ ├── new_multiple_choice_task.py │ └── new_task.py │ └── tests │ ├── test_description_dict.py │ ├── test_evaluator.py │ ├── test_generate_13_grams.py │ ├── test_janitor.py │ ├── test_misc.py │ ├── test_models.py │ ├── test_tasks.py │ ├── test_utils.py │ ├── test_version_stable.py │ └── testdata │ ├── anagrams1-v0-greedy_until │ ├── anagrams1-v0-res.json │ ├── anagrams2-v0-greedy_until │ ├── anagrams2-v0-res.json │ ├── anli_r1-v0-loglikelihood │ ├── anli_r1-v0-res.json │ ├── anli_r2-v0-loglikelihood │ ├── anli_r2-v0-res.json │ ├── anli_r3-v0-loglikelihood │ ├── anli_r3-v0-res.json │ ├── arc_challenge-v0-loglikelihood │ ├── arc_challenge-v0-res.json │ ├── arc_easy-v0-loglikelihood │ ├── arc_easy-v0-res.json │ ├── arithmetic_1dc-v0-loglikelihood │ ├── arithmetic_1dc-v0-res.json │ ├── arithmetic_2da-v0-loglikelihood │ ├── arithmetic_2da-v0-res.json │ ├── arithmetic_2dm-v0-loglikelihood │ ├── arithmetic_2dm-v0-res.json │ ├── arithmetic_2ds-v0-loglikelihood │ ├── arithmetic_2ds-v0-res.json │ ├── arithmetic_3da-v0-loglikelihood │ ├── arithmetic_3da-v0-res.json │ ├── arithmetic_3ds-v0-loglikelihood │ ├── arithmetic_3ds-v0-res.json │ ├── arithmetic_4da-v0-loglikelihood │ ├── arithmetic_4da-v0-res.json │ ├── arithmetic_4ds-v0-loglikelihood │ ├── arithmetic_4ds-v0-res.json │ ├── arithmetic_5da-v0-loglikelihood │ ├── arithmetic_5da-v0-res.json │ ├── arithmetic_5ds-v0-loglikelihood │ ├── arithmetic_5ds-v0-res.json │ ├── blimp_adjunct_island-v0-loglikelihood │ ├── blimp_adjunct_island-v0-res.json │ ├── blimp_anaphor_gender_agreement-v0-loglikelihood │ ├── blimp_anaphor_gender_agreement-v0-res.json │ ├── blimp_anaphor_number_agreement-v0-loglikelihood │ ├── blimp_anaphor_number_agreement-v0-res.json │ ├── blimp_animate_subject_passive-v0-loglikelihood │ ├── blimp_animate_subject_passive-v0-res.json │ ├── blimp_animate_subject_trans-v0-loglikelihood │ ├── blimp_animate_subject_trans-v0-res.json │ ├── blimp_causative-v0-loglikelihood │ ├── blimp_causative-v0-res.json │ ├── blimp_complex_NP_island-v0-loglikelihood │ ├── blimp_complex_NP_island-v0-res.json │ ├── blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood │ ├── blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json │ ├── blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood │ ├── blimp_coordinate_structure_constraint_object_extraction-v0-res.json │ ├── blimp_determiner_noun_agreement_1-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_1-v0-res.json │ ├── blimp_determiner_noun_agreement_2-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_2-v0-res.json │ ├── blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_irregular_1-v0-res.json │ ├── blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_irregular_2-v0-res.json │ ├── blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_with_adj_2-v0-res.json │ ├── blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_with_adj_irregular_1-v0-res.json │ ├── blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_with_adj_irregular_2-v0-res.json │ ├── blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood │ ├── blimp_determiner_noun_agreement_with_adjective_1-v0-res.json │ ├── blimp_distractor_agreement_relational_noun-v0-loglikelihood │ ├── blimp_distractor_agreement_relational_noun-v0-res.json │ ├── blimp_distractor_agreement_relative_clause-v0-loglikelihood │ ├── blimp_distractor_agreement_relative_clause-v0-res.json │ ├── blimp_drop_argument-v0-loglikelihood │ ├── blimp_drop_argument-v0-res.json │ ├── blimp_ellipsis_n_bar_1-v0-loglikelihood │ ├── blimp_ellipsis_n_bar_1-v0-res.json │ ├── blimp_ellipsis_n_bar_2-v0-loglikelihood │ ├── blimp_ellipsis_n_bar_2-v0-res.json │ ├── blimp_existential_there_object_raising-v0-loglikelihood │ ├── blimp_existential_there_object_raising-v0-res.json │ ├── blimp_existential_there_quantifiers_1-v0-loglikelihood │ ├── blimp_existential_there_quantifiers_1-v0-res.json │ ├── blimp_existential_there_quantifiers_2-v0-loglikelihood │ ├── blimp_existential_there_quantifiers_2-v0-res.json │ ├── blimp_existential_there_subject_raising-v0-loglikelihood │ ├── blimp_existential_there_subject_raising-v0-res.json │ ├── blimp_expletive_it_object_raising-v0-loglikelihood │ ├── blimp_expletive_it_object_raising-v0-res.json │ ├── blimp_inchoative-v0-loglikelihood │ ├── blimp_inchoative-v0-res.json │ ├── blimp_intransitive-v0-loglikelihood │ ├── blimp_intransitive-v0-res.json │ ├── blimp_irregular_past_participle_adjectives-v0-loglikelihood │ ├── blimp_irregular_past_participle_adjectives-v0-res.json │ ├── blimp_irregular_past_participle_verbs-v0-loglikelihood │ ├── blimp_irregular_past_participle_verbs-v0-res.json │ ├── blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood │ ├── blimp_irregular_plural_subject_verb_agreement_1-v0-res.json │ ├── blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood │ ├── blimp_irregular_plural_subject_verb_agreement_2-v0-res.json │ ├── blimp_left_branch_island_echo_question-v0-loglikelihood │ ├── blimp_left_branch_island_echo_question-v0-res.json │ ├── blimp_left_branch_island_simple_question-v0-loglikelihood │ ├── blimp_left_branch_island_simple_question-v0-res.json │ ├── blimp_matrix_question_npi_licensor_present-v0-loglikelihood │ ├── blimp_matrix_question_npi_licensor_present-v0-res.json │ ├── blimp_npi_present_1-v0-loglikelihood │ ├── blimp_npi_present_1-v0-res.json │ ├── blimp_npi_present_2-v0-loglikelihood │ ├── blimp_npi_present_2-v0-res.json │ ├── blimp_only_npi_licensor_present-v0-loglikelihood │ ├── blimp_only_npi_licensor_present-v0-res.json │ ├── blimp_only_npi_scope-v0-loglikelihood │ ├── blimp_only_npi_scope-v0-res.json │ ├── blimp_passive_1-v0-loglikelihood │ ├── blimp_passive_1-v0-res.json │ ├── blimp_passive_2-v0-loglikelihood │ ├── blimp_passive_2-v0-res.json │ ├── blimp_principle_A_c_command-v0-loglikelihood │ ├── blimp_principle_A_c_command-v0-res.json │ ├── blimp_principle_A_case_1-v0-loglikelihood │ ├── blimp_principle_A_case_1-v0-res.json │ ├── blimp_principle_A_case_2-v0-loglikelihood │ ├── blimp_principle_A_case_2-v0-res.json │ ├── blimp_principle_A_domain_1-v0-loglikelihood │ ├── blimp_principle_A_domain_1-v0-res.json │ ├── blimp_principle_A_domain_2-v0-loglikelihood │ ├── blimp_principle_A_domain_2-v0-res.json │ ├── blimp_principle_A_domain_3-v0-loglikelihood │ ├── blimp_principle_A_domain_3-v0-res.json │ ├── blimp_principle_A_reconstruction-v0-loglikelihood │ ├── blimp_principle_A_reconstruction-v0-res.json │ ├── blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood │ ├── blimp_regular_plural_subject_verb_agreement_1-v0-res.json │ ├── blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood │ ├── blimp_regular_plural_subject_verb_agreement_2-v0-res.json │ ├── blimp_sentential_negation_npi_licensor_present-v0-loglikelihood │ ├── blimp_sentential_negation_npi_licensor_present-v0-res.json │ ├── blimp_sentential_negation_npi_scope-v0-loglikelihood │ ├── blimp_sentential_negation_npi_scope-v0-res.json │ ├── blimp_sentential_subject_island-v0-loglikelihood │ ├── blimp_sentential_subject_island-v0-res.json │ ├── blimp_superlative_quantifiers_1-v0-loglikelihood │ ├── blimp_superlative_quantifiers_1-v0-res.json │ ├── blimp_superlative_quantifiers_2-v0-loglikelihood │ ├── blimp_superlative_quantifiers_2-v0-res.json │ ├── blimp_tough_vs_raising_1-v0-loglikelihood │ ├── blimp_tough_vs_raising_1-v0-res.json │ ├── blimp_tough_vs_raising_2-v0-loglikelihood │ ├── blimp_tough_vs_raising_2-v0-res.json │ ├── blimp_transitive-v0-loglikelihood │ ├── blimp_transitive-v0-res.json │ ├── blimp_wh_island-v0-loglikelihood │ ├── blimp_wh_island-v0-res.json │ ├── blimp_wh_questions_object_gap-v0-loglikelihood │ ├── blimp_wh_questions_object_gap-v0-res.json │ ├── blimp_wh_questions_subject_gap-v0-loglikelihood │ ├── blimp_wh_questions_subject_gap-v0-res.json │ ├── blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood │ ├── blimp_wh_questions_subject_gap_long_distance-v0-res.json │ ├── blimp_wh_vs_that_no_gap-v0-loglikelihood │ ├── blimp_wh_vs_that_no_gap-v0-res.json │ ├── blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood │ ├── blimp_wh_vs_that_no_gap_long_distance-v0-res.json │ ├── blimp_wh_vs_that_with_gap-v0-loglikelihood │ ├── blimp_wh_vs_that_with_gap-v0-res.json │ ├── blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood │ ├── blimp_wh_vs_that_with_gap_long_distance-v0-res.json │ ├── boolq-v0-loglikelihood │ ├── boolq-v0-res.json │ ├── boolq-v1-loglikelihood │ ├── boolq-v1-res.json │ ├── cb-v0-loglikelihood │ ├── cb-v0-res.json │ ├── cb-v1-loglikelihood │ ├── cb-v1-res.json │ ├── cola-v0-loglikelihood │ ├── cola-v0-res.json │ ├── copa-v0-loglikelihood │ ├── copa-v0-res.json │ ├── coqa-v0-greedy_until │ ├── coqa-v0-res.json │ ├── coqa-v1-greedy_until │ ├── coqa-v1-res.json │ ├── crows_pairs_english-v0-loglikelihood │ ├── crows_pairs_english-v0-res.json │ ├── crows_pairs_english_age-v0-loglikelihood │ ├── crows_pairs_english_age-v0-res.json │ ├── crows_pairs_english_autre-v0-loglikelihood │ ├── crows_pairs_english_autre-v0-res.json │ ├── crows_pairs_english_disability-v0-loglikelihood │ ├── crows_pairs_english_disability-v0-res.json │ ├── crows_pairs_english_gender-v0-loglikelihood │ ├── crows_pairs_english_gender-v0-res.json │ ├── crows_pairs_english_nationality-v0-loglikelihood │ ├── crows_pairs_english_nationality-v0-res.json │ ├── crows_pairs_english_physical_appearance-v0-loglikelihood │ ├── crows_pairs_english_physical_appearance-v0-res.json │ ├── crows_pairs_english_race_color-v0-loglikelihood │ ├── crows_pairs_english_race_color-v0-res.json │ ├── crows_pairs_english_religion-v0-loglikelihood │ ├── crows_pairs_english_religion-v0-res.json │ ├── crows_pairs_english_sexual_orientation-v0-loglikelihood │ ├── crows_pairs_english_sexual_orientation-v0-res.json │ ├── crows_pairs_english_socioeconomic-v0-loglikelihood │ ├── crows_pairs_english_socioeconomic-v0-res.json │ ├── crows_pairs_french-v0-loglikelihood │ ├── crows_pairs_french-v0-res.json │ ├── crows_pairs_french_age-v0-loglikelihood │ ├── crows_pairs_french_age-v0-res.json │ ├── crows_pairs_french_autre-v0-loglikelihood │ ├── crows_pairs_french_autre-v0-res.json │ ├── crows_pairs_french_disability-v0-loglikelihood │ ├── crows_pairs_french_disability-v0-res.json │ ├── crows_pairs_french_gender-v0-loglikelihood │ ├── crows_pairs_french_gender-v0-res.json │ ├── crows_pairs_french_nationality-v0-loglikelihood │ ├── crows_pairs_french_nationality-v0-res.json │ ├── crows_pairs_french_physical_appearance-v0-loglikelihood │ ├── crows_pairs_french_physical_appearance-v0-res.json │ ├── crows_pairs_french_race_color-v0-loglikelihood │ ├── crows_pairs_french_race_color-v0-res.json │ ├── crows_pairs_french_religion-v0-loglikelihood │ ├── crows_pairs_french_religion-v0-res.json │ ├── crows_pairs_french_sexual_orientation-v0-loglikelihood │ ├── crows_pairs_french_sexual_orientation-v0-res.json │ ├── crows_pairs_french_socioeconomic-v0-loglikelihood │ ├── crows_pairs_french_socioeconomic-v0-res.json │ ├── cycle_letters-v0-greedy_until │ ├── cycle_letters-v0-res.json │ ├── drop-v0-greedy_until │ ├── drop-v0-res.json │ ├── drop-v1-greedy_until │ ├── drop-v1-res.json │ ├── ethics_cm-v0-loglikelihood │ ├── ethics_cm-v0-res.json │ ├── ethics_deontology-v0-loglikelihood │ ├── ethics_deontology-v0-res.json │ ├── ethics_justice-v0-loglikelihood │ ├── ethics_justice-v0-res.json │ ├── ethics_utilitarianism-v0-loglikelihood │ ├── ethics_utilitarianism-v0-res.json │ ├── ethics_utilitarianism_original-v0-loglikelihood │ ├── ethics_utilitarianism_original-v0-res.json │ ├── ethics_virtue-v0-loglikelihood │ ├── ethics_virtue-v0-res.json │ ├── gpt3_test_0deb8e9bde8e8327bbc48157f638ff3ba06b0cd816dad2beb8ad90f7fbe795c7.pkl │ ├── gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl │ ├── gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl │ ├── gpt3_test_cfd11f555a5a63b6dfa114a55a932e51b724cdd44d4842586b9ce37260bf7aaa.pkl │ ├── gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl │ ├── gsm8k-v0-greedy_until │ ├── gsm8k-v0-res.json │ ├── headqa-v0-loglikelihood │ ├── headqa-v0-res.json │ ├── headqa_en-v0-loglikelihood │ ├── headqa_en-v0-res.json │ ├── headqa_es-v0-loglikelihood │ ├── headqa_es-v0-res.json │ ├── hellaswag-v0-loglikelihood │ ├── hellaswag-v0-res.json │ ├── hendrycksTest-abstract_algebra-v0-loglikelihood │ ├── hendrycksTest-abstract_algebra-v0-res.json │ ├── hendrycksTest-anatomy-v0-loglikelihood │ ├── hendrycksTest-anatomy-v0-res.json │ ├── hendrycksTest-astronomy-v0-loglikelihood │ ├── hendrycksTest-astronomy-v0-res.json │ ├── hendrycksTest-business_ethics-v0-loglikelihood │ ├── hendrycksTest-business_ethics-v0-res.json │ ├── hendrycksTest-clinical_knowledge-v0-loglikelihood │ ├── hendrycksTest-clinical_knowledge-v0-res.json │ ├── hendrycksTest-college_biology-v0-loglikelihood │ ├── hendrycksTest-college_biology-v0-res.json │ ├── hendrycksTest-college_chemistry-v0-loglikelihood │ ├── hendrycksTest-college_chemistry-v0-res.json │ ├── hendrycksTest-college_computer_science-v0-loglikelihood │ ├── hendrycksTest-college_computer_science-v0-res.json │ ├── hendrycksTest-college_mathematics-v0-loglikelihood │ ├── hendrycksTest-college_mathematics-v0-res.json │ ├── hendrycksTest-college_medicine-v0-loglikelihood │ ├── hendrycksTest-college_medicine-v0-res.json │ ├── hendrycksTest-college_physics-v0-loglikelihood │ ├── hendrycksTest-college_physics-v0-res.json │ ├── hendrycksTest-computer_security-v0-loglikelihood │ ├── hendrycksTest-computer_security-v0-res.json │ ├── hendrycksTest-conceptual_physics-v0-loglikelihood │ ├── hendrycksTest-conceptual_physics-v0-res.json │ ├── hendrycksTest-econometrics-v0-loglikelihood │ ├── hendrycksTest-econometrics-v0-res.json │ ├── hendrycksTest-electrical_engineering-v0-loglikelihood │ ├── hendrycksTest-electrical_engineering-v0-res.json │ ├── hendrycksTest-elementary_mathematics-v0-loglikelihood │ ├── hendrycksTest-elementary_mathematics-v0-res.json │ ├── hendrycksTest-formal_logic-v0-loglikelihood │ ├── hendrycksTest-formal_logic-v0-res.json │ ├── hendrycksTest-global_facts-v0-loglikelihood │ ├── hendrycksTest-global_facts-v0-res.json │ ├── hendrycksTest-high_school_biology-v0-loglikelihood │ ├── hendrycksTest-high_school_biology-v0-res.json │ ├── hendrycksTest-high_school_chemistry-v0-loglikelihood │ ├── hendrycksTest-high_school_chemistry-v0-res.json │ ├── hendrycksTest-high_school_computer_science-v0-loglikelihood │ ├── hendrycksTest-high_school_computer_science-v0-res.json │ ├── hendrycksTest-high_school_european_history-v0-loglikelihood │ ├── hendrycksTest-high_school_european_history-v0-res.json │ ├── hendrycksTest-high_school_geography-v0-loglikelihood │ ├── hendrycksTest-high_school_geography-v0-res.json │ ├── hendrycksTest-high_school_government_and_politics-v0-loglikelihood │ ├── hendrycksTest-high_school_government_and_politics-v0-res.json │ ├── hendrycksTest-high_school_macroeconomics-v0-loglikelihood │ ├── hendrycksTest-high_school_macroeconomics-v0-res.json │ ├── hendrycksTest-high_school_mathematics-v0-loglikelihood │ ├── hendrycksTest-high_school_mathematics-v0-res.json │ ├── hendrycksTest-high_school_microeconomics-v0-loglikelihood │ ├── hendrycksTest-high_school_microeconomics-v0-res.json │ ├── hendrycksTest-high_school_physics-v0-loglikelihood │ ├── hendrycksTest-high_school_physics-v0-res.json │ ├── hendrycksTest-high_school_psychology-v0-loglikelihood │ ├── hendrycksTest-high_school_psychology-v0-res.json │ ├── hendrycksTest-high_school_statistics-v0-loglikelihood │ ├── hendrycksTest-high_school_statistics-v0-res.json │ ├── hendrycksTest-high_school_us_history-v0-loglikelihood │ ├── hendrycksTest-high_school_us_history-v0-res.json │ ├── hendrycksTest-high_school_world_history-v0-loglikelihood │ ├── hendrycksTest-high_school_world_history-v0-res.json │ ├── hendrycksTest-human_aging-v0-loglikelihood │ ├── hendrycksTest-human_aging-v0-res.json │ ├── hendrycksTest-human_sexuality-v0-loglikelihood │ ├── hendrycksTest-human_sexuality-v0-res.json │ ├── hendrycksTest-international_law-v0-loglikelihood │ ├── hendrycksTest-international_law-v0-res.json │ ├── hendrycksTest-jurisprudence-v0-loglikelihood │ ├── hendrycksTest-jurisprudence-v0-res.json │ ├── hendrycksTest-logical_fallacies-v0-loglikelihood │ ├── hendrycksTest-logical_fallacies-v0-res.json │ ├── hendrycksTest-machine_learning-v0-loglikelihood │ ├── hendrycksTest-machine_learning-v0-res.json │ ├── hendrycksTest-management-v0-loglikelihood │ ├── hendrycksTest-management-v0-res.json │ ├── hendrycksTest-marketing-v0-loglikelihood │ ├── hendrycksTest-marketing-v0-res.json │ ├── hendrycksTest-medical_genetics-v0-loglikelihood │ ├── hendrycksTest-medical_genetics-v0-res.json │ ├── hendrycksTest-miscellaneous-v0-loglikelihood │ ├── hendrycksTest-miscellaneous-v0-res.json │ ├── hendrycksTest-moral_disputes-v0-loglikelihood │ ├── hendrycksTest-moral_disputes-v0-res.json │ ├── hendrycksTest-moral_scenarios-v0-loglikelihood │ ├── hendrycksTest-moral_scenarios-v0-res.json │ ├── hendrycksTest-nutrition-v0-loglikelihood │ ├── hendrycksTest-nutrition-v0-res.json │ ├── hendrycksTest-philosophy-v0-loglikelihood │ ├── hendrycksTest-philosophy-v0-res.json │ ├── hendrycksTest-prehistory-v0-loglikelihood │ ├── hendrycksTest-prehistory-v0-res.json │ ├── hendrycksTest-professional_accounting-v0-loglikelihood │ ├── hendrycksTest-professional_accounting-v0-res.json │ ├── hendrycksTest-professional_law-v0-loglikelihood │ ├── hendrycksTest-professional_law-v0-res.json │ ├── hendrycksTest-professional_medicine-v0-loglikelihood │ ├── hendrycksTest-professional_medicine-v0-res.json │ ├── hendrycksTest-professional_psychology-v0-loglikelihood │ ├── hendrycksTest-professional_psychology-v0-res.json │ ├── hendrycksTest-public_relations-v0-loglikelihood │ ├── hendrycksTest-public_relations-v0-res.json │ ├── hendrycksTest-security_studies-v0-loglikelihood │ ├── hendrycksTest-security_studies-v0-res.json │ ├── hendrycksTest-sociology-v0-loglikelihood │ ├── hendrycksTest-sociology-v0-res.json │ ├── hendrycksTest-us_foreign_policy-v0-loglikelihood │ ├── hendrycksTest-us_foreign_policy-v0-res.json │ ├── hendrycksTest-virology-v0-loglikelihood │ ├── hendrycksTest-virology-v0-res.json │ ├── hendrycksTest-world_religions-v0-loglikelihood │ ├── hendrycksTest-world_religions-v0-res.json │ ├── iwslt17-ar-en-v0-greedy_until │ ├── iwslt17-ar-en-v0-res.json │ ├── iwslt17-en-ar-v0-greedy_until │ ├── iwslt17-en-ar-v0-res.json │ ├── lambada-v0-loglikelihood │ ├── lambada-v0-res.json │ ├── lambada_cloze-v0-loglikelihood │ ├── lambada_cloze-v0-res.json │ ├── lambada_mt_de-v0-loglikelihood │ ├── lambada_mt_de-v0-res.json │ ├── lambada_mt_en-v0-loglikelihood │ ├── lambada_mt_en-v0-res.json │ ├── lambada_mt_es-v0-loglikelihood │ ├── lambada_mt_es-v0-res.json │ ├── lambada_mt_fr-v0-loglikelihood │ ├── lambada_mt_fr-v0-res.json │ ├── lambada_mt_it-v0-loglikelihood │ ├── lambada_mt_it-v0-res.json │ ├── lambada_openai-v0-loglikelihood │ ├── lambada_openai-v0-res.json │ ├── lambada_openai_cloze-v0-loglikelihood │ ├── lambada_openai_cloze-v0-res.json │ ├── lambada_openai_mt_de-v0-loglikelihood │ ├── lambada_openai_mt_de-v0-res.json │ ├── lambada_openai_mt_en-v0-loglikelihood │ ├── lambada_openai_mt_en-v0-res.json │ ├── lambada_openai_mt_es-v0-loglikelihood │ ├── lambada_openai_mt_es-v0-res.json │ ├── lambada_openai_mt_fr-v0-loglikelihood │ ├── lambada_openai_mt_fr-v0-res.json │ ├── lambada_openai_mt_it-v0-loglikelihood │ ├── lambada_openai_mt_it-v0-res.json │ ├── lambada_standard-v0-loglikelihood │ ├── lambada_standard-v0-res.json │ ├── lambada_standard_cloze-v0-loglikelihood │ ├── lambada_standard_cloze-v0-res.json │ ├── logiqa-v0-loglikelihood │ ├── logiqa-v0-res.json │ ├── math_algebra-v0-greedy_until │ ├── math_algebra-v0-res.json │ ├── math_algebra-v1-greedy_until │ ├── math_algebra-v1-res.json │ ├── math_counting_and_prob-v0-greedy_until │ ├── math_counting_and_prob-v0-res.json │ ├── math_counting_and_prob-v1-greedy_until │ ├── math_counting_and_prob-v1-res.json │ ├── math_geometry-v0-greedy_until │ ├── math_geometry-v0-res.json │ ├── math_geometry-v1-greedy_until │ ├── math_geometry-v1-res.json │ ├── math_intermediate_algebra-v0-greedy_until │ ├── math_intermediate_algebra-v0-res.json │ ├── math_intermediate_algebra-v1-greedy_until │ ├── math_intermediate_algebra-v1-res.json │ ├── math_num_theory-v0-greedy_until │ ├── math_num_theory-v0-res.json │ ├── math_num_theory-v1-greedy_until │ ├── math_num_theory-v1-res.json │ ├── math_prealgebra-v0-greedy_until │ ├── math_prealgebra-v0-res.json │ ├── math_prealgebra-v1-greedy_until │ ├── math_prealgebra-v1-res.json │ ├── math_precalc-v0-greedy_until │ ├── math_precalc-v0-res.json │ ├── math_precalc-v1-greedy_until │ ├── math_precalc-v1-res.json │ ├── mathqa-v0-loglikelihood │ ├── mathqa-v0-res.json │ ├── mc_taco-v0-loglikelihood │ ├── mc_taco-v0-res.json │ ├── mnli-v0-loglikelihood │ ├── mnli-v0-res.json │ ├── mnli_mismatched-v0-loglikelihood │ ├── mnli_mismatched-v0-res.json │ ├── mrpc-v0-loglikelihood │ ├── mrpc-v0-res.json │ ├── multirc-v0-loglikelihood │ ├── multirc-v0-res.json │ ├── multirc-v1-loglikelihood │ ├── multirc-v1-res.json │ ├── mutual-v0-loglikelihood │ ├── mutual-v0-res.json │ ├── mutual-v1-loglikelihood │ ├── mutual-v1-res.json │ ├── mutual_plus-v0-loglikelihood │ ├── mutual_plus-v0-res.json │ ├── mutual_plus-v1-loglikelihood │ ├── mutual_plus-v1-res.json │ ├── openbookqa-v0-loglikelihood │ ├── openbookqa-v0-res.json │ ├── pile_arxiv-v0-loglikelihood_rolling │ ├── pile_arxiv-v0-res.json │ ├── pile_arxiv-v1-loglikelihood_rolling │ ├── pile_arxiv-v1-res.json │ ├── pile_bookcorpus2-v0-loglikelihood_rolling │ ├── pile_bookcorpus2-v0-res.json │ ├── pile_bookcorpus2-v1-loglikelihood_rolling │ ├── pile_bookcorpus2-v1-res.json │ ├── pile_books3-v0-loglikelihood_rolling │ ├── pile_books3-v0-res.json │ ├── pile_books3-v1-loglikelihood_rolling │ ├── pile_books3-v1-res.json │ ├── pile_dm-mathematics-v0-loglikelihood_rolling │ ├── pile_dm-mathematics-v0-res.json │ ├── pile_dm-mathematics-v1-loglikelihood_rolling │ ├── pile_dm-mathematics-v1-res.json │ ├── pile_enron-v0-loglikelihood_rolling │ ├── pile_enron-v0-res.json │ ├── pile_enron-v1-loglikelihood_rolling │ ├── pile_enron-v1-res.json │ ├── pile_europarl-v0-loglikelihood_rolling │ ├── pile_europarl-v0-res.json │ ├── pile_europarl-v1-loglikelihood_rolling │ ├── pile_europarl-v1-res.json │ ├── pile_freelaw-v0-loglikelihood_rolling │ ├── pile_freelaw-v0-res.json │ ├── pile_freelaw-v1-loglikelihood_rolling │ ├── pile_freelaw-v1-res.json │ ├── pile_github-v0-loglikelihood_rolling │ ├── pile_github-v0-res.json │ ├── pile_github-v1-loglikelihood_rolling │ ├── pile_github-v1-res.json │ ├── pile_gutenberg-v0-loglikelihood_rolling │ ├── pile_gutenberg-v0-res.json │ ├── pile_gutenberg-v1-loglikelihood_rolling │ ├── pile_gutenberg-v1-res.json │ ├── pile_hackernews-v0-loglikelihood_rolling │ ├── pile_hackernews-v0-res.json │ ├── pile_hackernews-v1-loglikelihood_rolling │ ├── pile_hackernews-v1-res.json │ ├── pile_nih-exporter-v0-loglikelihood_rolling │ ├── pile_nih-exporter-v0-res.json │ ├── pile_nih-exporter-v1-loglikelihood_rolling │ ├── pile_nih-exporter-v1-res.json │ ├── pile_opensubtitles-v0-loglikelihood_rolling │ ├── pile_opensubtitles-v0-res.json │ ├── pile_opensubtitles-v1-loglikelihood_rolling │ ├── pile_opensubtitles-v1-res.json │ ├── pile_openwebtext2-v0-loglikelihood_rolling │ ├── pile_openwebtext2-v0-res.json │ ├── pile_openwebtext2-v1-loglikelihood_rolling │ ├── pile_openwebtext2-v1-res.json │ ├── pile_philpapers-v0-loglikelihood_rolling │ ├── pile_philpapers-v0-res.json │ ├── pile_philpapers-v1-loglikelihood_rolling │ ├── pile_philpapers-v1-res.json │ ├── pile_pile-cc-v0-loglikelihood_rolling │ ├── pile_pile-cc-v0-res.json │ ├── pile_pile-cc-v1-loglikelihood_rolling │ ├── pile_pile-cc-v1-res.json │ ├── pile_pubmed-abstracts-v0-loglikelihood_rolling │ ├── pile_pubmed-abstracts-v0-res.json │ ├── pile_pubmed-abstracts-v1-loglikelihood_rolling │ ├── pile_pubmed-abstracts-v1-res.json │ ├── pile_pubmed-central-v0-loglikelihood_rolling │ ├── pile_pubmed-central-v0-res.json │ ├── pile_pubmed-central-v1-loglikelihood_rolling │ ├── pile_pubmed-central-v1-res.json │ ├── pile_stackexchange-v0-loglikelihood_rolling │ ├── pile_stackexchange-v0-res.json │ ├── pile_stackexchange-v1-loglikelihood_rolling │ ├── pile_stackexchange-v1-res.json │ ├── pile_ubuntu-irc-v0-loglikelihood_rolling │ ├── pile_ubuntu-irc-v0-res.json │ ├── pile_ubuntu-irc-v1-loglikelihood_rolling │ ├── pile_ubuntu-irc-v1-res.json │ ├── pile_uspto-v0-loglikelihood_rolling │ ├── pile_uspto-v0-res.json │ ├── pile_uspto-v1-loglikelihood_rolling │ ├── pile_uspto-v1-res.json │ ├── pile_wikipedia-v0-loglikelihood_rolling │ ├── pile_wikipedia-v0-res.json │ ├── pile_wikipedia-v1-loglikelihood_rolling │ ├── pile_wikipedia-v1-res.json │ ├── pile_youtubesubtitles-v0-loglikelihood_rolling │ ├── pile_youtubesubtitles-v0-res.json │ ├── pile_youtubesubtitles-v1-loglikelihood_rolling │ ├── pile_youtubesubtitles-v1-res.json │ ├── piqa-v0-loglikelihood │ ├── piqa-v0-res.json │ ├── prost-v0-loglikelihood │ ├── prost-v0-res.json │ ├── pubmedqa-v0-loglikelihood │ ├── pubmedqa-v0-res.json │ ├── qa4mre_2011-v0-loglikelihood │ ├── qa4mre_2011-v0-res.json │ ├── qa4mre_2012-v0-loglikelihood │ ├── qa4mre_2012-v0-res.json │ ├── qa4mre_2013-v0-loglikelihood │ ├── qa4mre_2013-v0-res.json │ ├── qnli-v0-loglikelihood │ ├── qnli-v0-res.json │ ├── qqp-v0-loglikelihood │ ├── qqp-v0-res.json │ ├── race-v0-loglikelihood │ ├── race-v0-res.json │ ├── random_insertion-v0-greedy_until │ ├── random_insertion-v0-res.json │ ├── record-v0-loglikelihood │ ├── record-v0-res.json │ ├── reversed_words-v0-greedy_until │ ├── reversed_words-v0-res.json │ ├── rte-v0-loglikelihood │ ├── rte-v0-res.json │ ├── sciq-v0-loglikelihood │ ├── sciq-v0-res.json │ ├── squad2-v0-greedy_until │ ├── squad2-v0-loglikelihood │ ├── squad2-v0-res.json │ ├── squad2-v1-greedy_until │ ├── squad2-v1-loglikelihood │ ├── squad2-v1-res.json │ ├── sst-v0-loglikelihood │ ├── sst-v0-res.json │ ├── swag-v0-loglikelihood │ ├── swag-v0-res.json │ ├── textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl │ ├── textsynth_test_0c1c14571add7903b89e588c8212572b95bb57b334fc0752c89a7e045a5f63ae.pkl │ ├── textsynth_test_3092d07756f3e1d010c07524cc8a2ecba7f0c19f9e39f2aaf2bf440bfe328004.pkl │ ├── textsynth_test_434076260b6af3a46b7a5eaceec3306a5872c400a3872f744280b237455a0f8e.pkl │ ├── textsynth_test_49c47ae40e11f349f2f6b492128188b1b2bc103a421c676ee4b2142a68b43516.pkl │ ├── textsynth_test_4fd8d66a6dad7f602b40e5d7dc298d6fe329299d086a4659743a41f4a4012659.pkl │ ├── textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl │ ├── textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl │ ├── textsynth_test_7209c4617547bfe17cb9e7f5f735fe35822d650aefdc5fbeeaf0c1724effbe09.pkl │ ├── textsynth_test_7afdc285388e51094e12645f305328c759574fa3ec9751631025f8ad5ebf9f3e.pkl │ ├── textsynth_test_9d5f33dbfe1e254928c89f5ed85e4c010d888065f55a8f1b863bc1eb0340a5f2.pkl │ ├── textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl │ ├── textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl │ ├── textsynth_test_e7ad1e9f52a39e1ddd1e50f3c57ffa4546728dd150a67c0a0ddc8675c04e15d1.pkl │ ├── textsynth_test_f4bfe4beb605bd52a8ab6be3c9293639e7e2261d98de58159d15ccb83131bf4e.pkl │ ├── toxigen-v0-loglikelihood │ ├── toxigen-v0-res.json │ ├── triviaqa-v0-loglikelihood │ ├── triviaqa-v0-res.json │ ├── triviaqa-v1-loglikelihood │ ├── triviaqa-v1-res.json │ ├── truthfulqa_gen-v0-greedy_until │ ├── truthfulqa_gen-v0-res.json │ ├── truthfulqa_gen-v1-greedy_until │ ├── truthfulqa_gen-v1-res.json │ ├── truthfulqa_mc-v0-loglikelihood │ ├── truthfulqa_mc-v0-res.json │ ├── truthfulqa_mc-v1-loglikelihood │ ├── truthfulqa_mc-v1-res.json │ ├── webqs-v0-loglikelihood │ ├── webqs-v0-res.json │ ├── wic-v0-loglikelihood │ ├── wic-v0-res.json │ ├── wikitext-v0-loglikelihood_rolling │ ├── wikitext-v0-res.json │ ├── wikitext-v1-loglikelihood_rolling │ ├── wikitext-v1-res.json │ ├── winogrande-v0-loglikelihood │ ├── winogrande-v0-res.json │ ├── wmt14-en-fr-v0-greedy_until │ ├── wmt14-en-fr-v0-res.json │ ├── wmt14-fr-en-v0-greedy_until │ ├── wmt14-fr-en-v0-res.json │ ├── wmt16-de-en-v0-greedy_until │ ├── wmt16-de-en-v0-res.json │ ├── wmt16-en-de-v0-greedy_until │ ├── wmt16-en-de-v0-res.json │ ├── wmt16-en-ro-v0-greedy_until │ ├── wmt16-en-ro-v0-res.json │ ├── wmt16-ro-en-v0-greedy_until │ ├── wmt16-ro-en-v0-res.json │ ├── wmt20-cs-en-v0-greedy_until │ ├── wmt20-cs-en-v0-res.json │ ├── wmt20-de-en-v0-greedy_until │ ├── wmt20-de-en-v0-res.json │ ├── wmt20-de-fr-v0-greedy_until │ ├── wmt20-de-fr-v0-res.json │ ├── wmt20-en-cs-v0-greedy_until │ ├── wmt20-en-cs-v0-res.json │ ├── wmt20-en-de-v0-greedy_until │ ├── wmt20-en-de-v0-res.json │ ├── wmt20-en-iu-v0-greedy_until │ ├── wmt20-en-iu-v0-res.json │ ├── wmt20-en-ja-v0-greedy_until │ ├── wmt20-en-ja-v0-res.json │ ├── wmt20-en-ja-v1-greedy_until │ ├── wmt20-en-ja-v1-res.json │ ├── wmt20-en-km-v0-greedy_until │ ├── wmt20-en-km-v0-res.json │ ├── wmt20-en-pl-v0-greedy_until │ ├── wmt20-en-pl-v0-res.json │ ├── wmt20-en-ps-v0-greedy_until │ ├── wmt20-en-ps-v0-res.json │ ├── wmt20-en-ru-v0-greedy_until │ ├── wmt20-en-ru-v0-res.json │ ├── wmt20-en-ta-v0-greedy_until │ ├── wmt20-en-ta-v0-res.json │ ├── wmt20-en-zh-v0-greedy_until │ ├── wmt20-en-zh-v0-res.json │ ├── wmt20-en-zh-v1-greedy_until │ ├── wmt20-en-zh-v1-res.json │ ├── wmt20-fr-de-v0-greedy_until │ ├── wmt20-fr-de-v0-res.json │ ├── wmt20-iu-en-v0-greedy_until │ ├── wmt20-iu-en-v0-res.json │ ├── wmt20-ja-en-v0-greedy_until │ ├── wmt20-ja-en-v0-res.json │ ├── wmt20-km-en-v0-greedy_until │ ├── wmt20-km-en-v0-res.json │ ├── wmt20-pl-en-v0-greedy_until │ ├── wmt20-pl-en-v0-res.json │ ├── wmt20-ps-en-v0-greedy_until │ ├── wmt20-ps-en-v0-res.json │ ├── wmt20-ru-en-v0-greedy_until │ ├── wmt20-ru-en-v0-res.json │ ├── wmt20-ta-en-v0-greedy_until │ ├── wmt20-ta-en-v0-res.json │ ├── wmt20-zh-en-v0-greedy_until │ ├── wmt20-zh-en-v0-res.json │ ├── wnli-v0-loglikelihood │ ├── wnli-v0-res.json │ ├── wnli-v1-loglikelihood │ ├── wnli-v1-res.json │ ├── wsc-v0-loglikelihood │ ├── wsc-v0-res.json │ ├── wsc273-v0-loglikelihood │ └── wsc273-v0-res.json └── main.py /.ipynb_checkpoints/INSTALLATION-checkpoint.md: -------------------------------------------------------------------------------- 1 | conda env create --file environment.yaml 2 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/LICENSE-checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/.ipynb_checkpoints/LICENSE-checkpoint -------------------------------------------------------------------------------- /.ipynb_checkpoints/README-checkpoint.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/.ipynb_checkpoints/README-checkpoint.md -------------------------------------------------------------------------------- /.ipynb_checkpoints/main-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/.ipynb_checkpoints/main-checkpoint.py -------------------------------------------------------------------------------- /INSTALLATION.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/INSTALLATION.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/README.md -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/environment.yaml -------------------------------------------------------------------------------- /lib/.ipynb_checkpoints/modelling_llama_mod-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lib/.ipynb_checkpoints/modelling_llama_mod-checkpoint.py -------------------------------------------------------------------------------- /lib/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lib/data.py -------------------------------------------------------------------------------- /lib/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lib/eval.py -------------------------------------------------------------------------------- /lib/modelling_llama_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lib/modelling_llama_mod.py -------------------------------------------------------------------------------- /lib/scoring_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lib/scoring_model.py -------------------------------------------------------------------------------- /lora_ft/.ipynb_checkpoints/finetune_lm-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/.ipynb_checkpoints/finetune_lm-checkpoint.py -------------------------------------------------------------------------------- /lora_ft/evaluate_ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/evaluate_ppl.py -------------------------------------------------------------------------------- /lora_ft/finetune_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/finetune_lm.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/.coveragerc -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/.flake8 -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/.gitignore: -------------------------------------------------------------------------------- 1 | env 2 | *.pyc 3 | data/ 4 | lm_cache 5 | .idea 6 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/.ipynb_checkpoints/README-checkpoint.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/.ipynb_checkpoints/README-checkpoint.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/.pre-commit-config.yaml -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/CITATION.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/CITATION.bib -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @haileyschoelkopf @lintangsutawika 2 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/LICENSE.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/docs/decontamination.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/docs/decontamination.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/docs/description_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/docs/description_guide.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/docs/task_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/docs/task_guide.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/docs/task_table.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/docs/task_table.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/ignore.txt: -------------------------------------------------------------------------------- 1 | ROUGE 2 | rouge 3 | nin 4 | maka 5 | mor 6 | te 7 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval.egg-info/PKG-INFO -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval.egg-info/requires.txt -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | lm_eval 2 | scripts 3 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/.ipynb_checkpoints/base-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/.ipynb_checkpoints/base-checkpoint.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/.ipynb_checkpoints/evaluator-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/.ipynb_checkpoints/evaluator-checkpoint.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/base.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/asdiv/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/asdiv/asdiv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/asdiv/asdiv.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/asdiv/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/asdiv/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/dyck_languages.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/dyck_languages.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/hyperbaton.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/hyperbaton.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/navigate.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/navigate.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/ruin_names.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/ruin_names.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/snarks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/bigbench_resources/snarks.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/coqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/coqa/coqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/coqa/coqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/coqa/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/coqa/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/drop/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/drop/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/drop/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/drop/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/drop/drop.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/headqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/headqa/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/headqa/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/headqa/headqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/headqa/headqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_ethics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_ethics/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_ethics/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_ethics/hendrycks_ethics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_ethics/hendrycks_ethics.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_math/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_math/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_math/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_math/hendrycks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/hendrycks_math/hendrycks_math.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/logiqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/logiqa/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/logiqa/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/logiqa/logiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/logiqa/logiqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/mutual/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/mutual/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/mutual/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/mutual/mutual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/mutual/mutual.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/pile/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/pile/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/pile/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/pile/pile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/pile/pile.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/quac/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/quac/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/quac/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/quac/quac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/quac/quac.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/sat_analogies/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/sat_analogies/sat_analogies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/sat_analogies/sat_analogies.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/unscramble/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/unscramble/dataset_infos.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/unscramble/dataset_infos.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/datasets/unscramble/unscramble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/datasets/unscramble/unscramble.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/decontamination/archiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/decontamination/archiver.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/decontamination/decontaminate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/decontamination/decontaminate.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/decontamination/janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/decontamination/janitor.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/evaluator.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/metrics.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/models/__init__.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/models/anthropic_llms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/models/anthropic_llms.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/models/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/models/dummy.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/models/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/models/gpt2.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/models/gpt3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/models/gpt3.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/models/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/models/huggingface.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/models/textsynth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/models/textsynth.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/.ipynb_checkpoints/__init__-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/.ipynb_checkpoints/__init__-checkpoint.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/.ipynb_checkpoints/arc-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/.ipynb_checkpoints/arc-checkpoint.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/.ipynb_checkpoints/gsm8k-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/.ipynb_checkpoints/gsm8k-checkpoint.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/__init__.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/anli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/anli.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/arc.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/arithmetic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/arithmetic.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/asdiv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/asdiv.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/bigbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/bigbench.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/blimp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/blimp.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/cbt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/cbt.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/coqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/coqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/crowspairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/crowspairs.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/drop.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/glue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/glue.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/gsm8k.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/headqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/headqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/hellaswag.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/hendrycks_ethics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/hendrycks_ethics.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/hendrycks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/hendrycks_math.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/hendrycks_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/hendrycks_test.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/json.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/lambada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/lambada.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/lambada_cloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/lambada_cloze.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/lambada_multilingual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/lambada_multilingual.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/logiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/logiqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/mathqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/mathqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/mc_taco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/mc_taco.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/mgsm.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/mutual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/mutual.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/naturalqs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/naturalqs.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/openbookqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/openbookqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/pawsx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/pawsx.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/pile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/pile.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/piqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/piqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/prost.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/prost.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/pubmedqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/pubmedqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/qa4mre.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/qa4mre.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/qasper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/qasper.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/quac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/quac.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/race.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/race.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/sat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/sat.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/sciq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/sciq.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/squad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/squad.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/storycloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/storycloze.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/superglue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/superglue.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/swag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/swag.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/toxigen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/toxigen.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/translation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/translation.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/triviaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/triviaqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/truthfulqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/truthfulqa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/unscramble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/unscramble.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/webqs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/webqs.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/wikitext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/wikitext.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/winogrande.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/winogrande.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/wsc273.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/wsc273.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/xcopa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/xcopa.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/xnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/xnli.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/xstorycloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/xstorycloze.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/tasks/xwinograd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/tasks/xwinograd.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/lm_eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/lm_eval/utils.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/main.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/pile_statistics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/pile_statistics.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_xwinograd_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b1/bloom-1b1_xwinograd_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_xwinograd_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-1b7/bloom-1b7_xwinograd_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_xwinograd_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-3b/bloom-3b_xwinograd_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-560m/bloom-560m_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_bbh_3-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_bbh_3-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_xwinograd_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/bloom/bloom-7b1/bloom-7b1_xwinograd_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_bbh_3-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_bbh_3-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_blimp_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_blimp_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_glue_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_glue_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_lambada_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_lambada_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_mmlu_5-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_mmlu_5-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_superglue_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_superglue_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_xwinograd_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-13B/llama-13B_xwinograd_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_bbh_3-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_bbh_3-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_mmlu_5-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_mmlu_5-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_xwinograd_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-30B/llama-30B_xwinograd_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_anli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_anli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_arithmetic_5-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_arithmetic_5-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_bbh_3-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_bbh_3-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_blimp_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_blimp_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_glue_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_glue_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_lambada_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_lambada_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_mmlu_5-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_mmlu_5-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_unscramble_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_unscramble_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/llama/llama-7B/llama-7B_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_anli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_anli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_arithmetic_5-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_arithmetic_5-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_bbh_3-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_bbh_3-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_blimp_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_blimp_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_glue_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_glue_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_lambada_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_lambada_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_mmlu_5-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_mmlu_5-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_superglue_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_superglue_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_unscramble_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_unscramble_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xstory_cloze_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xstory_cloze_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xwinograd_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/mpt/mpt-7b/mpt-7b_xwinograd_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-1.3b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-1.3b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-1.3b/opt-1.3b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-1.3b/opt-1.3b.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-125m/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-125m/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-125m/opt-125m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-125m/opt-125m.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-13b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-13b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-13b/opt-13b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-13b/opt-13b.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-2.7b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-2.7b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-2.7b/opt-2.7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-2.7b/opt-2.7b.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-30b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-30b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-30b/opt-30b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-30b/opt-30b.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-350m/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-350m/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-350m/opt-350m.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-350m/opt-350m.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-6.7b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-6.7b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-6.7b/opt-6.7b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-6.7b/opt-6.7b.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-66b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-66b/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/opt/opt-66b/opt-66b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/opt/opt-66b/opt-66b.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-1.7B/xglm-1.7B_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/xglm-2.9B_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/xglm-2.9B_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/xglm-2.9B_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/xglm-2.9B_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/xglm-2.9B_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-2.9B/xglm-2.9B_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-4.5B/xglm-4.5B_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-564M/xglm-564M_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_gsm8k_8-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_gsm8k_8-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_pawsx_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_pawsx_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_xcopa_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_xcopa_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_xnli_0-shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/results/xglm/xglm-7.5B/xglm-7.5B_xnli_0-shot.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/clean_training_data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/clean_training_data/README.md -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/clean_training_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/clean_training_data/compress_and_package.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/clean_training_data/compress_and_package.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/clean_training_data/generate_13_grams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/clean_training_data/generate_13_grams.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/clean_training_data/investigate_pile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/clean_training_data/investigate_pile.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/clean_training_data/janitor_util.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/clean_training_data/janitor_util.cpp -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/clean_training_data/sort_13_gram_buckets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/clean_training_data/sort_13_gram_buckets.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/cost_estimate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/cost_estimate.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/get_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/get_prompts.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/make_gpt2_test_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/make_gpt2_test_cases.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/make_table_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/make_table_results.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/make_table_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/make_table_tasks.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/regression.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/scripts/write_out.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/scripts/write_out.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/setup.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/templates/new_multiple_choice_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/templates/new_multiple_choice_task.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/templates/new_task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/templates/new_task.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_description_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_description_dict.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_evaluator.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_generate_13_grams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_generate_13_grams.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_janitor.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_misc.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_models.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_tasks.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_utils.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/test_version_stable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/test_version_stable.py -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anagrams1-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anagrams1-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anagrams1-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anagrams1-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anagrams2-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anagrams2-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anagrams2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anagrams2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anli_r1-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anli_r1-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anli_r1-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anli_r1-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anli_r2-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anli_r2-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anli_r2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anli_r2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anli_r3-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anli_r3-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/anli_r3-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/anli_r3-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arc_challenge-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arc_challenge-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arc_challenge-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arc_challenge-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arc_easy-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arc_easy-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arc_easy-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arc_easy-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_1dc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_1dc-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_1dc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_1dc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2da-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2da-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2da-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2da-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2dm-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_2ds-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3da-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3ds-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3ds-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3ds-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_3ds-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4da-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4ds-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4ds-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4ds-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_4ds-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5da-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5da-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5da-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5da-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5ds-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5ds-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5ds-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/arithmetic_5ds-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_adjunct_island-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_causative-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_causative-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_causative-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_causative-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_complex_NP_island-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_complex_NP_island-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_drop_argument-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_inchoative-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_inchoative-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_inchoative-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_inchoative-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_intransitive-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_intransitive-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_intransitive-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_intransitive-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_1-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_1-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_1-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_1-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_npi_present_2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_only_npi_scope-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_only_npi_scope-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_only_npi_scope-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_only_npi_scope-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_1-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_1-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_1-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_1-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_passive_2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_principle_A_case_1-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_principle_A_case_1-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_principle_A_case_2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_principle_A_case_2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_1-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_1-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_tough_vs_raising_2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_transitive-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_island-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_island-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_island-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_island-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/boolq-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/boolq-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/boolq-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/boolq-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/boolq-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/boolq-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/boolq-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/boolq-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cb-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cb-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cb-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cb-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cola-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cola-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/copa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/copa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/coqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/coqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/coqa-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/coqa-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english_age-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english_age-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english_autre-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_english_autre-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french_age-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french_age-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french_autre-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french_autre-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french_gender-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/crows_pairs_french_gender-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cycle_letters-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cycle_letters-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/cycle_letters-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/cycle_letters-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/drop-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/drop-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/drop-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/drop-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_cm-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_cm-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_cm-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_cm-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_deontology-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_justice-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_justice-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_justice-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_justice-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_utilitarianism-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_utilitarianism-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_virtue-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_virtue-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/ethics_virtue-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/ethics_virtue-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/gsm8k-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/gsm8k-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/headqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/headqa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/headqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/headqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/headqa_en-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/headqa_en-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/headqa_en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/headqa_en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/headqa_es-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/headqa_es-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/headqa_es-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/headqa_es-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hellaswag-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hellaswag-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hellaswag-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hellaswag-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-anatomy-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-anatomy-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-astronomy-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-astronomy-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-human_aging-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-human_aging-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-management-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-management-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-marketing-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-marketing-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-nutrition-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-nutrition-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-philosophy-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-philosophy-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-prehistory-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-prehistory-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-sociology-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-sociology-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-virology-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/hendrycksTest-virology-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-ar-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-ar-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-ar-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-ar-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-en-ar-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-en-ar-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-en-ar-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/iwslt17-en-ar-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_cloze-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_cloze-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_cloze-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_cloze-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_de-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_de-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_de-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_de-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_en-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_en-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_es-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_fr-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_fr-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_fr-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_fr-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_it-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_it-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_it-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_mt_it-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_cloze-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_cloze-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_cloze-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_cloze-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_de-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_de-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_de-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_de-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_en-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_en-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_es-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_es-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_es-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_es-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_fr-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_it-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_it-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_it-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_openai_mt_it-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_standard-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_standard-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_standard-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_standard-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/lambada_standard_cloze-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/lambada_standard_cloze-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/logiqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/logiqa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/logiqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/logiqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_algebra-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_counting_and_prob-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_geometry-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_intermediate_algebra-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_num_theory-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_prealgebra-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/math_precalc-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mathqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mathqa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mathqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mathqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mc_taco-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mc_taco-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mc_taco-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mc_taco-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mnli-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mnli-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mnli_mismatched-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mrpc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mrpc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/multirc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/multirc-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/multirc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/multirc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/multirc-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/multirc-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/multirc-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/multirc-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/mutual_plus-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/openbookqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/openbookqa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/openbookqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/openbookqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_arxiv-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_bookcorpus2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_bookcorpus2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_bookcorpus2-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_bookcorpus2-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_books3-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_dm-mathematics-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_enron-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_europarl-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_europarl-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_europarl-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_europarl-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_freelaw-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_github-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_gutenberg-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_gutenberg-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_gutenberg-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_gutenberg-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_hackernews-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_hackernews-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_hackernews-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_hackernews-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_nih-exporter-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_nih-exporter-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_nih-exporter-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_nih-exporter-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_opensubtitles-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_openwebtext2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_openwebtext2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_openwebtext2-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_openwebtext2-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_philpapers-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_philpapers-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_philpapers-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_philpapers-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pile-cc-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-abstracts-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-abstracts-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-abstracts-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-abstracts-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-central-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-central-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-central-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_pubmed-central-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_stackexchange-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_stackexchange-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_stackexchange-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_stackexchange-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_ubuntu-irc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_ubuntu-irc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_ubuntu-irc-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_ubuntu-irc-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_uspto-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_wikipedia-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_wikipedia-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_wikipedia-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_wikipedia-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_youtubesubtitles-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_youtubesubtitles-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pile_youtubesubtitles-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pile_youtubesubtitles-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/piqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/piqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/prost-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/prost-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/prost-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/prost-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pubmedqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pubmedqa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/pubmedqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/pubmedqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2011-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2011-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2011-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2011-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2012-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2013-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2013-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2013-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qa4mre_2013-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qnli-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qnli-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/qqp-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/qqp-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/race-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/race-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/race-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/race-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/random_insertion-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/random_insertion-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/random_insertion-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/random_insertion-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/record-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/record-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/record-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/record-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/reversed_words-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/reversed_words-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/reversed_words-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/reversed_words-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/rte-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/rte-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/rte-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/rte-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/sciq-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/sciq-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/sciq-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/sciq-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/squad2-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/squad2-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/squad2-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/squad2-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/squad2-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/squad2-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/squad2-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/squad2-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/squad2-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/squad2-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/squad2-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/squad2-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/sst-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/sst-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/sst-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/swag-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/swag-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/swag-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/toxigen-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/toxigen-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/toxigen-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/toxigen-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/triviaqa-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_gen-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/truthfulqa_mc-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/webqs-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/webqs-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/webqs-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/webqs-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wic-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wic-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wic-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wic-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v0-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v0-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v1-loglikelihood_rolling: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v1-loglikelihood_rolling -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wikitext-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/winogrande-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/winogrande-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/winogrande-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/winogrande-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt14-en-fr-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt14-en-fr-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt14-en-fr-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt14-en-fr-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt14-fr-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt14-fr-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt14-fr-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt14-fr-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-de-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-de-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-de-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-de-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-de-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-de-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-de-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-de-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-ro-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-ro-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-ro-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-en-ro-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-ro-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-ro-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt16-ro-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt16-ro-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-cs-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-cs-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-cs-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-cs-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-fr-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-fr-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-fr-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-de-fr-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-cs-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-cs-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-cs-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-cs-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-de-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-de-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-de-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-de-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-iu-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-iu-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-iu-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-iu-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ja-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-km-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-km-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-km-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-km-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-pl-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-pl-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-pl-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-pl-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ps-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ps-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ps-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ps-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ru-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ru-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ru-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ru-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ta-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ta-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ta-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-ta-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v1-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-en-zh-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-fr-de-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-fr-de-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-fr-de-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-fr-de-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-iu-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-iu-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-iu-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-iu-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ja-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ja-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ja-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ja-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-km-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-pl-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-pl-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-pl-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-pl-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ps-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ps-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ps-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ps-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ru-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ru-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ru-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ru-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ta-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ta-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ta-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-ta-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-zh-en-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-zh-en-v0-greedy_until -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wmt20-zh-en-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wmt20-zh-en-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wnli-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wnli-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wnli-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wnli-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wnli-v1-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wnli-v1-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wnli-v1-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wsc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wsc-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wsc-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wsc-v0-res.json -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wsc273-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wsc273-v0-loglikelihood -------------------------------------------------------------------------------- /lora_ft/lm-evaluation-harness/tests/testdata/wsc273-v0-res.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/lora_ft/lm-evaluation-harness/tests/testdata/wsc273-v0-res.json -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ldery/Bonsai/HEAD/main.py --------------------------------------------------------------------------------