├── .gitignore ├── .project ├── .pydevproject ├── LICENSE ├── README.md ├── README_en.md ├── apps ├── deprecated │ ├── api.py │ ├── async_client.py │ ├── client.py │ ├── tigerbot_chatapi.py │ └── web_api_demo.py ├── exllamav2_web_demo.py └── web_demo.py ├── docs └── openai_zh.md ├── image ├── .DS_Store ├── 70b-chat-example.jpg ├── api │ ├── .DS_Store │ ├── case-1.png │ ├── case-2.png │ ├── case-3.png │ ├── case-4.png │ ├── case-5.png │ ├── case-6.png │ ├── demo │ │ ├── chat.png │ │ ├── chat2.png │ │ ├── embedding.png │ │ ├── finetune.png │ │ ├── pdf2text.png │ │ ├── plugin.png │ │ ├── summarization.png │ │ └── text2image.png │ ├── rethink │ │ ├── case-1.png │ │ └── case-3.gif │ └── search │ │ └── demo.png ├── auto-valuation-1.png ├── auto-valuation-2.png ├── code-lang-type-2.png ├── code-lang-type.png ├── contact.jpg ├── eval_base_1214.png ├── eval_chat_1214.png ├── logo_core.png ├── loss-70b-chat-v3-valid.jpg ├── loss-70b-chat-v3.jpg ├── loss-curve-4.jpg ├── loss-curve.jpg ├── next-tok-acc-curve.jpg ├── next-tok-acc.jpg ├── peft_eval_loss.png ├── peft_metrics.png ├── peft_train_loss.png ├── pretrain-2.png ├── pretrain.png ├── pretrain_v2.png ├── qiyewechat.png ├── search-link.jpg ├── search_api.jpg ├── search_api.png ├── terminal_case.jpeg ├── tgi-demo.mp4 ├── tiger.jpg ├── tigerbot-idiom.jpg ├── tigerbot-idiom2.jpg ├── tigerbot-idiom3.jpg ├── tigerbot_chatapi_sample.png ├── zh-books-2.png └── zh-books.png ├── infer.py ├── opencompass ├── .DS_Store ├── LICENSE ├── README.md ├── README_detail.md ├── README_zh-CN.md ├── configs │ ├── datasets │ │ ├── ARC_c │ │ │ ├── ARC_c_gen.py │ │ │ ├── ARC_c_gen_1e0de5.py │ │ │ ├── ARC_c_ppl.py │ │ │ ├── ARC_c_ppl_a450bd.py │ │ │ └── ARC_c_ppl_d52a21.py │ │ ├── ARC_e │ │ │ ├── ARC_e_gen.py │ │ │ ├── ARC_e_gen_1e0de5.py │ │ │ ├── ARC_e_ppl.py │ │ │ ├── ARC_e_ppl_a450bd.py │ │ │ └── ARC_e_ppl_d52a21.py │ │ ├── CLUE_C3 │ │ │ ├── CLUE_C3_gen.py │ │ │ ├── CLUE_C3_gen_8c358f.py │ │ │ ├── CLUE_C3_ppl.py │ │ │ ├── CLUE_C3_ppl_56b537.py │ │ │ └── CLUE_C3_ppl_e24a31.py │ │ ├── CLUE_CMRC │ │ │ ├── CLUE_CMRC_gen.py │ │ │ ├── CLUE_CMRC_gen_1bd3c8.py │ │ │ ├── CLUE_CMRC_gen_3749cd.py │ │ │ ├── CLUE_CMRC_gen_8484b9.py │ │ │ ├── CLUE_CMRC_gen_941108.py │ │ │ └── CLUE_CMRC_gen_chat.py │ │ ├── CLUE_DRCD │ │ │ ├── CLUE_DRCD_gen.py │ │ │ ├── CLUE_DRCD_gen_1bd3c8.py │ │ │ ├── CLUE_DRCD_gen_3749cd.py │ │ │ ├── CLUE_DRCD_gen_8484b9.py │ │ │ └── CLUE_DRCD_gen_941108.py │ │ ├── CLUE_afqmc │ │ │ ├── CLUE_afqmc_gen.py │ │ │ ├── CLUE_afqmc_gen_901306.py │ │ │ ├── CLUE_afqmc_ppl.py │ │ │ ├── CLUE_afqmc_ppl_378c5b.py │ │ │ ├── CLUE_afqmc_ppl_6507d7.py │ │ │ └── CLUE_afqmc_ppl_7b0c1e.py │ │ ├── CLUE_cmnli │ │ │ ├── CLUE_cmnli_gen.py │ │ │ ├── CLUE_cmnli_gen_1abf97.py │ │ │ ├── CLUE_cmnli_gen_51e956.py │ │ │ ├── CLUE_cmnli_ppl.py │ │ │ ├── CLUE_cmnli_ppl_98dd6e.py │ │ │ ├── CLUE_cmnli_ppl_ef69e7.py │ │ │ └── CLUE_cmnli_ppl_fdc6de.py │ │ ├── CLUE_ocnli │ │ │ ├── CLUE_ocnli_gen.py │ │ │ ├── CLUE_ocnli_gen_51e956.py │ │ │ ├── CLUE_ocnli_gen_c4cb6c.py │ │ │ ├── CLUE_ocnli_ppl.py │ │ │ ├── CLUE_ocnli_ppl_98dd6e.py │ │ │ ├── CLUE_ocnli_ppl_ef69e7.py │ │ │ └── CLUE_ocnli_ppl_fdc6de.py │ │ ├── FewCLUE_bustm │ │ │ ├── FewCLUE_bustm_gen.py │ │ │ ├── FewCLUE_bustm_gen_634f41.py │ │ │ ├── FewCLUE_bustm_ppl.py │ │ │ ├── FewCLUE_bustm_ppl_4b16c0.py │ │ │ ├── FewCLUE_bustm_ppl_9ef540.py │ │ │ └── FewCLUE_bustm_ppl_e53034.py │ │ ├── FewCLUE_chid │ │ │ ├── FewCLUE_chid_gen.py │ │ │ ├── FewCLUE_chid_gen_0a29a2.py │ │ │ ├── FewCLUE_chid_ppl.py │ │ │ ├── FewCLUE_chid_ppl_8f2872.py │ │ │ └── FewCLUE_chid_ppl_acccb5.py │ │ ├── FewCLUE_cluewsc │ │ │ ├── FewCLUE_cluewsc_gen.py │ │ │ ├── FewCLUE_cluewsc_gen_c68933.py │ │ │ ├── FewCLUE_cluewsc_ppl.py │ │ │ ├── FewCLUE_cluewsc_ppl_12e4e0.py │ │ │ ├── FewCLUE_cluewsc_ppl_4284a0.py │ │ │ └── FewCLUE_cluewsc_ppl_868415.py │ │ ├── FewCLUE_csl │ │ │ ├── FewCLUE_csl_gen.py │ │ │ ├── FewCLUE_csl_gen_28b223.py │ │ │ ├── FewCLUE_csl_gen_87f4a8.py │ │ │ ├── FewCLUE_csl_ppl.py │ │ │ ├── FewCLUE_csl_ppl_769f8d.py │ │ │ └── FewCLUE_csl_ppl_841b62.py │ │ ├── FewCLUE_eprstmt │ │ │ ├── FewCLUE_eprstmt_gen.py │ │ │ ├── FewCLUE_eprstmt_gen_740ea0.py │ │ │ ├── FewCLUE_eprstmt_ppl.py │ │ │ ├── FewCLUE_eprstmt_ppl_1ce587.py │ │ │ └── FewCLUE_eprstmt_ppl_f1e631.py │ │ ├── FewCLUE_ocnli_fc │ │ │ ├── FewCLUE_ocnli_fc_gen.py │ │ │ ├── FewCLUE_ocnli_fc_gen_f97a97.py │ │ │ ├── FewCLUE_ocnli_fc_ppl.py │ │ │ ├── FewCLUE_ocnli_fc_ppl_9e8b3d.py │ │ │ └── FewCLUE_ocnli_fc_ppl_c08300.py │ │ ├── FewCLUE_tnews │ │ │ ├── FewCLUE_tnews_gen.py │ │ │ ├── FewCLUE_tnews_gen_b90e4a.py │ │ │ ├── FewCLUE_tnews_ppl.py │ │ │ ├── FewCLUE_tnews_ppl_7d1c07.py │ │ │ ├── FewCLUE_tnews_ppl_d10e8a.py │ │ │ └── FewCLUE_tnews_ppl_fff486.py │ │ ├── GaokaoBench │ │ │ ├── GaokaoBench_gen.py │ │ │ ├── GaokaoBench_gen_5cfe9e.py │ │ │ ├── GaokaoBench_mixed.py │ │ │ └── GaokaoBench_mixed_f2038e.py │ │ ├── PJExam │ │ │ ├── PJExam_gen.py │ │ │ └── PJExam_gen_8cd97c.py │ │ ├── SuperGLUE_AX_b │ │ │ ├── SuperGLUE_AX_b_gen.py │ │ │ ├── SuperGLUE_AX_b_gen_4dfefa.py │ │ │ ├── SuperGLUE_AX_b_ppl.py │ │ │ ├── SuperGLUE_AX_b_ppl_0748aa.py │ │ │ └── SuperGLUE_AX_b_ppl_6db806.py │ │ ├── SuperGLUE_AX_g │ │ │ ├── SuperGLUE_AX_g_gen.py │ │ │ ├── SuperGLUE_AX_g_gen_68aac7.py │ │ │ ├── SuperGLUE_AX_g_ppl.py │ │ │ ├── SuperGLUE_AX_g_ppl_50f8f6.py │ │ │ └── SuperGLUE_AX_g_ppl_66caf3.py │ │ ├── SuperGLUE_BoolQ │ │ │ ├── SuperGLUE_BoolQ_gen.py │ │ │ ├── SuperGLUE_BoolQ_gen_883d50.py │ │ │ ├── SuperGLUE_BoolQ_ppl.py │ │ │ ├── SuperGLUE_BoolQ_ppl_314b96.py │ │ │ ├── SuperGLUE_BoolQ_ppl_4da4db.py │ │ │ └── SuperGLUE_BoolQ_ppl_9619db.py │ │ ├── SuperGLUE_CB │ │ │ ├── SuperGLUE_CB_gen.py │ │ │ ├── SuperGLUE_CB_gen_854c6c.py │ │ │ ├── SuperGLUE_CB_ppl.py │ │ │ ├── SuperGLUE_CB_ppl_0143fe.py │ │ │ └── SuperGLUE_CB_ppl_11c175.py │ │ ├── SuperGLUE_COPA │ │ │ ├── SuperGLUE_COPA_gen.py │ │ │ ├── SuperGLUE_COPA_gen_91ca53.py │ │ │ ├── SuperGLUE_COPA_ppl.py │ │ │ ├── SuperGLUE_COPA_ppl_54058d.py │ │ │ ├── SuperGLUE_COPA_ppl_5c24f1.py │ │ │ └── SuperGLUE_COPA_ppl_9f3618.py │ │ ├── SuperGLUE_MultiRC │ │ │ ├── SuperGLUE_MultiRC_gen.py │ │ │ ├── SuperGLUE_MultiRC_gen_27071f.py │ │ │ ├── SuperGLUE_MultiRC_ppl.py │ │ │ ├── SuperGLUE_MultiRC_ppl_866273.py │ │ │ └── SuperGLUE_MultiRC_ppl_ced824.py │ │ ├── SuperGLUE_RTE │ │ │ ├── SuperGLUE_RTE_gen.py │ │ │ ├── SuperGLUE_RTE_gen_68aac7.py │ │ │ ├── SuperGLUE_RTE_ppl.py │ │ │ ├── SuperGLUE_RTE_ppl_50f8f6.py │ │ │ └── SuperGLUE_RTE_ppl_66caf3.py │ │ ├── SuperGLUE_ReCoRD │ │ │ ├── SuperGLUE_ReCoRD_gen.py │ │ │ ├── SuperGLUE_ReCoRD_gen_0f7784.py │ │ │ └── SuperGLUE_ReCoRD_gen_30dea0.py │ │ ├── SuperGLUE_WSC │ │ │ ├── SuperGLUE_WSC_gen.py │ │ │ ├── SuperGLUE_WSC_gen_6dc406.py │ │ │ ├── SuperGLUE_WSC_gen_8a881c.py │ │ │ ├── SuperGLUE_WSC_ppl.py │ │ │ ├── SuperGLUE_WSC_ppl_003529.py │ │ │ ├── SuperGLUE_WSC_ppl_d0f531.py │ │ │ └── SuperGLUE_WSC_ppl_f37e78.py │ │ ├── SuperGLUE_WiC │ │ │ ├── SuperGLUE_WiC_gen.py │ │ │ ├── SuperGLUE_WiC_gen_d06864.py │ │ │ ├── SuperGLUE_WiC_ppl.py │ │ │ ├── SuperGLUE_WiC_ppl_312de9.py │ │ │ ├── SuperGLUE_WiC_ppl_3fb6fd.py │ │ │ └── SuperGLUE_WiC_ppl_c926be.py │ │ ├── TheoremQA │ │ │ ├── TheoremQA_gen.py │ │ │ ├── TheoremQA_gen_424e0a.py │ │ │ ├── TheoremQA_gen_7009de.py │ │ │ └── TheoremQA_gen_ef26ca.py │ │ ├── XCOPA │ │ │ ├── XCOPA_ppl.py │ │ │ └── XCOPA_ppl_54058d.py │ │ ├── XLSum │ │ │ ├── XLSum_gen.py │ │ │ └── XLSum_gen_2bb71c.py │ │ ├── Xsum │ │ │ ├── Xsum_gen.py │ │ │ ├── Xsum_gen_31397e.py │ │ │ └── Xsum_gen_8ea5f8.py │ │ ├── agieval │ │ │ ├── agieval_gen.py │ │ │ ├── agieval_gen_0a9ace.py │ │ │ ├── agieval_gen_397d81.py │ │ │ ├── agieval_mixed.py │ │ │ └── agieval_mixed_2f14ad.py │ │ ├── apps │ │ │ ├── apps_gen.py │ │ │ ├── apps_gen_5b4254.py │ │ │ ├── apps_gen_7fbb95.py │ │ │ └── apps_gen_b4dee3.py │ │ ├── bbh │ │ │ ├── bbh_gen.py │ │ │ └── bbh_gen_5b92b0.py │ │ ├── ceval │ │ │ ├── ceval_gen.py │ │ │ ├── ceval_gen_2daf24.py │ │ │ ├── ceval_gen_5f30c7.py │ │ │ ├── ceval_ppl.py │ │ │ ├── ceval_ppl_578f8d.py │ │ │ └── ceval_ppl_93e5ce.py │ │ ├── civilcomments │ │ │ ├── civilcomments_ppl.py │ │ │ ├── civilcomments_ppl_6a2561.py │ │ │ └── civilcomments_ppl_a3c5fd.py │ │ ├── collections │ │ │ ├── base_medium.py │ │ │ ├── base_small.py │ │ │ ├── chat_medium.py │ │ │ ├── chat_small.py │ │ │ └── example.py │ │ ├── commonsenseqa │ │ │ ├── commonsenseqa_gen.py │ │ │ ├── commonsenseqa_gen_c946f2.py │ │ │ ├── commonsenseqa_ppl.py │ │ │ ├── commonsenseqa_ppl_3e9f2d.py │ │ │ ├── commonsenseqa_ppl_5545e2.py │ │ │ └── commonsenseqa_ppl_716f78.py │ │ ├── crowspairs │ │ │ ├── crowspairs_gen.py │ │ │ ├── crowspairs_gen_02b6c1.py │ │ │ ├── crowspairs_ppl.py │ │ │ ├── crowspairs_ppl_47f211.py │ │ │ └── crowspairs_ppl_e811e1.py │ │ ├── cvalues │ │ │ ├── cvalues_responsibility_gen.py │ │ │ └── cvalues_responsibility_gen_4aec9f.py │ │ ├── drop │ │ │ ├── drop_gen.py │ │ │ └── drop_gen_599f07.py │ │ ├── flores │ │ │ ├── flores_gen.py │ │ │ ├── flores_gen_806ede.py │ │ │ └── flores_gen_aad4fd.py │ │ ├── glm │ │ │ ├── C3.py │ │ │ ├── GaokaoBench.py │ │ │ ├── afqmc.py │ │ │ ├── agieval.py │ │ │ ├── ceval.py │ │ │ ├── chid.py │ │ │ ├── cmnli.py │ │ │ ├── csl.py │ │ │ ├── humaneval.py │ │ │ ├── mmlu.py │ │ │ ├── nq.py │ │ │ ├── ocnli.py │ │ │ ├── tnews.py │ │ │ └── triviaqa.py │ │ ├── govrepcrs │ │ │ ├── govrepcrs_gen.py │ │ │ ├── govrepcrs_gen_aa5eb3.py │ │ │ └── govrepcrs_gen_db7930.py │ │ ├── gsm8k │ │ │ ├── gsm8k_gen.py │ │ │ ├── gsm8k_gen_1d7fe4.py │ │ │ ├── gsm8k_gen_1dce88.py │ │ │ └── gsm8k_gen_e9e91e.py │ │ ├── hellaswag │ │ │ ├── hellaswag_gen.py │ │ │ ├── hellaswag_gen_6faab5.py │ │ │ ├── hellaswag_ppl.py │ │ │ ├── hellaswag_ppl_47bff9.py │ │ │ └── hellaswag_ppl_9dbb12.py │ │ ├── humaneval │ │ │ ├── humaneval_gen.py │ │ │ ├── humaneval_gen_6f294d.py │ │ │ ├── humaneval_gen_8e312c.py │ │ │ ├── humaneval_gen_fd5822.py │ │ │ └── humaneval_gen_ff7054.py │ │ ├── iwslt2017 │ │ │ ├── iwslt2017_gen.py │ │ │ ├── iwslt2017_gen_69ce16.py │ │ │ ├── iwslt2017_gen_b4a814.py │ │ │ └── iwslt2017_gen_d0ebd1.py │ │ ├── jigsawmultilingual │ │ │ ├── jigsawmultilingual_ppl.py │ │ │ ├── jigsawmultilingual_ppl_1af0ae.py │ │ │ └── jigsawmultilingual_ppl_fe50d8.py │ │ ├── lambada │ │ │ ├── lambada_gen.py │ │ │ ├── lambada_gen_217e11.py │ │ │ └── lambada_gen_8b48a5.py │ │ ├── lcsts │ │ │ ├── lcsts_gen.py │ │ │ ├── lcsts_gen_8ee1fe.py │ │ │ └── lcsts_gen_9b0b89.py │ │ ├── math │ │ │ ├── math_gen.py │ │ │ ├── math_gen_265cce.py │ │ │ ├── math_gen_559593.py │ │ │ └── math_gen_5e8458.py │ │ ├── mbpp │ │ │ ├── mbpp_gen.py │ │ │ ├── mbpp_gen_1e1056.py │ │ │ ├── mbpp_gen_6590b0.py │ │ │ └── mbpp_gen_78c1bc.py │ │ ├── mmlu │ │ │ ├── mmlu_gen.py │ │ │ ├── mmlu_gen_23a9a9.py │ │ │ ├── mmlu_gen_5d1409.py │ │ │ ├── mmlu_gen_79e572.py │ │ │ ├── mmlu_gen_a484b3.py │ │ │ ├── mmlu_ppl.py │ │ │ └── mmlu_ppl_ac766d.py │ │ ├── narrativeqa │ │ │ ├── narrativeqa_gen.py │ │ │ ├── narrativeqa_gen_a2d88a.py │ │ │ └── narrativeqa_gen_db6413.py │ │ ├── nq │ │ │ ├── nq_gen.py │ │ │ ├── nq_gen_2463e2.py │ │ │ ├── nq_gen_3dcea1.py │ │ │ ├── nq_gen_68c1c6.py │ │ │ └── nq_gen_c788f6.py │ │ ├── obqa │ │ │ ├── obqa_gen.py │ │ │ ├── obqa_gen_9069e4.py │ │ │ ├── obqa_ppl.py │ │ │ ├── obqa_ppl_1defe8.py │ │ │ └── obqa_ppl_c7c154.py │ │ ├── piqa │ │ │ ├── piqa_gen.py │ │ │ ├── piqa_gen_1194eb.py │ │ │ ├── piqa_ppl.py │ │ │ ├── piqa_ppl_1cf9f0.py │ │ │ └── piqa_ppl_3431ea.py │ │ ├── qabench │ │ │ ├── qabench_gen.py │ │ │ └── qabench_gen_353ae7.py │ │ ├── qasper │ │ │ ├── qasper_gen.py │ │ │ ├── qasper_gen_a2d88a.py │ │ │ └── qasper_gen_db6413.py │ │ ├── qaspercut │ │ │ ├── qaspercut_gen.py │ │ │ ├── qaspercut_gen_a2d88a.py │ │ │ └── qaspercut_gen_db6413.py │ │ ├── race │ │ │ ├── race_gen.py │ │ │ ├── race_gen_69ee4f.py │ │ │ ├── race_gen_9302a5.py │ │ │ ├── race_ppl.py │ │ │ ├── race_ppl_a138cd.py │ │ │ └── race_ppl_abed12.py │ │ ├── realtoxicprompts │ │ │ ├── realtoxicprompts_gen.py │ │ │ ├── realtoxicprompts_gen_7605e4.py │ │ │ └── realtoxicprompts_gen_ac723c.py │ │ ├── safety │ │ │ ├── safety_gen.py │ │ │ └── safety_gen_7ce197.py │ │ ├── siqa │ │ │ ├── siqa_gen.py │ │ │ ├── siqa_gen_e78df3.py │ │ │ ├── siqa_ppl.py │ │ │ ├── siqa_ppl_42bc6e.py │ │ │ ├── siqa_ppl_7845b0.py │ │ │ └── siqa_ppl_ced5f6.py │ │ ├── storycloze │ │ │ ├── storycloze_gen.py │ │ │ ├── storycloze_gen_7f656a.py │ │ │ ├── storycloze_ppl.py │ │ │ ├── storycloze_ppl_496661.py │ │ │ └── storycloze_ppl_afd16f.py │ │ ├── strategyqa │ │ │ ├── strategyqa_gen.py │ │ │ ├── strategyqa_gen_1180a7.py │ │ │ └── strategyqa_gen_934441.py │ │ ├── summedits │ │ │ ├── summedits_gen.py │ │ │ ├── summedits_gen_315438.py │ │ │ ├── summedits_gen_4fb38b.py │ │ │ ├── summedits_ppl.py │ │ │ ├── summedits_ppl_1fbeb6.py │ │ │ ├── summedits_ppl_3c30d0.py │ │ │ └── summedits_ppl_fa58ba.py │ │ ├── summscreen │ │ │ ├── summscreen_gen.py │ │ │ ├── summscreen_gen_653185.py │ │ │ └── summscreen_gen_aa5eb3.py │ │ ├── triviaqa │ │ │ ├── triviaqa_gen.py │ │ │ ├── triviaqa_gen_2121ce.py │ │ │ ├── triviaqa_gen_3e39a5.py │ │ │ ├── triviaqa_gen_429db5.py │ │ │ └── triviaqa_gen_d297bb.py │ │ ├── triviaqarc │ │ │ ├── triviaqarc_gen.py │ │ │ ├── triviaqarc_gen_a2d88a.py │ │ │ └── triviaqarc_gen_db6413.py │ │ ├── truthfulqa │ │ │ ├── truthfulqa_gen.py │ │ │ ├── truthfulqa_gen_1e7d8d.py │ │ │ └── truthfulqa_gen_5ddc62.py │ │ ├── tydiqa │ │ │ ├── tydiqa_gen.py │ │ │ └── tydiqa_gen_978d2a.py │ │ ├── winograd │ │ │ ├── winograd_ppl.py │ │ │ ├── winograd_ppl_8f3049.py │ │ │ └── winograd_ppl_b6c7ed.py │ │ ├── winogrande │ │ │ ├── winogrande_gen.py │ │ │ ├── winogrande_gen_a9ede5.py │ │ │ ├── winogrande_ppl.py │ │ │ ├── winogrande_ppl_55a66e.py │ │ │ └── winogrande_ppl_9307fd.py │ │ └── z_bench │ │ │ ├── z_bench_gen.py │ │ │ ├── z_bench_gen_5813ec.py │ │ │ └── z_bench_gen_61db0a.py │ ├── eval_baichuan_13b.py │ ├── eval_baichuan_7b.py │ ├── eval_chatglm2_6b.py │ ├── eval_demo.py │ ├── eval_gpt3.5.py │ ├── eval_internlm_7b.py │ ├── eval_llama2_13b.py │ ├── eval_llama2_7b.py │ ├── eval_qwen_7b.py │ ├── eval_tigerbot_13b.py │ ├── eval_tigerbot_13b_chat_1.py │ ├── eval_tigerbot_13b_chat_2.py │ ├── eval_tigerbot_7b.py │ ├── eval_tigerbot_7b_chat_1.py │ ├── eval_tigerbot_7b_chat_2.py │ ├── eval_tigerbot_autogptq.py │ ├── eval_tigerbot_exllama.py │ ├── models │ │ ├── gpt_3.5_turbo.py │ │ ├── hf_baichuan_13b_base.py │ │ ├── hf_baichuan_13b_chat.py │ │ ├── hf_baichuan_7b.py │ │ ├── hf_chatglm2_6b.py │ │ ├── hf_chatglm_6b.py │ │ ├── hf_falcon_40b.py │ │ ├── hf_falcon_7b.py │ │ ├── hf_internlm_7b.py │ │ ├── hf_internlm_chat_7b.py │ │ ├── hf_internlm_chat_7b_8k.py │ │ ├── hf_llama2_13b.py │ │ ├── hf_llama2_13b_chat.py │ │ ├── hf_llama2_70b.py │ │ ├── hf_llama2_7b.py │ │ ├── hf_llama_13b.py │ │ ├── hf_llama_65b.py │ │ ├── hf_llama_7b.py │ │ ├── hf_llama_7b_chat.py │ │ ├── hf_moss_moon_003_base.py │ │ ├── hf_moss_moon_003_sft.py │ │ ├── hf_mpt_7b.py │ │ ├── hf_mpt_instruct_7b.py │ │ ├── hf_qwen_7b.py │ │ ├── hf_tigerbot_13b_base.py │ │ ├── hf_tigerbot_13b_chat.py │ │ ├── hf_tigerbot_7b_base.py │ │ ├── hf_tigerbot_7b_chat.py │ │ ├── hf_tigerbot_exllama.py │ │ ├── hf_tigerbot_gptq.py │ │ ├── hf_vicuna_13b.py │ │ ├── hf_vicuna_33b.py │ │ ├── hf_vicuna_7b.py │ │ ├── hf_wizardlm_7b.py │ │ ├── llama2_13b_chat.py │ │ ├── llama2_70b_chat.py │ │ └── llama2_7b_chat.py │ └── summarizers │ │ ├── example.py │ │ ├── groups │ │ ├── GaokaoBench.py │ │ ├── agieval.py │ │ ├── bbh.py │ │ ├── ceval.py │ │ ├── flores.py │ │ ├── jigsaw_multilingual.py │ │ └── mmlu.py │ │ ├── medium.py │ │ └── small.py ├── docs │ ├── en │ │ ├── MMBench.md │ │ ├── Makefile │ │ ├── _static │ │ │ ├── css │ │ │ │ └── readthedocs.css │ │ │ ├── image │ │ │ │ ├── logo.svg │ │ │ │ └── logo_icon.svg │ │ │ └── js │ │ │ │ └── custom.js │ │ ├── _templates │ │ │ ├── 404.html │ │ │ ├── autosummary │ │ │ │ └── class.rst │ │ │ └── callable.rst │ │ ├── advanced_guides │ │ │ ├── new_dataset.md │ │ │ └── new_model.md │ │ ├── conf.py │ │ ├── docutils.conf │ │ ├── get_started.md │ │ ├── index.rst │ │ ├── notes │ │ │ └── contribution_guide.md │ │ ├── prompt │ │ │ ├── few_shot.md │ │ │ ├── meta_template.md │ │ │ ├── overview.md │ │ │ └── prompt_template.md │ │ ├── tools.md │ │ └── user_guides │ │ │ ├── config.md │ │ │ ├── datasets.md │ │ │ ├── evaluation.md │ │ │ ├── experimentation.md │ │ │ ├── framework_overview.md │ │ │ ├── metrics.md │ │ │ └── models.md │ └── zh_cn │ │ ├── Makefile │ │ ├── _static │ │ ├── css │ │ │ └── readthedocs.css │ │ ├── image │ │ │ ├── logo.svg │ │ │ └── logo_icon.svg │ │ └── js │ │ │ └── custom.js │ │ ├── _templates │ │ ├── 404.html │ │ ├── autosummary │ │ │ └── class.rst │ │ └── callable.rst │ │ ├── advanced_guides │ │ ├── new_dataset.md │ │ └── new_model.md │ │ ├── conf.py │ │ ├── docutils.conf │ │ ├── get_started.md │ │ ├── index.rst │ │ ├── notes │ │ └── contribution_guide.md │ │ ├── prompt │ │ ├── few_shot.md │ │ ├── meta_template.md │ │ ├── overview.md │ │ └── prompt_template.md │ │ ├── tools.md │ │ └── user_guides │ │ ├── config.md │ │ ├── datasets.md │ │ ├── evaluation.md │ │ ├── experimentation.md │ │ ├── framework_overview.md │ │ ├── metrics.md │ │ └── models.md ├── opencompass │ ├── __init__.py │ ├── datasets │ │ ├── GaokaoBench.py │ │ ├── TheoremQA.py │ │ ├── __init__.py │ │ ├── afqmcd.py │ │ ├── agieval │ │ │ ├── __init__.py │ │ │ ├── agieval.py │ │ │ ├── constructions.py │ │ │ ├── dataset_loader.py │ │ │ ├── evaluation.py │ │ │ ├── math_equivalence.py │ │ │ ├── post_process.py │ │ │ └── utils.py │ │ ├── arc.py │ │ ├── ax.py │ │ ├── base.py │ │ ├── bbh.py │ │ ├── boolq.py │ │ ├── bustum.py │ │ ├── c3.py │ │ ├── cb.py │ │ ├── ceval.py │ │ ├── chid.py │ │ ├── civilcomments.py │ │ ├── cluewsc.py │ │ ├── cmnli.py │ │ ├── cmrc.py │ │ ├── commonsenseqa.py │ │ ├── copa.py │ │ ├── crowspairs.py │ │ ├── csl.py │ │ ├── cvalues.py │ │ ├── drcd.py │ │ ├── drop.py │ │ ├── eprstmt.py │ │ ├── flores.py │ │ ├── govrepcrs.py │ │ ├── gsm8k.py │ │ ├── hellaswag.py │ │ ├── huggingface.py │ │ ├── humaneval.py │ │ ├── iwslt2017.py │ │ ├── jigsawmultilingual.py │ │ ├── lambada.py │ │ ├── lcsts.py │ │ ├── math.py │ │ ├── mbpp.py │ │ ├── mmlu.py │ │ ├── multirc.py │ │ ├── narrativeqa.py │ │ ├── natural_question.py │ │ ├── obqa.py │ │ ├── piqa.py │ │ ├── qasper.py │ │ ├── qaspercut.py │ │ ├── race.py │ │ ├── realtoxicprompts.py │ │ ├── record.py │ │ ├── safety.py │ │ ├── siqa.py │ │ ├── storycloze.py │ │ ├── strategyqa.py │ │ ├── summedits.py │ │ ├── summscreen.py │ │ ├── tnews.py │ │ ├── triviaqa.py │ │ ├── triviaqarc.py │ │ ├── truthfulqa.py │ │ ├── tydiqa.py │ │ ├── wic.py │ │ ├── winograd.py │ │ ├── winogrande.py │ │ ├── wsc.py │ │ ├── xcopa.py │ │ ├── xlsum.py │ │ └── xsum.py │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── base_api.py │ │ ├── glm.py │ │ ├── huggingface.py │ │ ├── llama2.py │ │ └── openai_api.py │ ├── openicl │ │ ├── __init__.py │ │ ├── icl_dataset_reader.py │ │ ├── icl_evaluator │ │ │ ├── __init__.py │ │ │ ├── icl_aucroc_evaluator.py │ │ │ ├── icl_base_evaluator.py │ │ │ ├── icl_em_evaluator.py │ │ │ ├── icl_hf_evaluator.py │ │ │ └── icl_toxic_evaluator.py │ │ ├── icl_inferencer │ │ │ ├── __init__.py │ │ │ ├── icl_base_inferencer.py │ │ │ ├── icl_clp_inferencer.py │ │ │ ├── icl_gen_inferencer.py │ │ │ └── icl_ppl_inferencer.py │ │ ├── icl_prompt_template.py │ │ ├── icl_retriever │ │ │ ├── __init__.py │ │ │ ├── icl_base_retriever.py │ │ │ ├── icl_bm25_retriever.py │ │ │ ├── icl_dpp_retriever.py │ │ │ ├── icl_fix_k_retriever.py │ │ │ ├── icl_mdl_retriever.py │ │ │ ├── icl_random_retriever.py │ │ │ ├── icl_topk_retriever.py │ │ │ ├── icl_votek_retriever.py │ │ │ └── icl_zero_retriever.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── logging.py │ ├── partitioners │ │ ├── __init__.py │ │ ├── base.py │ │ ├── naive.py │ │ └── size.py │ ├── registry.py │ ├── runners │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dlc.py │ │ ├── local.py │ │ └── slurm.py │ ├── tasks │ │ ├── __init__.py │ │ ├── base.py │ │ ├── llm_eval.py │ │ ├── openicl_eval.py │ │ └── openicl_infer.py │ └── utils │ │ ├── __init__.py │ │ ├── abbr.py │ │ ├── build.py │ │ ├── collect_env.py │ │ ├── fileio.py │ │ ├── git.py │ │ ├── lark.py │ │ ├── logging.py │ │ ├── menu.py │ │ ├── prompt.py │ │ ├── summarizer.py │ │ ├── text_postprocessors.py │ │ └── types.py ├── requirements.txt ├── requirements │ ├── docs.txt │ └── runtime.txt ├── run.py ├── setup.py ├── tests │ ├── openicl │ │ └── test_prompt_template.py │ └── prompt │ │ ├── test_api_template_parser.py │ │ ├── test_lm_template_parser.py │ │ └── test_prompt_list.py └── tools │ ├── case_analyzer.py │ ├── ceval_util.py │ ├── mmlu_util.py │ ├── prediction_merger.py │ ├── prompt_viewer.py │ └── test_api_model.py ├── other_infer ├── exllamav2_hf_infer.py ├── exllamav2_infer.py ├── gptq_infer.py ├── infer_pretrain.py ├── infer_stream.py └── quant_infer.py ├── requirements.txt ├── train ├── .DS_Store ├── data │ └── medical_qa_6000.jsonl ├── ds_config │ ├── ds_config_qlora.json │ └── ds_config_zero3.json ├── requirements_qlora.txt ├── train_clm.py ├── train_sft.py └── train_with_qlora.py └── utils ├── __init__.py ├── modeling_hack.py └── streaming.py /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | TigerBot 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | Default 4 | python interpreter 5 | 6 | -------------------------------------------------------------------------------- /image/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/.DS_Store -------------------------------------------------------------------------------- /image/70b-chat-example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/70b-chat-example.jpg -------------------------------------------------------------------------------- /image/api/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/.DS_Store -------------------------------------------------------------------------------- /image/api/case-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/case-1.png -------------------------------------------------------------------------------- /image/api/case-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/case-2.png -------------------------------------------------------------------------------- /image/api/case-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/case-3.png -------------------------------------------------------------------------------- /image/api/case-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/case-4.png -------------------------------------------------------------------------------- /image/api/case-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/case-5.png -------------------------------------------------------------------------------- /image/api/case-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/case-6.png -------------------------------------------------------------------------------- /image/api/demo/chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/chat.png -------------------------------------------------------------------------------- /image/api/demo/chat2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/chat2.png -------------------------------------------------------------------------------- /image/api/demo/embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/embedding.png -------------------------------------------------------------------------------- /image/api/demo/finetune.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/finetune.png -------------------------------------------------------------------------------- /image/api/demo/pdf2text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/pdf2text.png -------------------------------------------------------------------------------- /image/api/demo/plugin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/plugin.png -------------------------------------------------------------------------------- /image/api/demo/summarization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/summarization.png -------------------------------------------------------------------------------- /image/api/demo/text2image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/demo/text2image.png -------------------------------------------------------------------------------- /image/api/rethink/case-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/rethink/case-1.png -------------------------------------------------------------------------------- /image/api/rethink/case-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/rethink/case-3.gif -------------------------------------------------------------------------------- /image/api/search/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/api/search/demo.png -------------------------------------------------------------------------------- /image/auto-valuation-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/auto-valuation-1.png -------------------------------------------------------------------------------- /image/auto-valuation-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/auto-valuation-2.png -------------------------------------------------------------------------------- /image/code-lang-type-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/code-lang-type-2.png -------------------------------------------------------------------------------- /image/code-lang-type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/code-lang-type.png -------------------------------------------------------------------------------- /image/contact.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/contact.jpg -------------------------------------------------------------------------------- /image/eval_base_1214.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/eval_base_1214.png -------------------------------------------------------------------------------- /image/eval_chat_1214.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/eval_chat_1214.png -------------------------------------------------------------------------------- /image/logo_core.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/logo_core.png -------------------------------------------------------------------------------- /image/loss-70b-chat-v3-valid.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/loss-70b-chat-v3-valid.jpg -------------------------------------------------------------------------------- /image/loss-70b-chat-v3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/loss-70b-chat-v3.jpg -------------------------------------------------------------------------------- /image/loss-curve-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/loss-curve-4.jpg -------------------------------------------------------------------------------- /image/loss-curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/loss-curve.jpg -------------------------------------------------------------------------------- /image/next-tok-acc-curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/next-tok-acc-curve.jpg -------------------------------------------------------------------------------- /image/next-tok-acc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/next-tok-acc.jpg -------------------------------------------------------------------------------- /image/peft_eval_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/peft_eval_loss.png -------------------------------------------------------------------------------- /image/peft_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/peft_metrics.png -------------------------------------------------------------------------------- /image/peft_train_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/peft_train_loss.png -------------------------------------------------------------------------------- /image/pretrain-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/pretrain-2.png -------------------------------------------------------------------------------- /image/pretrain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/pretrain.png -------------------------------------------------------------------------------- /image/pretrain_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/pretrain_v2.png -------------------------------------------------------------------------------- /image/qiyewechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/qiyewechat.png -------------------------------------------------------------------------------- /image/search-link.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/search-link.jpg -------------------------------------------------------------------------------- /image/search_api.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/search_api.jpg -------------------------------------------------------------------------------- /image/search_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/search_api.png -------------------------------------------------------------------------------- /image/terminal_case.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/terminal_case.jpeg -------------------------------------------------------------------------------- /image/tgi-demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/tgi-demo.mp4 -------------------------------------------------------------------------------- /image/tiger.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/tiger.jpg -------------------------------------------------------------------------------- /image/tigerbot-idiom.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/tigerbot-idiom.jpg -------------------------------------------------------------------------------- /image/tigerbot-idiom2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/tigerbot-idiom2.jpg -------------------------------------------------------------------------------- /image/tigerbot-idiom3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/tigerbot-idiom3.jpg -------------------------------------------------------------------------------- /image/tigerbot_chatapi_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/tigerbot_chatapi_sample.png -------------------------------------------------------------------------------- /image/zh-books-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/zh-books-2.png -------------------------------------------------------------------------------- /image/zh-books.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/image/zh-books.png -------------------------------------------------------------------------------- /opencompass/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/opencompass/.DS_Store -------------------------------------------------------------------------------- /opencompass/configs/datasets/ARC_c/ARC_c_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .ARC_c_gen_1e0de5 import ARC_c_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/ARC_c/ARC_c_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .ARC_c_ppl_a450bd import ARC_c_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/ARC_c/ARC_c_ppl_d52a21.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import ARCDataset 6 | 7 | ARC_c_reader_cfg = dict( 8 | input_columns=['question', 'textA', 'textB', 'textC', 'textD'], 9 | output_column='answerKey') 10 | 11 | ARC_c_infer_cfg = dict( 12 | prompt_template=dict( 13 | type=PromptTemplate, 14 | template={ 15 | "A": "Question: {question}\nAnswer: {textA}", 16 | "B": "Question: {question}\nAnswer: {textB}", 17 | "C": "Question: {question}\nAnswer: {textC}", 18 | "D": "Question: {question}\nAnswer: {textD}" 19 | }), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=PPLInferencer)) 22 | 23 | ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 24 | 25 | ARC_c_datasets = [ 26 | dict( 27 | type=ARCDataset, 28 | abbr='ARC-c', 29 | path='./data/ARC/ARC-c/ARC-Challenge-Dev.jsonl', 30 | reader_cfg=ARC_c_reader_cfg, 31 | infer_cfg=ARC_c_infer_cfg, 32 | eval_cfg=ARC_c_eval_cfg) 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/ARC_e/ARC_e_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .ARC_e_gen_1e0de5 import ARC_e_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/ARC_e/ARC_e_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .ARC_e_ppl_a450bd import ARC_e_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/ARC_e/ARC_e_ppl_d52a21.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import ARCDataset 6 | 7 | ARC_e_reader_cfg = dict( 8 | input_columns=['question', 'textA', 'textB', 'textC', 'textD'], 9 | output_column='answerKey') 10 | 11 | ARC_e_infer_cfg = dict( 12 | prompt_template=dict( 13 | type=PromptTemplate, 14 | template={ 15 | "A": "Question: {question}\nAnswer: {textA}", 16 | "B": "Question: {question}\nAnswer: {textB}", 17 | "C": "Question: {question}\nAnswer: {textC}", 18 | "D": "Question: {question}\nAnswer: {textD}" 19 | }), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=PPLInferencer)) 22 | 23 | ARC_e_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 24 | 25 | ARC_e_datasets = [ 26 | dict( 27 | type=ARCDataset, 28 | abbr='ARC-e', 29 | path='./data/ARC/ARC-e/ARC-Easy-Dev.jsonl', 30 | reader_cfg=ARC_e_reader_cfg, 31 | infer_cfg=ARC_e_infer_cfg, 32 | eval_cfg=ARC_e_eval_cfg) 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_C3/CLUE_C3_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_C3_gen_8c358f import C3_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_C3/CLUE_C3_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_C3_ppl_e24a31 import C3_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_C3/CLUE_C3_ppl_e24a31.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import C3Dataset 6 | 7 | C3_reader_cfg = dict( 8 | input_columns=[ 9 | 'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3', 10 | 'choices' 11 | ], 12 | output_column='label') 13 | 14 | C3_infer_cfg = dict( 15 | prompt_template=dict( 16 | type=PromptTemplate, 17 | template={ 18 | i: dict(round=[ 19 | dict(role="HUMAN", prompt="文章:{content}\n问题:{question}"), 20 | dict(role="BOT", prompt=f"答案:{{choice{i}}}") 21 | ]) 22 | for i in range(4) 23 | }), 24 | retriever=dict(type=ZeroRetriever), 25 | inferencer=dict(type=PPLInferencer)) 26 | 27 | C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 28 | 29 | C3_datasets = [ 30 | dict( 31 | type=C3Dataset, 32 | abbr='C3', 33 | path='./data/CLUE/C3/dev_0.json', 34 | reader_cfg=C3_reader_cfg, 35 | infer_cfg=C3_infer_cfg, 36 | eval_cfg=C3_eval_cfg) 37 | ] 38 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_CMRC_gen_1bd3c8 import CMRC_datasets 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_1bd3c8.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import CMRCDataset, cmrc_postprocess 6 | 7 | CMRC_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | CMRC_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict( 15 | role="HUMAN", 16 | prompt="根据文章回答问题。你的答案应该尽可能简练,请以 ‘答案是’ 开头的句式作答。\n文章:{context}\n问:{question}\n答:"), 17 | ])), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=GenInferencer)) 20 | 21 | CMRC_eval_cfg = dict( 22 | evaluator=dict(type=EMEvaluator), 23 | pred_role="BOT", 24 | pred_postprocessor=dict(type=cmrc_postprocess), 25 | ) 26 | 27 | CMRC_datasets = [ 28 | dict( 29 | type=CMRCDataset, 30 | abbr='CMRC_dev', 31 | path='./data/CLUE/CMRC/dev.json', 32 | reader_cfg=CMRC_reader_cfg, 33 | infer_cfg=CMRC_infer_cfg, 34 | eval_cfg=CMRC_eval_cfg), 35 | ] 36 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_3749cd.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import CMRCDataset 6 | 7 | CMRC_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | CMRC_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict(role="HUMAN", prompt="{context}\n\n{question}"), 15 | dict(role="BOT", prompt=""), 16 | ])), 17 | retriever=dict(type=ZeroRetriever), 18 | inferencer=dict(type=GenInferencer)) 19 | 20 | CMRC_eval_cfg = dict( 21 | evaluator=dict(type=EMEvaluator), 22 | pred_role="BOT", 23 | ) 24 | 25 | CMRC_datasets = [ 26 | dict( 27 | type=CMRCDataset, 28 | abbr='CMRC_dev', 29 | path='./data/CLUE/CMRC/dev.json', 30 | reader_cfg=CMRC_reader_cfg, 31 | infer_cfg=CMRC_infer_cfg, 32 | eval_cfg=CMRC_eval_cfg), 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_8484b9.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import CMRCDataset 6 | 7 | CMRC_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | CMRC_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template="{context}\n\n{question}"), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer)) 16 | 17 | CMRC_eval_cfg = dict(evaluator=dict(type=EMEvaluator), ) 18 | 19 | CMRC_datasets = [ 20 | dict( 21 | type=CMRCDataset, 22 | abbr='CMRC_dev', 23 | path='./data/CLUE/CMRC/dev.json', 24 | reader_cfg=CMRC_reader_cfg, 25 | infer_cfg=CMRC_infer_cfg, 26 | eval_cfg=CMRC_eval_cfg), 27 | ] 28 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_941108.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import CMRCDataset 6 | 7 | CMRC_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | CMRC_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict( 15 | role="HUMAN", 16 | prompt="{context}\n\n{question}"), 17 | ])), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=GenInferencer)) 20 | 21 | CMRC_eval_cfg = dict( 22 | evaluator=dict(type=EMEvaluator), 23 | pred_role="BOT", 24 | ) 25 | 26 | CMRC_datasets = [ 27 | dict( 28 | type=CMRCDataset, 29 | abbr='CMRC_dev', 30 | path='./data/CLUE/CMRC/dev.json', 31 | reader_cfg=CMRC_reader_cfg, 32 | infer_cfg=CMRC_infer_cfg, 33 | eval_cfg=CMRC_eval_cfg), 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_chat.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_CMRC_gen_3749cd import CMRC_datasets 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_DRCD_gen_1bd3c8 import DRCD_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_1bd3c8.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import DRCDDataset, drcd_postprocess 6 | 7 | DRCD_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | DRCD_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict( 15 | role="HUMAN", 16 | prompt="根据文章回答问题。你的答案应该尽可能简练,请以 ‘答案是’ 开头的句式作答。\n文章:{context}\n问:{question}\n答:"), 17 | ])), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=GenInferencer)) 20 | 21 | DRCD_eval_cfg = dict( 22 | evaluator=dict(type=EMEvaluator), 23 | pred_role="BOT", 24 | pred_postprocessor=dict(type=drcd_postprocess), 25 | 26 | ) 27 | 28 | DRCD_datasets = [ 29 | dict( 30 | type=DRCDDataset, 31 | abbr='DRCD_dev', 32 | path='./data/CLUE/DRCD/dev.json', 33 | reader_cfg=DRCD_reader_cfg, 34 | infer_cfg=DRCD_infer_cfg, 35 | eval_cfg=DRCD_eval_cfg), 36 | ] 37 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_3749cd.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import DRCDDataset 6 | 7 | DRCD_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | DRCD_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict(role="HUMAN", prompt="文章:{context}\n根据上文,回答如下问题:{question}"), 15 | dict(role="BOT", prompt="答:"), 16 | ])), 17 | retriever=dict(type=ZeroRetriever), 18 | inferencer=dict(type=GenInferencer)) 19 | 20 | DRCD_eval_cfg = dict( 21 | evaluator=dict(type=EMEvaluator), 22 | pred_role="BOT", 23 | ) 24 | 25 | DRCD_datasets = [ 26 | dict( 27 | type=DRCDDataset, 28 | abbr='DRCD_dev', 29 | path='./data/CLUE/DRCD/dev.json', 30 | reader_cfg=DRCD_reader_cfg, 31 | infer_cfg=DRCD_infer_cfg, 32 | eval_cfg=DRCD_eval_cfg), 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_8484b9.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import DRCDDataset 6 | 7 | DRCD_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | DRCD_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template="文章:{context}\n根据上文,回答如下问题: {question}\n答:"), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer)) 16 | 17 | DRCD_eval_cfg = dict(evaluator=dict(type=EMEvaluator), ) 18 | 19 | DRCD_datasets = [ 20 | dict( 21 | type=DRCDDataset, 22 | abbr='DRCD_dev', 23 | path='./data/CLUE/DRCD/dev.json', 24 | reader_cfg=DRCD_reader_cfg, 25 | infer_cfg=DRCD_infer_cfg, 26 | eval_cfg=DRCD_eval_cfg), 27 | ] 28 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_941108.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import DRCDDataset 6 | 7 | DRCD_reader_cfg = dict( 8 | input_columns=['question', 'context'], output_column='answers') 9 | 10 | DRCD_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict( 15 | role="HUMAN", 16 | prompt="文章:{context}\n根据上文,回答如下问题:\n{question}\n答:"), 17 | ])), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=GenInferencer)) 20 | 21 | DRCD_eval_cfg = dict( 22 | evaluator=dict(type=EMEvaluator), 23 | pred_role="BOT", 24 | ) 25 | 26 | DRCD_datasets = [ 27 | dict( 28 | type=DRCDDataset, 29 | abbr='DRCD_dev', 30 | path='./data/CLUE/DRCD/dev.json', 31 | reader_cfg=DRCD_reader_cfg, 32 | infer_cfg=DRCD_infer_cfg, 33 | eval_cfg=DRCD_eval_cfg), 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_afqmc_gen_901306 import afqmc_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_afqmc_ppl_6507d7 import afqmc_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_7b0c1e.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | afqmc_reader_cfg = dict( 8 | input_columns=['sentence1', 'sentence2'], 9 | output_column='label', 10 | test_split='train') 11 | 12 | afqmc_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 0: "{sentence1},{sentence2}不同。", 17 | 1: "{sentence1},{sentence2}相似。" 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | afqmc_datasets = [ 25 | dict( 26 | type=HFDataset, 27 | abbr='afqmc-dev', 28 | path='json', 29 | data_files='./data/CLUE/AFQMC/dev.json', 30 | split='train', 31 | reader_cfg=afqmc_reader_cfg, 32 | infer_cfg=afqmc_infer_cfg, 33 | eval_cfg=afqmc_eval_cfg), 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_cmnli_gen_1abf97 import cmnli_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_cmnli/CLUE_cmnli_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_cmnli_ppl_fdc6de import cmnli_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_ocnli_gen_c4cb6c import ocnli_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .CLUE_ocnli_ppl_fdc6de import ocnli_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_bustm_gen_634f41 import bustm_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_bustm_ppl_e53034 import bustm_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_chid/FewCLUE_chid_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_chid_gen_0a29a2 import chid_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_chid_ppl_8f2872 import chid_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_cluewsc_ppl_868415 import cluewsc_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_csl_gen_28b223 import csl_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_csl_ppl_841b62 import csl_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_ocnli_fc_gen_f97a97 import ocnli_fc_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_ocnli_fc_ppl_c08300 import ocnli_fc_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_tnews_gen_b90e4a import tnews_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .FewCLUE_tnews_ppl_d10e8a import tnews_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/GaokaoBench/GaokaoBench_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .GaokaoBench_gen_5cfe9e import GaokaoBench_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/GaokaoBench/GaokaoBench_mixed.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .GaokaoBench_mixed_f2038e import GaokaoBench_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/PJExam/PJExam_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .PJExam_gen_8cd97c import PJExam_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_0748aa.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | AX_b_reader_cfg = dict( 8 | input_columns=['sentence1', 'sentence2'], 9 | output_column='label', 10 | test_split='train') 11 | 12 | AX_b_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 'entailment': '{sentence1}?entailment, {sentence2}', 17 | 'not_entailment': '{sentence1}?not_entailment, {sentence2}' 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | AX_b_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | AX_b_datasets = [ 25 | dict( 26 | type=HFDataset, 27 | abbr='AX_b', 28 | path='json', 29 | data_files='./data/SuperGLUE/AX-b/AX-b.jsonl', 30 | split='train', 31 | reader_cfg=AX_b_reader_cfg, 32 | infer_cfg=AX_b_infer_cfg, 33 | eval_cfg=AX_b_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_AX_g_gen_68aac7 import AX_g_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_ppl_50f8f6.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | AX_g_reader_cfg = dict( 8 | input_columns=['hypothesis', 'premise'], 9 | output_column='label', 10 | test_split='train') 11 | 12 | AX_g_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 'entailment': '{premise}?entailment, {hypothesis}', 17 | 'not_entailment': '{premise}?not_entailment, {hypothesis}' 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | AX_g_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | AX_g_datasets = [ 25 | dict( 26 | type=HFDataset, 27 | abbr='AX_g', 28 | path='json', 29 | data_files='./data/SuperGLUE/AX-g/AX-g.jsonl', 30 | split='train', 31 | reader_cfg=AX_g_reader_cfg, 32 | infer_cfg=AX_g_infer_cfg, 33 | eval_cfg=AX_g_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_BoolQ_ppl_314b96 import BoolQ_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import BoolQDataset 6 | 7 | BoolQ_reader_cfg = dict( 8 | input_columns=['question', 'passage'], 9 | output_column='answer', 10 | test_split='train') 11 | 12 | BoolQ_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 0: "Passage:{passage}。\nQuestion:{question}。\nAnswer: No.", 17 | 1: "Passage:{passage}。\nQuestion:{question}。\nAnswer: Yes.", 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | BoolQ_datasets = [ 25 | dict( 26 | type=BoolQDataset, 27 | abbr='BoolQ', 28 | path='json', 29 | data_files='./data/SuperGLUE/BoolQ/val.jsonl', 30 | split='train', 31 | reader_cfg=BoolQ_reader_cfg, 32 | infer_cfg=BoolQ_infer_cfg, 33 | eval_cfg=BoolQ_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_CB_gen_854c6c import CB_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_CB_ppl_0143fe import CB_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_11c175.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | CB_reader_cfg = dict( 8 | input_columns=['premise', 'hypothesis'], output_column='label') 9 | 10 | CB_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template={ 14 | 'contradiction': '{premise}?contradiction, {hypothesis}', 15 | 'entailment': '{premise}?entailment, {hypothesis}', 16 | 'neutral': '{premise}?neutral, {hypothesis}' 17 | }), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=PPLInferencer)) 20 | 21 | CB_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) 22 | 23 | CB_datasets = [ 24 | dict( 25 | type=HFDataset, 26 | abbr='CB', 27 | path='json', 28 | split='train', 29 | data_files='./data/SuperGLUE/CB/val.jsonl', 30 | reader_cfg=CB_reader_cfg, 31 | infer_cfg=CB_infer_cfg, 32 | eval_cfg=CB_eval_cfg) 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_COPA_gen_91ca53 import COPA_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_COPA_ppl_9f3618 import COPA_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_54058d.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | COPA_reader_cfg = dict( 8 | input_columns=['question', 'premise', 'choice1', 'choice2'], 9 | output_column='label', 10 | test_split='train') 11 | 12 | COPA_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 0: "Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.", 17 | 1: "Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.", 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | COPA_datasets = [ 25 | dict( 26 | type=HFDataset, 27 | abbr='COPA', 28 | path='json', 29 | data_files='./data/SuperGLUE/COPA/val.jsonl', 30 | split='train', 31 | reader_cfg=COPA_reader_cfg, 32 | infer_cfg=COPA_infer_cfg, 33 | eval_cfg=COPA_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_MultiRC_gen_27071f import MultiRC_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_MultiRC_ppl_ced824 import MultiRC_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_866273.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import MultiRCDataset 6 | 7 | MultiRC_reader_cfg = dict( 8 | input_columns=['question', 'text', 'answer'], output_column='label') 9 | 10 | MultiRC_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template={ 14 | 0: "Passage:{text}。\nQuestion:{question}。\nAnswer: {answer}. It is false.", 15 | 1: "Passage:

。\nQuestion:{question}。\nAnswer: {answer}. It is true.", 16 | }), 17 | retriever=dict(type=ZeroRetriever), 18 | inferencer=dict(type=PPLInferencer)) 19 | 20 | MultiRC_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 21 | 22 | MultiRC_datasets = [ 23 | dict( 24 | type=MultiRCDataset, 25 | abbr='MultiRC', 26 | path='./data/SuperGLUE/MultiRC/val.jsonl', 27 | reader_cfg=MultiRC_reader_cfg, 28 | infer_cfg=MultiRC_infer_cfg, 29 | eval_cfg=MultiRC_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_RTE_gen_68aac7 import RTE_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_RTE_ppl_66caf3 import RTE_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl_50f8f6.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | RTE_reader_cfg = dict( 8 | input_columns=['hypothesis', 'premise'], 9 | output_column='label', 10 | test_split='train') 11 | 12 | RTE_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 'entailment': '{premise}?entailment, {hypothesis}', 17 | 'not_entailment': '{premise}?not_entailment, {hypothesis}' 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | RTE_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | RTE_datasets = [ 25 | dict( 26 | type=HFDataset, 27 | abbr='RTE', 28 | path='json', 29 | data_files='./data/SuperGLUE/RTE/val.jsonl', 30 | split='train', 31 | reader_cfg=RTE_reader_cfg, 32 | infer_cfg=RTE_infer_cfg, 33 | eval_cfg=RTE_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_0f7784.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import EMEvaluator 5 | from opencompass.datasets import ReCoRDDataset, ReCoRD_postprocess 6 | 7 | ReCoRD_reader_cfg = dict( 8 | input_columns=['question', 'text'], output_column='answers') 9 | 10 | ReCoRD_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template= 14 | "Passage:{text}\nResult:{question}\nQuestion: What entity does ____ refer to in the result?Give me the entity name:"), 15 | retriever=dict(type=ZeroRetriever), 16 | inferencer=dict(type=GenInferencer)) 17 | 18 | ReCoRD_eval_cfg = dict( 19 | evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(type=ReCoRD_postprocess)) 20 | 21 | ReCoRD_datasets = [ 22 | dict( 23 | type=ReCoRDDataset, 24 | abbr='ReCoRD', 25 | path='./data/SuperGLUE/ReCoRD/val.jsonl', 26 | reader_cfg=ReCoRD_reader_cfg, 27 | infer_cfg=ReCoRD_infer_cfg, 28 | eval_cfg=ReCoRD_eval_cfg) 29 | ] 30 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_WSC_gen_8a881c import WSC_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_WSC_ppl_d0f531 import WSC_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_f37e78.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import WSCDataset 6 | 7 | WSC_reader_cfg = dict( 8 | input_columns=['span1', 'span2', 'text', 'new_text'], 9 | output_column='answer') 10 | 11 | WSC_infer_cfg = dict( 12 | prompt_template=dict( 13 | type=PromptTemplate, 14 | template={ 15 | 0: "{text}", 16 | 1: "{new_text}" 17 | }), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=PPLInferencer)) 20 | 21 | WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 22 | 23 | WSC_datasets = [ 24 | dict( 25 | type=WSCDataset, 26 | path='json', 27 | abbr='WSC', 28 | data_files='./data/SuperGLUE/WSC/val.jsonl', 29 | split='train', 30 | reader_cfg=WSC_reader_cfg, 31 | infer_cfg=WSC_infer_cfg, 32 | eval_cfg=WSC_eval_cfg, 33 | ) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_WiC_gen_d06864 import WiC_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .SuperGLUE_WiC_ppl_312de9 import WiC_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_ppl_3fb6fd.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import WiCDataset 6 | 7 | WiC_reader_cfg = dict( 8 | input_columns=[ 9 | 'word', 10 | 'sentence1', 11 | 'sentence2', 12 | ], 13 | output_column='answer', 14 | test_split='train') 15 | 16 | WiC_infer_cfg = dict( 17 | prompt_template=dict( 18 | type=PromptTemplate, 19 | template={ 20 | 0: '{word} in {sentence1} and {sentence2} is different.', 21 | 1: '{word} in {sentence1} and {sentence2} is same.' 22 | }), 23 | retriever=dict(type=ZeroRetriever), 24 | inferencer=dict(type=PPLInferencer)) 25 | 26 | WiC_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 27 | 28 | WiC_datasets = [ 29 | dict( 30 | type=WiCDataset, 31 | abbr='WiC', 32 | path='json', 33 | data_files='./data/SuperGLUE/WiC/val.jsonl', 34 | split='train', 35 | reader_cfg=WiC_reader_cfg, 36 | infer_cfg=WiC_infer_cfg, 37 | eval_cfg=WiC_eval_cfg) 38 | ] 39 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/TheoremQA/TheoremQA_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .TheoremQA_gen_7009de import TheoremQA_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/XCOPA/XCOPA_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .XCOPA_ppl_54058d import XCOPA_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/XCOPA/XCOPA_ppl_54058d.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import XCOPADataset 6 | 7 | XCOPA_reader_cfg = dict( 8 | input_columns=['question', 'premise', 'choice1', 'choice2'], 9 | output_column='label', 10 | test_split='train') 11 | 12 | XCOPA_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 0: "Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.", 17 | 1: "Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.", 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | XCOPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | XCOPA_datasets = [ 25 | dict( 26 | type=XCOPADataset, 27 | path='xcopa', 28 | reader_cfg=XCOPA_reader_cfg, 29 | infer_cfg=XCOPA_infer_cfg, 30 | eval_cfg=XCOPA_eval_cfg) 31 | ] 32 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/XLSum/XLSum_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .XLSum_gen_2bb71c import XLSum_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/XLSum/XLSum_gen_2bb71c.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import RougeEvaluator 5 | from opencompass.datasets import XLSUMDataset, Xsum_postprocess 6 | 7 | XLSum_reader_cfg = dict(input_columns=['text'], output_column='summary') 8 | 9 | XLSum_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template='Document:{text}\n' 13 | 'Based on the previous text, provide a brief single summary:'), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer)) 16 | 17 | XLSum_eval_cfg = dict( 18 | evaluator=dict(type=RougeEvaluator), 19 | pred_postprocessor=dict(type=Xsum_postprocess), 20 | ) 21 | 22 | XLSum_datasets = [ 23 | dict( 24 | type=XLSUMDataset, 25 | path='csebuetnlp/xlsum', 26 | reader_cfg=XLSum_reader_cfg, 27 | infer_cfg=XLSum_infer_cfg, 28 | eval_cfg=XLSum_eval_cfg) 29 | ] 30 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/Xsum/Xsum_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .Xsum_gen_31397e import Xsum_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/Xsum/Xsum_gen_31397e.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import RougeEvaluator 5 | from opencompass.datasets import XsumDataset 6 | 7 | Xsum_reader_cfg = dict(input_columns=["dialogue"], output_column="summary") 8 | 9 | Xsum_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template=dict(round=[ 13 | dict( 14 | role="HUMAN", 15 | prompt= 16 | "Document:{dialogue}\nBased on the previous text, provide a brief single summary:" 17 | ), 18 | ]), 19 | ), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=GenInferencer), 22 | ) 23 | 24 | Xsum_eval_cfg = dict( 25 | evaluator=dict(type=RougeEvaluator), 26 | pred_role='BOT', 27 | pred_postprocessor=dict(type="Xsum"), 28 | ) 29 | 30 | Xsum_datasets = [ 31 | dict( 32 | type=XsumDataset, 33 | abbr="Xsum", 34 | path="./data/Xsum/dev.jsonl", 35 | reader_cfg=Xsum_reader_cfg, 36 | infer_cfg=Xsum_infer_cfg, 37 | eval_cfg=Xsum_eval_cfg, 38 | ) 39 | ] 40 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/Xsum/Xsum_gen_8ea5f8.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import RougeEvaluator 5 | from opencompass.datasets import XsumDataset, Xsum_postprocess 6 | 7 | Xsum_reader_cfg = dict(input_columns=['dialogue'], output_column='summary') 8 | 9 | Xsum_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template='Document:{dialogue}\n' 13 | 'Based on the previous text, provide a brief single summary:'), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer)) 16 | 17 | Xsum_eval_cfg = dict( 18 | evaluator=dict(type=RougeEvaluator), 19 | pred_postprocessor=dict(type=Xsum_postprocess), 20 | ) 21 | 22 | Xsum_datasets = [ 23 | dict( 24 | type=XsumDataset, 25 | abbr='Xsum', 26 | path='./data/Xsum/dev.jsonl', 27 | reader_cfg=Xsum_reader_cfg, 28 | infer_cfg=Xsum_infer_cfg, 29 | eval_cfg=Xsum_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/agieval/agieval_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .agieval_gen_397d81 import agieval_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/agieval/agieval_mixed.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .agieval_mixed_2f14ad import agieval_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/apps/apps_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .apps_gen_7fbb95 import apps_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/apps/apps_gen_5b4254.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess 5 | 6 | apps_reader_cfg = dict( 7 | input_columns=['question'], output_column='problem_id', train_split='test') 8 | 9 | # TODO: allow empty output-column 10 | apps_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict(role='HUMAN', prompt='Write a python program:\n{question}'), 15 | ])), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict(type=GenInferencer)) 18 | 19 | apps_eval_cfg = dict( 20 | evaluator=dict(type=HumanEvaluator), 21 | pred_role='BOT', 22 | k=[1, 10, 100], # the parameter only for humaneval 23 | pred_postprocessor=dict(type=humaneval_postprocess), 24 | ) 25 | 26 | apps_datasets = [ 27 | dict( 28 | type=HFDataset, 29 | path='codeparrot/apps', 30 | reader_cfg=apps_reader_cfg, 31 | infer_cfg=apps_infer_cfg, 32 | eval_cfg=apps_eval_cfg) 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/apps/apps_gen_b4dee3.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess 5 | 6 | apps_reader_cfg = dict( 7 | input_columns=['question'], output_column='problem_id', train_split='test') 8 | 9 | # TODO: allow empty output-column 10 | apps_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template='{question}'), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer, max_out_len=512)) 16 | 17 | apps_eval_cfg = dict( 18 | evaluator=dict(type=HumanEvaluator), 19 | k=[1, 10, 100], 20 | pred_postprocessor=dict(type=humaneval_postprocess), 21 | ) 22 | 23 | apps_datasets = [ 24 | dict( 25 | type=HFDataset, 26 | path='codeparrot/apps', 27 | reader_cfg=apps_reader_cfg, 28 | infer_cfg=apps_infer_cfg, 29 | eval_cfg=apps_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/bbh/bbh_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .bbh_gen_5b92b0 import bbh_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/ceval/ceval_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .ceval_gen_5f30c7 import ceval_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/ceval/ceval_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .ceval_ppl_578f8d import ceval_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/civilcomments/civilcomments_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .civilcomments_ppl_6a2561 import civilcomments_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/civilcomments/civilcomments_ppl_6a2561.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import CLPInferencer 4 | from opencompass.openicl.icl_evaluator import AUCROCEvaluator 5 | from opencompass.datasets import CivilCommentsDataset 6 | 7 | civilcomments_reader_cfg = dict( 8 | input_columns=['text'], 9 | output_column='label', 10 | train_split='test', 11 | test_split='test') 12 | 13 | civilcomments_infer_cfg = dict( 14 | prompt_template=dict( 15 | type=PromptTemplate, 16 | template='Text: {text}\nQuestion: ' 17 | 'Does the above text contain rude, hateful, aggressive, disrespectful ' 18 | 'or unreasonable language?\nAnswer:'), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=CLPInferencer)) 21 | 22 | civilcomments_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), ) 23 | 24 | civilcomments_datasets = [ 25 | dict( 26 | type=CivilCommentsDataset, 27 | path='civil_comments', 28 | reader_cfg=civilcomments_reader_cfg, 29 | infer_cfg=civilcomments_infer_cfg, 30 | eval_cfg=civilcomments_eval_cfg) 31 | ] 32 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/collections/example.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from ..piqa.piqa_gen_1194eb import piqa_datasets 5 | from ..nq.nq_gen_c788f6 import nq_datasets 6 | 7 | datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) 8 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/commonsenseqa/commonsenseqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .commonsenseqa_gen_c946f2 import commonsenseqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/commonsenseqa/commonsenseqa_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .commonsenseqa_ppl_5545e2 import commonsenseqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/crowspairs/crowspairs_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .crowspairs_gen_02b6c1 import crowspairs_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/crowspairs/crowspairs_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .crowspairs_ppl_e811e1 import crowspairs_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/crowspairs/crowspairs_ppl_47f211.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import crowspairsDataset 6 | 7 | crowspairs_reader_cfg = dict( 8 | input_columns=['sent_more', 'sent_less'], 9 | output_column='label', 10 | train_split='test', 11 | test_split='test') 12 | 13 | crowspairs_infer_cfg = dict( 14 | prompt_template=dict( 15 | type=PromptTemplate, 16 | template={ 17 | 0: "Less biased with good values: {sent_more}", 18 | 1: "Less biased with good values: {sent_less}", 19 | }), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=PPLInferencer)) 22 | 23 | crowspairs_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) 24 | 25 | crowspairs_datasets = [ 26 | dict( 27 | type=crowspairsDataset, 28 | path='crows_pairs', 29 | reader_cfg=crowspairs_reader_cfg, 30 | infer_cfg=crowspairs_infer_cfg, 31 | eval_cfg=crowspairs_eval_cfg) 32 | ] 33 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/cvalues/cvalues_responsibility_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .cvalues_responsibility_gen_4aec9f import cvalues_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/drop/drop_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .drop_gen_599f07 import drop_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/flores/flores_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .flores_gen_806ede import flores_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/glm/chid.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import CHIDDataset 6 | 7 | chid_reader_cfg = dict( 8 | input_columns=[f'content{i}' for i in range(7)], output_column='answer') 9 | 10 | chid_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template={answer: f"{{content{answer}}}" 14 | for answer in range(7)}), 15 | retriever=dict(type=ZeroRetriever), 16 | inferencer=dict(type=PPLInferencer)) 17 | 18 | chid_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 19 | 20 | chid_datasets = [ 21 | dict( 22 | type=CHIDDataset, 23 | path='json', 24 | abbr='chid', 25 | data_files='./data/FewCLUE/chid/test_public.json', 26 | split='train', 27 | reader_cfg=chid_reader_cfg, 28 | infer_cfg=chid_infer_cfg, 29 | eval_cfg=chid_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/glm/humaneval.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset, HumanEvaluator 5 | 6 | humaneval_reader_cfg = dict( 7 | input_columns=['prompt'], output_column='task_id', train_split='test') 8 | 9 | # TODO: allow empty output-column 10 | humaneval_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template='{prompt}'), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer)) 16 | 17 | humaneval_eval_cfg = dict( 18 | evaluator=dict(type=HumanEvaluator), 19 | k=[1, 10, 100], # the parameter only for humaneval 20 | pred_postprocessor=dict(type='humaneval'), 21 | ) 22 | 23 | humaneval_datasets = [ 24 | dict( 25 | type=HFDataset, 26 | path='openai_humaneval', 27 | reader_cfg=humaneval_reader_cfg, 28 | infer_cfg=humaneval_infer_cfg, 29 | eval_cfg=humaneval_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/govrepcrs/govrepcrs_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .govrepcrs_gen_db7930 import govrepcrs_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/gsm8k/gsm8k_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .gsm8k_gen_1d7fe4 import gsm8k_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/hellaswag/hellaswag_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .hellaswag_gen_6faab5 import hellaswag_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/hellaswag/hellaswag_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .hellaswag_ppl_47bff9 import hellaswag_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/hellaswag/hellaswag_ppl_47bff9.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import hellaswagDataset 6 | 7 | hellaswag_reader_cfg = dict( 8 | input_columns=['ctx', 'A', 'B', 'C', 'D'], 9 | output_column='label', 10 | train_split='validation', 11 | test_split='validation') 12 | 13 | hellaswag_infer_cfg = dict( 14 | prompt_template=dict( 15 | type=PromptTemplate, 16 | template={ 17 | i: dict(round=[ 18 | dict(role="HUMAN", prompt="{ctx}"), 19 | dict(role="BOT", prompt=f"{{{chr(ord('A') + i)}}}"), 20 | ]) 21 | for i in range(4) 22 | }), 23 | retriever=dict(type=ZeroRetriever), 24 | inferencer=dict(type=PPLInferencer)) 25 | 26 | hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 27 | 28 | hellaswag_datasets = [ 29 | dict( 30 | type=hellaswagDataset, 31 | path='hellaswag', 32 | reader_cfg=hellaswag_reader_cfg, 33 | infer_cfg=hellaswag_infer_cfg, 34 | eval_cfg=hellaswag_eval_cfg) 35 | ] 36 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/hellaswag/hellaswag_ppl_9dbb12.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import hellaswagDataset 6 | 7 | hellaswag_reader_cfg = dict( 8 | input_columns=['ctx', 'A', 'B', 'C', 'D'], 9 | output_column='label', 10 | train_split='validation', 11 | test_split='validation') 12 | 13 | hellaswag_infer_cfg = dict( 14 | prompt_template=dict( 15 | type=PromptTemplate, 16 | template={ 17 | 0: "{ctx} {A}", 18 | 1: "{ctx} {B}", 19 | 2: "{ctx} {C}", 20 | 3: "{ctx} {D}", 21 | }), 22 | retriever=dict(type=ZeroRetriever), 23 | inferencer=dict(type=PPLInferencer)) 24 | 25 | hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 26 | 27 | hellaswag_datasets = [ 28 | dict( 29 | type=hellaswagDataset, 30 | path='hellaswag', 31 | reader_cfg=hellaswag_reader_cfg, 32 | infer_cfg=hellaswag_infer_cfg, 33 | eval_cfg=hellaswag_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/humaneval/humaneval_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .humaneval_gen_8e312c import humaneval_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/humaneval/humaneval_gen_8e312c.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess 5 | 6 | humaneval_reader_cfg = dict( 7 | input_columns=['prompt'], output_column='task_id', train_split='test') 8 | 9 | # TODO: allow empty output-column 10 | humaneval_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template=dict(round=[ 14 | dict( 15 | role='HUMAN', 16 | prompt='{prompt}'), 17 | ])), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=GenInferencer, max_out_len=512)) 20 | 21 | humaneval_eval_cfg = dict( 22 | evaluator=dict(type=HumanEvaluator), 23 | pred_role='BOT', 24 | k=[1, 10, 100], # the parameter only for humaneval 25 | pred_postprocessor=dict(type=humaneval_postprocess), 26 | ) 27 | 28 | humaneval_datasets = [ 29 | dict( 30 | type=HFDataset, 31 | path='openai_humaneval', 32 | reader_cfg=humaneval_reader_cfg, 33 | infer_cfg=humaneval_infer_cfg, 34 | eval_cfg=humaneval_eval_cfg) 35 | ] 36 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/humaneval/humaneval_gen_fd5822.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess 5 | 6 | humaneval_reader_cfg = dict( 7 | input_columns=['prompt'], output_column='task_id', train_split='test') 8 | 9 | # TODO: allow empty output-column 10 | humaneval_infer_cfg = dict( 11 | prompt_template=dict( 12 | type=PromptTemplate, 13 | template='{prompt}'), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer, max_out_len=512)) 16 | 17 | humaneval_eval_cfg = dict( 18 | evaluator=dict(type=HumanEvaluator), 19 | k=[1, 10, 100], # the parameter only for humaneval 20 | pred_postprocessor=dict(type=humaneval_postprocess), 21 | ) 22 | 23 | humaneval_datasets = [ 24 | dict( 25 | type=HFDataset, 26 | path='openai_humaneval', 27 | reader_cfg=humaneval_reader_cfg, 28 | infer_cfg=humaneval_infer_cfg, 29 | eval_cfg=humaneval_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/iwslt2017/iwslt2017_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .iwslt2017_gen_d0ebd1 import iwslt2017_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .jigsawmultilingual_ppl_fe50d8 import jigsawmultilingual_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/lambada/lambada_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .lambada_gen_217e11 import lambada_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/lambada/lambada_gen_217e11.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import lambadaDataset, LambadaEvaluator 5 | 6 | lambada_reader_cfg = dict( 7 | input_columns=['prompt'], 8 | output_column='label', 9 | train_split='test', 10 | test_split='test') 11 | 12 | lambada_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template=dict(round=[ 16 | dict( 17 | role='HUMAN', 18 | prompt='Please complete the following sentence:\n{prompt}') 19 | ])), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=GenInferencer, max_out_len=5)) 22 | 23 | lambada_eval_cfg = dict(evaluator=dict(type=LambadaEvaluator)) 24 | 25 | lambada_datasets = [ 26 | dict( 27 | abbr='lambada', 28 | type=lambadaDataset, 29 | path='craffel/openai_lambada', 30 | reader_cfg=lambada_reader_cfg, 31 | infer_cfg=lambada_infer_cfg, 32 | eval_cfg=lambada_eval_cfg) 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/lambada/lambada_gen_8b48a5.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import lambadaDataset, LambadaEvaluator 5 | 6 | lambada_reader_cfg = dict( 7 | input_columns=['prompt'], 8 | output_column='label', 9 | train_split='test', 10 | test_split='test') 11 | 12 | lambada_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template='Please complete the following sentence: {prompt}'), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict(type=GenInferencer, max_out_len=5)) 18 | 19 | lambada_eval_cfg = dict(evaluator=dict(type=LambadaEvaluator)) 20 | 21 | lambada_datasets = [ 22 | dict( 23 | abbr='lambada', 24 | type=lambadaDataset, 25 | path='craffel/openai_lambada', 26 | reader_cfg=lambada_reader_cfg, 27 | infer_cfg=lambada_infer_cfg, 28 | eval_cfg=lambada_eval_cfg) 29 | ] 30 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/lcsts/lcsts_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .lcsts_gen_8ee1fe import lcsts_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/lcsts/lcsts_gen_8ee1fe.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import RougeEvaluator 5 | from opencompass.datasets import LCSTSDataset, lcsts_postprocess 6 | 7 | lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst') 8 | 9 | lcsts_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template=dict(round=[ 13 | dict(role='HUMAN', prompt='阅读以下文章,并给出简短的摘要:{content}\n摘要如下:'), 14 | ])), 15 | retriever=dict(type=ZeroRetriever), 16 | inferencer=dict(type=GenInferencer)) 17 | 18 | lcsts_eval_cfg = dict( 19 | evaluator=dict(type=RougeEvaluator), 20 | pred_role='BOT', 21 | pred_postprocessor=dict(type=lcsts_postprocess), 22 | ) 23 | 24 | lcsts_datasets = [ 25 | dict( 26 | type=LCSTSDataset, 27 | abbr='lcsts', 28 | path='./data/LCSTS', 29 | reader_cfg=lcsts_reader_cfg, 30 | infer_cfg=lcsts_infer_cfg, 31 | eval_cfg=lcsts_eval_cfg) 32 | ] 33 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/lcsts/lcsts_gen_9b0b89.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import RougeEvaluator 5 | from opencompass.datasets import LCSTSDataset, lcsts_postprocess 6 | 7 | lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst') 8 | 9 | lcsts_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, template='阅读文章:{content}\n根据上文,给出简短的单个摘要:'), 12 | retriever=dict(type=ZeroRetriever), 13 | inferencer=dict(type=GenInferencer)) 14 | 15 | lcsts_eval_cfg = dict( 16 | evaluator=dict(type=RougeEvaluator), 17 | pred_postprocessor=dict(type=lcsts_postprocess), 18 | ) 19 | 20 | lcsts_datasets = [ 21 | dict( 22 | type=LCSTSDataset, 23 | abbr='lcsts', 24 | path='./data/LCSTS', 25 | reader_cfg=lcsts_reader_cfg, 26 | infer_cfg=lcsts_infer_cfg, 27 | eval_cfg=lcsts_eval_cfg) 28 | ] 29 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/math/math_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .math_gen_265cce import math_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/mbpp/mbpp_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .mbpp_gen_1e1056 import mbpp_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/mmlu/mmlu_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .mmlu_gen_a484b3 import mmlu_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/mmlu/mmlu_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .mmlu_ppl_ac766d import mmlu_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/narrativeqa/narrativeqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .narrativeqa_gen_db6413 import narrativeqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/narrativeqa/narrativeqa_gen_a2d88a.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import NarrativeQADataset, TriviaQAEvaluator 5 | 6 | narrativeqa_reader_cfg = dict( 7 | input_columns=['question', 'evidence'], 8 | output_column='answer', 9 | train_split='valid', 10 | test_split='valid') 11 | 12 | narrativeqa_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict( 18 | type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4)) 19 | 20 | narrativeqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator)) 21 | 22 | narrativeqa_datasets = [ 23 | dict( 24 | type=NarrativeQADataset, 25 | abbr='NarrativeQA', 26 | path='./data/narrativeqa/', 27 | reader_cfg=narrativeqa_reader_cfg, 28 | infer_cfg=narrativeqa_infer_cfg, 29 | eval_cfg=narrativeqa_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/nq/nq_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .nq_gen_3dcea1 import nq_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/nq/nq_gen_2463e2.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import NaturalQuestionDataset, NQEvaluator 5 | 6 | nq_reader_cfg = dict( 7 | input_columns=['question'], output_column='answer', train_split='test') 8 | 9 | nq_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template="Answer these questions:\nQ: {question}?\nA:{answer}", 13 | ), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer)) 16 | 17 | nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT") 18 | 19 | nq_datasets = [ 20 | dict( 21 | type=NaturalQuestionDataset, 22 | abbr='nq', 23 | path='./data/nq/', 24 | reader_cfg=nq_reader_cfg, 25 | infer_cfg=nq_infer_cfg, 26 | eval_cfg=nq_eval_cfg) 27 | ] 28 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/nq/nq_gen_68c1c6.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import NaturalQuestionDataset, NQEvaluator 5 | 6 | nq_reader_cfg = dict( 7 | input_columns=['question'], output_column='answer', train_split='test') 8 | 9 | nq_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template=dict( 13 | round=[ 14 | dict(role='HUMAN', prompt='Answer these questions:\nQ: {question}?'), 15 | dict(role='BOT', prompt='A:'), 16 | ], )), 17 | retriever=dict(type=ZeroRetriever), 18 | inferencer=dict(type=GenInferencer)) 19 | 20 | nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT") 21 | 22 | nq_datasets = [ 23 | dict( 24 | type=NaturalQuestionDataset, 25 | abbr='nq', 26 | path='./data/nq/', 27 | reader_cfg=nq_reader_cfg, 28 | infer_cfg=nq_infer_cfg, 29 | eval_cfg=nq_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/nq/nq_gen_c788f6.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import NaturalQuestionDataset, NQEvaluator 5 | 6 | nq_reader_cfg = dict( 7 | input_columns=['question'], output_column='answer', train_split='test') 8 | 9 | nq_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template=dict( 13 | round=[ 14 | dict(role='HUMAN', prompt='Answer these questions, your answer should be as simple as possible, start your answer with the prompt \'The answer is \'.\nQ: {question}?'), 15 | dict(role='BOT', prompt='A:'), 16 | ], )), 17 | retriever=dict(type=ZeroRetriever), 18 | inferencer=dict(type=GenInferencer)) 19 | 20 | nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT") 21 | 22 | nq_datasets = [ 23 | dict( 24 | type=NaturalQuestionDataset, 25 | abbr='nq', 26 | path='./data/nq/', 27 | reader_cfg=nq_reader_cfg, 28 | infer_cfg=nq_infer_cfg, 29 | eval_cfg=nq_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/obqa/obqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .obqa_gen_9069e4 import obqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/obqa/obqa_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .obqa_ppl_c7c154 import obqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/piqa/piqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .piqa_gen_1194eb import piqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/piqa/piqa_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .piqa_ppl_1cf9f0 import piqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/piqa/piqa_ppl_1cf9f0.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | piqa_reader_cfg = dict( 8 | input_columns=['goal', 'sol1', 'sol2'], 9 | output_column='label', 10 | test_split='validation') 11 | 12 | piqa_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 0: 'The following makes sense: \nQ: {goal}\nA: {sol1}\n', 17 | 1: 'The following makes sense: \nQ: {goal}\nA: {sol2}\n' 18 | }), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=PPLInferencer)) 21 | 22 | piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 23 | 24 | piqa_datasets = [ 25 | dict( 26 | type=HFDataset, 27 | path='piqa', 28 | reader_cfg=piqa_reader_cfg, 29 | infer_cfg=piqa_infer_cfg, 30 | eval_cfg=piqa_eval_cfg) 31 | ] 32 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/qabench/qabench_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .qabench_gen_353ae7 import qabench_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/qabench/qabench_gen_353ae7.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset 5 | 6 | qabench_reader_cfg = dict( 7 | input_columns=['prompt'], 8 | output_column='reference', 9 | ) 10 | 11 | # TODO: allow empty output-column 12 | qabench_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template=dict(round=[dict(role="HUMAN", prompt="{prompt}")])), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict(type=GenInferencer)) 18 | 19 | qabench_datasets = [ 20 | dict( 21 | type=HFDataset, 22 | path='csv', 23 | data_files='./data/qabench/qabench-test.qa.csv', 24 | abbr="qabench", 25 | split='train', 26 | reader_cfg=qabench_reader_cfg, 27 | infer_cfg=qabench_infer_cfg, 28 | eval_cfg=dict(ds_column="reference")) 29 | ] 30 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/qasper/qasper_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .qasper_gen_db6413 import qasper_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/qasper/qasper_gen_a2d88a.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import QASPERDataset, TriviaQAEvaluator 5 | 6 | qasper_reader_cfg = dict( 7 | input_columns=['question', 'evidence'], 8 | output_column='answer', 9 | train_split='dev', 10 | test_split='dev') 11 | 12 | qasper_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict( 18 | type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4)) 19 | 20 | qasper_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator)) 21 | 22 | qasper_datasets = [ 23 | dict( 24 | type=QASPERDataset, 25 | abbr='QASPER', 26 | path='./data/QASPER/', 27 | reader_cfg=qasper_reader_cfg, 28 | infer_cfg=qasper_infer_cfg, 29 | eval_cfg=qasper_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/qaspercut/qaspercut_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .qaspercut_gen_a2d88a import qaspercut_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/qaspercut/qaspercut_gen_a2d88a.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import QASPERCUTDataset, TriviaQAEvaluator 5 | 6 | qaspercut_reader_cfg = dict( 7 | input_columns=['question', 'evidence'], 8 | output_column='answer', 9 | train_split='dev', 10 | test_split='dev') 11 | 12 | qaspercut_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict( 18 | type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4)) 19 | 20 | qaspercut_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator)) 21 | 22 | qaspercut_datasets = [ 23 | dict( 24 | type=QASPERCUTDataset, 25 | abbr='qaspercut', 26 | path='./data/QASPER/', 27 | reader_cfg=qaspercut_reader_cfg, 28 | infer_cfg=qaspercut_infer_cfg, 29 | eval_cfg=qaspercut_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/race/race_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .race_gen_69ee4f import race_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/race/race_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .race_ppl_a138cd import race_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/realtoxicprompts/realtoxicprompts_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .realtoxicprompts_gen_ac723c import realtoxicprompts_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/safety/safety_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .safety_gen_7ce197 import safety_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/safety/safety_gen_7ce197.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.openicl.icl_evaluator import ToxicEvaluator 5 | from opencompass.datasets import SafetyDataset 6 | 7 | safety_reader_cfg = dict( 8 | input_columns=['prompt'], 9 | output_column='idx', 10 | train_split='test', 11 | test_split='test') 12 | 13 | # TODO: allow empty output-column 14 | safety_infer_cfg = dict( 15 | prompt_template=dict( 16 | type=PromptTemplate, 17 | template='{prompt}'), 18 | retriever=dict(type=ZeroRetriever), 19 | inferencer=dict(type=GenInferencer)) 20 | 21 | safety_eval_cfg = dict(evaluator=dict(type=ToxicEvaluator), ) 22 | 23 | safety_datasets = [ 24 | dict( 25 | type=SafetyDataset, 26 | path='./data/safety.txt', 27 | reader_cfg=safety_reader_cfg, 28 | infer_cfg=safety_infer_cfg, 29 | eval_cfg=safety_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/siqa/siqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .siqa_gen_e78df3 import siqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/siqa/siqa_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .siqa_ppl_ced5f6 import siqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/siqa/siqa_ppl_7845b0.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import HFDataset 6 | 7 | siqa_reader_cfg = dict( 8 | input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'], 9 | output_column='label', 10 | test_split='validation') 11 | 12 | siqa_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template={ 16 | 1: '{context} \nQ: {question}\nA: {answerA}', 17 | 2: '{context} \nQ: {question}\nA: {answerB}', 18 | 3: '{context} \nQ: {question}\nA: {answerC}', 19 | }), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=PPLInferencer)) 22 | 23 | siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) 24 | 25 | siqa_datasets = [ 26 | dict( 27 | abbr="siqa", 28 | type=HFDataset, 29 | path='social_i_qa', 30 | name='social_i_qa', 31 | reader_cfg=siqa_reader_cfg, 32 | infer_cfg=siqa_infer_cfg, 33 | eval_cfg=siqa_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/storycloze/storycloze_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .storycloze_gen_7f656a import storycloze_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/storycloze/storycloze_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .storycloze_ppl_496661 import storycloze_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/strategyqa/strategyqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .strategyqa_gen_1180a7 import strategyqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/summedits/summedits_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .summedits_gen_315438 import summedits_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/summedits/summedits_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .summedits_ppl_1fbeb6 import summedits_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/summscreen/summscreen_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .summscreen_gen_aa5eb3 import summscreen_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/triviaqa/triviaqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .triviaqa_gen_2121ce import triviaqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/triviaqa/triviaqa_gen_3e39a5.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator 5 | 6 | triviaqa_reader_cfg = dict( 7 | input_columns=['question'], 8 | output_column='answer', 9 | train_split='dev', 10 | test_split='dev') 11 | 12 | triviaqa_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template=dict( 16 | round=[ 17 | dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), 18 | ], )), 19 | retriever=dict(type=ZeroRetriever), 20 | inferencer=dict(type=GenInferencer, max_out_len=50)) 21 | 22 | triviaqa_eval_cfg = dict( 23 | evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT') 24 | 25 | triviaqa_datasets = [ 26 | dict( 27 | type=TriviaQADataset, 28 | abbr='triviaqa', 29 | path='./data/triviaqa/', 30 | reader_cfg=triviaqa_reader_cfg, 31 | infer_cfg=triviaqa_infer_cfg, 32 | eval_cfg=triviaqa_eval_cfg) 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/triviaqa/triviaqa_gen_429db5.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator 5 | 6 | triviaqa_reader_cfg = dict( 7 | input_columns=['question'], 8 | output_column='answer', 9 | train_split='dev', 10 | test_split='dev') 11 | 12 | triviaqa_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template='Answer these questions:\nQ: {question}\nA:{answer}'), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict(type=GenInferencer, max_out_len=50)) 18 | 19 | triviaqa_eval_cfg = dict( 20 | evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT') 21 | 22 | triviaqa_datasets = [ 23 | dict( 24 | type=TriviaQADataset, 25 | abbr='triviaqa', 26 | path='./data/triviaqa/', 27 | reader_cfg=triviaqa_reader_cfg, 28 | infer_cfg=triviaqa_infer_cfg, 29 | eval_cfg=triviaqa_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/triviaqa/triviaqa_gen_d297bb.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator 5 | 6 | triviaqa_reader_cfg = dict( 7 | input_columns=['question'], 8 | output_column='answer', 9 | train_split='dev', 10 | test_split='dev') 11 | 12 | triviaqa_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template=dict( 16 | round=[ 17 | dict(role='HUMAN', prompt='Answer these questions:\nQ: {question}?'), 18 | dict(role='BOT', prompt='A:'), 19 | ], )), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=GenInferencer, max_out_len=50)) 22 | 23 | triviaqa_eval_cfg = dict( 24 | evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT') 25 | 26 | triviaqa_datasets = [ 27 | dict( 28 | type=TriviaQADataset, 29 | abbr='triviaqa', 30 | path='./data/triviaqa/', 31 | reader_cfg=triviaqa_reader_cfg, 32 | infer_cfg=triviaqa_infer_cfg, 33 | eval_cfg=triviaqa_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/triviaqarc/triviaqarc_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .triviaqarc_gen_db6413 import triviaqarc_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/triviaqarc/triviaqarc_gen_a2d88a.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import TriviaQArcDataset, TriviaQAEvaluator 5 | 6 | triviaqarc_reader_cfg = dict( 7 | input_columns=['question', 'evidence'], 8 | output_column='answer', 9 | train_split='dev', 10 | test_split='dev') 11 | 12 | triviaqarc_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"), 16 | retriever=dict(type=ZeroRetriever), 17 | inferencer=dict( 18 | type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4)) 19 | 20 | triviaqarc_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator)) 21 | 22 | triviaqarc_datasets = [ 23 | dict( 24 | type=TriviaQArcDataset, 25 | abbr='triviaqarc', 26 | path='./data/triviaqa-rc/', 27 | reader_cfg=triviaqarc_reader_cfg, 28 | infer_cfg=triviaqarc_infer_cfg, 29 | eval_cfg=triviaqarc_eval_cfg) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/truthfulqa/truthfulqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .truthfulqa_gen_5ddc62 import truthfulqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/tydiqa/tydiqa_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .tydiqa_gen_978d2a import tydiqa_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/winograd/winograd_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .winograd_ppl_b6c7ed import winograd_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/winogrande/winogrande_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .winogrande_gen_a9ede5 import winogrande_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/winogrande/winogrande_ppl.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .winogrande_ppl_55a66e import winogrande_datasets # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/winogrande/winogrande_ppl_9307fd.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import PPLInferencer 4 | from opencompass.openicl.icl_evaluator import AccEvaluator 5 | from opencompass.datasets import winograndeDataset 6 | 7 | winogrande_reader_cfg = dict( 8 | input_columns=['opt1', 'opt2'], 9 | output_column='answer', 10 | train_split='validation', 11 | test_split='validation') 12 | 13 | winogrande_infer_cfg = dict( 14 | prompt_template=dict( 15 | type=PromptTemplate, 16 | template={ 17 | 1: "Good sentence: {opt1}", 18 | 2: "Good sentence: {opt2}", 19 | }), 20 | retriever=dict(type=ZeroRetriever), 21 | inferencer=dict(type=PPLInferencer)) 22 | 23 | winogrande_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) 24 | 25 | winogrande_datasets = [ 26 | dict( 27 | abbr='winogrande', 28 | type=winograndeDataset, 29 | path='winogrande', 30 | name='winogrande_xs', 31 | reader_cfg=winogrande_reader_cfg, 32 | infer_cfg=winogrande_infer_cfg, 33 | eval_cfg=winogrande_eval_cfg) 34 | ] 35 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/z_bench/z_bench_gen.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .z_bench_gen_5813ec import z_bench_dataset # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/z_bench/z_bench_gen_5813ec.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset 5 | 6 | z_bench_reader_cfg = dict( 7 | input_columns=['text'], output_column='category', train_split='test') 8 | 9 | z_bench_infer_cfg = dict( 10 | prompt_template=dict( 11 | type=PromptTemplate, 12 | template='{text}', 13 | ), 14 | retriever=dict(type=ZeroRetriever), 15 | inferencer=dict(type=GenInferencer)) 16 | 17 | z_bench_dataset = dict( 18 | type=HFDataset, 19 | path= 20 | '/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench', 21 | data_dir= 22 | '/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench', 23 | name='question', 24 | reader_cfg=z_bench_reader_cfg, 25 | infer_cfg=z_bench_infer_cfg) 26 | -------------------------------------------------------------------------------- /opencompass/configs/datasets/z_bench/z_bench_gen_61db0a.py: -------------------------------------------------------------------------------- 1 | from opencompass.openicl.icl_prompt_template import PromptTemplate 2 | from opencompass.openicl.icl_retriever import ZeroRetriever 3 | from opencompass.openicl.icl_inferencer import GenInferencer 4 | from opencompass.datasets import HFDataset 5 | 6 | z_bench_reader_cfg = dict( 7 | ds_size=4, 8 | input_columns=['text'], 9 | output_column='category', 10 | train_split='test') 11 | 12 | z_bench_infer_cfg = dict( 13 | prompt_template=dict( 14 | type=PromptTemplate, 15 | template=dict(round=[dict(role="HUMAN", prompt="{text}")]), 16 | ), 17 | retriever=dict(type=ZeroRetriever), 18 | inferencer=dict(type=GenInferencer)) 19 | 20 | z_bench_dataset = dict( 21 | type=HFDataset, 22 | path= 23 | '/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench', 24 | data_dir= 25 | '/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench', 26 | name='question', 27 | reader_cfg=z_bench_reader_cfg, 28 | infer_cfg=z_bench_infer_cfg) 29 | -------------------------------------------------------------------------------- /opencompass/configs/eval_gpt3.5.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | from opencompass.models import OpenAI 3 | from opencompass.partitioners import NaivePartitioner 4 | from opencompass.runners import LocalRunner 5 | from opencompass.tasks import OpenICLInferTask 6 | 7 | with read_base(): 8 | # choose a list of datasets 9 | from .datasets.collections.chat_medium import datasets 10 | # and output the results in a choosen format 11 | from .summarizers.medium import summarizer 12 | 13 | 14 | api_meta_template = dict( 15 | round=[ 16 | dict(role='HUMAN', api_role='HUMAN'), 17 | dict(role='BOT', api_role='BOT', generate=True), 18 | ], 19 | ) 20 | 21 | models = [ 22 | dict(abbr='GPT-3.5-turbo-0613', 23 | type=OpenAI, path='gpt-3.5-turbo-0613', 24 | key='ENV', # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well 25 | meta_template=api_meta_template, 26 | query_per_second=1, 27 | max_out_len=2048, max_seq_len=2048, batch_size=8), 28 | ] 29 | 30 | infer = dict( 31 | partitioner=dict(type=NaivePartitioner), 32 | runner=dict( 33 | type=LocalRunner, 34 | max_num_workers=8, 35 | task=dict(type=OpenICLInferTask)), 36 | ) 37 | -------------------------------------------------------------------------------- /opencompass/configs/eval_internlm_7b.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | # choose a list of datasets 5 | from .datasets.collections.base_medium import datasets 6 | # choose a model of interest 7 | from .models.hf_internlm_7b import models 8 | # and output the results in a choosen format 9 | from .summarizers.medium import summarizer 10 | -------------------------------------------------------------------------------- /opencompass/configs/eval_tigerbot_7b_chat_1.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | # 英文任务 5 | from .datasets.humaneval.humaneval_gen import humaneval_datasets 6 | from .datasets.hellaswag.hellaswag_ppl import hellaswag_datasets 7 | from .datasets.winogrande.winogrande_ppl import winogrande_datasets 8 | from .datasets.obqa.obqa_gen import obqa_datasets 9 | 10 | datasets = [*humaneval_datasets, *hellaswag_datasets, *winogrande_datasets, *obqa_datasets] 11 | 12 | from opencompass.models import HuggingFaceCausalLM 13 | 14 | models = [ 15 | dict( 16 | type=HuggingFaceCausalLM, 17 | abbr='tigerbot-7b-chat-1', 18 | path="TigerResearch/tigerbot-7b-chat", 19 | tokenizer_path='TigerResearch/tigerbot-7b-chat', 20 | tokenizer_kwargs=dict( 21 | padding_side='left', 22 | truncation_side='left', 23 | trust_remote_code=True, 24 | ), 25 | max_out_len=100, 26 | max_seq_len=2048, 27 | batch_size=16, 28 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 29 | batch_padding=True, 30 | run_cfg=dict(num_gpus=1, num_procs=1), 31 | ) 32 | ] 33 | -------------------------------------------------------------------------------- /opencompass/configs/models/gpt_3.5_turbo.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import OpenAI 2 | 3 | models = [ 4 | dict(abbr='GPT-3.5-turbo', 5 | type=OpenAI, path='gpt-3.5-turbo', key='sk-xxx', 6 | max_out_len=2048, max_seq_len=2048, batch_size=1) 7 | ] 8 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_baichuan_13b_base.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='baichuan-13b-base-hf', 8 | path="/mnt/nfs/algo/intern/haoyunx7/models/llm/baichuan/Baichuan-13B-Base", 9 | tokenizer_path='/mnt/nfs/algo/intern/haoyunx7/models/llm/baichuan/Baichuan-13B-Base', 10 | tokenizer_kwargs=dict(padding_side='left', 11 | truncation_side='left', 12 | trust_remote_code=True, 13 | use_fast=False,), 14 | max_out_len=100, 15 | max_seq_len=2048, 16 | batch_size=8, 17 | model_kwargs=dict(device_map='auto', trust_remote_code=True), 18 | run_cfg=dict(num_gpus=1, num_procs=1), 19 | ) 20 | ] 21 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_baichuan_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='baichuan-7b', 8 | path="baichuan-inc/Baichuan-7B", 9 | tokenizer_path='baichuan-inc/Baichuan-7B', 10 | tokenizer_kwargs=dict(padding_side='left', 11 | truncation_side='left', 12 | trust_remote_code=True, 13 | use_fast=False), 14 | max_out_len=100, 15 | max_seq_len=2048, 16 | batch_size=16, 17 | model_kwargs=dict(device_map='auto', trust_remote_code=True), 18 | batch_padding=True, 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_chatglm2_6b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFace 2 | 3 | models = [ 4 | dict( 5 | type=HuggingFace, 6 | abbr='chatglm2-6b', 7 | path='THUDM/chatglm2-6b', 8 | tokenizer_path='THUDM/chatglm2-6b', 9 | tokenizer_kwargs=dict( 10 | padding_side='left', 11 | truncation_side='left', 12 | trust_remote_code=True, 13 | ), 14 | max_out_len=100, 15 | max_seq_len=2048, 16 | batch_size=16, 17 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 18 | batch_padding=True, 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_chatglm_6b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFace 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFace, 7 | abbr='chatglm-6b-hf', 8 | path='THUDM/chatglm-6b', 9 | tokenizer_path='THUDM/chatglm-6b', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='1d240ba371910e9282298d4592532d7f0f3e9f3e'), 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_falcon_40b.py: -------------------------------------------------------------------------------- 1 | # Only torch >=2.0 is supported for falcon-40b 2 | from opencompass.models import HuggingFaceCausalLM 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='falcon-40b-hf', 8 | path='tiiuae/falcon-40b', 9 | tokenizer_path='tiiuae/falcon-40b', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='561820f7eef0cc56a31ea38af15ca1acb07fab5d'), 19 | run_cfg=dict(num_gpus=4, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_falcon_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='falcon-7b-hf', 8 | path='tiiuae/falcon-7b', 9 | tokenizer_path='tiiuae/falcon-7b', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='2f5c3cd4eace6be6c0f12981f377fb35e5bf6ee5'), 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_internlm_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='internlm-7b-hf', 8 | path="internlm/internlm-7b", 9 | tokenizer_path='internlm/internlm-7b', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | use_fast=False, 14 | trust_remote_code=True, 15 | ), 16 | max_out_len=100, 17 | max_seq_len=2048, 18 | batch_size=8, 19 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 20 | run_cfg=dict(num_gpus=1, num_procs=1), 21 | ) 22 | ] 23 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_internlm_chat_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | _meta_template = dict( 5 | round=[ 6 | dict(role='HUMAN', begin='<|User|>:', end='\n'), 7 | dict(role='BOT', begin='<|Bot|>:', end='\n', generate=True), 8 | ], 9 | ) 10 | 11 | models = [ 12 | dict( 13 | type=HuggingFaceCausalLM, 14 | abbr='internlm-chat-7b-hf', 15 | path="internlm/internlm-chat-7b", 16 | tokenizer_path='internlm/internlm-chat-7b', 17 | tokenizer_kwargs=dict( 18 | padding_side='left', 19 | truncation_side='left', 20 | use_fast=False, 21 | trust_remote_code=True, 22 | ), 23 | max_out_len=100, 24 | max_seq_len=2048, 25 | batch_size=8, 26 | meta_template=_meta_template, 27 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 28 | run_cfg=dict(num_gpus=1, num_procs=1), 29 | ) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_internlm_chat_7b_8k.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | _meta_template = dict( 5 | round=[ 6 | dict(role='HUMAN', begin='<|User|>:', end='\n'), 7 | dict(role='BOT', begin='<|Bot|>:', end='\n', generate=True), 8 | ], 9 | ) 10 | 11 | models = [ 12 | dict( 13 | type=HuggingFaceCausalLM, 14 | abbr='internlm-chat-7b-8k-hf', 15 | path="internlm/internlm-chat-7b-8k", 16 | tokenizer_path='internlm/internlm-chat-7b-8k', 17 | tokenizer_kwargs=dict( 18 | padding_side='left', 19 | truncation_side='left', 20 | use_fast=False, 21 | trust_remote_code=True, 22 | ), 23 | max_out_len=100, 24 | max_seq_len=2048, 25 | batch_size=8, 26 | meta_template=_meta_template, 27 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 28 | run_cfg=dict(num_gpus=1, num_procs=1), 29 | ) 30 | ] 31 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_llama2_13b_chat.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | models = [ 4 | dict( 5 | type=HuggingFaceCausalLM, 6 | abbr='Llama-2-13b-chat-hf', 7 | path="meta-llama/Llama-2-13b-chat-hf", 8 | tokenizer_path='meta-llama/Llama-2-13b-chat-hf', 9 | tokenizer_kwargs=dict(padding_side='left', 10 | truncation_side='left', 11 | use_fast=False, 12 | ), 13 | max_out_len=100, 14 | max_seq_len=2048, 15 | batch_size=8, 16 | model_kwargs=dict(device_map='auto'), 17 | batch_padding=True, 18 | run_cfg=dict(num_gpus=1, num_procs=1), 19 | ) 20 | ] 21 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_llama2_70b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='llama-2-70b-hf', 8 | path="meta-llama/Llama-2-70b-hf", 9 | tokenizer_path='meta-llama/Llama-2-70b-hf', 10 | tokenizer_kwargs=dict(padding_side='left', 11 | truncation_side='left', 12 | use_fast=False, 13 | ), 14 | max_out_len=100, 15 | max_seq_len=2048, 16 | batch_size=8, 17 | model_kwargs=dict(device_map='auto'), 18 | batch_padding=False, # if false, inference with for-loop without batch padding 19 | run_cfg=dict(num_gpus=8, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_llama2_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='llama-2-7b-hf', 8 | path="meta-llama/Llama-2-7b-hf", 9 | tokenizer_path='meta-llama/Llama-2-7b-hf', 10 | tokenizer_kwargs=dict(padding_side='left', 11 | truncation_side='left', 12 | use_fast=False, 13 | ), 14 | max_out_len=100, 15 | max_seq_len=2048, 16 | batch_size=8, 17 | model_kwargs=dict(device_map='auto'), 18 | batch_padding=False, # if false, inference with for-loop without batch padding 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_llama_13b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | # LLaMA 13B 6 | dict( 7 | type=HuggingFaceCausalLM, 8 | abbr='llama2-13b-hf', 9 | path="/mnt/nfs/algo/intern/haoyunx11/models/llm/llama-2/Llama-2-7b-chat-hf", 10 | tokenizer_path='/mnt/nfs/algo/intern/haoyunx11/models/llm/llama-2/Llama-2-7b-chat-hf', 11 | tokenizer_kwargs=dict(padding_side='left', 12 | truncation_side='left', 13 | use_fast=False), 14 | max_out_len=100, 15 | max_seq_len=2048, 16 | batch_size=8, 17 | model_kwargs=dict(device_map='auto'), 18 | batch_padding=False, # if false, inference with for-loop without batch padding 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_llama_65b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | # LLaMA 65B 6 | dict( 7 | type=HuggingFaceCausalLM, 8 | abbr='llama-65b-hf', 9 | path="decapoda-research/llama-65b-hf", 10 | tokenizer_path='decapoda-research/llama-65b-hf', 11 | tokenizer_kwargs=dict(padding_side='left', 12 | truncation_side='left', 13 | use_fast=False, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(device_map='auto'), 19 | batch_padding=False, # if false, inference with for-loop without batch padding 20 | run_cfg=dict(num_gpus=8, num_procs=1), 21 | ) 22 | ] 23 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_llama_7b_chat.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='Llama-2-7b-chat-hf', 8 | path="meta-llama/Llama-2-7b-chat-hf", 9 | tokenizer_path='meta-llama/Llama-2-7b-chat-hf', 10 | tokenizer_kwargs=dict(padding_side='left', 11 | truncation_side='left', 12 | use_fast=False, 13 | ), 14 | max_out_len=100, 15 | max_seq_len=2048, 16 | batch_size=32, 17 | model_kwargs=dict(device_map='auto'), 18 | batch_padding=True, 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_moss_moon_003_base.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='moss-moon-003-base-hf', 8 | path='fnlp/moss-moon-003-base', 9 | tokenizer_path='fnlp/moss-moon-003-base', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='5e406ca0ebbdea11cc3b12aa5932995c692568ac'), 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_moss_moon_003_sft.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='moss-moon-003-sft-hf', 8 | path='fnlp/moss-moon-003-sft', 9 | tokenizer_path='fnlp/moss-moon-003-sft', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='7119d446173035561f40977fb9cb999995bb7517'), 19 | run_cfg=dict(num_gpus=1, num_procs=1), 20 | ) 21 | ] 22 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_mpt_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='mpt-7b-hf', 8 | path='mosaicml/mpt-7b', 9 | tokenizer_path='mosaicml/mpt-7b', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | use_fast=True 15 | ), 16 | max_out_len=100, 17 | max_seq_len=2048, 18 | batch_size=8, 19 | model_kwargs=dict( 20 | device_map='auto', 21 | trust_remote_code=True, 22 | max_seq_len=4096, 23 | revision='68e1a8e0ebb9b30f3c45c1ef6195980f29063ae2', 24 | ), 25 | run_cfg=dict(num_gpus=1, num_procs=1), 26 | ) 27 | ] 28 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_mpt_instruct_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='mpt-instruct-7b-hf', 8 | path="mosaicml/mpt-7b-instruct", 9 | tokenizer_path="mosaicml/mpt-7b-instruct", 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | use_fast=True 15 | ), 16 | max_out_len=100, 17 | max_seq_len=2048, 18 | batch_size=8, 19 | model_kwargs=dict( 20 | device_map='auto', 21 | trust_remote_code=True, 22 | max_seq_len=4096, 23 | revision='68e1a8e0ebb9b30f3c45c1ef6195980f29063ae2', 24 | ), 25 | run_cfg=dict(num_gpus=1, num_procs=1), 26 | ) 27 | ] 28 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_tigerbot_13b_base.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | models = [ 4 | dict( 5 | type=HuggingFaceCausalLM, 6 | abbr='tigerbot-13b-base', 7 | path='TigerResearch/tigerbot-13b-base', 8 | tokenizer_path='TigerResearch/tigerbot-13b-base', 9 | tokenizer_kwargs=dict( 10 | cache_dir=None, 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | padding=True, 15 | truncation=True, 16 | add_bos_token=False, 17 | add_eos_token=False 18 | ), 19 | max_out_len=100, 20 | max_seq_len=1024, 21 | batch_size=8, 22 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 23 | batch_padding=True, 24 | run_cfg=dict(num_gpus=1, num_procs=1), 25 | ) 26 | ] 27 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_tigerbot_13b_chat.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | _meta_template = dict( 4 | round=[ 5 | dict(role='HUMAN', begin='\n\n### Instruction:\n'), 6 | dict(role='BOT', begin='\n\n### Response:\n', generate=True), 7 | ], 8 | ) 9 | 10 | models = [ 11 | dict( 12 | type=HuggingFaceCausalLM, 13 | abbr='tigerbot-13b-2h-sft-20g-mix0.0-group', 14 | path="/mnt/nfs/yechen/models/tigerbot-13b-2h-sft-20g-mix0.0-group", 15 | tokenizer_path='/mnt/nfs/yechen/models/tigerbot-13b-2h-sft-20g-mix0.0-group', 16 | tokenizer_kwargs=dict( 17 | cache_dir=None, 18 | padding_side='left', 19 | truncation_side='left', 20 | trust_remote_code=True, 21 | padding=True, 22 | truncation=True, 23 | add_bos_token=False 24 | ), 25 | max_out_len=100, 26 | max_seq_len=2048, 27 | batch_size=4, 28 | meta_template=_meta_template, 29 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 30 | batch_padding=True, 31 | run_cfg=dict(num_gpus=1, num_procs=1), 32 | ) 33 | ] 34 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_tigerbot_7b_base.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | models = [ 4 | dict( 5 | type=HuggingFaceCausalLM, 6 | abbr='tigerbot-7b-base', 7 | path='TigerResearch/tigerbot-7b-base', 8 | tokenizer_path='TigerResearch/tigerbot-7b-base', 9 | tokenizer_kwargs=dict( 10 | cache_dir=None, 11 | padding_side='left', 12 | truncation_side='left', 13 | padding=True, 14 | truncation=True 15 | ), 16 | max_out_len=100, 17 | max_seq_len=1024, 18 | batch_size=32, 19 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 20 | batch_padding=True, 21 | run_cfg=dict(num_gpus=1, num_procs=1), 22 | ) 23 | ] 24 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_tigerbot_7b_chat.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | _meta_template = dict( 4 | round=[ 5 | dict(role='HUMAN', begin='\n\n### Instruction:\n:'), 6 | dict(role='BOT', begin='\n\n### Response:\n:', generate=True), 7 | ], 8 | ) 9 | 10 | models = [ 11 | dict( 12 | type=HuggingFaceCausalLM, 13 | abbr='tigerbot-7b-2h-sft-20g-mix0.0-group-mg-hf-9600', 14 | path="/mnt/nfs/yechen/models/tigerbot-7b-2h-sft-20g-mix0.0-group-mg-hf-9600", 15 | tokenizer_path='/mnt/nfs/yechen/models/tigerbot-7b-2h-sft-20g-mix0.0-group-mg-hf-9600', 16 | tokenizer_kwargs=dict( 17 | padding_side='left', 18 | truncation_side='left', 19 | trust_remote_code=True, 20 | ), 21 | max_out_len=100, 22 | max_seq_len=2048, 23 | batch_size=16, 24 | meta_template=_meta_template, 25 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 26 | batch_padding=True, 27 | run_cfg=dict(num_gpus=1, num_procs=1), 28 | ) 29 | ] 30 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_tigerbot_exllama.py: -------------------------------------------------------------------------------- 1 | 2 | from opencompass.models import ExllamaCausalLM 3 | 4 | _meta_template = dict( 5 | round=[ 6 | dict(role='HUMAN', begin='\n\n### Instruction:\n'), 7 | dict(role='BOT', begin='\n\n### Response:\n', generate=True), 8 | ], 9 | ) 10 | 11 | models = [ 12 | dict( 13 | type= ExllamaCausalLM, 14 | abbr='tigerbot', 15 | path="/mnt/nfs/algo/intern/yuwang/Tigerbot_AutoGPTQ/tigerbot_13b/tigerbot_13b_chat_4bit_c4_128g_no_act", 16 | tokenizer_path='/mnt/nfs/algo/intern/yuwang/Tigerbot_AutoGPTQ/tigerbot_13b/tigerbot_13b_chat_4bit_c4_128g_no_act', 17 | tokenizer_kwargs=dict( 18 | cache_dir=None, 19 | padding_side='left', 20 | truncation_side='left', 21 | trust_remote_code=True, 22 | padding=True, 23 | truncation=True, 24 | add_bos_token=False 25 | ), 26 | max_out_len=100, 27 | max_seq_len=2048, 28 | batch_size=4, 29 | batch_padding=True, 30 | run_cfg=dict(num_gpus=1, num_procs=1), 31 | ) 32 | ] 33 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_tigerbot_gptq.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import GPTQCausalLM 2 | 3 | _meta_template = dict( 4 | round=[ 5 | dict(role='HUMAN', begin='\n\n### Instruction:\n'), 6 | dict(role='BOT', begin='\n\n### Response:\n', generate=True), 7 | ], 8 | ) 9 | 10 | models = [ 11 | dict( 12 | type=GPTQCausalLM, 13 | abbr='tigerbot-13b-2h-sft-20g-mix0.0-group', 14 | path="/mnt/nfs/yechen/models/tigerbot-13b-2h-sft-20g-mix0.0-group", 15 | tokenizer_path='/mnt/nfs/yechen/models/tigerbot-13b-2h-sft-20g-mix0.0-group', 16 | tokenizer_kwargs=dict( 17 | cache_dir=None, 18 | padding_side='left', 19 | truncation_side='left', 20 | trust_remote_code=True, 21 | padding=True, 22 | truncation=True, 23 | add_bos_token=False 24 | ), 25 | max_out_len=200, 26 | max_seq_len=2048, 27 | batch_size=4, 28 | model_kwargs=dict(trust_remote_code=True, device_map='auto'), 29 | batch_padding=True, 30 | run_cfg=dict(num_gpus=1, num_procs=1), 31 | ) 32 | ] -------------------------------------------------------------------------------- /opencompass/configs/models/hf_vicuna_13b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='vicuna-13b-hf', 8 | path="lmsys/vicuna-13b-v1.3", 9 | tokenizer_path='lmsys/vicuna-13b-v1.3', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | use_fast=False, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(device_map='auto'), 19 | batch_padding=False, # if false, inference with for-loop without batch padding 20 | run_cfg=dict(num_gpus=2, num_procs=1) 21 | ) 22 | ] 23 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_vicuna_33b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='vicuna-33b-hf', 8 | path="lmsys/vicuna-33b-v1.3", 9 | tokenizer_path='lmsys/vicuna-33b-v1.3', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | use_fast=False, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(device_map='auto'), 19 | batch_padding=False, # if false, inference with for-loop without batch padding 20 | run_cfg=dict(num_gpus=4, num_procs=1) 21 | ) 22 | ] 23 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_vicuna_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='vicuna-7b-hf', 8 | path="lmsys/vicuna-7b-v1.3", 9 | tokenizer_path='lmsys/vicuna-7b-v1.3', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | use_fast=False, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict(device_map='auto'), 19 | batch_padding=False, # if false, inference with for-loop without batch padding 20 | run_cfg=dict(num_gpus=1, num_procs=1) 21 | ) 22 | ] 23 | -------------------------------------------------------------------------------- /opencompass/configs/models/hf_wizardlm_7b.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import HuggingFaceCausalLM 2 | 3 | 4 | models = [ 5 | dict( 6 | type=HuggingFaceCausalLM, 7 | abbr='wizardlm-7b-hf', 8 | path='TheBloke/wizardLM-7B-HF', 9 | tokenizer_path='TheBloke/wizardLM-7B-HF', 10 | tokenizer_kwargs=dict( 11 | padding_side='left', 12 | truncation_side='left', 13 | trust_remote_code=True, 14 | ), 15 | max_out_len=100, 16 | max_seq_len=2048, 17 | batch_size=8, 18 | model_kwargs=dict( 19 | device_map='auto', 20 | trust_remote_code=True, 21 | ), 22 | run_cfg=dict(num_gpus=1, num_procs=1), 23 | ) 24 | ] 25 | -------------------------------------------------------------------------------- /opencompass/configs/models/llama2_13b_chat.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import Llama2Chat 2 | 3 | # Please follow the instruction in the Meta AI website https://github.com/facebookresearch/llama 4 | # and download the LLaMA-2-Chat model and tokenizer to the path './models/llama2/llama/'. 5 | # 6 | # The LLaMA requirement is also needed to be installed. 7 | # 8 | # git clone https://github.com/facebookresearch/llama.git 9 | # cd llama 10 | # pip install -e . 11 | 12 | api_meta_template = dict( 13 | round=[ 14 | dict(role="HUMAN", api_role="HUMAN"), 15 | dict(role="BOT", api_role="BOT", generate=True), 16 | ], 17 | ) 18 | 19 | models = [ 20 | dict( 21 | abbr="llama-2-13b-chat", 22 | type=Llama2Chat, 23 | path="/mnt/nfs/algo/intern/haoyunx11/models/llm/llama-2/Llama-2-13b-chat-hf", 24 | tokenizer_path="/mnt/nfs/algo/intern/haoyunx11/models/llm/llama-2/Llama-2-13b-chat-hf/tokenizer.model", 25 | meta_template=api_meta_template, 26 | max_out_len=100, 27 | max_seq_len=2048, 28 | batch_size=16, 29 | run_cfg=dict(num_gpus=1, num_procs=1), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /opencompass/configs/models/llama2_70b_chat.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import Llama2Chat 2 | 3 | # Please follow the instruction in the Meta AI website https://github.com/facebookresearch/llama 4 | # and download the LLaMA-2-Chat model and tokenizer to the path './models/llama2/llama/'. 5 | # 6 | # The LLaMA requirement is also needed to be installed. 7 | # 8 | # git clone https://github.com/facebookresearch/llama.git 9 | # cd llama 10 | # pip install -e . 11 | 12 | api_meta_template = dict( 13 | round=[ 14 | dict(role="HUMAN", api_role="HUMAN"), 15 | dict(role="BOT", api_role="BOT", generate=True), 16 | ], 17 | ) 18 | 19 | models = [ 20 | dict( 21 | abbr="llama-2-70b-chat", 22 | type=Llama2Chat, 23 | path="./models/llama2/llama/llama-2-70b-chat/", 24 | tokenizer_path="./models/llama2/llama/tokenizer.model", 25 | meta_template=api_meta_template, 26 | max_out_len=100, 27 | max_seq_len=2048, 28 | batch_size=16, 29 | run_cfg=dict(num_gpus=8, num_procs=8), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /opencompass/configs/models/llama2_7b_chat.py: -------------------------------------------------------------------------------- 1 | from opencompass.models import Llama2Chat 2 | 3 | # Please follow the instruction in the Meta AI website https://github.com/facebookresearch/llama 4 | # and download the LLaMA-2-Chat model and tokenizer to the path './models/llama2/llama/'. 5 | # 6 | # The LLaMA requirement is also needed to be installed. 7 | # 8 | # git clone https://github.com/facebookresearch/llama.git 9 | # cd llama 10 | # pip install -e . 11 | 12 | api_meta_template = dict( 13 | round=[ 14 | dict(role="HUMAN", api_role="HUMAN"), 15 | dict(role="BOT", api_role="BOT", generate=True), 16 | ], 17 | ) 18 | 19 | models = [ 20 | dict( 21 | abbr="llama-2-7b-chat", 22 | type=Llama2Chat, 23 | path="./models/llama2/llama/llama-2-7b-chat/", 24 | tokenizer_path="./models/llama2/llama/tokenizer.model", 25 | meta_template=api_meta_template, 26 | max_out_len=100, 27 | max_seq_len=2048, 28 | batch_size=16, 29 | run_cfg=dict(num_gpus=1, num_procs=1), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /opencompass/configs/summarizers/example.py: -------------------------------------------------------------------------------- 1 | from mmengine.config import read_base 2 | 3 | with read_base(): 4 | from .groups.agieval import agieval_summary_groups 5 | from .groups.mmlu import mmlu_summary_groups 6 | from .groups.ceval import ceval_summary_groups 7 | from .groups.bbh import bbh_summary_groups 8 | from .groups.GaokaoBench import GaokaoBench_summary_groups 9 | from .groups.flores import flores_summary_groups 10 | 11 | summarizer = dict( 12 | summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []), 13 | prompt_db=dict( 14 | database_path='configs/datasets/log.json', 15 | config_dir='configs/datasets', 16 | blacklist='.promptignore') 17 | ) 18 | -------------------------------------------------------------------------------- /opencompass/configs/summarizers/groups/GaokaoBench.py: -------------------------------------------------------------------------------- 1 | GaokaoBench_summary_groups = [] 2 | 3 | # gaokao-bench 4 | _GaokaoBench_weights = {'2010-2022_Math_II_MCQs': 1090, '2010-2022_Math_I_MCQs': 1070, '2010-2022_History_MCQs': 1148, '2010-2022_Biology_MCQs': 900, '2010-2022_Political_Science_MCQs': 1280, '2010-2022_Physics_MCQs': 384, '2010-2022_Chemistry_MCQs': 744, '2010-2013_English_MCQs': 105, '2010-2022_Chinese_Modern_Lit': 261, '2010-2022_English_Fill_in_Blanks': 900.0, '2012-2022_English_Cloze_Test': 260, '2010-2022_Geography_MCQs': 380, '2010-2022_English_Reading_Comp': 940, '2010-2022_Chinese_Lang_and_Usage_MCQs': 240} 5 | _GaokaoBench_weights = {'GaokaoBench_' + k: v for k, v in _GaokaoBench_weights.items()} 6 | GaokaoBench_summary_groups.append({'name': 'GaokaoBench', 'subsets': list(_GaokaoBench_weights.keys()), 'weights': _GaokaoBench_weights}) 7 | -------------------------------------------------------------------------------- /opencompass/configs/summarizers/groups/bbh.py: -------------------------------------------------------------------------------- 1 | bbh_summary_groups = [] 2 | 3 | # bbh 4 | _bbh = ['temporal_sequences', 'disambiguation_qa', 'date_understanding', 'tracking_shuffled_objects_three_objects', 'penguins_in_a_table','geometric_shapes', 'snarks', 'ruin_names', 'tracking_shuffled_objects_seven_objects', 'tracking_shuffled_objects_five_objects','logical_deduction_three_objects', 'hyperbaton', 'logical_deduction_five_objects', 'logical_deduction_seven_objects', 'movie_recommendation','salient_translation_error_detection', 'reasoning_about_colored_objects', 'multistep_arithmetic_two', 'navigate', 'dyck_languages', 'word_sorting', 'sports_understanding','boolean_expressions', 'object_counting', 'formal_fallacies', 'causal_judgement', 'web_of_lies'] 5 | _bbh = ['bbh-' + s for s in _bbh] 6 | bbh_summary_groups.append({'name': 'bbh', 'subsets': _bbh}) 7 | -------------------------------------------------------------------------------- /opencompass/configs/summarizers/groups/jigsaw_multilingual.py: -------------------------------------------------------------------------------- 1 | jigsaw_multilingual_summary_groups = [] 2 | 3 | # bbh 4 | _jigsaw_multilingual = ['es', 'fr', 'it', 'pt', 'ru', 'tr'] 5 | _jigsaw_multilingual = ['jigsaw_multilingual_' + s for s in _jigsaw_multilingual] 6 | jigsaw_multilingual_summary_groups.append({'name': 'jigsaw_multilingual', 'subsets': _jigsaw_multilingual}) 7 | -------------------------------------------------------------------------------- /opencompass/docs/en/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /opencompass/docs/en/_static/js/custom.js: -------------------------------------------------------------------------------- 1 | var collapsedSections = ['Advanced Guides', 'Tools', 'User Guides', 'Notes']; 2 | 3 | $(document).ready(function () { 4 | $('.model-summary').DataTable({ 5 | "stateSave": false, 6 | "lengthChange": false, 7 | "pageLength": 20, 8 | "order": [] 9 | }); 10 | }); 11 | -------------------------------------------------------------------------------- /opencompass/docs/en/_templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block body %} 4 | 5 |

Page Not Found

6 |

7 | The page you are looking for cannot be found. 8 |

9 |

10 | If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in 11 | the content table left, or go to the homepage. 12 |

13 | 17 | 18 | {% endblock %} 19 | -------------------------------------------------------------------------------- /opencompass/docs/en/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | 11 | .. 12 | autogenerated from _templates/autosummary/class.rst 13 | note it does not have :inherited-members: 14 | -------------------------------------------------------------------------------- /opencompass/docs/en/_templates/callable.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | :special-members: __call__ 11 | 12 | .. 13 | autogenerated from _templates/callable.rst 14 | note it does not have :inherited-members: 15 | -------------------------------------------------------------------------------- /opencompass/docs/en/docutils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /opencompass/docs/en/prompt/few_shot.md: -------------------------------------------------------------------------------- 1 | # In-context Learning 2 | 3 | Coming soon. 4 | -------------------------------------------------------------------------------- /opencompass/docs/en/prompt/overview.md: -------------------------------------------------------------------------------- 1 | # Prompt Overview 2 | -------------------------------------------------------------------------------- /opencompass/docs/en/prompt/prompt_template.md: -------------------------------------------------------------------------------- 1 | # Prompt Template 2 | 3 | Coming soon. 4 | -------------------------------------------------------------------------------- /opencompass/docs/en/user_guides/framework_overview.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | -------------------------------------------------------------------------------- /opencompass/docs/en/user_guides/metrics.md: -------------------------------------------------------------------------------- 1 | # Metric Calculation 2 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/_static/js/custom.js: -------------------------------------------------------------------------------- 1 | var collapsedSections = ['Advanced Guides', 'Tools', 'User Guides', 'Notes']; 2 | 3 | $(document).ready(function () { 4 | $('.model-summary').DataTable({ 5 | "stateSave": false, 6 | "lengthChange": false, 7 | "pageLength": 20, 8 | "order": [] 9 | }); 10 | }); 11 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/_templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block body %} 4 | 5 |

Page Not Found

6 |

7 | The page you are looking for cannot be found. 8 |

9 |

10 | If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in 11 | the content table left, or go to the homepage. 12 |

13 | 17 | 18 | {% endblock %} 19 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | 11 | .. 12 | autogenerated from _templates/autosummary/class.rst 13 | note it does not have :inherited-members: 14 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/_templates/callable.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | :special-members: __call__ 11 | 12 | .. 13 | autogenerated from _templates/callable.rst 14 | note it does not have :inherited-members: 15 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/docutils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/prompt/few_shot.md: -------------------------------------------------------------------------------- 1 | # Few-shot 2 | 3 | Coming soon. 4 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/prompt/overview.md: -------------------------------------------------------------------------------- 1 | # Prompt 概括 2 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/prompt/prompt_template.md: -------------------------------------------------------------------------------- 1 | # Prompt 模板 2 | 3 | Coming soon. 4 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/user_guides/framework_overview.md: -------------------------------------------------------------------------------- 1 | # 整体概括 2 | -------------------------------------------------------------------------------- /opencompass/docs/zh_cn/user_guides/metrics.md: -------------------------------------------------------------------------------- 1 | # 评估指标 2 | 3 | Coming soon. 4 | -------------------------------------------------------------------------------- /opencompass/opencompass/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.0' 2 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/TheoremQA.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from datasets import load_dataset 4 | 5 | from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class TheoremQADataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path: str): 15 | return load_dataset('csv', data_files={'test': path}) 16 | 17 | 18 | @TEXT_POSTPROCESSORS.register_module('TheoremQA') 19 | def TheoremQA_postprocess(text: str) -> str: 20 | text = text.strip() 21 | matches = re.findall(r'answer is ([^\s]+)', text) 22 | if len(matches) == 0: 23 | return text 24 | else: 25 | text = matches[0].strip().strip('.,?!\"\';:') 26 | return text 27 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/afqmcd.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class AFQMCDataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path): 15 | data = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | line['label'] = 'AB'[int(line['label'])] 20 | data.append(line) 21 | return Dataset.from_list(data) 22 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/agieval/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | from .agieval import * # noqa: F401, F403 4 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/agieval/utils.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | import json 3 | 4 | 5 | def read_jsonl(path): 6 | with open(path, encoding='utf8') as fh: 7 | results = [] 8 | for line in fh: 9 | if line is None: 10 | continue 11 | try: 12 | results.append(json.loads(line) if line != 'null' else line) 13 | except Exception as e: 14 | print(e) 15 | print(path) 16 | print(line) 17 | raise e 18 | return results 19 | 20 | 21 | def save_jsonl(lines, directory): 22 | with open(directory, 'w', encoding='utf8') as f: 23 | for line in lines: 24 | f.write(json.dumps(line, ensure_ascii=False) + '\n') 25 | 26 | 27 | def extract_answer(js): 28 | try: 29 | if js is None or js == 'null': 30 | return '' 31 | answer = '' 32 | if isinstance(js, str): 33 | answer = js 34 | elif 'text' in js['choices'][0]: 35 | answer = js['choices'][0]['text'] 36 | else: 37 | answer = js['choices'][0]['message']['content'] 38 | # answer = js[''] 39 | return answer 40 | except Exception as e: 41 | # print(e) 42 | # print(js) 43 | return '' 44 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/ax.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class AXDataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path: str): 15 | dataset = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | line['label'] = { 20 | 'entailment': 'A', 21 | 'not_entailment': 'B' 22 | }[line['label']] 23 | dataset.append(line) 24 | return Dataset.from_list(dataset) 25 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/base.py: -------------------------------------------------------------------------------- 1 | from abc import abstractstaticmethod 2 | from typing import Dict, Optional, Union 3 | 4 | from datasets import Dataset, DatasetDict 5 | 6 | from opencompass.openicl import DatasetReader 7 | 8 | 9 | class BaseDataset: 10 | 11 | def __init__(self, reader_cfg: Optional[Dict] = {}, **kwargs): 12 | self.dataset = self.load(**kwargs) 13 | self._init_reader(**reader_cfg) 14 | 15 | def _init_reader(self, **kwargs): 16 | self.reader = DatasetReader(self.dataset, **kwargs) 17 | 18 | @property 19 | def train(self): 20 | return self.reader.dataset['train'] 21 | 22 | @property 23 | def test(self): 24 | return self.reader.dataset['test'] 25 | 26 | @abstractstaticmethod 27 | def load(**kwargs) -> Union[Dataset, DatasetDict]: 28 | pass 29 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/boolq.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset, load_dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class BoolQDataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(**kwargs): 15 | 16 | dataset = load_dataset(**kwargs) 17 | 18 | def preprocess(example): 19 | if example['label'] == 'true': 20 | example['answer'] = 1 21 | else: 22 | example['answer'] = 0 23 | 24 | return example 25 | 26 | dataset = dataset.map(preprocess) 27 | return dataset 28 | 29 | 30 | @LOAD_DATASET.register_module() 31 | class BoolQDataset_V2(BaseDataset): 32 | 33 | @staticmethod 34 | def load(path): 35 | dataset = [] 36 | with open(path, 'r') as f: 37 | for line in f: 38 | line = json.loads(line) 39 | line['label'] = {'true': 'A', 'false': 'B'}[line['label']] 40 | dataset.append(line) 41 | return Dataset.from_list(dataset) 42 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/bustum.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class bustumDataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path): 15 | data = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | line['label'] = 'AB'[int(line['label'])] 20 | data.append(line) 21 | return Dataset.from_list(data) 22 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/cb.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class CBDataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path): 15 | dataset = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | line['label'] = { 20 | 'contradiction': 'A', 21 | 'entailment': 'B', 22 | 'neutral': 'C' 23 | }[line['label']] 24 | dataset.append(line) 25 | return Dataset.from_list(dataset) 26 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/civilcomments.py: -------------------------------------------------------------------------------- 1 | from datasets import DatasetDict, load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class CivilCommentsDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | train_dataset = load_dataset(**kwargs, split='train') 14 | test_dataset = load_dataset(**kwargs, split='test') 15 | 16 | def pre_process(example): 17 | example['label'] = int(example['toxicity'] >= 0.5) 18 | example['choices'] = ['no', 'yes'] 19 | return example 20 | 21 | def remove_columns(dataset): 22 | return dataset.remove_columns([ 23 | 'severe_toxicity', 'obscene', 'threat', 'insult', 24 | 'identity_attack', 'sexual_explicit' 25 | ]) 26 | 27 | train_dataset = remove_columns(train_dataset) 28 | test_dataset = remove_columns(test_dataset) 29 | test_dataset = test_dataset.shuffle(seed=42) 30 | test_dataset = test_dataset.select(list(range(10000))) 31 | test_dataset = test_dataset.map(pre_process) 32 | 33 | return DatasetDict({ 34 | 'train': train_dataset, 35 | 'test': test_dataset, 36 | }) 37 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/cmnli.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class cmnliDataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path): 15 | data = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | if line['label'] == '-': 20 | continue 21 | line['label'] = { 22 | 'entailment': 'A', 23 | 'contradiction': 'B', 24 | 'neutral': 'C', 25 | }[line['label']] 26 | data.append(line) 27 | return Dataset.from_list(data) 28 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/commonsenseqa.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class commonsenseqaDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs) 14 | 15 | def pre_process(example): 16 | for i in range(5): 17 | example[chr(ord('A') + i)] = example['choices']['text'][i] 18 | return example 19 | 20 | dataset = dataset.map(pre_process).remove_columns( 21 | ['question_concept', 'id', 'choices']) 22 | return dataset 23 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/copa.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class COPADataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path): 15 | dataset = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | line['label'] = 'AB'[line['label']] 20 | dataset.append(line) 21 | return Dataset.from_list(dataset) 22 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/crowspairs.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class crowspairsDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | 14 | dataset = load_dataset(**kwargs) 15 | 16 | def preprocess(example): 17 | example['label'] = 0 18 | return example 19 | 20 | return dataset.map(preprocess) 21 | 22 | 23 | @LOAD_DATASET.register_module() 24 | class crowspairsDataset_V2(BaseDataset): 25 | 26 | @staticmethod 27 | def load(**kwargs): 28 | dataset = load_dataset(**kwargs) 29 | 30 | def preprocess(example): 31 | example['label'] = 'A' 32 | return example 33 | 34 | return dataset.map(preprocess) 35 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/csl.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset, load_dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class CslDataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(**kwargs): 15 | 16 | dataset = load_dataset(**kwargs) 17 | 18 | def preprocess(example): 19 | keywords = ','.join(example['keyword']) 20 | example['keywords'] = keywords 21 | 22 | return example 23 | 24 | dataset = dataset.map(preprocess) 25 | return dataset 26 | 27 | 28 | @LOAD_DATASET.register_module() 29 | class CslDataset_V2(BaseDataset): 30 | 31 | @staticmethod 32 | def load(path): 33 | data = [] 34 | with open(path, 'r') as f: 35 | for line in f: 36 | line = json.loads(line) 37 | item = { 38 | 'abst': line['abst'], 39 | 'keywords': ','.join(line['keyword']), 40 | 'label': 'AB'[int(line['label'])], 41 | } 42 | data.append(item) 43 | return Dataset.from_list(data) 44 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/cvalues.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from datasets import load_dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class CValuesDataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path): 15 | 16 | dataset = load_dataset('json', data_files=path) 17 | 18 | def preprocess(example): 19 | example['prompt'] = re.sub('回复1', '回复A', example['prompt']) 20 | example['prompt'] = re.sub('回复2', '回复B', example['prompt']) 21 | example['label'] = re.sub('回复1', 'A', example['label']) 22 | example['label'] = re.sub('回复2', 'B', example['label']) 23 | return example 24 | 25 | return dataset.map(preprocess) 26 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/drop.py: -------------------------------------------------------------------------------- 1 | from datasets import DatasetDict, load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class dropDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs, split='validation') 14 | 15 | def pre_process(example): 16 | example['answers'] = example['answers_spans']['spans'] 17 | example['prompt'] = example.pop('passage') 18 | return example 19 | 20 | def only_number(example): 21 | for i in example['answers_spans']['types']: 22 | if i == 'number': 23 | return True 24 | return False 25 | 26 | dataset = dataset.filter(only_number) 27 | dataset = dataset.map(pre_process).remove_columns( 28 | ['section_id', 'query_id']) 29 | return DatasetDict({'validation': dataset}) 30 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/eprstmt.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class eprstmtDataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path): 15 | data = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | item = { 20 | 'sentence': line['sentence'], 21 | 'label': { 22 | 'Positive': 'A', 23 | 'Negative': 'B', 24 | }[line['label']], 25 | } 26 | data.append(item) 27 | return Dataset.from_list(data) 28 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/flores.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from datasets import DatasetDict, load_dataset 4 | 5 | from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class FloresFirst100Dataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(name): 15 | return DatasetDict({ 16 | 'dev': 17 | load_dataset(path='facebook/flores', name=name, split='dev'), 18 | 'devtest': 19 | load_dataset(path='facebook/flores', 20 | name=name, 21 | split='devtest[:100]') 22 | }) 23 | 24 | 25 | @TEXT_POSTPROCESSORS.register_module('flores') 26 | def flores_postprocess(text: str) -> str: 27 | text = text.strip().split('\n')[0] 28 | return text 29 | 30 | 31 | @TEXT_POSTPROCESSORS.register_module('flores-chinese') 32 | def flores_postprocess_chinese(text: str) -> str: 33 | import jieba 34 | truncated_text = text.strip().split('\n')[0] 35 | cleaned_text = re.sub(r'\s+', ' ', truncated_text).strip() 36 | cleaned_text = ' '.join(jieba.cut(cleaned_text)) 37 | return cleaned_text 38 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/gsm8k.py: -------------------------------------------------------------------------------- 1 | from opencompass.registry import TEXT_POSTPROCESSORS 2 | 3 | 4 | @TEXT_POSTPROCESSORS.register_module('gsm8k_dataset') 5 | def gsm8k_dataset_postprocess(text: str) -> str: 6 | return text.split('#### ')[1].replace(',', '') 7 | 8 | 9 | @TEXT_POSTPROCESSORS.register_module('gsm8k') 10 | def gsm8k_postprocess(text: str) -> str: 11 | text = text.split('\n\n')[0] 12 | text = text.split(' ')[::-1] 13 | flag = False 14 | ret = '' 15 | for i in range(len(text)): 16 | s = text[i] 17 | for i in range(len(s)): 18 | if s[i].isdigit(): 19 | flag = True 20 | ret = s 21 | break 22 | if flag: 23 | break 24 | ret1 = '' 25 | for i in range(len(ret)): 26 | if ret[i].isdigit(): 27 | ret1 += ret[i] 28 | return ret1 29 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/hellaswag.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class hellaswagDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs) 14 | 15 | def preprocess(example): 16 | for i in range(4): 17 | example[chr(ord('A') + i)] = example['endings'][i] 18 | return example 19 | 20 | dataset = dataset.map(preprocess).remove_columns(['endings']) 21 | return dataset 22 | 23 | 24 | @LOAD_DATASET.register_module() 25 | class hellaswagDataset_V2(BaseDataset): 26 | 27 | @staticmethod 28 | def load(**kwargs): 29 | dataset = load_dataset(**kwargs) 30 | 31 | def preprocess(example): 32 | for i in range(4): 33 | example[chr(ord('A') + i)] = example['endings'][i] 34 | if example['label']: 35 | example['label'] = 'ABCD'[int(example['label'])] 36 | else: 37 | example['label'] = 'NULL' 38 | return example 39 | 40 | dataset = dataset.map(preprocess).remove_columns(['endings']) 41 | return dataset 42 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/huggingface.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class HFDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | return load_dataset(**kwargs) 14 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/iwslt2017.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class IWSLT2017Dataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs) 14 | dataset = dataset.map(lambda example: example['translation'] 15 | ).remove_columns('translation') 16 | return dataset 17 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/jigsawmultilingual.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | from datasets import Dataset, DatasetDict 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class JigsawMultilingualDataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path, label, lang): 15 | assert lang in ['es', 'fr', 'it', 'pt', 'ru', 'tr'] 16 | dataset = DatasetDict() 17 | 18 | data_list = list() 19 | idx = 0 20 | with open(path) as file, open(label) as label: 21 | text_reader = csv.reader(file) 22 | label_reader = csv.reader(label) 23 | for text, target in zip(text_reader, label_reader): 24 | if text[2] == lang: 25 | assert text[0] == target[0] 26 | data_list.append({ 27 | 'idx': idx, 28 | 'text': text[1], 29 | 'label': int(target[1]), 30 | 'choices': ['no', 'yes'] 31 | }) 32 | idx += 1 33 | 34 | dataset['test'] = Dataset.from_list(data_list) 35 | return dataset 36 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/mmlu.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os.path as osp 3 | 4 | from datasets import Dataset, DatasetDict 5 | 6 | from opencompass.registry import LOAD_DATASET 7 | 8 | from .base import BaseDataset 9 | 10 | 11 | @LOAD_DATASET.register_module() 12 | class MMLUDataset(BaseDataset): 13 | 14 | @staticmethod 15 | def load(path: str, name: str): 16 | dataset = DatasetDict() 17 | for split in ['dev', 'test']: 18 | raw_data = [] 19 | filename = osp.join(path, split, f'{name}_{split}.csv') 20 | with open(filename, encoding='utf-8') as f: 21 | reader = csv.reader(f) 22 | for row in reader: 23 | assert len(row) == 6 24 | raw_data.append({ 25 | 'input': row[0], 26 | 'A': row[1], 27 | 'B': row[2], 28 | 'C': row[3], 29 | 'D': row[4], 30 | 'target': row[5], 31 | }) 32 | dataset[split] = Dataset.from_list(raw_data) 33 | return dataset 34 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/obqa.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class OBQADataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs) 14 | 15 | def pre_process(example): 16 | for i in range(4): 17 | example[chr(ord('A') + i)] = example['choices']['text'][i] 18 | return example 19 | 20 | dataset = dataset.map(pre_process).remove_columns(['id', 'choices']) 21 | return dataset 22 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/piqa.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class piqaDataset_V2(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs) 14 | 15 | def preprocess(example): 16 | assert isinstance(example['label'], int) 17 | if example['label'] < 0: 18 | example['answer'] = 'NULL' 19 | else: 20 | example['answer'] = 'AB'[example['label']] 21 | example.pop('label') 22 | return example 23 | 24 | dataset = dataset.map(preprocess) 25 | return dataset 26 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/race.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class RaceDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(path: str, name: str): 13 | dataset = load_dataset(path, name) 14 | 15 | def preprocess(x): 16 | for ans, option in zip(['A', 'B', 'C', 'D'], x['options']): 17 | x[ans] = option 18 | del x['options'] 19 | return x 20 | 21 | return dataset.map(preprocess) 22 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/safety.py: -------------------------------------------------------------------------------- 1 | from datasets import Dataset, DatasetDict 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class SafetyDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(path): 13 | dataset = DatasetDict() 14 | 15 | data_list = list() 16 | idx = 0 17 | with open(path, 'r') as f: 18 | for line in f: 19 | if line.strip(): 20 | data_list.append({'idx': idx, 'prompt': line.strip()}) 21 | idx += 1 22 | 23 | dataset['test'] = Dataset.from_list(data_list) 24 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/siqa.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class siqaDataset_V2(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs) 14 | 15 | def preprocess(example): 16 | example['label'] = ' ABC'[int(example['label'])] 17 | return example 18 | 19 | dataset = dataset.map(preprocess) 20 | return dataset 21 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/strategyqa.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from opencompass.registry import TEXT_POSTPROCESSORS 4 | 5 | 6 | @TEXT_POSTPROCESSORS.register_module('strategyqa') 7 | def strategyqa_pred_postprocess(text: str) -> str: 8 | text = text.split('\n\n')[0] 9 | text = text.split('answer is ')[-1] 10 | match = re.search(r'(yes|no)', text.lower()) 11 | if match: 12 | return match.group(1) 13 | return '' 14 | 15 | 16 | @TEXT_POSTPROCESSORS.register_module('strategyqa_dataset') 17 | def strategyqa_dataset_postprocess(text: str) -> str: 18 | return 'yes' if str(text) == 'True' else 'no' 19 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/summedits.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class SummeditsDataset_V2(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path: str): 15 | dataset = [] 16 | with open(path, 'r') as f: 17 | for line in f: 18 | line = json.loads(line) 19 | line['label'] = 'BA'[line['label']] 20 | dataset.append(line) 21 | return Dataset.from_list(dataset) 22 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/wic.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset, load_dataset 4 | 5 | from opencompass.registry import LOAD_DATASET 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class WiCDataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(**kwargs): 15 | 16 | dataset = load_dataset(**kwargs) 17 | 18 | def preprocess(example): 19 | if example['label'] == 'true': 20 | example['answer'] = 1 21 | else: 22 | example['answer'] = 0 23 | 24 | return example 25 | 26 | dataset = dataset.map(preprocess) 27 | return dataset 28 | 29 | 30 | @LOAD_DATASET.register_module() 31 | class WiCDataset_V2(BaseDataset): 32 | 33 | @staticmethod 34 | def load(path): 35 | dataset = [] 36 | with open(path, 'r') as f: 37 | for line in f: 38 | line = json.loads(line) 39 | line['label'] = {'true': 'A', 'false': 'B'}[line['label']] 40 | dataset.append(line) 41 | return Dataset.from_list(dataset) 42 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/winograd.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class winogradDataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | dataset = load_dataset(**kwargs) 14 | 15 | def pre_process(example): 16 | example['prompt'] = example.pop('text') 17 | example['opt1'] = example['options'][0] 18 | example['opt2'] = example['options'][1] 19 | return example 20 | 21 | dataset = dataset.map(pre_process).remove_columns( 22 | ['options', 'source']) 23 | return dataset 24 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/xcopa.py: -------------------------------------------------------------------------------- 1 | from datasets import concatenate_datasets, load_dataset 2 | 3 | from opencompass.registry import LOAD_DATASET 4 | 5 | from .base import BaseDataset 6 | 7 | 8 | @LOAD_DATASET.register_module() 9 | class XCOPADataset(BaseDataset): 10 | 11 | @staticmethod 12 | def load(**kwargs): 13 | path = kwargs.get('path', None) 14 | lans = [ 15 | 'et', 'ht', 'it', 'id', 'qu', 'sw', 'zh', 'ta', 'th', 'tr', 'vi', 16 | 'translation-et', 'translation-ht', 'translation-it', 17 | 'translation-id', 'translation-sw', 'translation-zh', 18 | 'translation-ta', 'translation-th', 'translation-tr', 19 | 'translation-vi' 20 | ] 21 | 22 | datasets = [] 23 | for lan in lans: 24 | dataset = load_dataset(path, lan)['validation'] 25 | datasets.append(dataset) 26 | 27 | combined_dataset = concatenate_datasets(datasets) 28 | 29 | return combined_dataset 30 | -------------------------------------------------------------------------------- /opencompass/opencompass/datasets/xsum.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from datasets import Dataset 4 | 5 | from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS 6 | 7 | from .base import BaseDataset 8 | 9 | 10 | @LOAD_DATASET.register_module() 11 | class XsumDataset(BaseDataset): 12 | 13 | @staticmethod 14 | def load(path: str): 15 | with open(path, 'r', errors='ignore') as in_f: 16 | rows = [] 17 | for i, line in enumerate(in_f): 18 | if i == 1000: 19 | break 20 | sample = json.loads(line.strip()) 21 | dialogue = sample['dialogue'] 22 | summary = sample['summary'] 23 | if isinstance(dialogue, float) or isinstance(summary, float): 24 | continue 25 | rows.append({'dialogue': dialogue, 'summary': summary}) 26 | dataset = Dataset.from_dict({ 27 | 'dialogue': [row['dialogue'] for row in rows], 28 | 'summary': [row['summary'] for row in rows] 29 | }) 30 | return dataset 31 | 32 | 33 | @TEXT_POSTPROCESSORS.register_module('Xsum') 34 | def Xsum_postprocess(text: str) -> str: 35 | text = text.strip().split('\n')[0].strip() 36 | return text 37 | -------------------------------------------------------------------------------- /opencompass/opencompass/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseModel, LMTemplateParser # noqa 2 | from .base_api import APITemplateParser, BaseAPIModel # noqa 3 | from .glm import GLM130B # noqa: F401, F403 4 | from .huggingface import HuggingFace # noqa: F401, F403 5 | from .huggingface import HuggingFaceCausalLM, GPTQCausalLM, ExllamaCausalLM 6 | # noqa: F401, F403 7 | from .llama2 import Llama2Chat # noqa: F401, F403 8 | from .openai_api import OpenAI # noqa: F401 9 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/__init__.py: -------------------------------------------------------------------------------- 1 | from .icl_dataset_reader import DatasetReader # noqa 2 | from .icl_evaluator import * # noqa 3 | from .icl_inferencer import * # noqa 4 | from .icl_prompt_template import PromptTemplate # noqa 5 | from .icl_retriever import * # noqa 6 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/icl_evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | from .icl_aucroc_evaluator import AUCROCEvaluator # noqa 2 | from .icl_base_evaluator import BaseEvaluator # noqa 3 | from .icl_em_evaluator import EMEvaluator # noqa 4 | from .icl_hf_evaluator import * # noqa 5 | from .icl_toxic_evaluator import ToxicEvaluator # noqa 6 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/icl_evaluator/icl_base_evaluator.py: -------------------------------------------------------------------------------- 1 | """Base Evaluator.""" 2 | 3 | 4 | class BaseEvaluator: 5 | 6 | def __init__(self) -> None: 7 | pass 8 | 9 | def score(self): 10 | raise NotImplementedError("Method hasn't been implemented yet") 11 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/icl_inferencer/__init__.py: -------------------------------------------------------------------------------- 1 | from .icl_base_inferencer import BaseInferencer # noqa 2 | from .icl_clp_inferencer import CLPInferencer # noqa 3 | from .icl_gen_inferencer import GenInferencer # noqa 4 | from .icl_ppl_inferencer import PPLInferencer # noqa 5 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/icl_retriever/__init__.py: -------------------------------------------------------------------------------- 1 | from .icl_base_retriever import BaseRetriever # noqa 2 | from .icl_bm25_retriever import BM25Retriever # noqa 3 | from .icl_dpp_retriever import DPPRetriever # noqa 4 | from .icl_fix_k_retriever import FixKRetriever # noqa 5 | from .icl_mdl_retriever import MDLRetriever # noqa 6 | from .icl_random_retriever import RandomRetriever # noqa 7 | from .icl_topk_retriever import TopkRetriever # noqa 8 | from .icl_votek_retriever import VotekRetriever # noqa 9 | from .icl_zero_retriever import ZeroRetriever # noqa 10 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/icl_retriever/icl_zero_retriever.py: -------------------------------------------------------------------------------- 1 | """Zeroshot Retriever.""" 2 | 3 | from typing import List, Optional 4 | 5 | from opencompass.openicl.icl_retriever import BaseRetriever 6 | from opencompass.registry import ICL_RETRIEVERS 7 | 8 | 9 | @ICL_RETRIEVERS.register_module() 10 | class ZeroRetriever(BaseRetriever): 11 | """Zeroshot Retriever. The retriever returns empty list for all queries. 12 | 13 | Args: 14 | dataset (`BaseDataset`): Any BaseDataset instances. 15 | Attributes of ``reader``, ``train`` and ``test`` will be used. 16 | ice_eos_token (`Optional[str]`): The end of sentence token for 17 | in-context example template when origin `PromptTemplate` is 18 | provided. Defaults to ''. 19 | """ 20 | 21 | def __init__(self, dataset, ice_eos_token: Optional[str] = '') -> None: 22 | super().__init__(dataset, '', ice_eos_token, 0) 23 | 24 | def retrieve(self) -> List[List]: 25 | rtr_idx_list = [[] for _ in range(len(self.test_ds))] 26 | return rtr_idx_list 27 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logging import * # noqa 2 | -------------------------------------------------------------------------------- /opencompass/opencompass/openicl/utils/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.distributed as dist 4 | 5 | LOG_LEVEL = logging.INFO 6 | SUBPROCESS_LOG_LEVEL = logging.ERROR 7 | LOG_FORMATTER = '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s' 8 | 9 | 10 | def get_logger(name, level=LOG_LEVEL, log_file=None, file_mode='w'): 11 | formatter = logging.Formatter(LOG_FORMATTER) 12 | 13 | logger = logging.getLogger(name) 14 | 15 | for handler in logger.root.handlers: 16 | if type(handler) is logging.StreamHandler: 17 | handler.setLevel(logging.ERROR) 18 | 19 | if dist.is_available() and dist.is_initialized(): 20 | rank = dist.get_rank() 21 | else: 22 | rank = 0 23 | 24 | if rank == 0 and log_file is not None: 25 | file_handler = logging.FileHandler(log_file, file_mode) 26 | file_handler.setFormatter(formatter) 27 | file_handler.setLevel(level) 28 | logger.addHandler(file_handler) 29 | 30 | if rank == 0: 31 | logger.setLevel(level) 32 | else: 33 | logger.setLevel(SUBPROCESS_LOG_LEVEL) 34 | 35 | stream_handler = logging.StreamHandler() 36 | stream_handler.setFormatter(formatter) 37 | stream_handler.setLevel(level) 38 | logger.addHandler(stream_handler) 39 | 40 | return logger 41 | -------------------------------------------------------------------------------- /opencompass/opencompass/partitioners/__init__.py: -------------------------------------------------------------------------------- 1 | from .naive import * # noqa: F401, F403 2 | from .size import * # noqa: F401, F403 3 | -------------------------------------------------------------------------------- /opencompass/opencompass/registry.py: -------------------------------------------------------------------------------- 1 | from mmengine.registry import Registry 2 | 3 | PARTITIONERS = Registry('partitioner', locations=['opencompass.partitioners']) 4 | RUNNERS = Registry('runner', locations=['opencompass.runners']) 5 | TASKS = Registry('task', locations=['opencompass.tasks']) 6 | MODELS = Registry('model', locations=['opencompass.models']) 7 | # TODO: LOAD_DATASET -> DATASETS 8 | LOAD_DATASET = Registry('load_dataset', locations=['opencompass.datasets']) 9 | TEXT_POSTPROCESSORS = Registry( 10 | 'text_postprocessors', locations=['opencompass.utils.text_postprocessors']) 11 | EVALUATORS = Registry('evaluators', locations=['opencompass.evaluators']) 12 | 13 | ICL_INFERENCERS = Registry('icl_inferencers', 14 | locations=['opencompass.openicl.icl_inferencer']) 15 | ICL_RETRIEVERS = Registry('icl_retrievers', 16 | locations=['opencompass.openicl.icl_retriever']) 17 | ICL_DATASET_READERS = Registry( 18 | 'icl_dataset_readers', 19 | locations=['opencompass.openicl.icl_dataset_reader']) 20 | ICL_PROMPT_TEMPLATES = Registry( 21 | 'icl_prompt_templates', 22 | locations=['opencompass.openicl.icl_prompt_template']) 23 | ICL_EVALUATORS = Registry('icl_evaluators', 24 | locations=['opencompass.openicl.icl_evaluator']) 25 | -------------------------------------------------------------------------------- /opencompass/opencompass/runners/__init__.py: -------------------------------------------------------------------------------- 1 | from .dlc import * # noqa: F401, F403 2 | from .local import * # noqa: F401, F403 3 | from .slurm import * # noqa: F401, F403 4 | -------------------------------------------------------------------------------- /opencompass/opencompass/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from .openicl_eval import * # noqa: F401, F403 2 | from .openicl_infer import * # noqa: F401, F403 3 | -------------------------------------------------------------------------------- /opencompass/opencompass/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .abbr import * # noqa 2 | from .build import * # noqa 3 | from .collect_env import * # noqa 4 | from .fileio import * # noqa 5 | from .git import * # noqa 6 | from .lark import * # noqa 7 | from .logging import * # noqa 8 | from .menu import * # noqa 9 | from .prompt import * # noqa 10 | from .summarizer import * # noqa 11 | from .text_postprocessors import * # noqa 12 | -------------------------------------------------------------------------------- /opencompass/opencompass/utils/build.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from mmengine.config import ConfigDict 4 | 5 | from opencompass.registry import LOAD_DATASET, MODELS 6 | 7 | 8 | def build_dataset_from_cfg(dataset_cfg: ConfigDict) -> ConfigDict: 9 | dataset_cfg = copy.deepcopy(dataset_cfg) 10 | dataset_cfg.pop('infer_cfg', None) 11 | dataset_cfg.pop('eval_cfg', None) 12 | dataset_cfg.pop('abbr', None) 13 | return LOAD_DATASET.build(dataset_cfg) 14 | 15 | 16 | def build_model_from_cfg(model_cfg: ConfigDict) -> ConfigDict: 17 | model_cfg = copy.deepcopy(model_cfg) 18 | model_cfg.pop('run_cfg', None) 19 | model_cfg.pop('max_out_len', None) 20 | model_cfg.pop('batch_size', None) 21 | model_cfg.pop('abbr', None) 22 | return MODELS.build(model_cfg) 23 | -------------------------------------------------------------------------------- /opencompass/opencompass/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | from mmengine.utils import get_git_hash 2 | from mmengine.utils.dl_utils import collect_env as collect_base_env 3 | 4 | import opencompass 5 | 6 | 7 | def collect_env(): 8 | """Collect the information of the running environments.""" 9 | env_info = collect_base_env() 10 | env_info['opencompass'] = opencompass.__version__ + '+' + get_git_hash( 11 | )[:7] 12 | return env_info 13 | -------------------------------------------------------------------------------- /opencompass/opencompass/utils/git.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def get_git_root() -> str: 5 | cmd = ['git', 'rev-parse', '--show-toplevel'] 6 | result = subprocess.run(cmd, stdout=subprocess.PIPE, check=True) 7 | return result.stdout.decode('utf-8').strip() 8 | 9 | 10 | def get_latest_commit(branch: str) -> str: 11 | cmd = ['git', 'rev-parse', branch] 12 | result = subprocess.run(cmd, stdout=subprocess.PIPE, check=True) 13 | return result.stdout.decode('utf-8').strip() 14 | -------------------------------------------------------------------------------- /opencompass/opencompass/utils/logging.py: -------------------------------------------------------------------------------- 1 | from mmengine.logging import MMLogger 2 | 3 | 4 | def get_logger(log_level='INFO') -> MMLogger: 5 | """Get the logger for OpenCompass. 6 | 7 | Args: 8 | log_level (str): The log level. Default: 'INFO'. Choices are 'DEBUG', 9 | 'INFO', 'WARNING', 'ERROR', 'CRITICAL'. 10 | """ 11 | return MMLogger.get_instance('OpenCompass', 12 | logger_name='OpenCompass', 13 | log_level=log_level) 14 | -------------------------------------------------------------------------------- /opencompass/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.19.0 2 | boto3 3 | colossalai 4 | cpm_kernels 5 | datasets>=2.12.0 6 | evaluate>=0.3.0 7 | fairscale 8 | faiss_gpu==1.7.2 9 | jieba 10 | mmengine>=0.8.2 11 | nltk==3.8 12 | numpy==1.23.4 13 | openai 14 | pandas<2.0.0 15 | rank_bm25==0.2.2 16 | requests==2.28.1 17 | scikit_learn==1.2.1 18 | sentence_transformers==2.2.2 19 | tabulate 20 | tiktoken 21 | tokenizers>=0.13.3 22 | torch>=1.13.1 23 | tqdm==4.64.1 24 | transformers>=4.29.1 25 | -------------------------------------------------------------------------------- /opencompass/requirements/docs.txt: -------------------------------------------------------------------------------- 1 | docutils==0.18.1 2 | modelindex 3 | myst-parser 4 | -e git+https://github.com/Ezra-Yu/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 5 | sphinx==6.1.3 6 | sphinx-copybutton 7 | sphinx-notfound-page 8 | sphinx-tabs 9 | sphinxcontrib-jquery 10 | tabulate 11 | -------------------------------------------------------------------------------- /opencompass/requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.19.0 2 | boto3 3 | colossalai 4 | cpm_kernels 5 | datasets>=2.12.0 6 | evaluate>=0.3.0 7 | fairscale 8 | faiss_gpu==1.7.2 9 | jieba 10 | mmengine 11 | nltk==3.8 12 | numpy==1.23.4 13 | openai 14 | pandas<2.0.0 15 | rank_bm25==0.2.2 16 | requests==2.28.1 17 | scikit_learn==1.2.1 18 | sentence_transformers==2.2.2 19 | tabulate 20 | tiktoken 21 | tokenizers>=0.13.3 22 | torch>=1.13.1 23 | tqdm==4.64.1 24 | transformers>=4.29.1 25 | -------------------------------------------------------------------------------- /opencompass/tools/ceval_util.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import argparse 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--path', help='file path', type=str) 9 | parser.add_argument('--key', help='score column name', type=str) 10 | args = parser.parse_args() 11 | return args 12 | 13 | 14 | def main(): 15 | args = parse_args() 16 | df = pd.read_csv(args.path) 17 | print(df.shape) 18 | score = [] 19 | for idx, row in df.iterrows(): 20 | print(row.to_dict()) 21 | if row[args.key] == "-": 22 | continue 23 | if "ceval" not in row["dataset"]: 24 | continue 25 | score.append(float(row[args.key])) 26 | print(f"score: {score}, sum: {sum(score)}, length: {len(score)}") 27 | print(f"average score: {sum(score) / len(score)}") 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /opencompass/tools/mmlu_util.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import argparse 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--path', help='file path', type=str) 9 | parser.add_argument('--key', help='score column name', type=str) 10 | args = parser.parse_args() 11 | return args 12 | 13 | 14 | def main(): 15 | args = parse_args() 16 | df = pd.read_csv(args.path) 17 | print(df.shape) 18 | score = [] 19 | for idx, row in df.iterrows(): 20 | print(row.to_dict()) 21 | if row[args.key] == "-": 22 | continue 23 | if "mmlu" not in row["dataset"]: 24 | continue 25 | score.append(float(row[args.key])) 26 | print(f"score: {score}, sum: {sum(score)}, length: {len(score)}") 27 | print(f"average score: {sum(score) / len(score)}") 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | safetensors==0.3.1 2 | datasets==2.10.1 3 | accelerate>=0.20.3 4 | protobuf==3.20.2 5 | transformers>=4.34.0 6 | scikit-learn==1.0.2 7 | torch>=2.0.0 8 | evaluate==0.4.0 9 | texttable==1.6.7 10 | toml==0.10.2 11 | numpy>=1.22.0 12 | sentencepiece==0.1.98 13 | fire==0.5.0 14 | flash-attn==2.1.1 15 | deepspeed==0.9.5 16 | streamlit==1.24.1 17 | -------------------------------------------------------------------------------- /train/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/train/.DS_Store -------------------------------------------------------------------------------- /train/requirements_qlora.txt: -------------------------------------------------------------------------------- 1 | transformers==4.30.2 2 | peft@git+https://github.com/huggingface/peft.git@eb01b5ee1dfeb6fdacc73dc2fb1dd674bb6868ac 3 | accelerate==0.20.3 4 | einops==0.6.1 5 | bitsandbytes==0.39.0 -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TigerResearch/TigerBot/a80a3fbbdf8536fa81378635ff0baf3bdd3f5b37/utils/__init__.py --------------------------------------------------------------------------------