├── README.md ├── code └── release_code_aaai_ck12 │ ├── __pycache__ │ └── evaluator_opt.cpython-38.pyc │ ├── eval.py │ ├── eval_llama2.py │ ├── eval_results │ ├── chatglm2_zero_shot.json │ └── llama2-zero_shot_128.json │ ├── evaluator_opt.py │ ├── evaluators │ ├── __pycache__ │ │ ├── Yi.cpython-38.pyc │ │ ├── baichuan_wo_compute_metric.cpython-38.pyc │ │ ├── bluelm.cpython-38.pyc │ │ ├── chatglm3.cpython-38.pyc │ │ ├── chatglm_wo_compute_metric.cpython-38.pyc │ │ ├── chatglm_wo_compute_metric3.cpython-38.pyc │ │ ├── chatgpt.cpython-38.pyc │ │ ├── evaluator.cpython-38.pyc │ │ ├── llama2.cpython-38.pyc │ │ ├── minimax.cpython-38.pyc │ │ ├── moss.cpython-38.pyc │ │ ├── moss_wo_compute_metric.cpython-38.pyc │ │ ├── qianwen_wo_compute_metric.cpython-38.pyc │ │ ├── skywork.cpython-38.pyc │ │ └── skywork_TEST.cpython-38.pyc │ ├── baichuan_wo_compute_metric.py │ ├── chatglm_wo_compute_metric.py │ ├── evaluator.py │ └── llama2.py │ ├── few_shot_cot_examples.json │ ├── few_shot_examples.json │ ├── inference_hf.sh │ ├── inference_vllm.sh │ └── metric.py └── images ├── che.png ├── exp4.png ├── exp5.png ├── math_results.png ├── overview.png ├── pie.png ├── result.png └── title.png /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/README.md -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/__pycache__/evaluator_opt.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/__pycache__/evaluator_opt.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/eval.py -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/eval_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/eval_llama2.py -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/eval_results/chatglm2_zero_shot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/eval_results/chatglm2_zero_shot.json -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/eval_results/llama2-zero_shot_128.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/eval_results/llama2-zero_shot_128.json -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluator_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluator_opt.py -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/Yi.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/Yi.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/baichuan_wo_compute_metric.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/baichuan_wo_compute_metric.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/bluelm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/bluelm.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/chatglm3.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/chatglm3.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/chatglm_wo_compute_metric.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/chatglm_wo_compute_metric.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/chatglm_wo_compute_metric3.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/chatglm_wo_compute_metric3.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/chatgpt.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/chatgpt.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/evaluator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/evaluator.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/llama2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/llama2.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/minimax.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/minimax.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/moss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/moss.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/moss_wo_compute_metric.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/moss_wo_compute_metric.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/qianwen_wo_compute_metric.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/qianwen_wo_compute_metric.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/skywork.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/skywork.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/__pycache__/skywork_TEST.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/__pycache__/skywork_TEST.cpython-38.pyc -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/baichuan_wo_compute_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/baichuan_wo_compute_metric.py -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/chatglm_wo_compute_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/chatglm_wo_compute_metric.py -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/evaluator.py -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/evaluators/llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/evaluators/llama2.py -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/few_shot_cot_examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/few_shot_cot_examples.json -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/few_shot_examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/few_shot_examples.json -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/inference_hf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/inference_hf.sh -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/inference_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/inference_vllm.sh -------------------------------------------------------------------------------- /code/release_code_aaai_ck12/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/code/release_code_aaai_ck12/metric.py -------------------------------------------------------------------------------- /images/che.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/che.png -------------------------------------------------------------------------------- /images/exp4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/exp4.png -------------------------------------------------------------------------------- /images/exp5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/exp5.png -------------------------------------------------------------------------------- /images/math_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/math_results.png -------------------------------------------------------------------------------- /images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/overview.png -------------------------------------------------------------------------------- /images/pie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/pie.png -------------------------------------------------------------------------------- /images/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/result.png -------------------------------------------------------------------------------- /images/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tal-tech/chinese-k12-evaluation/HEAD/images/title.png --------------------------------------------------------------------------------