├── .gitignore ├── assets ├── algorithm.png ├── figure1.png ├── prmbench.png └── processbench.png ├── examples ├── ds_config.json ├── py_scripts │ ├── code_snippets.py │ ├── pool_based_active_learning.py │ └── test_actprm_on_processbench.py └── scripts │ └── pool_based_active_learning.sh ├── pyproject.toml ├── readme.md ├── requirements.txt ├── setup.py └── src └── active_prm ├── __init__.py ├── dataset.py ├── eval ├── PRMBench │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── __main__.py │ ├── docs │ │ ├── assets │ │ │ ├── main_fig.pdf │ │ │ ├── main_fig.svg │ │ │ └── main_logo.png │ │ ├── data_format.md │ │ └── document.md │ ├── mr_annotate │ │ ├── __init__.py │ │ ├── annotation │ │ │ ├── simple_judge_turn_to_annotate_version.ipynb │ │ │ └── turn_to_annotate_version.ipynb │ │ └── build_data │ │ │ ├── __init__.py │ │ │ ├── generate_by_4o.py │ │ │ ├── generate_by_gemini.py │ │ │ ├── generate_data │ │ │ ├── __init__.py │ │ │ ├── dataset │ │ │ │ └── generate_dataset.py │ │ │ ├── generate_by_4o.py │ │ │ └── generate_by_gemini.py │ │ │ ├── model_inference │ │ │ ├── gemini_api │ │ │ │ └── gemini_inference.py │ │ │ └── qwq │ │ │ │ ├── inferencer │ │ │ │ ├── __init__.py │ │ │ │ ├── qwq_inferencer.py │ │ │ │ └── qwq_inferencer_dataset.py │ │ │ │ ├── run_inference.py │ │ │ │ └── test_qwq.ipynb │ │ │ └── prompts │ │ │ ├── __init__.py │ │ │ ├── classifications │ │ │ ├── __init__.py │ │ │ ├── circular.py │ │ │ ├── confidence.py │ │ │ ├── counterfactual.py │ │ │ ├── deception.py │ │ │ ├── domain_inconsistency.py │ │ │ ├── missing_condition.py │ │ │ ├── redundency.py │ │ │ └── step_contradiction.py │ │ │ ├── prompt_new.py │ │ │ ├── prompts.py │ │ │ └── prompts_test.txt │ ├── mr_eval │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── bon_eval │ │ │ ├── bon_eval.py │ │ │ ├── evaluate │ │ │ │ └── eval_on_gpt.py │ │ │ ├── policy_gen │ │ │ │ ├── __init__.py │ │ │ │ ├── dataset │ │ │ │ │ ├── base_dataset.py │ │ │ │ │ ├── fix_data.ipynb │ │ │ │ │ ├── load_data.ipynb │ │ │ │ │ └── validate_data.ipynb │ │ │ │ └── vllm_inference.py │ │ │ └── prm_eval_utils.py │ │ ├── evaluator.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── abstract_model.py │ │ │ ├── ensemble_prm.py │ │ │ ├── gemini_models.py │ │ │ ├── llama3_1_8b_prm.py │ │ │ ├── llemma7b_prm.py │ │ │ ├── math_shepherd.py │ │ │ ├── mathminos_mistral.py │ │ │ ├── openai_models.py │ │ │ ├── pure_prm.py │ │ │ ├── qwen_math_rm_fsdp.py │ │ │ ├── qwen_prm.py │ │ │ ├── qwen_qwq.py │ │ │ ├── reasoneval.py │ │ │ ├── skywork_prm.py │ │ │ └── vllm_models.py │ │ ├── scripts │ │ │ └── examples │ │ │ │ ├── accelerate_configs │ │ │ │ ├── 1gpu.yaml │ │ │ │ ├── 4gpus.yaml │ │ │ │ ├── 4gpus_deepspeed.yaml │ │ │ │ ├── cpu.yaml │ │ │ │ ├── zero3_inference.json │ │ │ │ └── zero3_offload_inference.json │ │ │ │ ├── api_eval.sh │ │ │ │ ├── direct_run.sh │ │ │ │ ├── example_configs │ │ │ │ ├── all_prms.yaml │ │ │ │ └── initial_test_gpt4o.yaml │ │ │ │ └── local_multi_gpu_eval.sh │ │ ├── tasks │ │ │ ├── __init__.py │ │ │ ├── base_dataset │ │ │ │ ├── __init__.py │ │ │ │ └── base_evaluation_dataset.py │ │ │ ├── prmbench_bon │ │ │ │ ├── __init__.py │ │ │ │ ├── config.yaml │ │ │ │ └── task.py │ │ │ ├── prmbench_bon_subset50 │ │ │ │ ├── __init__.py │ │ │ │ ├── config.yaml │ │ │ │ └── task.py │ │ │ ├── prmbench_stem │ │ │ │ ├── __init__.py │ │ │ │ ├── config.yaml │ │ │ │ └── task.py │ │ │ ├── prmtest_classified │ │ │ │ ├── __init__.py │ │ │ │ ├── config.yaml │ │ │ │ ├── data │ │ │ │ │ ├── debug │ │ │ │ │ │ └── debug.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_circular.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_confidence.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_counterfactual.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_deception.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_domain_inconsistency.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_missing_condition.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_redundency.jsonl │ │ │ │ │ ├── prm800k_test_p1_4o_step_contradiction.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_circular.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_confidence.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_counterfactual.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_deception.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_domain_inconsistency.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_missing_condition.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_redundency.jsonl │ │ │ │ │ ├── prm800k_test_p2_4o_step_contradiction.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_circular.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_confidence.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_counterfactual.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_deception.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_domain_inconsistency.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_missing_condition.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_redundency.jsonl │ │ │ │ │ ├── prm800k_train_p1_first400_4o_step_contradiction.jsonl │ │ │ │ │ └── prm_test_p12_multi_solutions.jsonl │ │ │ │ └── task.py │ │ │ └── prmtest_classified_subset400 │ │ │ │ ├── __init__.py │ │ │ │ ├── config.yaml │ │ │ │ ├── data │ │ │ │ └── target_data.jsonl │ │ │ │ └── task.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── arguments.py │ │ │ ├── log_utils.py │ │ │ ├── model_utils.py │ │ │ ├── prompts.py │ │ │ ├── task_utils.py │ │ │ └── utils.py │ ├── mr_visualize │ │ ├── data_visualization │ │ │ └── generate_instance.ipynb │ │ └── eval_res_view │ │ │ ├── draw_figure │ │ │ ├── draw_correlation.ipynb │ │ │ ├── draw_error_step_distribution.ipynb │ │ │ ├── draw_radar.ipynb │ │ │ ├── draw_step_acc.ipynb │ │ │ └── res │ │ │ │ ├── correlation.pdf │ │ │ │ ├── error_position_distribution.pdf │ │ │ │ ├── llava_radar.pdf │ │ │ │ ├── llava_radar.svg │ │ │ │ └── step_acc.pdf │ │ │ ├── draw_tabs │ │ │ ├── bias_table.ipynb │ │ │ ├── data_statistic.ipynb │ │ │ ├── draw_error_cases.ipynb │ │ │ ├── form_excel_strs.ipynb │ │ │ ├── form_latex_appendix.ipynb │ │ │ ├── form_latex_strs.ipynb │ │ │ └── prmbench_bon │ │ │ │ └── form_latex_strs.ipynb │ │ │ └── form_latex_strs.ipynb │ ├── requirements.txt │ ├── setup.py │ └── vis_res.py └── processbench.py ├── models ├── __init__.py ├── nets.py ├── qwen2_ensemble_prm │ ├── configuration_qwen2.py │ └── modeling_qwen2.py └── utils.py ├── trainer ├── __init__.py ├── active_sft_config.py └── active_sft_trainer.py └── utils ├── processor.py └── worker.py /.gitignore: -------------------------------------------------------------------------------- 1 | .git 2 | .nvim 3 | .argo 4 | .DS_Store 5 | 6 | -------------------------------------------------------------------------------- /assets/algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/assets/algorithm.png -------------------------------------------------------------------------------- /assets/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/assets/figure1.png -------------------------------------------------------------------------------- /assets/prmbench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/assets/prmbench.png -------------------------------------------------------------------------------- /assets/processbench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/assets/processbench.png -------------------------------------------------------------------------------- /examples/ds_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/examples/ds_config.json -------------------------------------------------------------------------------- /examples/py_scripts/code_snippets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/examples/py_scripts/code_snippets.py -------------------------------------------------------------------------------- /examples/py_scripts/pool_based_active_learning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/examples/py_scripts/pool_based_active_learning.py -------------------------------------------------------------------------------- /examples/py_scripts/test_actprm_on_processbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/examples/py_scripts/test_actprm_on_processbench.py -------------------------------------------------------------------------------- /examples/scripts/pool_based_active_learning.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/examples/scripts/pool_based_active_learning.sh -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/pyproject.toml -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/readme.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | deepspeed 3 | transformers 4 | datasets 5 | trl>=0.15.2 6 | vllm 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/setup.py -------------------------------------------------------------------------------- /src/active_prm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/dataset.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/.gitignore -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/LICENSE -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/README.md -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/__main__.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/docs/assets/main_fig.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/docs/assets/main_fig.pdf -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/docs/assets/main_fig.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/docs/assets/main_fig.svg -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/docs/assets/main_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/docs/assets/main_logo.png -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/docs/data_format.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/docs/data_format.md -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/docs/document.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/docs/document.md -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/annotation/simple_judge_turn_to_annotate_version.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/annotation/simple_judge_turn_to_annotate_version.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/annotation/turn_to_annotate_version.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/annotation/turn_to_annotate_version.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_by_4o.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_by_4o.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_by_gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_by_gemini.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_data/dataset/generate_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_data/dataset/generate_dataset.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_data/generate_by_4o.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_data/generate_by_4o.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_data/generate_by_gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/generate_data/generate_by_gemini.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/gemini_api/gemini_inference.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/inferencer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/inferencer/__init__.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/inferencer/qwq_inferencer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/inferencer/qwq_inferencer.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/inferencer/qwq_inferencer_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/inferencer/qwq_inferencer_dataset.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/run_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/run_inference.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/test_qwq.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/model_inference/qwq/test_qwq.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/__init__.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/__init__.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/circular.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/circular.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/confidence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/confidence.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/counterfactual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/counterfactual.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/deception.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/deception.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/domain_inconsistency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/domain_inconsistency.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/missing_condition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/missing_condition.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/redundency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/redundency.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/step_contradiction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/classifications/step_contradiction.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/prompt_new.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/prompt_new.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/prompts.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/prompts_test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_annotate/build_data/prompts/prompts_test.txt -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/__main__.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/bon_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/bon_eval.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/evaluate/eval_on_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/evaluate/eval_on_gpt.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/base_dataset.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/fix_data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/fix_data.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/load_data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/load_data.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/validate_data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/dataset/validate_data.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/vllm_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/policy_gen/vllm_inference.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/bon_eval/prm_eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/bon_eval/prm_eval_utils.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/evaluator.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/__init__.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/abstract_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/abstract_model.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/ensemble_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/ensemble_prm.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/gemini_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/gemini_models.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/llama3_1_8b_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/llama3_1_8b_prm.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/llemma7b_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/llemma7b_prm.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/math_shepherd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/math_shepherd.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/mathminos_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/mathminos_mistral.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/openai_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/openai_models.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/pure_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/pure_prm.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/qwen_math_rm_fsdp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/qwen_math_rm_fsdp.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/qwen_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/qwen_prm.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/qwen_qwq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/qwen_qwq.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/reasoneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/reasoneval.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/skywork_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/skywork_prm.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/models/vllm_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/models/vllm_models.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/1gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/1gpu.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/4gpus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/4gpus.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/4gpus_deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/4gpus_deepspeed.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/cpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/cpu.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/zero3_inference.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/zero3_inference.json -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/zero3_offload_inference.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/accelerate_configs/zero3_offload_inference.json -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/api_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/api_eval.sh -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/direct_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/direct_run.sh -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/example_configs/all_prms.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/example_configs/all_prms.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/example_configs/initial_test_gpt4o.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/example_configs/initial_test_gpt4o.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/scripts/examples/local_multi_gpu_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/scripts/examples/local_multi_gpu_eval.sh -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/__init__.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/base_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/base_dataset/base_evaluation_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/base_dataset/base_evaluation_dataset.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon/config.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon/task.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon_subset50/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon_subset50/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon_subset50/config.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon_subset50/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_bon_subset50/task.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_stem/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_stem/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_stem/config.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_stem/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmbench_stem/task.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/config.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/debug/debug.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/debug/debug.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_circular.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_circular.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_confidence.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_confidence.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_counterfactual.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_counterfactual.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_deception.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_deception.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_domain_inconsistency.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_domain_inconsistency.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_missing_condition.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_missing_condition.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_redundency.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_redundency.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_step_contradiction.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p1_4o_step_contradiction.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_circular.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_circular.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_confidence.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_confidence.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_counterfactual.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_counterfactual.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_deception.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_deception.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_domain_inconsistency.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_domain_inconsistency.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_missing_condition.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_missing_condition.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_redundency.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_redundency.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_step_contradiction.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_test_p2_4o_step_contradiction.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_circular.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_circular.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_confidence.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_confidence.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_counterfactual.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_counterfactual.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_deception.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_deception.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_domain_inconsistency.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_domain_inconsistency.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_missing_condition.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_missing_condition.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_redundency.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_redundency.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_step_contradiction.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm800k_train_p1_first400_4o_step_contradiction.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm_test_p12_multi_solutions.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/data/prm_test_p12_multi_solutions.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified/task.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified_subset400/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified_subset400/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified_subset400/config.yaml -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified_subset400/data/target_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified_subset400/data/target_data.jsonl -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified_subset400/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/tasks/prmtest_classified_subset400/task.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/utils/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/utils/arguments.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/utils/log_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/utils/log_utils.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/utils/model_utils.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/utils/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/utils/prompts.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/utils/task_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/utils/task_utils.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_eval/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_eval/utils/utils.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/data_visualization/generate_instance.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/data_visualization/generate_instance.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_correlation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_correlation.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_error_step_distribution.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_error_step_distribution.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_radar.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_radar.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_step_acc.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/draw_step_acc.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/correlation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/correlation.pdf -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/error_position_distribution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/error_position_distribution.pdf -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/llava_radar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/llava_radar.pdf -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/llava_radar.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/llava_radar.svg -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/step_acc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_figure/res/step_acc.pdf -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/bias_table.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/bias_table.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/data_statistic.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/data_statistic.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/draw_error_cases.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/draw_error_cases.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/form_excel_strs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/form_excel_strs.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/form_latex_appendix.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/form_latex_appendix.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/form_latex_strs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/form_latex_strs.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/prmbench_bon/form_latex_strs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/draw_tabs/prmbench_bon/form_latex_strs.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/form_latex_strs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/mr_visualize/eval_res_view/form_latex_strs.ipynb -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/requirements.txt -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/setup.py -------------------------------------------------------------------------------- /src/active_prm/eval/PRMBench/vis_res.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/PRMBench/vis_res.py -------------------------------------------------------------------------------- /src/active_prm/eval/processbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/eval/processbench.py -------------------------------------------------------------------------------- /src/active_prm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/models/__init__.py -------------------------------------------------------------------------------- /src/active_prm/models/nets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/models/nets.py -------------------------------------------------------------------------------- /src/active_prm/models/qwen2_ensemble_prm/configuration_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/models/qwen2_ensemble_prm/configuration_qwen2.py -------------------------------------------------------------------------------- /src/active_prm/models/qwen2_ensemble_prm/modeling_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/models/qwen2_ensemble_prm/modeling_qwen2.py -------------------------------------------------------------------------------- /src/active_prm/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/models/utils.py -------------------------------------------------------------------------------- /src/active_prm/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/trainer/__init__.py -------------------------------------------------------------------------------- /src/active_prm/trainer/active_sft_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/trainer/active_sft_config.py -------------------------------------------------------------------------------- /src/active_prm/trainer/active_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/trainer/active_sft_trainer.py -------------------------------------------------------------------------------- /src/active_prm/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/utils/processor.py -------------------------------------------------------------------------------- /src/active_prm/utils/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sail-sg/ActivePRM/HEAD/src/active_prm/utils/worker.py --------------------------------------------------------------------------------