├── .gitignore ├── README.md ├── cases ├── case-0.png ├── case-1.png └── case-2.png ├── config ├── archer │ ├── accelerate_config.yaml │ ├── archer_config.yaml │ └── default.yaml ├── ds_configs │ └── stage3-cosine.json ├── llama3-1 │ └── StepTool_ppo.json ├── qwen2 │ └── StepTool_ppo.json └── toolllama │ └── StepTool_ppo.json ├── data ├── model_predictions_converted │ └── qwen2 │ │ └── G123_example.json └── reward_annotation │ └── qwen2 │ └── G123_example_5.json ├── data_eval └── pass_rate_results │ ├── baseline-archer_cot │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── baseline-archer_dfs │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── baseline-eto_cot │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── baseline-eto_dfs │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── baseline-ppo_cot │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── baseline-ppo_dfs │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── baseline-rft_cot │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── baseline-rft_dfs │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── steptool_cot │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── steptool_dfs │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ ├── toolllama_sft_cot │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.csv │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json │ └── toolllama_sft_dfs │ ├── G1_category.csv │ ├── G1_category.json │ ├── G1_instruction.csv │ ├── G1_instruction.json │ ├── G1_tool.csv │ ├── G1_tool.json │ ├── G2_category.csv │ ├── G2_category.json │ ├── G2_instruction.json │ ├── G3_instruction.csv │ └── G3_instruction.json ├── data_train ├── eto │ └── dpo_data_example.csv ├── llama3-1 │ ├── gpt4_dfs_G123_for_sft_example.json │ └── step_grained_for_ppo_example.csv ├── qwen2 │ ├── gpt4_dfs_G123_for_sft_example.json │ └── step_grained_for_ppo_example.csv ├── rft │ └── rft_data_example.json └── toolllama │ └── step_grained_for_ppo_example.csv ├── requirements.txt ├── scripts ├── baseline-archer │ ├── build_data.sh │ └── train_archer.sh ├── baseline-eto │ └── train_dpo.sh ├── baseline-ppo │ └── train_toolllama.sh ├── baseline-rft │ └── train_rft.sh ├── reward │ └── annotation_with_gpt.sh ├── sft │ ├── train_llama3-1.sh │ └── train_qwen2.sh └── steptool_train │ ├── train_llama3-1.sh │ ├── train_qwen2.sh │ └── train_toolllama.sh ├── scripts_eval ├── baseline-archer │ ├── inference_archer_vllm.sh │ ├── run_convert_answer.sh │ └── run_pass_rate.sh ├── baseline-eto │ ├── inference_eto_vllm.sh │ ├── run_convert_answer.sh │ └── run_pass_rate.sh ├── baseline-ppo │ ├── inference_ppo_vllm.sh │ ├── run_convert_answer.sh │ └── run_pass_rate.sh ├── baseline-rft │ ├── inference_rft_vllm.sh │ ├── run_convert_answer.sh │ └── run_pass_rate.sh ├── llama3-1 │ ├── inference_llama3-1_vllm.sh │ ├── run_conver_answer.sh │ ├── run_pass_rate.sh │ └── run_preference.sh ├── qwen2 │ ├── inference_qwen2_vllm.sh │ ├── run_convert_answer.sh │ ├── run_pass_rate.sh │ └── run_preference.sh ├── steptool │ ├── inference_steptool_vllm.sh │ ├── run_convert_answer.sh │ └── run_pass_rate.sh ├── toolllama-sft │ ├── inference_toolllama_vllm.sh │ ├── run_conver_answer.sh │ └── run_pass_rate.sh └── toolllama │ └── run_preference.sh ├── src ├── baseline-archer │ ├── archer_agent.py │ ├── archer_critic.py │ ├── archer_data.py │ ├── archer_environment.py │ ├── archer_trainer.py │ ├── build_archer_data.py │ ├── offpolicy_train_loop.py │ └── run.py ├── baseline-eto │ └── dpo_train.py ├── baseline-ppo │ └── ppo.py ├── baseline-rft │ └── rft.py ├── reward │ ├── annotation_by_rules.ipynb │ ├── annotation_with_gpt.py │ ├── evaluators │ │ ├── evaluator.py │ │ └── gpt-4-turbo-2024-04-09 │ │ │ ├── config.yaml │ │ │ └── template.txt │ └── openai_key.json ├── sft │ ├── llama3-1.py │ └── qwen2.py └── steptool │ ├── step_ppo.py │ └── step_ppotrainer.py └── stabletoolbench ├── config.yml ├── server ├── config.yml ├── main.py ├── requirements.txt └── utils.py ├── solvable_queries ├── test_instruction │ ├── G1_category.json │ ├── G1_instruction.json │ ├── G1_tool.json │ ├── G2_category.json │ ├── G2_instruction.json │ └── G3_instruction.json └── test_query_ids │ ├── G1_category.json │ ├── G1_instruction.json │ ├── G1_tool.json │ ├── G2_category.json │ ├── G2_instruction.json │ └── G3_instruction.json └── toolbench ├── inference ├── Algorithms │ ├── DFS.py │ ├── __init__.py │ ├── base_search.py │ └── single_chain.py ├── Downstream_tasks │ ├── __init__.py │ ├── base_env.py │ ├── rapidapi.py │ └── rapidapi_multithread.py ├── LLM │ ├── __init__.py │ ├── base_io.py │ ├── chatgpt_model.py │ ├── llama3_sft_model.py │ ├── qwen2_sft_model.py │ ├── retriever.py │ └── tool_llama_vllm.py ├── LLM_rank │ ├── __init__.py │ └── rank_candidate.py ├── Prompts │ ├── ReAct_prompts.py │ ├── Tree_search_prompts.py │ ├── __init__.py │ └── rank_prompts.py ├── Tree │ ├── Tree.py │ └── __init__.py ├── callbacks │ └── ServerEventCallback.py ├── qa_pipeline.py ├── qa_pipeline_multithread.py ├── qa_pipeline_open_domain.py ├── server.py ├── toolbench_server.py └── utils.py ├── model ├── __init__.py ├── apply_delta.py ├── compression.py ├── make_delta.py └── model_adapter.py ├── tool_conversation.py ├── tooleval ├── README.md ├── README_ZH.md ├── ToolBench.code-workspace ├── __init__.py ├── automatic_eval_sample.py ├── convert_answers.py ├── convert_to_answer_format.py ├── dataset │ └── __init__.py ├── eval_and_update_leaderboard.py ├── eval_pass_rate.py ├── eval_preference.py ├── eval_process_reward.py ├── evaluation │ ├── __init__.py │ ├── dataclass.py │ ├── methodcls.py │ └── usereval.py ├── evaluators │ ├── __init__.py │ ├── registered_cls │ │ ├── __init__.py │ │ ├── base.py │ │ ├── rtl.py │ │ ├── tooleval.py │ │ └── utils.py │ ├── tooleval_gpt-3.5-turbo_default │ │ ├── config.yaml │ │ └── template.txt │ ├── tooleval_gpt-3.5-turbo_fn │ │ ├── config.yaml │ │ └── template.txt │ └── tooleval_gpt-3.5-turbo_normalized │ │ ├── config.yaml │ │ └── template.txt ├── evaluators_comparison.py ├── requirements.txt ├── results │ ├── default_evalset │ │ ├── DFS │ │ │ └── win.csv │ │ └── gpt-3.5-turbo_CoT │ │ │ ├── G1_category.json │ │ │ ├── G1_instruction.json │ │ │ ├── G1_tool.json │ │ │ ├── G2_category.json │ │ │ ├── G2_instruction.json │ │ │ └── G3_instruction.json │ ├── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv │ └── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv └── utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/README.md -------------------------------------------------------------------------------- /cases/case-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/cases/case-0.png -------------------------------------------------------------------------------- /cases/case-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/cases/case-1.png -------------------------------------------------------------------------------- /cases/case-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/cases/case-2.png -------------------------------------------------------------------------------- /config/archer/accelerate_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/archer/accelerate_config.yaml -------------------------------------------------------------------------------- /config/archer/archer_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/archer/archer_config.yaml -------------------------------------------------------------------------------- /config/archer/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/archer/default.yaml -------------------------------------------------------------------------------- /config/ds_configs/stage3-cosine.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/ds_configs/stage3-cosine.json -------------------------------------------------------------------------------- /config/llama3-1/StepTool_ppo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/llama3-1/StepTool_ppo.json -------------------------------------------------------------------------------- /config/qwen2/StepTool_ppo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/qwen2/StepTool_ppo.json -------------------------------------------------------------------------------- /config/toolllama/StepTool_ppo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/toolllama/StepTool_ppo.json -------------------------------------------------------------------------------- /data/model_predictions_converted/qwen2/G123_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data/model_predictions_converted/qwen2/G123_example.json -------------------------------------------------------------------------------- /data/reward_annotation/qwen2/G123_example_5.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data/reward_annotation/qwen2/G123_example_5.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_cot/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/steptool_dfs/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G2_instruction.json -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.csv -------------------------------------------------------------------------------- /data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.json -------------------------------------------------------------------------------- /data_train/eto/dpo_data_example.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/eto/dpo_data_example.csv -------------------------------------------------------------------------------- /data_train/llama3-1/gpt4_dfs_G123_for_sft_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/llama3-1/gpt4_dfs_G123_for_sft_example.json -------------------------------------------------------------------------------- /data_train/llama3-1/step_grained_for_ppo_example.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/llama3-1/step_grained_for_ppo_example.csv -------------------------------------------------------------------------------- /data_train/qwen2/gpt4_dfs_G123_for_sft_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/qwen2/gpt4_dfs_G123_for_sft_example.json -------------------------------------------------------------------------------- /data_train/qwen2/step_grained_for_ppo_example.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/qwen2/step_grained_for_ppo_example.csv -------------------------------------------------------------------------------- /data_train/rft/rft_data_example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/rft/rft_data_example.json -------------------------------------------------------------------------------- /data_train/toolllama/step_grained_for_ppo_example.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/toolllama/step_grained_for_ppo_example.csv -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/baseline-archer/build_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-archer/build_data.sh -------------------------------------------------------------------------------- /scripts/baseline-archer/train_archer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-archer/train_archer.sh -------------------------------------------------------------------------------- /scripts/baseline-eto/train_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-eto/train_dpo.sh -------------------------------------------------------------------------------- /scripts/baseline-ppo/train_toolllama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-ppo/train_toolllama.sh -------------------------------------------------------------------------------- /scripts/baseline-rft/train_rft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-rft/train_rft.sh -------------------------------------------------------------------------------- /scripts/reward/annotation_with_gpt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/reward/annotation_with_gpt.sh -------------------------------------------------------------------------------- /scripts/sft/train_llama3-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/sft/train_llama3-1.sh -------------------------------------------------------------------------------- /scripts/sft/train_qwen2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/sft/train_qwen2.sh -------------------------------------------------------------------------------- /scripts/steptool_train/train_llama3-1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/steptool_train/train_llama3-1.sh -------------------------------------------------------------------------------- /scripts/steptool_train/train_qwen2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/steptool_train/train_qwen2.sh -------------------------------------------------------------------------------- /scripts/steptool_train/train_toolllama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/steptool_train/train_toolllama.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-archer/inference_archer_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-archer/inference_archer_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-archer/run_convert_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-archer/run_convert_answer.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-archer/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-archer/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-eto/inference_eto_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-eto/inference_eto_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-eto/run_convert_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-eto/run_convert_answer.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-eto/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-eto/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-ppo/inference_ppo_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-ppo/inference_ppo_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-ppo/run_convert_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-ppo/run_convert_answer.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-ppo/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-ppo/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-rft/inference_rft_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-rft/inference_rft_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-rft/run_convert_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-rft/run_convert_answer.sh -------------------------------------------------------------------------------- /scripts_eval/baseline-rft/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-rft/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/llama3-1/inference_llama3-1_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/inference_llama3-1_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/llama3-1/run_conver_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/run_conver_answer.sh -------------------------------------------------------------------------------- /scripts_eval/llama3-1/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/llama3-1/run_preference.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/run_preference.sh -------------------------------------------------------------------------------- /scripts_eval/qwen2/inference_qwen2_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/inference_qwen2_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/qwen2/run_convert_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/run_convert_answer.sh -------------------------------------------------------------------------------- /scripts_eval/qwen2/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/qwen2/run_preference.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/run_preference.sh -------------------------------------------------------------------------------- /scripts_eval/steptool/inference_steptool_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/steptool/inference_steptool_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/steptool/run_convert_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/steptool/run_convert_answer.sh -------------------------------------------------------------------------------- /scripts_eval/steptool/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/steptool/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/toolllama-sft/inference_toolllama_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama-sft/inference_toolllama_vllm.sh -------------------------------------------------------------------------------- /scripts_eval/toolllama-sft/run_conver_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama-sft/run_conver_answer.sh -------------------------------------------------------------------------------- /scripts_eval/toolllama-sft/run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama-sft/run_pass_rate.sh -------------------------------------------------------------------------------- /scripts_eval/toolllama/run_preference.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama/run_preference.sh -------------------------------------------------------------------------------- /src/baseline-archer/archer_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_agent.py -------------------------------------------------------------------------------- /src/baseline-archer/archer_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_critic.py -------------------------------------------------------------------------------- /src/baseline-archer/archer_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_data.py -------------------------------------------------------------------------------- /src/baseline-archer/archer_environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_environment.py -------------------------------------------------------------------------------- /src/baseline-archer/archer_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_trainer.py -------------------------------------------------------------------------------- /src/baseline-archer/build_archer_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/build_archer_data.py -------------------------------------------------------------------------------- /src/baseline-archer/offpolicy_train_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/offpolicy_train_loop.py -------------------------------------------------------------------------------- /src/baseline-archer/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/run.py -------------------------------------------------------------------------------- /src/baseline-eto/dpo_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-eto/dpo_train.py -------------------------------------------------------------------------------- /src/baseline-ppo/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-ppo/ppo.py -------------------------------------------------------------------------------- /src/baseline-rft/rft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-rft/rft.py -------------------------------------------------------------------------------- /src/reward/annotation_by_rules.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/annotation_by_rules.ipynb -------------------------------------------------------------------------------- /src/reward/annotation_with_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/annotation_with_gpt.py -------------------------------------------------------------------------------- /src/reward/evaluators/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/evaluators/evaluator.py -------------------------------------------------------------------------------- /src/reward/evaluators/gpt-4-turbo-2024-04-09/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/evaluators/gpt-4-turbo-2024-04-09/config.yaml -------------------------------------------------------------------------------- /src/reward/evaluators/gpt-4-turbo-2024-04-09/template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/evaluators/gpt-4-turbo-2024-04-09/template.txt -------------------------------------------------------------------------------- /src/reward/openai_key.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/openai_key.json -------------------------------------------------------------------------------- /src/sft/llama3-1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/sft/llama3-1.py -------------------------------------------------------------------------------- /src/sft/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/sft/qwen2.py -------------------------------------------------------------------------------- /src/steptool/step_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/steptool/step_ppo.py -------------------------------------------------------------------------------- /src/steptool/step_ppotrainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/steptool/step_ppotrainer.py -------------------------------------------------------------------------------- /stabletoolbench/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/config.yml -------------------------------------------------------------------------------- /stabletoolbench/server/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/config.yml -------------------------------------------------------------------------------- /stabletoolbench/server/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/main.py -------------------------------------------------------------------------------- /stabletoolbench/server/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/requirements.txt -------------------------------------------------------------------------------- /stabletoolbench/server/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/utils.py -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_instruction/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G1_category.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_instruction/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G1_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_instruction/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G1_tool.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_instruction/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G2_category.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_instruction/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G2_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_instruction/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G3_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_query_ids/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G1_category.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_query_ids/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G1_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_query_ids/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G1_tool.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_query_ids/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G2_category.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_query_ids/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G2_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/solvable_queries/test_query_ids/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G3_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Algorithms/DFS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Algorithms/DFS.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Algorithms/base_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Algorithms/base_search.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Algorithms/single_chain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Algorithms/single_chain.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Downstream_tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Downstream_tasks/base_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Downstream_tasks/base_env.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi_multithread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi_multithread.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM/base_io.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def base_io(input_str): 4 | pass -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM/chatgpt_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/chatgpt_model.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM/llama3_sft_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/llama3_sft_model.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM/qwen2_sft_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/qwen2_sft_model.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM/retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/retriever.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM/tool_llama_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/tool_llama_vllm.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM_rank/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/LLM_rank/rank_candidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM_rank/rank_candidate.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Prompts/ReAct_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Prompts/ReAct_prompts.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Prompts/Tree_search_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Prompts/Tree_search_prompts.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Prompts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Prompts/rank_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Prompts/rank_prompts.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Tree/Tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Tree/Tree.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/Tree/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/callbacks/ServerEventCallback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/callbacks/ServerEventCallback.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/qa_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/qa_pipeline.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/qa_pipeline_multithread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/qa_pipeline_multithread.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/qa_pipeline_open_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/qa_pipeline_open_domain.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/server.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/toolbench_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/toolbench_server.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/utils.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/__init__.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/apply_delta.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/model/compression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/compression.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/make_delta.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/model/model_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/model_adapter.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tool_conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tool_conversation.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/README.md -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/README_ZH.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/README_ZH.md -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/ToolBench.code-workspace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/ToolBench.code-workspace -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/automatic_eval_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/automatic_eval_sample.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/convert_answers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/convert_answers.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/convert_to_answer_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/convert_to_answer_format.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/eval_and_update_leaderboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_and_update_leaderboard.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/eval_pass_rate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_pass_rate.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/eval_preference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_preference.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/eval_process_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_process_reward.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/__init__.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluation/dataclass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/dataclass.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluation/methodcls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/methodcls.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluation/usereval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/usereval.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/__init__.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/registered_cls/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/__init__.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/registered_cls/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/base.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/registered_cls/rtl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/rtl.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/registered_cls/tooleval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/tooleval.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/registered_cls/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/utils.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/evaluators_comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators_comparison.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/requirements.txt -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/default_evalset/DFS/win.csv: -------------------------------------------------------------------------------- 1 | ,Method,Win Rate,Std Error 2 | 0,DFS,, 3 | -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv -------------------------------------------------------------------------------- /stabletoolbench/toolbench/tooleval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/utils.py -------------------------------------------------------------------------------- /stabletoolbench/toolbench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/utils.py --------------------------------------------------------------------------------