├── .gitignore
├── README.md
├── cases
    ├── case-0.png
    ├── case-1.png
    └── case-2.png
├── config
    ├── archer
    │   ├── accelerate_config.yaml
    │   ├── archer_config.yaml
    │   └── default.yaml
    ├── ds_configs
    │   └── stage3-cosine.json
    ├── llama3-1
    │   └── StepTool_ppo.json
    ├── qwen2
    │   └── StepTool_ppo.json
    └── toolllama
    │   └── StepTool_ppo.json
├── data
    ├── model_predictions_converted
    │   └── qwen2
    │   │   └── G123_example.json
    └── reward_annotation
    │   └── qwen2
    │       └── G123_example_5.json
├── data_eval
    └── pass_rate_results
    │   ├── baseline-archer_cot
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── baseline-archer_dfs
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── baseline-eto_cot
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── baseline-eto_dfs
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── baseline-ppo_cot
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── baseline-ppo_dfs
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── baseline-rft_cot
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── baseline-rft_dfs
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── steptool_cot
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── steptool_dfs
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   ├── toolllama_sft_cot
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.csv
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
    │   └── toolllama_sft_dfs
    │       ├── G1_category.csv
    │       ├── G1_category.json
    │       ├── G1_instruction.csv
    │       ├── G1_instruction.json
    │       ├── G1_tool.csv
    │       ├── G1_tool.json
    │       ├── G2_category.csv
    │       ├── G2_category.json
    │       ├── G2_instruction.json
    │       ├── G3_instruction.csv
    │       └── G3_instruction.json
├── data_train
    ├── eto
    │   └── dpo_data_example.csv
    ├── llama3-1
    │   ├── gpt4_dfs_G123_for_sft_example.json
    │   └── step_grained_for_ppo_example.csv
    ├── qwen2
    │   ├── gpt4_dfs_G123_for_sft_example.json
    │   └── step_grained_for_ppo_example.csv
    ├── rft
    │   └── rft_data_example.json
    └── toolllama
    │   └── step_grained_for_ppo_example.csv
├── requirements.txt
├── scripts
    ├── baseline-archer
    │   ├── build_data.sh
    │   └── train_archer.sh
    ├── baseline-eto
    │   └── train_dpo.sh
    ├── baseline-ppo
    │   └── train_toolllama.sh
    ├── baseline-rft
    │   └── train_rft.sh
    ├── reward
    │   └── annotation_with_gpt.sh
    ├── sft
    │   ├── train_llama3-1.sh
    │   └── train_qwen2.sh
    └── steptool_train
    │   ├── train_llama3-1.sh
    │   ├── train_qwen2.sh
    │   └── train_toolllama.sh
├── scripts_eval
    ├── baseline-archer
    │   ├── inference_archer_vllm.sh
    │   ├── run_convert_answer.sh
    │   └── run_pass_rate.sh
    ├── baseline-eto
    │   ├── inference_eto_vllm.sh
    │   ├── run_convert_answer.sh
    │   └── run_pass_rate.sh
    ├── baseline-ppo
    │   ├── inference_ppo_vllm.sh
    │   ├── run_convert_answer.sh
    │   └── run_pass_rate.sh
    ├── baseline-rft
    │   ├── inference_rft_vllm.sh
    │   ├── run_convert_answer.sh
    │   └── run_pass_rate.sh
    ├── llama3-1
    │   ├── inference_llama3-1_vllm.sh
    │   ├── run_conver_answer.sh
    │   ├── run_pass_rate.sh
    │   └── run_preference.sh
    ├── qwen2
    │   ├── inference_qwen2_vllm.sh
    │   ├── run_convert_answer.sh
    │   ├── run_pass_rate.sh
    │   └── run_preference.sh
    ├── steptool
    │   ├── inference_steptool_vllm.sh
    │   ├── run_convert_answer.sh
    │   └── run_pass_rate.sh
    ├── toolllama-sft
    │   ├── inference_toolllama_vllm.sh
    │   ├── run_conver_answer.sh
    │   └── run_pass_rate.sh
    └── toolllama
    │   └── run_preference.sh
├── src
    ├── baseline-archer
    │   ├── archer_agent.py
    │   ├── archer_critic.py
    │   ├── archer_data.py
    │   ├── archer_environment.py
    │   ├── archer_trainer.py
    │   ├── build_archer_data.py
    │   ├── offpolicy_train_loop.py
    │   └── run.py
    ├── baseline-eto
    │   └── dpo_train.py
    ├── baseline-ppo
    │   └── ppo.py
    ├── baseline-rft
    │   └── rft.py
    ├── reward
    │   ├── annotation_by_rules.ipynb
    │   ├── annotation_with_gpt.py
    │   ├── evaluators
    │   │   ├── evaluator.py
    │   │   └── gpt-4-turbo-2024-04-09
    │   │   │   ├── config.yaml
    │   │   │   └── template.txt
    │   └── openai_key.json
    ├── sft
    │   ├── llama3-1.py
    │   └── qwen2.py
    └── steptool
    │   ├── step_ppo.py
    │   └── step_ppotrainer.py
└── stabletoolbench
    ├── config.yml
    ├── server
        ├── config.yml
        ├── main.py
        ├── requirements.txt
        └── utils.py
    ├── solvable_queries
        ├── test_instruction
        │   ├── G1_category.json
        │   ├── G1_instruction.json
        │   ├── G1_tool.json
        │   ├── G2_category.json
        │   ├── G2_instruction.json
        │   └── G3_instruction.json
        └── test_query_ids
        │   ├── G1_category.json
        │   ├── G1_instruction.json
        │   ├── G1_tool.json
        │   ├── G2_category.json
        │   ├── G2_instruction.json
        │   └── G3_instruction.json
    └── toolbench
        ├── inference
            ├── Algorithms
            │   ├── DFS.py
            │   ├── __init__.py
            │   ├── base_search.py
            │   └── single_chain.py
            ├── Downstream_tasks
            │   ├── __init__.py
            │   ├── base_env.py
            │   ├── rapidapi.py
            │   └── rapidapi_multithread.py
            ├── LLM
            │   ├── __init__.py
            │   ├── base_io.py
            │   ├── chatgpt_model.py
            │   ├── llama3_sft_model.py
            │   ├── qwen2_sft_model.py
            │   ├── retriever.py
            │   └── tool_llama_vllm.py
            ├── LLM_rank
            │   ├── __init__.py
            │   └── rank_candidate.py
            ├── Prompts
            │   ├── ReAct_prompts.py
            │   ├── Tree_search_prompts.py
            │   ├── __init__.py
            │   └── rank_prompts.py
            ├── Tree
            │   ├── Tree.py
            │   └── __init__.py
            ├── callbacks
            │   └── ServerEventCallback.py
            ├── qa_pipeline.py
            ├── qa_pipeline_multithread.py
            ├── qa_pipeline_open_domain.py
            ├── server.py
            ├── toolbench_server.py
            └── utils.py
        ├── model
            ├── __init__.py
            ├── apply_delta.py
            ├── compression.py
            ├── make_delta.py
            └── model_adapter.py
        ├── tool_conversation.py
        ├── tooleval
            ├── README.md
            ├── README_ZH.md
            ├── ToolBench.code-workspace
            ├── __init__.py
            ├── automatic_eval_sample.py
            ├── convert_answers.py
            ├── convert_to_answer_format.py
            ├── dataset
            │   └── __init__.py
            ├── eval_and_update_leaderboard.py
            ├── eval_pass_rate.py
            ├── eval_preference.py
            ├── eval_process_reward.py
            ├── evaluation
            │   ├── __init__.py
            │   ├── dataclass.py
            │   ├── methodcls.py
            │   └── usereval.py
            ├── evaluators
            │   ├── __init__.py
            │   ├── registered_cls
            │   │   ├── __init__.py
            │   │   ├── base.py
            │   │   ├── rtl.py
            │   │   ├── tooleval.py
            │   │   └── utils.py
            │   ├── tooleval_gpt-3.5-turbo_default
            │   │   ├── config.yaml
            │   │   └── template.txt
            │   ├── tooleval_gpt-3.5-turbo_fn
            │   │   ├── config.yaml
            │   │   └── template.txt
            │   └── tooleval_gpt-3.5-turbo_normalized
            │   │   ├── config.yaml
            │   │   └── template.txt
            ├── evaluators_comparison.py
            ├── requirements.txt
            ├── results
            │   ├── default_evalset
            │   │   ├── DFS
            │   │   │   └── win.csv
            │   │   └── gpt-3.5-turbo_CoT
            │   │   │   ├── G1_category.json
            │   │   │   ├── G1_instruction.json
            │   │   │   ├── G1_tool.json
            │   │   │   ├── G2_category.json
            │   │   │   ├── G2_instruction.json
            │   │   │   └── G3_instruction.json
            │   ├── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv
            │   └── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv
            └── utils.py
        └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/.gitignore


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/README.md


--------------------------------------------------------------------------------
/cases/case-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/cases/case-0.png


--------------------------------------------------------------------------------
/cases/case-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/cases/case-1.png


--------------------------------------------------------------------------------
/cases/case-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/cases/case-2.png


--------------------------------------------------------------------------------
/config/archer/accelerate_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/archer/accelerate_config.yaml


--------------------------------------------------------------------------------
/config/archer/archer_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/archer/archer_config.yaml


--------------------------------------------------------------------------------
/config/archer/default.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/archer/default.yaml


--------------------------------------------------------------------------------
/config/ds_configs/stage3-cosine.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/ds_configs/stage3-cosine.json


--------------------------------------------------------------------------------
/config/llama3-1/StepTool_ppo.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/llama3-1/StepTool_ppo.json


--------------------------------------------------------------------------------
/config/qwen2/StepTool_ppo.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/qwen2/StepTool_ppo.json


--------------------------------------------------------------------------------
/config/toolllama/StepTool_ppo.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/config/toolllama/StepTool_ppo.json


--------------------------------------------------------------------------------
/data/model_predictions_converted/qwen2/G123_example.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data/model_predictions_converted/qwen2/G123_example.json


--------------------------------------------------------------------------------
/data/reward_annotation/qwen2/G123_example_5.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data/reward_annotation/qwen2/G123_example_5.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_cot/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-archer_dfs/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_cot/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-eto_dfs/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_cot/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-ppo_dfs/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_cot/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/baseline-rft_dfs/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_cot/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_cot/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/steptool_dfs/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/steptool_dfs/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_cot/G3_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G1_tool.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G2_category.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G2_instruction.json


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.csv


--------------------------------------------------------------------------------
/data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_eval/pass_rate_results/toolllama_sft_dfs/G3_instruction.json


--------------------------------------------------------------------------------
/data_train/eto/dpo_data_example.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/eto/dpo_data_example.csv


--------------------------------------------------------------------------------
/data_train/llama3-1/gpt4_dfs_G123_for_sft_example.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/llama3-1/gpt4_dfs_G123_for_sft_example.json


--------------------------------------------------------------------------------
/data_train/llama3-1/step_grained_for_ppo_example.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/llama3-1/step_grained_for_ppo_example.csv


--------------------------------------------------------------------------------
/data_train/qwen2/gpt4_dfs_G123_for_sft_example.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/qwen2/gpt4_dfs_G123_for_sft_example.json


--------------------------------------------------------------------------------
/data_train/qwen2/step_grained_for_ppo_example.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/qwen2/step_grained_for_ppo_example.csv


--------------------------------------------------------------------------------
/data_train/rft/rft_data_example.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/rft/rft_data_example.json


--------------------------------------------------------------------------------
/data_train/toolllama/step_grained_for_ppo_example.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/data_train/toolllama/step_grained_for_ppo_example.csv


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/requirements.txt


--------------------------------------------------------------------------------
/scripts/baseline-archer/build_data.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-archer/build_data.sh


--------------------------------------------------------------------------------
/scripts/baseline-archer/train_archer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-archer/train_archer.sh


--------------------------------------------------------------------------------
/scripts/baseline-eto/train_dpo.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-eto/train_dpo.sh


--------------------------------------------------------------------------------
/scripts/baseline-ppo/train_toolllama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-ppo/train_toolllama.sh


--------------------------------------------------------------------------------
/scripts/baseline-rft/train_rft.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/baseline-rft/train_rft.sh


--------------------------------------------------------------------------------
/scripts/reward/annotation_with_gpt.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/reward/annotation_with_gpt.sh


--------------------------------------------------------------------------------
/scripts/sft/train_llama3-1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/sft/train_llama3-1.sh


--------------------------------------------------------------------------------
/scripts/sft/train_qwen2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/sft/train_qwen2.sh


--------------------------------------------------------------------------------
/scripts/steptool_train/train_llama3-1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/steptool_train/train_llama3-1.sh


--------------------------------------------------------------------------------
/scripts/steptool_train/train_qwen2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/steptool_train/train_qwen2.sh


--------------------------------------------------------------------------------
/scripts/steptool_train/train_toolllama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts/steptool_train/train_toolllama.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-archer/inference_archer_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-archer/inference_archer_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-archer/run_convert_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-archer/run_convert_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-archer/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-archer/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-eto/inference_eto_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-eto/inference_eto_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-eto/run_convert_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-eto/run_convert_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-eto/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-eto/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-ppo/inference_ppo_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-ppo/inference_ppo_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-ppo/run_convert_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-ppo/run_convert_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-ppo/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-ppo/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-rft/inference_rft_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-rft/inference_rft_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-rft/run_convert_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-rft/run_convert_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/baseline-rft/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/baseline-rft/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/llama3-1/inference_llama3-1_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/inference_llama3-1_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/llama3-1/run_conver_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/run_conver_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/llama3-1/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/llama3-1/run_preference.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/llama3-1/run_preference.sh


--------------------------------------------------------------------------------
/scripts_eval/qwen2/inference_qwen2_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/inference_qwen2_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/qwen2/run_convert_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/run_convert_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/qwen2/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/qwen2/run_preference.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/qwen2/run_preference.sh


--------------------------------------------------------------------------------
/scripts_eval/steptool/inference_steptool_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/steptool/inference_steptool_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/steptool/run_convert_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/steptool/run_convert_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/steptool/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/steptool/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/toolllama-sft/inference_toolllama_vllm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama-sft/inference_toolllama_vllm.sh


--------------------------------------------------------------------------------
/scripts_eval/toolllama-sft/run_conver_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama-sft/run_conver_answer.sh


--------------------------------------------------------------------------------
/scripts_eval/toolllama-sft/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama-sft/run_pass_rate.sh


--------------------------------------------------------------------------------
/scripts_eval/toolllama/run_preference.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/scripts_eval/toolllama/run_preference.sh


--------------------------------------------------------------------------------
/src/baseline-archer/archer_agent.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_agent.py


--------------------------------------------------------------------------------
/src/baseline-archer/archer_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_critic.py


--------------------------------------------------------------------------------
/src/baseline-archer/archer_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_data.py


--------------------------------------------------------------------------------
/src/baseline-archer/archer_environment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_environment.py


--------------------------------------------------------------------------------
/src/baseline-archer/archer_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/archer_trainer.py


--------------------------------------------------------------------------------
/src/baseline-archer/build_archer_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/build_archer_data.py


--------------------------------------------------------------------------------
/src/baseline-archer/offpolicy_train_loop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/offpolicy_train_loop.py


--------------------------------------------------------------------------------
/src/baseline-archer/run.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-archer/run.py


--------------------------------------------------------------------------------
/src/baseline-eto/dpo_train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-eto/dpo_train.py


--------------------------------------------------------------------------------
/src/baseline-ppo/ppo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-ppo/ppo.py


--------------------------------------------------------------------------------
/src/baseline-rft/rft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/baseline-rft/rft.py


--------------------------------------------------------------------------------
/src/reward/annotation_by_rules.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/annotation_by_rules.ipynb


--------------------------------------------------------------------------------
/src/reward/annotation_with_gpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/annotation_with_gpt.py


--------------------------------------------------------------------------------
/src/reward/evaluators/evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/evaluators/evaluator.py


--------------------------------------------------------------------------------
/src/reward/evaluators/gpt-4-turbo-2024-04-09/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/evaluators/gpt-4-turbo-2024-04-09/config.yaml


--------------------------------------------------------------------------------
/src/reward/evaluators/gpt-4-turbo-2024-04-09/template.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/evaluators/gpt-4-turbo-2024-04-09/template.txt


--------------------------------------------------------------------------------
/src/reward/openai_key.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/reward/openai_key.json


--------------------------------------------------------------------------------
/src/sft/llama3-1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/sft/llama3-1.py


--------------------------------------------------------------------------------
/src/sft/qwen2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/sft/qwen2.py


--------------------------------------------------------------------------------
/src/steptool/step_ppo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/steptool/step_ppo.py


--------------------------------------------------------------------------------
/src/steptool/step_ppotrainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/src/steptool/step_ppotrainer.py


--------------------------------------------------------------------------------
/stabletoolbench/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/config.yml


--------------------------------------------------------------------------------
/stabletoolbench/server/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/config.yml


--------------------------------------------------------------------------------
/stabletoolbench/server/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/main.py


--------------------------------------------------------------------------------
/stabletoolbench/server/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/requirements.txt


--------------------------------------------------------------------------------
/stabletoolbench/server/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/server/utils.py


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_instruction/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G1_category.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_instruction/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G1_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_instruction/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G1_tool.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_instruction/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G2_category.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_instruction/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G2_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_instruction/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_instruction/G3_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_query_ids/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G1_category.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_query_ids/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G1_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_query_ids/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G1_tool.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_query_ids/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G2_category.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_query_ids/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G2_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/solvable_queries/test_query_ids/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/solvable_queries/test_query_ids/G3_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Algorithms/DFS.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Algorithms/DFS.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Algorithms/base_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Algorithms/base_search.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Algorithms/single_chain.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Algorithms/single_chain.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Downstream_tasks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Downstream_tasks/base_env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Downstream_tasks/base_env.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi_multithread.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Downstream_tasks/rapidapi_multithread.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM/base_io.py:
--------------------------------------------------------------------------------
1 | import re
2 | 
3 | def base_io(input_str):
4 |     pass


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM/chatgpt_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/chatgpt_model.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM/llama3_sft_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/llama3_sft_model.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM/qwen2_sft_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/qwen2_sft_model.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM/retriever.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/retriever.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM/tool_llama_vllm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM/tool_llama_vllm.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM_rank/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/LLM_rank/rank_candidate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/LLM_rank/rank_candidate.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Prompts/ReAct_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Prompts/ReAct_prompts.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Prompts/Tree_search_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Prompts/Tree_search_prompts.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Prompts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Prompts/rank_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Prompts/rank_prompts.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Tree/Tree.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/Tree/Tree.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/Tree/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/callbacks/ServerEventCallback.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/callbacks/ServerEventCallback.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/qa_pipeline.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/qa_pipeline.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/qa_pipeline_multithread.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/qa_pipeline_multithread.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/qa_pipeline_open_domain.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/qa_pipeline_open_domain.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/server.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/toolbench_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/toolbench_server.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/inference/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/inference/utils.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/__init__.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/model/apply_delta.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/apply_delta.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/model/compression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/compression.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/model/make_delta.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/make_delta.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/model/model_adapter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/model/model_adapter.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tool_conversation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tool_conversation.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/README.md


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/README_ZH.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/README_ZH.md


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/ToolBench.code-workspace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/ToolBench.code-workspace


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/automatic_eval_sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/automatic_eval_sample.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/convert_answers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/convert_answers.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/convert_to_answer_format.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/convert_to_answer_format.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/eval_and_update_leaderboard.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_and_update_leaderboard.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/eval_pass_rate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_pass_rate.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/eval_preference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_preference.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/eval_process_reward.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/eval_process_reward.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/__init__.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluation/dataclass.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/dataclass.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluation/methodcls.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/methodcls.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluation/usereval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluation/usereval.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/__init__.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/__init__.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/base.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/rtl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/rtl.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/tooleval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/tooleval.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/registered_cls/utils.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/evaluators_comparison.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/evaluators_comparison.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/requirements.txt


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/default_evalset/DFS/win.csv:
--------------------------------------------------------------------------------
1 | ,Method,Win Rate,Std Error
2 | 0,DFS,,
3 | 


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/tooleval/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/tooleval/utils.py


--------------------------------------------------------------------------------
/stabletoolbench/toolbench/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yuyq18/StepTool/HEAD/stabletoolbench/toolbench/utils.py


--------------------------------------------------------------------------------