├── .gitignore ├── LICENSE ├── README.md ├── data_example ├── answer │ ├── virtual_chatgpt_cot │ │ └── G1_instruction │ │ │ ├── 1073_CoT@1.json │ │ │ ├── 588_CoT@1.json │ │ │ └── 608_CoT@1.json │ └── virtual_chatgpt_dfs │ │ └── G1_instruction │ │ ├── 1073_DFS_woFilter_w2.json │ │ ├── 588_DFS_woFilter_w2.json │ │ └── 608_DFS_woFilter_w2.json ├── model_predictions_converted │ ├── virtual_chatgpt_cot │ │ └── G1_instruction.json │ └── virtual_chatgpt_dfs │ │ └── G1_instruction.json ├── pass_rate_results │ ├── virtual_chatgpt_cot │ │ ├── G1_instruction_virtual_chatgpt_cot.csv │ │ └── G1_instruction_virtual_chatgpt_cot.json │ └── virtual_chatgpt_dfs │ │ ├── G1_instruction_virtual_chatgpt_dfs.csv │ │ └── G1_instruction_virtual_chatgpt_dfs.json └── preference_results │ ├── G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.csv │ └── G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.json ├── inference_chatgpt_pipeline_virtual.sh ├── legacy_results.md ├── openai_key.json ├── requirements.txt ├── run_convert_answer.sh ├── run_fac_eval.sh ├── run_pass_rate.sh ├── run_preference.sh ├── server ├── Dockerfile ├── config.yml ├── config_mirrorapi.yml ├── config_mirrorapi_cache.yml ├── main.py ├── main_mirrorapi.py ├── main_mirrorapi_cache.py ├── requirements.txt ├── system_prompts.py └── utils.py ├── solvable_queries ├── test_instruction │ ├── G1_category.json │ ├── G1_instruction.json │ ├── G1_tool.json │ ├── G2_category.json │ ├── G2_instruction.json │ └── G3_instruction.json └── test_query_ids │ ├── G1_category.json │ ├── G1_instruction.json │ ├── G1_tool.json │ ├── G2_category.json │ ├── G2_instruction.json │ └── G3_instruction.json ├── solvable_queries_example ├── test_instruction │ └── G1_instruction.json └── test_query_ids │ └── G1_instruction.json ├── stbicon.svg └── toolbench ├── inference ├── Algorithms │ ├── DFS.py │ ├── __init__.py │ ├── base_search.py │ └── single_chain.py ├── Downstream_tasks │ ├── __init__.py │ ├── base_env.py │ ├── rapidapi.py │ └── rapidapi_multithread.py ├── LLM │ ├── __init__.py │ ├── base_io.py │ ├── chatgpt_function_model.py │ ├── davinci_model.py │ ├── llama_model.py │ ├── retriever.py │ ├── tool_llama_lora_model.py │ ├── tool_llama_model.py │ └── tool_llama_vllm_model.py ├── LLM_rank │ ├── __init__.py │ └── rank_candidate.py ├── Prompts │ ├── ReAct_prompts.py │ ├── Tree_search_prompts.py │ ├── __init__.py │ └── rank_prompts.py ├── Tree │ ├── Tree.py │ └── __init__.py ├── callbacks │ └── ServerEventCallback.py ├── qa_pipeline.py ├── qa_pipeline_multithread.py ├── qa_pipeline_open_domain.py ├── server.py ├── toolbench_server.py └── utils.py ├── model ├── __init__.py ├── apply_delta.py ├── compression.py ├── make_delta.py └── model_adapter.py ├── tool_conversation.py ├── tooleval ├── README.md ├── README_ZH.md ├── ToolBench.code-workspace ├── __init__.py ├── automatic_eval_sample.py ├── convert_answers.py ├── convert_to_answer_format.py ├── dataset │ └── __init__.py ├── eval_and_update_leaderboard.py ├── eval_pass_rate.py ├── eval_preference.py ├── evaluation │ ├── __init__.py │ ├── dataclass.py │ ├── methodcls.py │ └── usereval.py ├── evaluators │ ├── __init__.py │ ├── registered_cls │ │ ├── __init__.py │ │ ├── base.py │ │ ├── rtl.py │ │ ├── tooleval.py │ │ └── utils.py │ ├── tooleval_gpt-3.5-turbo_default │ │ ├── config.yaml │ │ └── template.txt │ ├── tooleval_gpt-3.5-turbo_fn │ │ ├── config.yaml │ │ └── template.txt │ └── tooleval_gpt-3.5-turbo_normalized │ │ ├── config.yaml │ │ └── template.txt ├── evaluators_comparison.py ├── fac_eval.py ├── requirements.txt ├── results │ ├── default_evalset │ │ ├── DFS │ │ │ └── win.csv │ │ └── gpt-3.5-turbo_CoT │ │ │ ├── G1_category.json │ │ │ ├── G1_instruction.json │ │ │ ├── G1_tool.json │ │ │ ├── G2_category.json │ │ │ ├── G2_instruction.json │ │ │ └── G3_instruction.json │ ├── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv │ └── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv └── utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/README.md -------------------------------------------------------------------------------- /data_example/answer/virtual_chatgpt_cot/G1_instruction/1073_CoT@1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_cot/G1_instruction/1073_CoT@1.json -------------------------------------------------------------------------------- /data_example/answer/virtual_chatgpt_cot/G1_instruction/588_CoT@1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_cot/G1_instruction/588_CoT@1.json -------------------------------------------------------------------------------- /data_example/answer/virtual_chatgpt_cot/G1_instruction/608_CoT@1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_cot/G1_instruction/608_CoT@1.json -------------------------------------------------------------------------------- /data_example/answer/virtual_chatgpt_dfs/G1_instruction/1073_DFS_woFilter_w2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_dfs/G1_instruction/1073_DFS_woFilter_w2.json -------------------------------------------------------------------------------- /data_example/answer/virtual_chatgpt_dfs/G1_instruction/588_DFS_woFilter_w2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_dfs/G1_instruction/588_DFS_woFilter_w2.json -------------------------------------------------------------------------------- /data_example/answer/virtual_chatgpt_dfs/G1_instruction/608_DFS_woFilter_w2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_dfs/G1_instruction/608_DFS_woFilter_w2.json -------------------------------------------------------------------------------- /data_example/model_predictions_converted/virtual_chatgpt_cot/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/model_predictions_converted/virtual_chatgpt_cot/G1_instruction.json -------------------------------------------------------------------------------- /data_example/model_predictions_converted/virtual_chatgpt_dfs/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/model_predictions_converted/virtual_chatgpt_dfs/G1_instruction.json -------------------------------------------------------------------------------- /data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.csv -------------------------------------------------------------------------------- /data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.json -------------------------------------------------------------------------------- /data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.csv -------------------------------------------------------------------------------- /data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.json -------------------------------------------------------------------------------- /data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.csv -------------------------------------------------------------------------------- /data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.json -------------------------------------------------------------------------------- /inference_chatgpt_pipeline_virtual.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/inference_chatgpt_pipeline_virtual.sh -------------------------------------------------------------------------------- /legacy_results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/legacy_results.md -------------------------------------------------------------------------------- /openai_key.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/openai_key.json -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/requirements.txt -------------------------------------------------------------------------------- /run_convert_answer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_convert_answer.sh -------------------------------------------------------------------------------- /run_fac_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_fac_eval.sh -------------------------------------------------------------------------------- /run_pass_rate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_pass_rate.sh -------------------------------------------------------------------------------- /run_preference.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_preference.sh -------------------------------------------------------------------------------- /server/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/Dockerfile -------------------------------------------------------------------------------- /server/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/config.yml -------------------------------------------------------------------------------- /server/config_mirrorapi.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/config_mirrorapi.yml -------------------------------------------------------------------------------- /server/config_mirrorapi_cache.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/config_mirrorapi_cache.yml -------------------------------------------------------------------------------- /server/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/main.py -------------------------------------------------------------------------------- /server/main_mirrorapi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/main_mirrorapi.py -------------------------------------------------------------------------------- /server/main_mirrorapi_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/main_mirrorapi_cache.py -------------------------------------------------------------------------------- /server/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/requirements.txt -------------------------------------------------------------------------------- /server/system_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/system_prompts.py -------------------------------------------------------------------------------- /server/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/utils.py -------------------------------------------------------------------------------- /solvable_queries/test_instruction/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G1_category.json -------------------------------------------------------------------------------- /solvable_queries/test_instruction/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G1_instruction.json -------------------------------------------------------------------------------- /solvable_queries/test_instruction/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G1_tool.json -------------------------------------------------------------------------------- /solvable_queries/test_instruction/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G2_category.json -------------------------------------------------------------------------------- /solvable_queries/test_instruction/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G2_instruction.json -------------------------------------------------------------------------------- /solvable_queries/test_instruction/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G3_instruction.json -------------------------------------------------------------------------------- /solvable_queries/test_query_ids/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G1_category.json -------------------------------------------------------------------------------- /solvable_queries/test_query_ids/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G1_instruction.json -------------------------------------------------------------------------------- /solvable_queries/test_query_ids/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G1_tool.json -------------------------------------------------------------------------------- /solvable_queries/test_query_ids/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G2_category.json -------------------------------------------------------------------------------- /solvable_queries/test_query_ids/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G2_instruction.json -------------------------------------------------------------------------------- /solvable_queries/test_query_ids/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G3_instruction.json -------------------------------------------------------------------------------- /solvable_queries_example/test_instruction/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries_example/test_instruction/G1_instruction.json -------------------------------------------------------------------------------- /solvable_queries_example/test_query_ids/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries_example/test_query_ids/G1_instruction.json -------------------------------------------------------------------------------- /stbicon.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/stbicon.svg -------------------------------------------------------------------------------- /toolbench/inference/Algorithms/DFS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Algorithms/DFS.py -------------------------------------------------------------------------------- /toolbench/inference/Algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/inference/Algorithms/base_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Algorithms/base_search.py -------------------------------------------------------------------------------- /toolbench/inference/Algorithms/single_chain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Algorithms/single_chain.py -------------------------------------------------------------------------------- /toolbench/inference/Downstream_tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/inference/Downstream_tasks/base_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Downstream_tasks/base_env.py -------------------------------------------------------------------------------- /toolbench/inference/Downstream_tasks/rapidapi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Downstream_tasks/rapidapi.py -------------------------------------------------------------------------------- /toolbench/inference/Downstream_tasks/rapidapi_multithread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Downstream_tasks/rapidapi_multithread.py -------------------------------------------------------------------------------- /toolbench/inference/LLM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/inference/LLM/base_io.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def base_io(input_str): 4 | pass -------------------------------------------------------------------------------- /toolbench/inference/LLM/chatgpt_function_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/chatgpt_function_model.py -------------------------------------------------------------------------------- /toolbench/inference/LLM/davinci_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/davinci_model.py -------------------------------------------------------------------------------- /toolbench/inference/LLM/llama_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/llama_model.py -------------------------------------------------------------------------------- /toolbench/inference/LLM/retriever.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/retriever.py -------------------------------------------------------------------------------- /toolbench/inference/LLM/tool_llama_lora_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/tool_llama_lora_model.py -------------------------------------------------------------------------------- /toolbench/inference/LLM/tool_llama_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/tool_llama_model.py -------------------------------------------------------------------------------- /toolbench/inference/LLM/tool_llama_vllm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/tool_llama_vllm_model.py -------------------------------------------------------------------------------- /toolbench/inference/LLM_rank/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/inference/LLM_rank/rank_candidate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM_rank/rank_candidate.py -------------------------------------------------------------------------------- /toolbench/inference/Prompts/ReAct_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Prompts/ReAct_prompts.py -------------------------------------------------------------------------------- /toolbench/inference/Prompts/Tree_search_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Prompts/Tree_search_prompts.py -------------------------------------------------------------------------------- /toolbench/inference/Prompts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/inference/Prompts/rank_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Prompts/rank_prompts.py -------------------------------------------------------------------------------- /toolbench/inference/Tree/Tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Tree/Tree.py -------------------------------------------------------------------------------- /toolbench/inference/Tree/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/inference/callbacks/ServerEventCallback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/callbacks/ServerEventCallback.py -------------------------------------------------------------------------------- /toolbench/inference/qa_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/qa_pipeline.py -------------------------------------------------------------------------------- /toolbench/inference/qa_pipeline_multithread.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/qa_pipeline_multithread.py -------------------------------------------------------------------------------- /toolbench/inference/qa_pipeline_open_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/qa_pipeline_open_domain.py -------------------------------------------------------------------------------- /toolbench/inference/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/server.py -------------------------------------------------------------------------------- /toolbench/inference/toolbench_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/toolbench_server.py -------------------------------------------------------------------------------- /toolbench/inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/utils.py -------------------------------------------------------------------------------- /toolbench/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/__init__.py -------------------------------------------------------------------------------- /toolbench/model/apply_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/apply_delta.py -------------------------------------------------------------------------------- /toolbench/model/compression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/compression.py -------------------------------------------------------------------------------- /toolbench/model/make_delta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/make_delta.py -------------------------------------------------------------------------------- /toolbench/model/model_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/model_adapter.py -------------------------------------------------------------------------------- /toolbench/tool_conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tool_conversation.py -------------------------------------------------------------------------------- /toolbench/tooleval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/README.md -------------------------------------------------------------------------------- /toolbench/tooleval/README_ZH.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/README_ZH.md -------------------------------------------------------------------------------- /toolbench/tooleval/ToolBench.code-workspace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/ToolBench.code-workspace -------------------------------------------------------------------------------- /toolbench/tooleval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/tooleval/automatic_eval_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/automatic_eval_sample.py -------------------------------------------------------------------------------- /toolbench/tooleval/convert_answers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/convert_answers.py -------------------------------------------------------------------------------- /toolbench/tooleval/convert_to_answer_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/convert_to_answer_format.py -------------------------------------------------------------------------------- /toolbench/tooleval/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolbench/tooleval/eval_and_update_leaderboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/eval_and_update_leaderboard.py -------------------------------------------------------------------------------- /toolbench/tooleval/eval_pass_rate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/eval_pass_rate.py -------------------------------------------------------------------------------- /toolbench/tooleval/eval_preference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/eval_preference.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/__init__.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluation/dataclass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/dataclass.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluation/methodcls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/methodcls.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluation/usereval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/usereval.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/__init__.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/registered_cls/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/__init__.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/registered_cls/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/base.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/registered_cls/rtl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/rtl.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/registered_cls/tooleval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/tooleval.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/registered_cls/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/utils.py -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt -------------------------------------------------------------------------------- /toolbench/tooleval/evaluators_comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators_comparison.py -------------------------------------------------------------------------------- /toolbench/tooleval/fac_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/fac_eval.py -------------------------------------------------------------------------------- /toolbench/tooleval/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/requirements.txt -------------------------------------------------------------------------------- /toolbench/tooleval/results/default_evalset/DFS/win.csv: -------------------------------------------------------------------------------- 1 | ,Method,Win Rate,Std Error 2 | 0,DFS,, 3 | -------------------------------------------------------------------------------- /toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json -------------------------------------------------------------------------------- /toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json -------------------------------------------------------------------------------- /toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json -------------------------------------------------------------------------------- /toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json -------------------------------------------------------------------------------- /toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json -------------------------------------------------------------------------------- /toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json -------------------------------------------------------------------------------- /toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv -------------------------------------------------------------------------------- /toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv -------------------------------------------------------------------------------- /toolbench/tooleval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/utils.py -------------------------------------------------------------------------------- /toolbench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/utils.py --------------------------------------------------------------------------------