├── .gitignore
├── LICENSE
├── README.md
├── data_example
    ├── answer
    │   ├── virtual_chatgpt_cot
    │   │   └── G1_instruction
    │   │   │   ├── 1073_CoT@1.json
    │   │   │   ├── 588_CoT@1.json
    │   │   │   └── 608_CoT@1.json
    │   └── virtual_chatgpt_dfs
    │   │   └── G1_instruction
    │   │       ├── 1073_DFS_woFilter_w2.json
    │   │       ├── 588_DFS_woFilter_w2.json
    │   │       └── 608_DFS_woFilter_w2.json
    ├── model_predictions_converted
    │   ├── virtual_chatgpt_cot
    │   │   └── G1_instruction.json
    │   └── virtual_chatgpt_dfs
    │   │   └── G1_instruction.json
    ├── pass_rate_results
    │   ├── virtual_chatgpt_cot
    │   │   ├── G1_instruction_virtual_chatgpt_cot.csv
    │   │   └── G1_instruction_virtual_chatgpt_cot.json
    │   └── virtual_chatgpt_dfs
    │   │   ├── G1_instruction_virtual_chatgpt_dfs.csv
    │   │   └── G1_instruction_virtual_chatgpt_dfs.json
    └── preference_results
    │   ├── G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.csv
    │   └── G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.json
├── inference_chatgpt_pipeline_virtual.sh
├── legacy_results.md
├── openai_key.json
├── requirements.txt
├── run_convert_answer.sh
├── run_fac_eval.sh
├── run_pass_rate.sh
├── run_preference.sh
├── server
    ├── Dockerfile
    ├── config.yml
    ├── config_mirrorapi.yml
    ├── config_mirrorapi_cache.yml
    ├── main.py
    ├── main_mirrorapi.py
    ├── main_mirrorapi_cache.py
    ├── requirements.txt
    ├── system_prompts.py
    └── utils.py
├── solvable_queries
    ├── test_instruction
    │   ├── G1_category.json
    │   ├── G1_instruction.json
    │   ├── G1_tool.json
    │   ├── G2_category.json
    │   ├── G2_instruction.json
    │   └── G3_instruction.json
    └── test_query_ids
    │   ├── G1_category.json
    │   ├── G1_instruction.json
    │   ├── G1_tool.json
    │   ├── G2_category.json
    │   ├── G2_instruction.json
    │   └── G3_instruction.json
├── solvable_queries_example
    ├── test_instruction
    │   └── G1_instruction.json
    └── test_query_ids
    │   └── G1_instruction.json
├── stbicon.svg
└── toolbench
    ├── inference
        ├── Algorithms
        │   ├── DFS.py
        │   ├── __init__.py
        │   ├── base_search.py
        │   └── single_chain.py
        ├── Downstream_tasks
        │   ├── __init__.py
        │   ├── base_env.py
        │   ├── rapidapi.py
        │   └── rapidapi_multithread.py
        ├── LLM
        │   ├── __init__.py
        │   ├── base_io.py
        │   ├── chatgpt_function_model.py
        │   ├── davinci_model.py
        │   ├── llama_model.py
        │   ├── retriever.py
        │   ├── tool_llama_lora_model.py
        │   ├── tool_llama_model.py
        │   └── tool_llama_vllm_model.py
        ├── LLM_rank
        │   ├── __init__.py
        │   └── rank_candidate.py
        ├── Prompts
        │   ├── ReAct_prompts.py
        │   ├── Tree_search_prompts.py
        │   ├── __init__.py
        │   └── rank_prompts.py
        ├── Tree
        │   ├── Tree.py
        │   └── __init__.py
        ├── callbacks
        │   └── ServerEventCallback.py
        ├── qa_pipeline.py
        ├── qa_pipeline_multithread.py
        ├── qa_pipeline_open_domain.py
        ├── server.py
        ├── toolbench_server.py
        └── utils.py
    ├── model
        ├── __init__.py
        ├── apply_delta.py
        ├── compression.py
        ├── make_delta.py
        └── model_adapter.py
    ├── tool_conversation.py
    ├── tooleval
        ├── README.md
        ├── README_ZH.md
        ├── ToolBench.code-workspace
        ├── __init__.py
        ├── automatic_eval_sample.py
        ├── convert_answers.py
        ├── convert_to_answer_format.py
        ├── dataset
        │   └── __init__.py
        ├── eval_and_update_leaderboard.py
        ├── eval_pass_rate.py
        ├── eval_preference.py
        ├── evaluation
        │   ├── __init__.py
        │   ├── dataclass.py
        │   ├── methodcls.py
        │   └── usereval.py
        ├── evaluators
        │   ├── __init__.py
        │   ├── registered_cls
        │   │   ├── __init__.py
        │   │   ├── base.py
        │   │   ├── rtl.py
        │   │   ├── tooleval.py
        │   │   └── utils.py
        │   ├── tooleval_gpt-3.5-turbo_default
        │   │   ├── config.yaml
        │   │   └── template.txt
        │   ├── tooleval_gpt-3.5-turbo_fn
        │   │   ├── config.yaml
        │   │   └── template.txt
        │   └── tooleval_gpt-3.5-turbo_normalized
        │   │   ├── config.yaml
        │   │   └── template.txt
        ├── evaluators_comparison.py
        ├── fac_eval.py
        ├── requirements.txt
        ├── results
        │   ├── default_evalset
        │   │   ├── DFS
        │   │   │   └── win.csv
        │   │   └── gpt-3.5-turbo_CoT
        │   │   │   ├── G1_category.json
        │   │   │   ├── G1_instruction.json
        │   │   │   ├── G1_tool.json
        │   │   │   ├── G2_category.json
        │   │   │   ├── G2_instruction.json
        │   │   │   └── G3_instruction.json
        │   ├── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv
        │   └── leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv
        └── utils.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/README.md


--------------------------------------------------------------------------------
/data_example/answer/virtual_chatgpt_cot/G1_instruction/1073_CoT@1.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_cot/G1_instruction/1073_CoT@1.json


--------------------------------------------------------------------------------
/data_example/answer/virtual_chatgpt_cot/G1_instruction/588_CoT@1.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_cot/G1_instruction/588_CoT@1.json


--------------------------------------------------------------------------------
/data_example/answer/virtual_chatgpt_cot/G1_instruction/608_CoT@1.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_cot/G1_instruction/608_CoT@1.json


--------------------------------------------------------------------------------
/data_example/answer/virtual_chatgpt_dfs/G1_instruction/1073_DFS_woFilter_w2.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_dfs/G1_instruction/1073_DFS_woFilter_w2.json


--------------------------------------------------------------------------------
/data_example/answer/virtual_chatgpt_dfs/G1_instruction/588_DFS_woFilter_w2.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_dfs/G1_instruction/588_DFS_woFilter_w2.json


--------------------------------------------------------------------------------
/data_example/answer/virtual_chatgpt_dfs/G1_instruction/608_DFS_woFilter_w2.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/answer/virtual_chatgpt_dfs/G1_instruction/608_DFS_woFilter_w2.json


--------------------------------------------------------------------------------
/data_example/model_predictions_converted/virtual_chatgpt_cot/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/model_predictions_converted/virtual_chatgpt_cot/G1_instruction.json


--------------------------------------------------------------------------------
/data_example/model_predictions_converted/virtual_chatgpt_dfs/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/model_predictions_converted/virtual_chatgpt_dfs/G1_instruction.json


--------------------------------------------------------------------------------
/data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.csv


--------------------------------------------------------------------------------
/data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_cot/G1_instruction_virtual_chatgpt_cot.json


--------------------------------------------------------------------------------
/data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.csv


--------------------------------------------------------------------------------
/data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/pass_rate_results/virtual_chatgpt_dfs/G1_instruction_virtual_chatgpt_dfs.json


--------------------------------------------------------------------------------
/data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.csv


--------------------------------------------------------------------------------
/data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/data_example/preference_results/G1_instruction_virtual_chatgpt_cot_virtual_chatgpt_dfs.json


--------------------------------------------------------------------------------
/inference_chatgpt_pipeline_virtual.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/inference_chatgpt_pipeline_virtual.sh


--------------------------------------------------------------------------------
/legacy_results.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/legacy_results.md


--------------------------------------------------------------------------------
/openai_key.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/openai_key.json


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/requirements.txt


--------------------------------------------------------------------------------
/run_convert_answer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_convert_answer.sh


--------------------------------------------------------------------------------
/run_fac_eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_fac_eval.sh


--------------------------------------------------------------------------------
/run_pass_rate.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_pass_rate.sh


--------------------------------------------------------------------------------
/run_preference.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/run_preference.sh


--------------------------------------------------------------------------------
/server/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/Dockerfile


--------------------------------------------------------------------------------
/server/config.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/config.yml


--------------------------------------------------------------------------------
/server/config_mirrorapi.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/config_mirrorapi.yml


--------------------------------------------------------------------------------
/server/config_mirrorapi_cache.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/config_mirrorapi_cache.yml


--------------------------------------------------------------------------------
/server/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/main.py


--------------------------------------------------------------------------------
/server/main_mirrorapi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/main_mirrorapi.py


--------------------------------------------------------------------------------
/server/main_mirrorapi_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/main_mirrorapi_cache.py


--------------------------------------------------------------------------------
/server/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/requirements.txt


--------------------------------------------------------------------------------
/server/system_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/system_prompts.py


--------------------------------------------------------------------------------
/server/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/server/utils.py


--------------------------------------------------------------------------------
/solvable_queries/test_instruction/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G1_category.json


--------------------------------------------------------------------------------
/solvable_queries/test_instruction/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G1_instruction.json


--------------------------------------------------------------------------------
/solvable_queries/test_instruction/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G1_tool.json


--------------------------------------------------------------------------------
/solvable_queries/test_instruction/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G2_category.json


--------------------------------------------------------------------------------
/solvable_queries/test_instruction/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G2_instruction.json


--------------------------------------------------------------------------------
/solvable_queries/test_instruction/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_instruction/G3_instruction.json


--------------------------------------------------------------------------------
/solvable_queries/test_query_ids/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G1_category.json


--------------------------------------------------------------------------------
/solvable_queries/test_query_ids/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G1_instruction.json


--------------------------------------------------------------------------------
/solvable_queries/test_query_ids/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G1_tool.json


--------------------------------------------------------------------------------
/solvable_queries/test_query_ids/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G2_category.json


--------------------------------------------------------------------------------
/solvable_queries/test_query_ids/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G2_instruction.json


--------------------------------------------------------------------------------
/solvable_queries/test_query_ids/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries/test_query_ids/G3_instruction.json


--------------------------------------------------------------------------------
/solvable_queries_example/test_instruction/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries_example/test_instruction/G1_instruction.json


--------------------------------------------------------------------------------
/solvable_queries_example/test_query_ids/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/solvable_queries_example/test_query_ids/G1_instruction.json


--------------------------------------------------------------------------------
/stbicon.svg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/stbicon.svg


--------------------------------------------------------------------------------
/toolbench/inference/Algorithms/DFS.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Algorithms/DFS.py


--------------------------------------------------------------------------------
/toolbench/inference/Algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/inference/Algorithms/base_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Algorithms/base_search.py


--------------------------------------------------------------------------------
/toolbench/inference/Algorithms/single_chain.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Algorithms/single_chain.py


--------------------------------------------------------------------------------
/toolbench/inference/Downstream_tasks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/inference/Downstream_tasks/base_env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Downstream_tasks/base_env.py


--------------------------------------------------------------------------------
/toolbench/inference/Downstream_tasks/rapidapi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Downstream_tasks/rapidapi.py


--------------------------------------------------------------------------------
/toolbench/inference/Downstream_tasks/rapidapi_multithread.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Downstream_tasks/rapidapi_multithread.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/inference/LLM/base_io.py:
--------------------------------------------------------------------------------
1 | import re
2 | 
3 | def base_io(input_str):
4 |     pass


--------------------------------------------------------------------------------
/toolbench/inference/LLM/chatgpt_function_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/chatgpt_function_model.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM/davinci_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/davinci_model.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM/llama_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/llama_model.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM/retriever.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/retriever.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM/tool_llama_lora_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/tool_llama_lora_model.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM/tool_llama_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/tool_llama_model.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM/tool_llama_vllm_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM/tool_llama_vllm_model.py


--------------------------------------------------------------------------------
/toolbench/inference/LLM_rank/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/inference/LLM_rank/rank_candidate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/LLM_rank/rank_candidate.py


--------------------------------------------------------------------------------
/toolbench/inference/Prompts/ReAct_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Prompts/ReAct_prompts.py


--------------------------------------------------------------------------------
/toolbench/inference/Prompts/Tree_search_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Prompts/Tree_search_prompts.py


--------------------------------------------------------------------------------
/toolbench/inference/Prompts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/inference/Prompts/rank_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Prompts/rank_prompts.py


--------------------------------------------------------------------------------
/toolbench/inference/Tree/Tree.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/Tree/Tree.py


--------------------------------------------------------------------------------
/toolbench/inference/Tree/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/inference/callbacks/ServerEventCallback.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/callbacks/ServerEventCallback.py


--------------------------------------------------------------------------------
/toolbench/inference/qa_pipeline.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/qa_pipeline.py


--------------------------------------------------------------------------------
/toolbench/inference/qa_pipeline_multithread.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/qa_pipeline_multithread.py


--------------------------------------------------------------------------------
/toolbench/inference/qa_pipeline_open_domain.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/qa_pipeline_open_domain.py


--------------------------------------------------------------------------------
/toolbench/inference/server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/server.py


--------------------------------------------------------------------------------
/toolbench/inference/toolbench_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/toolbench_server.py


--------------------------------------------------------------------------------
/toolbench/inference/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/inference/utils.py


--------------------------------------------------------------------------------
/toolbench/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/__init__.py


--------------------------------------------------------------------------------
/toolbench/model/apply_delta.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/apply_delta.py


--------------------------------------------------------------------------------
/toolbench/model/compression.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/compression.py


--------------------------------------------------------------------------------
/toolbench/model/make_delta.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/make_delta.py


--------------------------------------------------------------------------------
/toolbench/model/model_adapter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/model/model_adapter.py


--------------------------------------------------------------------------------
/toolbench/tool_conversation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tool_conversation.py


--------------------------------------------------------------------------------
/toolbench/tooleval/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/README.md


--------------------------------------------------------------------------------
/toolbench/tooleval/README_ZH.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/README_ZH.md


--------------------------------------------------------------------------------
/toolbench/tooleval/ToolBench.code-workspace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/ToolBench.code-workspace


--------------------------------------------------------------------------------
/toolbench/tooleval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/tooleval/automatic_eval_sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/automatic_eval_sample.py


--------------------------------------------------------------------------------
/toolbench/tooleval/convert_answers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/convert_answers.py


--------------------------------------------------------------------------------
/toolbench/tooleval/convert_to_answer_format.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/convert_to_answer_format.py


--------------------------------------------------------------------------------
/toolbench/tooleval/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolbench/tooleval/eval_and_update_leaderboard.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/eval_and_update_leaderboard.py


--------------------------------------------------------------------------------
/toolbench/tooleval/eval_pass_rate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/eval_pass_rate.py


--------------------------------------------------------------------------------
/toolbench/tooleval/eval_preference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/eval_preference.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/__init__.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluation/dataclass.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/dataclass.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluation/methodcls.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/methodcls.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluation/usereval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluation/usereval.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/__init__.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/registered_cls/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/__init__.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/registered_cls/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/base.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/registered_cls/rtl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/rtl.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/registered_cls/tooleval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/tooleval.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/registered_cls/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/registered_cls/utils.py


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/config.yaml


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_default/template.txt


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/config.yaml


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_fn/template.txt


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/config.yaml


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators/tooleval_gpt-3.5-turbo_normalized/template.txt


--------------------------------------------------------------------------------
/toolbench/tooleval/evaluators_comparison.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/evaluators_comparison.py


--------------------------------------------------------------------------------
/toolbench/tooleval/fac_eval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/fac_eval.py


--------------------------------------------------------------------------------
/toolbench/tooleval/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/requirements.txt


--------------------------------------------------------------------------------
/toolbench/tooleval/results/default_evalset/DFS/win.csv:
--------------------------------------------------------------------------------
1 | ,Method,Win Rate,Std Error
2 | 0,DFS,,
3 | 


--------------------------------------------------------------------------------
/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_category.json


--------------------------------------------------------------------------------
/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_instruction.json


--------------------------------------------------------------------------------
/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G1_tool.json


--------------------------------------------------------------------------------
/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_category.json


--------------------------------------------------------------------------------
/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G2_instruction.json


--------------------------------------------------------------------------------
/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/default_evalset/gpt-3.5-turbo_CoT/G3_instruction.json


--------------------------------------------------------------------------------
/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###ChatGPT-DFSDT.csv


--------------------------------------------------------------------------------
/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/results/leaderboard###default_evalset###tooleval_gpt-3.5-turbo_normalized###gpt-3.5-turbo_CoT.csv


--------------------------------------------------------------------------------
/toolbench/tooleval/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/tooleval/utils.py


--------------------------------------------------------------------------------
/toolbench/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THUNLP-MT/StableToolBench/HEAD/toolbench/utils.py


--------------------------------------------------------------------------------