├── .gitignore
├── LICENSE
├── README.md
├── README_ZH.md
├── c3_bench
    ├── __init__.py
    ├── bench_test
    │   ├── __init__.py
    │   ├── analysis_result.py
    │   ├── data
    │   │   ├── C3-Bench.csv
    │   │   └── C3-Bench.jsonl
    │   ├── handle
    │   │   ├── __init__.py
    │   │   ├── api_handle.py
    │   │   ├── basic_handle.py
    │   │   ├── chatglm_handle.py
    │   │   ├── fcm_handle.py
    │   │   ├── gorilla_handle.py
    │   │   ├── hammer_handle.py
    │   │   ├── handles.py
    │   │   ├── hunyuan_handle.py
    │   │   ├── llama_handle.py
    │   │   ├── toolace_handle.py
    │   │   ├── tools.py
    │   │   ├── watt_handle.py
    │   │   ├── xlam2_handle.py
    │   │   └── xlam_handle.py
    │   ├── request_pipeline.py
    │   ├── request_pipeline_upta.py
    │   ├── result
    │   │   └── upta
    │   │   │   └── 2025-06-25-15:50:37_b3b8be_hunyuan-a13b_en_remove_role_contain_context_history_with_planner_tool_.jsonl
    │   ├── tool_call_graph.py
    │   ├── tool_class
    │   │   ├── __init__.py
    │   │   ├── chatglm.py
    │   │   ├── deepseek.py
    │   │   ├── fc_medium.py
    │   │   ├── gorilla.py
    │   │   ├── hammer.py
    │   │   ├── llama.py
    │   │   ├── tool_ace.py
    │   │   ├── tool_class_base.py
    │   │   ├── tool_model_map.py
    │   │   ├── watt.py
    │   │   ├── xlam.py
    │   │   └── xlam2.py
    │   ├── tool_parser
    │   │   ├── __init__.py
    │   │   └── hunyuan_tool_parser.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── date.py
    │   │   ├── parse_res.py
    │   │   ├── readnwrite.py
    │   │   └── tools.py
    │   └── web_server.py
    ├── multi_agent
    │   ├── agent
    │   │   ├── __init__.py
    │   │   ├── agent_answer.py
    │   │   ├── agent_answer_chat.py
    │   │   ├── agent_ask.py
    │   │   ├── checker_planner.py
    │   │   ├── checker_tool.py
    │   │   ├── planner.py
    │   │   ├── tool.py
    │   │   ├── user_answer_ask.py
    │   │   ├── user_ask.py
    │   │   ├── user_chat.py
    │   │   ├── user_continue_question.py
    │   │   ├── user_multi_tool.py
    │   │   ├── user_multi_tool_parallel.py
    │   │   ├── user_multi_tool_serial_parallel.py
    │   │   ├── user_single_tool.py
    │   │   └── user_vague_answer_ask.py
    │   ├── generate.py
    │   ├── handle
    │   │   ├── __init__.py
    │   │   ├── api_handle.py
    │   │   └── handles.py
    │   ├── tools
    │   │   ├── tools_en.jsonl
    │   │   └── tools_zh.jsonl
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── agent_utils.py
    │   │   ├── data_process_utils.py
    │   │   ├── file_utils.py
    │   │   ├── log_utils.py
    │   │   ├── time_utils.py
    │   │   └── tool_utils.py
    └── requirements.txt
└── picture
    ├── agent_family.png
    ├── compare.png
    ├── example.png
    ├── example_zh.png
    ├── first.png
    ├── framework.png
    ├── multi_agent.png
    ├── multi_agent2.png
    └── overall.png


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/*
2 | 


--------------------------------------------------------------------------------
/c3_bench/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/__init__.py


--------------------------------------------------------------------------------
/c3_bench/bench_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/__init__.py


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/handle/__init__.py


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/api_handle.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from .basic_handle import SimulateMultiTurnMessages
 5 | from c3_bench.bench_test.utils import functions_uniform
 6 | from openai import OpenAI
 7 | 
 8 | 
 9 | class APIMultiTurnMessages(SimulateMultiTurnMessages):
10 |     def __init__(self, model_url, is_english=False):
11 |         super().__init__(model_url, is_english)
12 |         self.model_messages = []
13 |         self.client = OpenAI(
14 |             api_key=os.getenv("API_KEY"),
15 |             base_url=os.getenv("BASE_URL"),
16 |         )
17 | 
18 |     def request_funcall(self, messages, tools, env_info=None):
19 |         messages = self.add_date_to_message(messages, env_info)
20 |         tools = [functions_uniform(tool) for tool in tools]
21 |         kwargs = {
22 |             "messages": messages,
23 |             "tools": tools,
24 |             "temperature": 0.1,
25 |             "timeout": 300,
26 |             "model": os.getenv("MODEL")
27 |         }
28 |         api_response = self.client.chat.completions.create(**kwargs)
29 |         api_response = json.loads(api_response.json())
30 |         choice = api_response["choices"][0]
31 |         message = choice["message"]
32 |         text = message["content"]
33 |         tool_calls = message.get("tool_calls", None)
34 |         return text, tool_calls
35 | 
36 | 
37 | def main():
38 |     handle = APIMultiTurnMessages("")
39 |     tools = [
40 |         {
41 |             "type": "function",
42 |             "function": {
43 |                 "name": "get_current_weather",
44 |                 "description": "Get the current weather in a given location",
45 |                 "parameters": {
46 |                     "type": "object",
47 |                     "properties": {
48 |                         "location": {
49 |                             "type": "string",
50 |                             "description": "The city and state, e.g. San Francisco, CA"
51 |                         },
52 |                         "unit": {
53 |                             "type": "string",
54 |                             "enum": [
55 |                                 "celsius",
56 |                                 "fahrenheit"
57 |                             ]
58 |                         }
59 |                     },
60 |                     "required": [
61 |                         "location"
62 |                     ]
63 |                 }
64 |             }
65 |         }
66 |     ]
67 |     messages = [
68 |         {
69 |             "role": "user",
70 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
71 |         }
72 |     ]
73 |     content, tool_calls = handle.request_funcall(messages, tools)
74 |     print(content)
75 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/basic_handle.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | class SimulateMultiTurnMessages:
 7 |     def __init__(self, model_url, is_english):
 8 |         self.model_url = model_url
 9 |         self.is_english = is_english
10 |         self.model_messages = []
11 |         self.timeout = 90
12 |         self.add_date = True
13 | 
14 |     def preprocess_to_simple(self, messages):
15 |         pass
16 | 
17 |     def post_process_tool_call(self, answer):
18 |         pass
19 | 
20 |     def add_weekday_date(self, date):
21 |         date = date.replace("当前时间：", "").replace("环境：", "")
22 |         date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
23 |         weekday_num = date_obj.weekday()
24 |         if self.is_english:
25 |             weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
26 |         else:
27 |             weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
28 |         weekday = weekdays[weekday_num]
29 |         date = date + " " + weekday
30 |         return date
31 | 
32 |     def add_date_to_message(self, message, env_info=None):
33 |         if env_info is not None and self.add_date:
34 |             system_content = message[0]["content"] if message[0]["role"] == "system" else ""
35 |             if self.is_english:
36 |                 system_content = system_content[:system_content.rfind("Current Date:")] + "\n\nCurrent Date:" + self.add_weekday_date(env_info)
37 |             else:
38 |                 system_content = system_content[:system_content.rfind("当前日期：")] + "当前日期：" + self.add_weekday_date(env_info)
39 |             if message[0]["role"] == "system":
40 |                 message[0]["content"] = system_content.strip()
41 |             else:
42 |                 message.insert(0, {"role": "system", "content": system_content.strip()})
43 |             return message
44 |         else:
45 |             return message
46 | 
47 |     def add_date_to_messsage_user(self, message, env_info=None):
48 |         if env_info is not None and self.add_date:
49 |             if self.is_english:
50 |                 system_content = "Current Date:" + self.add_weekday_date(env_info)
51 |             else:
52 |                 system_content = "当前日期：" + self.add_weekday_date(env_info)
53 |             idx = 0
54 |             date_flag = False
55 |             for idx_, item in enumerate(message):
56 |                 if item["role"] == "user":
57 |                     if "Current Date:" in item["content"] or "当前日期：" in item["content"]:
58 |                         date_flag = True
59 |                     idx = idx_
60 |             if not date_flag:
61 |                 message[idx]["content"] += "\n\n" + system_content
62 |             return message
63 |         else:
64 |             return message
65 | 
66 |     def request_funcall(self, messages, tools, env_info=None):
67 |         url = self.model_url
68 |         headers = {"Content-Type": "application/json"}
69 |         data = {
70 |             "messages": self.add_date_to_message(self.preprocess_to_simple(messages), env_info),
71 |             "tools": tools,
72 |             "date": self.add_weekday_date(env_info)
73 |         }
74 | 
75 |         text = None
76 |         tool_calls = None
77 |         try:
78 |             response = requests.post(url, headers=headers, json=data, timeout=self.timeout)
79 |             if response.status_code == 200:
80 |                 result = response.json()
81 |                 answer = result["answer"]
82 |                 text, tool_calls = self.post_process_tool_call(answer)
83 |         except Exception as e:
84 |             print(f"error: {e}")
85 |             text = None
86 |             tool_calls = None
87 | 
88 |         return text, tool_calls
89 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/chatglm_handle.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import uuid
 3 | 
 4 | from .basic_handle import SimulateMultiTurnMessages
 5 | from .tools import remove_messages
 6 | 
 7 | 
 8 | class ChatGLMMultiTurnMessages(SimulateMultiTurnMessages):
 9 |     def __init__(self, model_url, is_english=False):
10 |         super().__init__(model_url, is_english)
11 |         self.model_messages = []
12 | 
13 |     def preprocess_to_simple(self, messages):
14 |         if len(self.model_messages) == 0:
15 |             self.model_messages = remove_messages(messages, is_english=True)
16 |         else:
17 |             if messages[-1]["role"] == "user":
18 |                 self.model_messages.append({"role": "user",
19 |                                             "content": messages[-1]["content"].replace("用户：", "").replace("User:",
20 |                                                                                                           "").strip()})
21 |             elif messages[-1]["role"] == "tool":
22 |                 observations = json.loads(messages[-1]["content"])
23 |                 functions = messages[-2]["tool_calls"]
24 |                 assert len(observations) == len(functions)
25 |                 ret_observation = []
26 |                 for function, observation in zip(functions, observations):
27 |                     ret_observation.append({
28 |                         "name": function["function"]["name"],
29 |                         "results": observation
30 |                     })
31 |                 self.model_messages.append({"role": "observation", "content": json.dumps(ret_observation)})
32 |         return self.model_messages
33 | 
34 |     def post_process_tool_call(self, answer):
35 |         try:
36 |             self.model_messages.append({"role": "assistant", "content": answer})
37 |             answer_split = answer.split("\n")
38 |             if len(answer_split) >= 2:
39 |                 text = f"use {answer_split[0]} to solve user problem"
40 |                 tool_calls = [{"id": str(uuid.uuid4()), "function": {
41 |                     "name": answer_split[0],
42 |                     "arguments": json.loads(answer_split[1])
43 |                 }}]
44 |             else:
45 |                 text = answer
46 |                 tool_calls = None
47 |             return text, tool_calls
48 |         except Exception as e:
49 |             print(f"error: {e}")
50 |             return answer, None
51 | 
52 | 
53 | def main():
54 |     handle = ChatGLMMultiTurnMessages("http://111.111.111.111:12345")
55 |     tools = [
56 |         {
57 |             "type": "function",
58 |             "function": {
59 |                 "name": "get_current_weather",
60 |                 "description": "Get the current weather in a given location",
61 |                 "parameters": {
62 |                     "type": "object",
63 |                     "properties": {
64 |                         "location": {
65 |                             "type": "string",
66 |                             "description": "The city and state, e.g. San Francisco, CA"
67 |                         },
68 |                         "unit": {
69 |                             "type": "string",
70 |                             "enum": [
71 |                                 "celsius",
72 |                                 "fahrenheit"
73 |                             ]
74 |                         }
75 |                     },
76 |                     "required": [
77 |                         "location"
78 |                     ]
79 |                 }
80 |             }
81 |         }
82 |     ]
83 |     messages = [
84 |         {
85 |             "role": "user",
86 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
87 |         }
88 |     ]
89 |     content, tool_calls = handle.request_funcall(messages, tools)
90 |     print(content)
91 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/fcm_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import uuid
  3 | 
  4 | from .basic_handle  import SimulateMultiTurnMessages
  5 | from .tools import remove_messages
  6 | 
  7 | 
  8 | class FCMMultiTurnMessages(SimulateMultiTurnMessages):
  9 |     def __init__(self, model_url, is_english=False):
 10 |         super().__init__(model_url, is_english)
 11 |         self.model_messages = []
 12 |         self.timeout = 300
 13 |     
 14 |     def preprocess_to_simple(self, messages):
 15 |         if len(self.model_messages) == 0:
 16 |             self.model_messages = remove_messages(messages, is_english=self.is_english)
 17 |         else:
 18 |             if messages[-1]["role"] == "user":
 19 |                 self.model_messages += remove_messages(
 20 |                     [{"role":"user", "content": messages[-1]["content"]}],
 21 |                     is_english=self.is_english
 22 |                 )
 23 |             elif messages[-1]["role"] == "tool":
 24 |                 # messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"})
 25 |                 assistant = None
 26 |                 observation = []
 27 |                 idx = -1
 28 |                 while True or idx > -len(messages):
 29 |                     if messages[idx]["role"] == "assistant":
 30 |                         assistant = messages[idx]
 31 |                         break
 32 |                     if messages[idx]["role"] == "tool":
 33 |                         observation.append(messages[idx])
 34 |                     idx -= 1
 35 |                 idmap_observation = {}
 36 |                 assert len(observation) == len(assistant["tool_calls"])
 37 |                 for tool_call in assistant["tool_calls"]:
 38 |                     idmap_observation[tool_call["id"]] = tool_call["function"]["name"]
 39 |                 for obser in observation:
 40 |                     assert obser["tool_call_id"] in idmap_observation
 41 |                     self.model_messages.append({
 42 |                         "role": "tool", "name": idmap_observation[obser["tool_call_id"]],
 43 |                         "content": obser["content"]
 44 |                     })
 45 |         return self.model_messages
 46 | 
 47 |     def post_process_tool_call(self, answer):
 48 |         text = None
 49 |         tool_calls = None
 50 |         try:
 51 |             if "<function=" in answer and "</function>" in answer:
 52 |                 try:
 53 |                     # <function=get_current_weather>{"location": "Boston, MA"}</function><function=get_current_weather>{"location": "San Francisco, CA"}</function>
 54 |                     self.model_messages.append({"role": "assistant", "content": answer})
 55 |                     text = answer
 56 |                     tool_calls = None
 57 |                     assert answer.count("<function=") == answer.count("</function>")
 58 |                     answer = answer.split("</function>")
 59 |                     for tc in answer:
 60 |                         if not tc.startswith("<function="):
 61 |                             continue
 62 |                         name = tc[:tc.find(">{")].replace("<function=", "")
 63 |                         argument = json.loads(tc[tc.find(">{")+1:])
 64 |                         if tool_calls is None:
 65 |                             tool_calls = []
 66 |                         tool_calls.append({"id": str(uuid.uuid4()), "function": {
 67 |                             "name": name, "arguments": argument
 68 |                         }})
 69 |                 except:
 70 |                     pass
 71 |             else:
 72 |                 self.model_messages.append({"role": "assistant", "content": answer})
 73 |                 text = answer
 74 |                 tool_calls = None
 75 |             return text, tool_calls
 76 |         except Exception as e:
 77 |             print(f"error: {e}")
 78 |             return None, None
 79 | 
 80 | 
 81 | def main():
 82 |     handle = FCMMultiTurnMessages("http://111.111.111.111:12345")
 83 |     tools = [
 84 |         {
 85 |             "type": "function",
 86 |             "function": {
 87 |                 "name": "get_current_weather",
 88 |                 "description": "Get the current weather in a given location",
 89 |                 "parameters": {
 90 |                     "type": "object",
 91 |                     "properties": {
 92 |                         "location": {
 93 |                             "type": "string",
 94 |                             "description": "The city and state, e.g. San Francisco, CA"
 95 |                         },
 96 |                         "unit": {
 97 |                             "type": "string",
 98 |                             "enum": [
 99 |                                 "celsius",
100 |                                 "fahrenheit"
101 |                             ]
102 |                         }
103 |                     },
104 |                     "required": [
105 |                         "location"
106 |                     ]
107 |                 }
108 |             }
109 |         }
110 |     ]
111 |     messages = [
112 |         {
113 |             "role": "user",
114 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
115 |         }
116 |     ]
117 |     content, tool_calls = handle.request_funcall(messages, tools)
118 |     print(content)
119 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
120 | 
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     main()


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/gorilla_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import uuid
  3 | import ast
  4 | import requests
  5 | 
  6 | from .basic_handle  import SimulateMultiTurnMessages
  7 | from c3_bench.bench_test.utils import get_keywords
  8 | 
  9 |         
 10 | def parse_python_function_call(call_str):
 11 |     tree = ast.parse(call_str)
 12 |     expr = tree.body[0]
 13 | 
 14 |     call_node = expr.value
 15 |     function_name = (
 16 |         call_node.func.id
 17 |         if isinstance(call_node.func, ast.Name)
 18 |         else str(call_node.func)
 19 |     )
 20 | 
 21 |     parameters = {}
 22 |     noNameParam = []
 23 | 
 24 |     # Process positional arguments
 25 |     for arg in call_node.args:
 26 |         noNameParam.append(get_keywords(arg))
 27 | 
 28 |     # Process keyword arguments
 29 |     for kw in call_node.keywords:
 30 |         parameters[kw.arg] = get_keywords(kw.value)
 31 | 
 32 |     if noNameParam:
 33 |         parameters["None"] = noNameParam
 34 |         
 35 |     function_dict = {"name": function_name, "arguments": parameters}
 36 |     return function_dict
 37 | 
 38 | 
 39 | FN_CALL_DELIMITER = "<<function>>"
 40 | 
 41 | 
 42 | def strip_function_calls(content):
 43 |     """
 44 |     Split the content by the function call delimiter and remove empty strings
 45 |     """
 46 |     return [element.strip() for element in content.split(FN_CALL_DELIMITER)[1:] if element.strip()]
 47 | 
 48 | 
 49 | def parse_function_call(call):
 50 |     """
 51 |     This is temporary. The long term solution is to union all the 
 52 |     types of the parameters from the user's input function definition,
 53 |     and check which language is a proper super set of the union type.
 54 |     """
 55 |     try:
 56 |         return parse_python_function_call(call)
 57 |     except Exception as e:
 58 |         print(f"error: {e}")
 59 |         return None
 60 | 
 61 | 
 62 | def format_response(response):
 63 |     """
 64 |     Formats the response from the OpenFunctions model.
 65 | 
 66 |     Parameters:
 67 |     - response (str): The response generated by the LLM.
 68 | 
 69 |     Returns:
 70 |     - str: The formatted response.
 71 |     - dict: The function call(s) extracted from the response.
 72 | 
 73 |     """
 74 |     function_call_dicts = None
 75 |     try:
 76 |         response = strip_function_calls(response)
 77 |         # Parallel function calls returned as a str, list[dict]
 78 |         if len(response) > 1:
 79 |             function_call_dicts = []
 80 |             for function_call in response:
 81 |                 parse_function_call_dict = parse_function_call(function_call)
 82 |                 if parse_function_call_dict is not None:
 83 |                     function_call_dicts.append(parse_function_call_dict)
 84 |             response = ", ".join(response)
 85 |         # Single function call returned as a str, dict
 86 |         else:
 87 |             function_call_dicts = [parse_function_call(response[0])]
 88 |             response = response[0]
 89 |     except Exception as e:
 90 |         # Just faithfully return the generated response str to the user
 91 |         print(f"error: {e}")
 92 |         pass
 93 | 
 94 |     return response, function_call_dicts
 95 | 
 96 | 
 97 | class GorillaMultiTurnMessages(SimulateMultiTurnMessages):
 98 |     def __init__(self, model_url, is_english=False):
 99 |         super().__init__(model_url, is_english)
100 |         self.model_messages = []
101 |     
102 |     def get_prompt(self, user_query: str, history, functions: list = [], env_info=None) -> str:
103 |         """
104 |         Generates a conversation prompt based on the user's query and a list of functions.
105 | 
106 |         Parameters:
107 |         - user_query (str): The user's query.
108 |         - functions (list): A list of functions to include in the prompt.
109 | 
110 |         Returns:
111 |         - str: The formatted conversation prompt.
112 |         """
113 |         system = "You are an AI programming assistant, utilizing the Gorilla LLM model, developed by Gorilla LLM, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer."
114 |         if env_info is not None:
115 |             env_info = self.add_date_to_message([{"role":"", "content":""}], env_info)[0]["content"]
116 |             assert env_info.startswith("当前日期") or env_info.startswith("Current Date")
117 |             system = "\n\n" + env_info
118 |         history = "\n".join([f"<<{m['role']}>>" + m["content"] + f"<<{m['role']}>>" for m in history])
119 |         if len(functions) == 0:
120 |             return f"{system}\n### Instruction: <<history>> {history}\n\n<<question>> {user_query}\n### Response: "
121 |         functions_string = json.dumps(functions)
122 | 
123 |         return f"{system}\n### Instruction:  <<history>> {history}\n\n<<function>>{functions_string}\n<<question>>{user_query}\n### Response: "
124 | 
125 |     def request_funcall(self, messages, tools, env_info=None):
126 |         """
127 |         Request the function call(s) from the model.
128 | 
129 |         Parameters:
130 |         - messages (list): The messages in the conversation.
131 |         - tools (list): The list of tools to include in the prompt.
132 | 
133 |         Returns:
134 |         - str: The formatted response.
135 |         - list: The function call(s) extracted from the response.
136 |         """
137 |         url = self.model_url
138 |         headers = {'Content-Type': 'application/json'}
139 |         history = [_ for _ in messages if _["role"] != "system"]
140 |         query = [_ for _ in messages if _["role"] == "user"][-1]["content"]
141 |         data = {
142 |             'messages': [{
143 |                 "content": self.get_prompt(query, history, tools, env_info), "role": "user"
144 |             }],
145 |         }
146 | 
147 |         text = None
148 |         tool_calls = None
149 |         try:
150 |             response = requests.post(url, headers=headers, json=data, timeout=90)
151 |             if response.status_code == 200:
152 |                 result = response.json()
153 |                 text = result["answer"].strip()
154 |                 _, tool_calls = format_response(result["answer"])
155 |                 if (
156 |                     tool_calls is not None 
157 |                     and len(tool_calls) > 0 
158 |                     and len([_ for _ in tool_calls if _ is not None]) > 0
159 |                     and type(tool_calls[0]["name"]) == str
160 |                 ):
161 |                     tool_calls = [{"id":str(uuid.uuid4()), "function":_} for _ in tool_calls if _ is not None]
162 |                 else:
163 |                     tool_calls = None
164 |         except Exception as e:
165 |             print(f"error: {e}")
166 | 
167 |         return text, tool_calls
168 | 
169 | 
170 | def main():
171 |     handle = GorillaMultiTurnMessages("http://111.111.111.111:12345")
172 |     tools = [
173 |         {
174 |             "type": "function",
175 |             "function": {
176 |                 "name": "get_current_weather",
177 |                 "description": "Get the current weather in a given location",
178 |                 "parameters": {
179 |                     "type": "object",
180 |                     "properties": {
181 |                         "location": {
182 |                             "type": "string",
183 |                             "description": "The city and state, e.g. San Francisco, CA"
184 |                         },
185 |                         "unit": {
186 |                             "type": "string",
187 |                             "enum": [
188 |                                 "celsius",
189 |                                 "fahrenheit"
190 |                             ]
191 |                         }
192 |                     },
193 |                     "required": [
194 |                         "location"
195 |                     ]
196 |                 }
197 |             }
198 |         }
199 |     ]
200 |     messages = [
201 |         {
202 |             "role": "user",
203 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
204 |         }
205 |     ]
206 |     content, tool_calls = handle.request_funcall(messages, tools)
207 |     print(content)
208 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
209 |     print("==="*10)
210 | 
211 |     tools = [
212 |         {
213 |             "name": "thermodynamics.calculate_boiling_point",
214 |             "description": "Calculate the boiling point of a given substance at a specific pressure.",
215 |             "parameters": {
216 |                 "type": "object",
217 |                 "properties": {
218 |                     "substance": {
219 |                         "type": "string",
220 |                         "description": "The substance for which to calculate the boiling point."
221 |                     },
222 |                     "pressure": {
223 |                         "type": "number",
224 |                         "description": "The pressure at which to calculate the boiling point."
225 |                     },
226 |                     "unit": {
227 |                         "type": "string",
228 |                         "description": "The unit of the pressure. Default is 'kPa'."
229 |                     }
230 |                 },
231 |                 "required": [
232 |                     "substance",
233 |                     "pressure"
234 |                 ]
235 |             }
236 |         }
237 |     ]
238 |     messages = [
239 |         {
240 |             "role": "user",
241 |             "content": "What is the freezing point of water at a pressure of 10 kPa?"
242 |         }
243 |     ]
244 |     content, tool_calls = handle.request_funcall(messages, tools)
245 |     print(content)
246 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
247 |     print("==="*10)
248 | 
249 |     res = "<<function>>getActivityReport(user_id=456, include_details=true, date_range={'start_date': '2023-04-01', 'end_date': '2023-04-30'})"
250 |     _, tool_calls = format_response(res)
251 |     print(_)
252 |     print(tool_calls)
253 |     print(tool_calls is not None)
254 |     print(len(tool_calls) > 0)
255 |     print(len([_ for _ in tool_calls if _ is not None]) > 0)
256 |     print(type(tool_calls[0]["name"]) == str)
257 | 
258 | 
259 | if __name__ == "__main__":
260 |     main()
261 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/hammer_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pdb
  3 | import uuid
  4 | 
  5 | from .basic_handle import SimulateMultiTurnMessages
  6 | from .tools import remove_messages
  7 | 
  8 | 
  9 | class HammerMultiTurnMessages(SimulateMultiTurnMessages):
 10 |     def __init__(self, model_url, is_english=False):
 11 |         super().__init__(model_url, is_english)
 12 |         self.model_messages = []
 13 |         self.timeout = 300
 14 | 
 15 |     def process_planner_tool(self, messages):
 16 |         new_messages = []
 17 |         for i, message in enumerate(messages):
 18 |             role = message["role"]
 19 |             tool_calls = message.get("tool_calls", None)
 20 |             function_calls = []
 21 |             if tool_calls:
 22 |                 for tool_call in tool_calls:
 23 |                     function = tool_call["function"]
 24 |                     name = function["name"]
 25 |                     arguments = function["arguments"]
 26 |                     function_calls.append({"name": name, "arguments": arguments})
 27 |                 # pdb.set_trace()
 28 |                 function_calls = f"```\n{json.dumps(function_calls, ensure_ascii=False)}\n```"
 29 |                 new_messages.append({"role": "assistant", "content": function_calls})
 30 |             elif role == "tool":
 31 |                 functions = messages[i - 1]["tool_calls"]
 32 |                 observations = json.loads(message["content"])
 33 |                 assert len(observations) == len(functions)
 34 |                 ret_observation = []
 35 |                 for function, observation in zip(functions, observations):
 36 |                     ret_observation.append({
 37 |                         "name": function["function"]["name"],
 38 |                         "results": observation
 39 |                     })
 40 |                 new_messages.append({"role": "tool", "content": json.dumps(ret_observation, ensure_ascii=False)})
 41 |             else:
 42 |                 new_messages.append(message)
 43 |         return new_messages
 44 | 
 45 |     def preprocess_to_simple(self, messages):
 46 |         # pdb.set_trace()
 47 |         if len(self.model_messages) == 0:
 48 |             messages = remove_messages(messages, is_english=self.is_english)
 49 |             self.model_messages = self.process_planner_tool(messages)
 50 |         else:
 51 |             if messages[-1]["role"] == "user":
 52 |                 self.model_messages += remove_messages(
 53 |                     [{"role": "user", "content": messages[-1]["content"]}],
 54 |                     is_english=self.is_english
 55 |                 )
 56 |             elif messages[-1]["role"] == "tool":
 57 |                 observations = json.loads(messages[-1]["content"])
 58 |                 functions = messages[-2]["tool_calls"]
 59 |                 assert len(observations) == len(functions)
 60 |                 ret_observation = []
 61 |                 for function, observation in zip(functions, observations):
 62 |                     ret_observation.append({
 63 |                         "name": function["function"]["name"],
 64 |                         "results": observation
 65 |                     })
 66 |                 self.model_messages.append({"role": "user", "content": json.dumps(ret_observation)})
 67 | 
 68 |         return self.model_messages
 69 | 
 70 |     def parameters2arguments(self, function_dict):
 71 |         return {
 72 |             "name": function_dict["name"],
 73 |             "arguments": function_dict["parameters"] if "parameters" in function_dict else function_dict["arguments"]
 74 |         }
 75 | 
 76 |     def post_process_tool_call(self, answer):
 77 |         text = None
 78 |         tool_calls = None
 79 |         try:
 80 |             if "```\n[{\"name\"" in answer:
 81 |                 try:
 82 |                     # ```\n[{"name": "get_current_weather", "arguments": {"location": "Boston"}}, {"name": "get_current_weather", "arguments": {"location": "San Francisco"}}]\n```
 83 |                     text = answer
 84 |                     tool_calls = json.loads(answer[len("```"): -len("\n```")])
 85 |                     if type(tool_calls) == dict:
 86 |                         tool_calls = [{
 87 |                             "id": str(uuid.uuid4()), "function": self.parameters2arguments(tool_calls)
 88 |                         }]
 89 |                     elif type(tool_calls) == list:
 90 |                         tool_calls = [
 91 |                             {"id": str(uuid.uuid4()), "function": self.parameters2arguments(_)}
 92 |                             for _ in tool_calls
 93 |                         ]
 94 |                     self.model_messages.append({"role": "assistant", "content": answer})
 95 |                 except Exception as e:
 96 |                     print(f"process error: {e}")
 97 |                     pass
 98 |             else:
 99 |                 self.model_messages.append({"role": "assistant", "content": answer})
100 |                 text = "[model doesnt choose function(Manual placeholder)]"
101 |                 tool_calls = None
102 | 
103 |             return text, tool_calls
104 | 
105 |         except Exception as e:
106 |             print(f"error: {e}")
107 |             return None, None
108 | 
109 | 
110 | def main():
111 |     handle = HammerMultiTurnMessages("http://111.111.111.111:12345")
112 |     tools = [
113 |         {
114 |             "type": "function",
115 |             "function": {
116 |                 "name": "get_current_weather",
117 |                 "description": "Get the current weather in a given location",
118 |                 "parameters": {
119 |                     "type": "object",
120 |                     "properties": {
121 |                         "location": {
122 |                             "type": "string",
123 |                             "description": "The city and state, e.g. San Francisco, CA"
124 |                         },
125 |                         "unit": {
126 |                             "type": "string",
127 |                             "enum": [
128 |                                 "celsius",
129 |                                 "fahrenheit"
130 |                             ]
131 |                         }
132 |                     },
133 |                     "required": [
134 |                         "location"
135 |                     ]
136 |                 }
137 |             }
138 |         }
139 |     ]
140 |     messages = [
141 |         {
142 |             "role": "user",
143 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
144 |         }
145 |     ]
146 |     content, tool_calls = handle.request_funcall(messages, tools)
147 |     print(content)
148 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
149 | 
150 | 
151 | if __name__ == "__main__":
152 |     main()
153 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/handles.py:
--------------------------------------------------------------------------------
 1 | from .toolace_handle import ToolACEMultiTurnMessages
 2 | from .xlam_handle import XLAMMultiTurnMessages
 3 | from .xlam2_handle import XLAM2MultiTurnMessages
 4 | from .gorilla_handle import GorillaMultiTurnMessages
 5 | from .api_handle import APIMultiTurnMessages
 6 | from .llama_handle import LlamaMultiTurnMessages
 7 | from .chatglm_handle import ChatGLMMultiTurnMessages
 8 | from .hammer_handle import HammerMultiTurnMessages
 9 | from .watt_handle import WattMultiTurnMessages
10 | from .fcm_handle import FCMMultiTurnMessages
11 | from .hunyuan_handle import HunyuanMultiTurnMessages
12 | 
13 | 
14 | tool_handle_map = {
15 |     # hunyuan
16 |     "hunyuan-turbos-latest": (APIMultiTurnMessages, False),
17 |     "hunyuan-a13b": (HunyuanMultiTurnMessages, False),
18 |     # toolace
19 |     "toolace": (ToolACEMultiTurnMessages, False),
20 |     "toolace2": (ToolACEMultiTurnMessages, False),
21 |     # xlam
22 |     "xlam": (XLAMMultiTurnMessages, False),
23 |     "xlam2-70b": (XLAM2MultiTurnMessages, False),
24 |     "xlam2-32b": (XLAM2MultiTurnMessages, False),
25 |     "xlam2-8b": (XLAM2MultiTurnMessages, False),
26 |     "xlam2-3b": (XLAM2MultiTurnMessages, False),
27 |     "xlam2-1b": (XLAM2MultiTurnMessages, False),
28 |     # other
29 |     "gorilla": (GorillaMultiTurnMessages, False),
30 |     "chatglm": (ChatGLMMultiTurnMessages, False),
31 |     "fcm3.1": (FCMMultiTurnMessages, True),
32 |     # Watt
33 |     "watt70b": (WattMultiTurnMessages, True),
34 |     "watt8b": (WattMultiTurnMessages, True),
35 |     # Hammer
36 |     "hammer7b": (HammerMultiTurnMessages, False),
37 |     "hammer3b": (HammerMultiTurnMessages, False),
38 |     "hammer1.5b": (HammerMultiTurnMessages, False),
39 |     "hammer0.5b": (HammerMultiTurnMessages, False),
40 |     # LLAMA
41 |     "llama70b": (LlamaMultiTurnMessages, True),
42 |     "llama8b": (LlamaMultiTurnMessages, True),
43 |     "llama3b": (LlamaMultiTurnMessages, True),
44 |     "llama1b": (LlamaMultiTurnMessages, True)
45 | }
46 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/hunyuan_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from openai import OpenAI
  5 | 
  6 | 
  7 | class HunyuanMultiTurnMessages:
  8 |     def __init__(self, model_url, is_english=False):
  9 |         self.model = os.getenv("MODEL")
 10 |         self.model_url = model_url
 11 |         self.model_messages = []
 12 |         self.remove_flag = False
 13 |         self.client = OpenAI(
 14 |             api_key="EMPTY",
 15 |             base_url=f"http://{self.model_url}/v1",
 16 |         )
 17 | 
 18 |     def request_model(self, model, messages, tools, env_info):
 19 |         text, tool_calls = None, None
 20 |         messages = [{"role": "system", "content": f"Current time: {env_info}"}] + messages
 21 |         resp = None
 22 |         try:
 23 |             while True:
 24 |                 response = self.client.chat.completions.create(
 25 |                     model=model,
 26 |                     messages=messages,
 27 |                     stream=False,
 28 |                     temperature=0.5,
 29 |                     top_p=0.7,
 30 |                     tools=tools,
 31 |                     max_tokens=8192,
 32 |                     extra_body={
 33 |                         "repetition_penalty": 1.05,
 34 |                         "top_k": 20
 35 |                     },
 36 |                 )
 37 |                 response = response.model_dump()
 38 |                 text = response["choices"][0]["message"]["content"]
 39 |                 if "</think>" in text:
 40 |                     text = text[text.find("</think>") + len("</think>"):]
 41 |                 if "<answer>" in text and "</answer>" in text:
 42 |                     text = text[text.find("<answer>") + len("<answer>"):text.rfind("</answer>")]
 43 |                 if text.startswith("助手："):
 44 |                     text = text[len("助手："):].strip()
 45 |                 text = text.strip()
 46 |                 tool_calls = response["choices"][0]["message"]["tool_calls"]
 47 |                 if tool_calls is not None or text is not None:
 48 |                     break
 49 | 
 50 |         except Exception as e:
 51 |             print(f"resp: {resp.text if resp is not None else resp}")
 52 |             print(f"error: {e}")
 53 | 
 54 |         if text is None:
 55 |             print("request model error")
 56 | 
 57 |         return text, tool_calls
 58 | 
 59 |     def request_funcall(self, messages, tools, env_info=None):
 60 |         try:
 61 |             text, tool_calls = self.request_model(self.model, messages, tools, env_info)
 62 |         except Exception as e:
 63 |             raise ValueError(f"Error raised by inference endpoint: {e}")
 64 |         return text, tool_calls
 65 | 
 66 | 
 67 | def main():
 68 |     handle = HunyuanMultiTurnMessages("http://111.111.111.111:12345")
 69 |     tools = [
 70 |         {
 71 |             "type": "function",
 72 |             "function": {
 73 |                 "name": "get_current_weather",
 74 |                 "description": "Get the current weather in a given location",
 75 |                 "parameters": {
 76 |                     "type": "object",
 77 |                     "properties": {
 78 |                         "location": {
 79 |                             "type": "string",
 80 |                             "description": "The city and state, e.g. San Francisco, CA"
 81 |                         },
 82 |                         "unit": {
 83 |                             "type": "string",
 84 |                             "enum": [
 85 |                                 "celsius",
 86 |                                 "fahrenheit"
 87 |                             ]
 88 |                         }
 89 |                     },
 90 |                     "required": [
 91 |                         "location"
 92 |                     ]
 93 |                 }
 94 |             }
 95 |         }
 96 |     ]
 97 |     messages = [
 98 |         {
 99 |             "role": "user",
100 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
101 |         }
102 |     ]
103 |     content, tool_calls = handle.request_funcall(messages, tools)
104 |     print(content)
105 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     main()
110 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/llama_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import uuid
  3 | 
  4 | from .basic_handle import SimulateMultiTurnMessages
  5 | from .tools import remove_messages
  6 | 
  7 | 
  8 | class LlamaMultiTurnMessages(SimulateMultiTurnMessages):
  9 |     def __init__(self, model_url, is_english=False):
 10 |         super().__init__(model_url, is_english)
 11 |         self.model_messages = []
 12 |         self.timeout = 300
 13 |         self.add_date = False
 14 | 
 15 |     def preprocess_to_simple(self, messages):
 16 |         if len(self.model_messages) == 0:
 17 |             self.model_messages = remove_messages(messages, is_english=self.is_english)
 18 |         else:
 19 |             if messages[-1]["role"] == "user":
 20 |                 self.model_messages += remove_messages(
 21 |                     [{"role": "user", "content": messages[-1]["content"]}],
 22 |                     is_english=self.is_english
 23 |                 )
 24 |             elif messages[-1]["role"] == "tool":
 25 |                 # messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"})
 26 |                 assistant = None
 27 |                 observation = []
 28 |                 idx = -1
 29 |                 while True or idx > -len(messages):
 30 |                     if messages[idx]["role"] == "assistant":
 31 |                         assistant = messages[idx]
 32 |                         break
 33 |                     if messages[idx]["role"] == "tool":
 34 |                         observation.append(messages[idx])
 35 |                     idx -= 1
 36 |                 idmap_observation = {}
 37 | 
 38 |                 assert len(observation) == len(assistant["tool_calls"])
 39 |                 for tool_call in assistant["tool_calls"]:
 40 |                     idmap_observation[tool_call["id"]] = tool_call["function"]["name"]
 41 | 
 42 |                 for obser in observation:
 43 |                     assert obser["tool_call_id"] in idmap_observation
 44 |                     self.model_messages.append({
 45 |                         "role": "tool", "name": idmap_observation[obser["tool_call_id"]],
 46 |                         "content": obser["content"]
 47 |                     })
 48 | 
 49 |         return self.model_messages
 50 | 
 51 |     def parameters2arguments(self, function_dict):
 52 |         return {
 53 |             "name": function_dict["name"],
 54 |             "arguments": function_dict["parameters"] if "parameters" in function_dict else function_dict["arguments"]
 55 |         }
 56 | 
 57 |     def post_process_tool_call(self, answer):
 58 |         text = None
 59 |         tool_calls = None
 60 |         try:
 61 |             if "function" in answer and "name" in answer and "parameters" in answer:
 62 |                 try:
 63 |                     # messages.append({"role": "assistant", "tool_calls": [{"type": "function", "function": tool_call}]})
 64 |                     text = answer
 65 |                     tool_calls = json.loads(answer)
 66 |                     if type(tool_calls) == dict:
 67 |                         tool_calls = [{
 68 |                             "id": str(uuid.uuid4()), "function": self.parameters2arguments(tool_calls)
 69 |                         }]
 70 |                     elif type(tool_calls) == list:
 71 |                         tool_calls = [
 72 |                             {"id": str(uuid.uuid4()), "function": self.parameters2arguments(json.loads(_))}
 73 |                             for _ in tool_calls
 74 |                         ]
 75 |                     self.model_messages.append({
 76 |                         "role": "assistant", "tool_calls": [
 77 |                             {"type": "function", "function": {
 78 |                                 key: tool_call["function"][key] for key in ["name", "arguments"]
 79 |                             }}
 80 |                             for tool_call in tool_calls
 81 |                         ]
 82 |                     })
 83 |                 except Exception as e:
 84 |                     print(f"process error: {e}")
 85 |                     pass
 86 |             else:
 87 |                 self.model_messages.append({"role": "assistant", "content": answer})
 88 |                 text = answer
 89 |                 tool_calls = None
 90 | 
 91 |             return text, tool_calls
 92 | 
 93 |         except Exception as e:
 94 |             print(f"error: {e}")
 95 |             return None, None
 96 | 
 97 | 
 98 | def main():
 99 |     handle = LlamaMultiTurnMessages("http://111.111.111.111:12345")
100 |     tools = [
101 |         {
102 |             "type": "function",
103 |             "function": {
104 |                 "name": "get_current_weather",
105 |                 "description": "Get the current weather in a given location",
106 |                 "parameters": {
107 |                     "type": "object",
108 |                     "properties": {
109 |                         "location": {
110 |                             "type": "string",
111 |                             "description": "The city and state, e.g. San Francisco, CA"
112 |                         },
113 |                         "unit": {
114 |                             "type": "string",
115 |                             "enum": [
116 |                                 "celsius",
117 |                                 "fahrenheit"
118 |                             ]
119 |                         }
120 |                     },
121 |                     "required": [
122 |                         "location"
123 |                     ]
124 |                 }
125 |             }
126 |         }
127 |     ]
128 |     messages = [
129 |         {
130 |             "role": "user",
131 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
132 |         }
133 |     ]
134 |     content, tool_calls = handle.request_funcall(messages, tools)
135 |     print(content)
136 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     main()
141 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/toolace_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pdb
  3 | import uuid
  4 | import ast
  5 | import requests
  6 | 
  7 | from .basic_handle import SimulateMultiTurnMessages
  8 | from .tools import remove_messages, AstVisitor, create_ast_value, generate_code
  9 | 
 10 | 
 11 | class DoubleQuoteStrTransformer(ast.NodeTransformer):
 12 |     def visit_Str(self, node):
 13 |         # 自定义一个类属性来指示是否使用双引号
 14 |         node.use_double_quotes = True
 15 |         return node
 16 | 
 17 | 
 18 | class ToolACEMultiTurnMessages(SimulateMultiTurnMessages):
 19 |     def __init__(self, model_url, is_english=False):
 20 |         super().__init__(model_url, is_english)
 21 |         self.model_messages = []
 22 | 
 23 |     def process_planner_tool(self, messages):
 24 |         new_messages = []
 25 |         for i, message in enumerate(messages):
 26 |             role = message["role"]
 27 |             tool_calls = message.get("tool_calls", None)
 28 |             function_calls = []
 29 |             if tool_calls:
 30 |                 for tool_call in tool_calls:
 31 |                     function = tool_call["function"]
 32 |                     name = function["name"]
 33 |                     arguments = function["arguments"]
 34 | 
 35 |                     func_call = ast.Call(
 36 |                         func=ast.Name(id=name, ctx=ast.Load()),
 37 |                         args=[],
 38 |                         keywords=[
 39 |                             ast.keyword(arg=k, value=create_ast_value(v)) for k, v in arguments.items()
 40 |                         ]
 41 |                     )
 42 |                     function_calls.append(func_call)
 43 | 
 44 |                 list_node = ast.List(elts=function_calls, ctx=ast.Load())
 45 |                 module = ast.Module(body=[ast.Expr(value=list_node)], type_ignores=[])
 46 |                 ast_tool_calls = generate_code(module)
 47 |                 # pdb.set_trace()
 48 |                 new_messages.append({"role": "assistant", "content": ast_tool_calls})
 49 |             elif role == "tool":
 50 |                 functions = messages[i - 1]["tool_calls"]
 51 |                 observations = json.loads(message["content"])
 52 |                 assert len(observations) == len(functions)
 53 |                 ret_observation = []
 54 |                 for function, observation in zip(functions, observations):
 55 |                     ret_observation.append({
 56 |                         "name": function["function"]["name"],
 57 |                         "results": observation
 58 |                     })
 59 |                 new_messages.append({"role": "tool", "content": json.dumps(ret_observation, ensure_ascii=False)})
 60 |             else:
 61 |                 new_messages.append(message)
 62 |         return new_messages
 63 | 
 64 |     def preprocess_to_simple(self, messages):
 65 |         if len(self.model_messages) == 0:
 66 |             messages = remove_messages(messages, is_english=self.is_english)
 67 |             self.model_messages = self.process_planner_tool(messages)
 68 |         else:
 69 |             if messages[-1]["role"] == "user":
 70 |                 self.model_messages.append({"role": "user", "content": messages[-1]["content"].replace("用户：", "").replace("User:", "").strip()})
 71 |             elif messages[-1]["role"] == "tool":
 72 |                 observations = json.loads(messages[-1]["content"])
 73 |                 functions = messages[-2]["tool_calls"]
 74 |                 assert len(observations) == len(functions)
 75 |                 ret_observation = []
 76 |                 for function, observation in zip(functions, observations):
 77 |                     ret_observation.append({
 78 |                         "name": function["function"]["name"],
 79 |                         "results": observation
 80 |                     })
 81 |                 self.model_messages.append({"role": "tool", "content": json.dumps(ret_observation, ensure_ascii=False)})
 82 | 
 83 |         return self.model_messages
 84 | 
 85 |     def post_process_tool_call(self, answer):
 86 |         try:
 87 |             self.model_messages.append({"role": "assistant", "content": answer})
 88 |             if answer.startswith("[") and answer.endswith("]"):
 89 |                 astor = AstVisitor()
 90 |                 astor.visit(ast.parse(answer))
 91 |                 answer = astor.function
 92 |                 text = "use {} to solve user problem".format(", ".join([_["name"] for _ in answer]))
 93 |                 tool_calls = [{"id": str(uuid.uuid4()), "function": _} for _ in answer]
 94 |             else:
 95 |                 text = answer
 96 |                 tool_calls = None
 97 | 
 98 |             return text, tool_calls
 99 | 
100 |         except Exception as e:
101 |             print(f"error: {e}")
102 |             return None, None
103 | 
104 |     def request_funcall(self, messages, tools, env_info=None):
105 |         url = self.model_url
106 |         headers = {"Content-Type": "application/json"}
107 |         data = {
108 |             "messages": self.add_date_to_messsage_user(self.preprocess_to_simple(messages), env_info),
109 |             "tools": tools,
110 |             "date": self.add_weekday_date(env_info)
111 |         }
112 | 
113 |         text = None
114 |         tool_calls = None
115 |         try:
116 |             response = requests.post(url, headers=headers, json=data, timeout=self.timeout)
117 |             if response.status_code == 200:
118 |                 result = response.json()
119 |                 answer = result["answer"]
120 |                 text, tool_calls = self.post_process_tool_call(answer)
121 |         except Exception as e:
122 |             print(f"error: {e}")
123 |             text = None
124 |             tool_calls = None
125 | 
126 |         return text, tool_calls
127 | 
128 | 
129 | def main():
130 |     handle = ToolACEMultiTurnMessages("http://111.111.111.111:12345")
131 |     tools = [
132 |         {
133 |             "type": "function",
134 |             "function": {
135 |                 "name": "get_current_weather",
136 |                 "description": "Get the current weather in a given location",
137 |                 "parameters": {
138 |                     "type": "object",
139 |                     "properties": {
140 |                         "location": {
141 |                             "type": "string",
142 |                             "description": "The city and state, e.g. San Francisco, CA"
143 |                         },
144 |                         "unit": {
145 |                             "type": "string",
146 |                             "enum": [
147 |                                 "celsius",
148 |                                 "fahrenheit"
149 |                             ]
150 |                         }
151 |                     },
152 |                     "required": [
153 |                         "location"
154 |                     ]
155 |                 }
156 |             }
157 |         }
158 |     ]
159 |     messages = [
160 |         {
161 |             "role": "user",
162 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
163 |         }
164 |     ]
165 |     content, tool_calls = handle.request_funcall(messages, tools)
166 |     print(content)
167 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
168 | 
169 | 
170 | if __name__ == "__main__":
171 |     main()
172 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/tools.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import json
  3 | import traceback
  4 | 
  5 | from c3_bench.bench_test.utils import get_keywords
  6 | 
  7 | 
  8 | class AstVisitor(ast.NodeVisitor):
  9 |     def __init__(self):
 10 |         self.function = []
 11 | 
 12 |     def visit_Call(self, node):
 13 |         # self.function_name, self.args = parse_string_to_function(node)
 14 |         function = {}
 15 |         if isinstance(node.func, ast.Name):
 16 |             function["name"] = node.func.id
 17 |         elif isinstance(node.func, ast.Attribute):
 18 |             function["name"] = node.func.attr
 19 | 
 20 |         function["arguments"] = {}
 21 |         for keyword in node.keywords:
 22 |             function["arguments"][keyword.arg] = get_keywords(keyword.value)
 23 |         self.function.append(function)
 24 | 
 25 |     def clear(self):
 26 |         self.function = []
 27 | 
 28 | 
 29 | english_prompt = '''
 30 | You are an expert in function composition. You will be given a question and a set of possible functions. Based on the question, you need to make one or more function/tool calls to achieve the purpose.
 31 | If none of the functions can be used, please directly reply to the user in natural language, starting with "Assistant:".
 32 | If the given question lacks the parameters required by the function, please ask the user for the necessary information in natural language, starting with "Assistant:".
 33 | If the result of the call is already sufficient to answer the user's question, please summarize the historical results and reply to the user in natural language, starting with "Assistant:".
 34 | You should only return function calls in the tool call section. If you decide to make any function calls, you must format them as <tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>. You should not include any other text in your reply. The following is a list of functions you can call, in JSON format.
 35 | 
 36 | {{{tools}}}
 37 | 
 38 | If you decide to return function calls, please format them as <tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>, without including any other text.
 39 | Otherwise, please refer to the three cases mentioned at the beginning and reply starting with "Assistant:".
 40 | 
 41 | Current time: {{{env_info}}}'''.strip("\n")
 42 | 
 43 | 
 44 | def tool_call_prompt(messages, tools, date_time):
 45 |     system_prompt = english_prompt.replace(
 46 |         "{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=2)
 47 |     ).replace(
 48 |         "{{{env_info}}}", date_time
 49 |     )
 50 |     new_messages = [{"role": "system", "content": system_prompt}]
 51 |     for message in messages:
 52 |         role = message["role"]
 53 |         content = message["content"]
 54 |         if role == "user":
 55 |             new_messages.append({"role": "user", "content": content})
 56 |         elif role == "assistant":
 57 |             tool_calls = message.get("tool_calls", None)
 58 |             if tool_calls and len(message["tool_calls"]) != 0:
 59 |                 new_tool_calls = []
 60 |                 for tool_call in tool_calls:
 61 |                     function = tool_call["function"]
 62 |                     new_tool_calls.append(function)
 63 |                 new_messages.append({"role": "assistant", "content":
 64 |                     f"<tool_calls>{json.dumps(new_tool_calls, ensure_ascii=False)}</tool_calls>"})
 65 |             else:
 66 |                 new_messages.append({"role": "assistant", "content": f"Assistant:{content}"})
 67 |         elif role == "tool":
 68 |             new_messages.append({"role": "user", "content": f"<tool_response>{content}</tool_response>"})
 69 |         elif role == "system":
 70 |             continue
 71 |         else:
 72 |             raise NotImplementedError
 73 |     return new_messages
 74 | 
 75 | 
 76 | def remove_messages(messages, is_english=False):
 77 |     new_messages = []
 78 |     try:
 79 |         role = "user"
 80 |         for m in messages:
 81 |             assert (
 82 |                 m["role"] == "assistant"
 83 |                 and role == "assistant"
 84 |             ) or (
 85 |                 m["role"] in ["user", "tool"]
 86 |                 and role in ["user", "tool"]
 87 |             )
 88 |             role = "assistant" if role in ["user", "tool"] else "user"
 89 |             if is_english:
 90 |                 colon_idx = m["content"].find(":")
 91 |                 if (
 92 |                     colon_idx != -1 and
 93 |                     m["content"][:colon_idx].lower() in [
 94 |                         "ai", "ai agent", "user", "ai agent assistant", "planner", "observation", "tool"
 95 |                     ]
 96 |                 ):
 97 |                     m['content'] = m["content"][colon_idx+1:]
 98 |             else:
 99 |                 colon_idx = m["content"].find("：")
100 |                 if (
101 |                     colon_idx != -1 and
102 |                     m["content"][:colon_idx] in [
103 |                         "用户", "AI Agent助手", "AI Agent", "Planner", "Observation", "Tool"
104 |                     ]
105 |                 ):
106 |                     m['content'] = m["content"][colon_idx+1:]
107 |             new_messages.append(m)
108 |     except Exception as e:
109 |         print(f"error: {e}")
110 |         traceback.print_exc()
111 |     return new_messages
112 | 
113 | 
114 | def create_ast_value(value):
115 |     if isinstance(value, str):
116 |         return ast.Str(s=value)
117 |     elif isinstance(value, int):
118 |         return ast.Num(n=value)
119 |     elif isinstance(value, float):
120 |         return ast.Num(n=value)
121 |     elif isinstance(value, bool):
122 |         return ast.NameConstant(value=value)
123 |     elif isinstance(value, list):
124 |         return ast.List(elts=[create_ast_value(item) for item in value], ctx=ast.Load())
125 |     elif isinstance(value, dict):
126 |         keys = [ast.Str(s=k) for k in value.keys()]
127 |         values = [create_ast_value(v) for v in value.values()]
128 |         return ast.Dict(keys=keys, values=values)
129 |     else:
130 |         raise ValueError(f"Unsupported value type: {type(value).__name__}")
131 | 
132 | 
133 | def generate_code(node):
134 |     if isinstance(node, ast.Str):
135 |         return f'"{node.s}"'
136 |     elif isinstance(node, ast.Num):
137 |         return str(node.n)
138 |     elif isinstance(node, ast.NameConstant):
139 |         return str(node.value).lower()
140 |     elif isinstance(node, ast.List):
141 |         elements = [generate_code(elt) for elt in node.elts]
142 |         return f"[{', '.join(elements)}]"
143 |     elif isinstance(node, ast.Dict):
144 |         pairs = []
145 |         for key, value in zip(node.keys, node.values):
146 |             key_str = generate_code(key)
147 |             value_str = generate_code(value)
148 |             pairs.append(f"{key_str}: {value_str}")
149 |         return f"{{{', '.join(pairs)}}}"
150 |     elif isinstance(node, ast.Call):
151 |         func_name = node.func.id
152 |         args_str = ", ".join([generate_code(arg) for arg in node.args])
153 |         kwargs_str = ", ".join([f"{kw.arg}={generate_code(kw.value)}" for kw in node.keywords])
154 |         all_args_str = ", ".join(filter(None, [args_str, kwargs_str]))
155 |         return f"{func_name}({all_args_str})"
156 |     elif isinstance(node, ast.Module):
157 |         body_str = ", ".join([generate_code(item) for item in node.body])
158 |         return body_str
159 |     elif isinstance(node, ast.Expr):
160 |         return generate_code(node.value)
161 |     else:
162 |         raise ValueError(f"Unsupported AST node type: {type(node).__name__}")
163 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/watt_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pdb
  3 | import traceback
  4 | import uuid
  5 | import ast
  6 | import requests
  7 | 
  8 | from .basic_handle import SimulateMultiTurnMessages
  9 | from .tools import remove_messages, AstVisitor, create_ast_value, generate_code
 10 | 
 11 | 
 12 | class WattMultiTurnMessages(SimulateMultiTurnMessages):
 13 |     def __init__(self, model_url, is_english=False):
 14 |         super().__init__(model_url, is_english)
 15 |         self.model_messages = []
 16 |         self.timeout = 300
 17 | 
 18 |     def process_planner_tool(self, messages):
 19 |         new_messages = []
 20 |         for i, message in enumerate(messages):
 21 |             role = message["role"]
 22 |             tool_calls = message.get("tool_calls", None)
 23 |             function_calls = []
 24 |             if tool_calls:
 25 |                 for tool_call in tool_calls:
 26 |                     function = tool_call["function"]
 27 |                     name = function["name"]
 28 |                     arguments = function["arguments"]
 29 | 
 30 |                     func_call = ast.Call(
 31 |                         func=ast.Name(id=name, ctx=ast.Load()),
 32 |                         args=[],
 33 |                         keywords=[
 34 |                             ast.keyword(arg=k, value=create_ast_value(v)) for k, v in arguments.items()
 35 |                         ]
 36 |                     )
 37 |                     function_calls.append(func_call)
 38 | 
 39 |                 list_node = ast.List(elts=function_calls, ctx=ast.Load())
 40 |                 module = ast.Module(body=[ast.Expr(value=list_node)], type_ignores=[])
 41 |                 ast_tool_calls = generate_code(module)
 42 |                 # pdb.set_trace()
 43 |                 new_messages.append({"role": "assistant", "content": ast_tool_calls})
 44 |             elif role == "tool":
 45 |                 functions = messages[i - 1]["tool_calls"]
 46 |                 observations = json.loads(message["content"])
 47 |                 assert len(observations) == len(functions)
 48 |                 for function, observation in zip(functions, observations):
 49 |                     # pdb.set_trace()
 50 |                     new_messages.append({"role": "tool", "name": function["function"]["name"], "content": json.dumps(observation, ensure_ascii=False)})
 51 |             else:
 52 |                 new_messages.append(message)
 53 |         return new_messages
 54 | 
 55 |     def preprocess_to_simple(self, messages):
 56 |         # pdb.set_trace()
 57 |         if len(self.model_messages) == 0:
 58 |             messages = remove_messages(messages, is_english=self.is_english)
 59 |             self.model_messages = self.process_planner_tool(messages)
 60 |         else:
 61 |             if messages[-1]["role"] == "user":
 62 |                 self.model_messages += remove_messages(
 63 |                     [{"role": "user", "content": messages[-1]["content"]}],
 64 |                     is_english=self.is_english
 65 |                 )
 66 |             elif messages[-1]["role"] == "tool":
 67 |                 assistant = None
 68 |                 observation = []
 69 |                 idx = -1
 70 |                 while True or idx > -len(messages):
 71 |                     if messages[idx]["role"] == "assistant":
 72 |                         assistant = messages[idx]
 73 |                         break
 74 |                     if messages[idx]["role"] == "tool":
 75 |                         observation.append(messages[idx])
 76 |                     idx -= 1
 77 |                 idmap_observation = {}
 78 |                 assert len(observation) == len(assistant["tool_calls"])
 79 |                 for tool_call in assistant["tool_calls"]:
 80 |                     idmap_observation[tool_call["id"]] = tool_call["function"]["name"]
 81 |                 for obser in observation:
 82 |                     assert obser["tool_call_id"] in idmap_observation
 83 |                     self.model_messages.append({
 84 |                         "role": "tool", "name": idmap_observation[obser["tool_call_id"]],
 85 |                         "content": obser["content"]
 86 |                     })
 87 |         return self.model_messages
 88 | 
 89 |     def parameters2arguments(self, function_dict):
 90 |         return {
 91 |             "name": function_dict["name"],
 92 |             "arguments": function_dict["parameters"] if "parameters" in function_dict else function_dict["arguments"]
 93 |         }
 94 | 
 95 |     def post_process_tool_call(self, answer):
 96 |         text = None
 97 |         tool_calls = None
 98 |         try:
 99 |             if answer.startswith("[") and answer.endswith("]"):
100 |                 try:
101 |                     self.model_messages.append({"role": "assistant", "content": answer})
102 |                     astor = AstVisitor()
103 |                     astor.visit(ast.parse(answer))
104 |                     answer_ = astor.function
105 |                     text = "use {} to solve user problem".format(", ".join([_["name"] for _ in answer_]))
106 |                     tool_calls = [{"id": str(uuid.uuid4()), "function": _} for _ in answer_]
107 |                 except Exception as e:
108 |                     traceback.print_exc()
109 |                     print(f"process error: {e}", flush=True)
110 |             else:
111 |                 self.model_messages.append({"role": "assistant", "content": answer})
112 |                 text = answer
113 |                 tool_calls = None
114 | 
115 |             return text, tool_calls
116 | 
117 |         except Exception as e:
118 |             traceback.print_exc()
119 |             print(f"error: {e}", flush=True)
120 |             return None, None
121 | 
122 |     def request_funcall(self, messages, tools, env_info=None):
123 |         url = self.model_url
124 |         headers = {"Content-Type": "application/json"}
125 |         data = {
126 |             "messages": self.add_date_to_message(self.preprocess_to_simple(messages), env_info),
127 |             "tools": tools,
128 |             "date": self.add_weekday_date(env_info)
129 |         }
130 | 
131 |         text = None
132 |         tool_calls = None
133 |         try_nums = 0
134 |         while True:
135 |             try:
136 |                 response = requests.post(url, headers=headers, json=data, timeout=self.timeout)
137 |                 if response.status_code == 200:
138 |                     result = response.json()
139 |                     answer = result["answer"]
140 |                     text, tool_calls = self.post_process_tool_call(answer)
141 |                     break
142 |             except Exception as e:
143 |                 print(f"error: {e}", flush=True)
144 |                 traceback.print_exc()
145 |                 try_nums += 1
146 |                 print(f"try_nums: {try_nums}", flush=True)
147 |                 if try_nums >= 5:
148 |                     break
149 | 
150 |         return text, tool_calls
151 | 
152 | 
153 | def main():
154 |     handle = WattMultiTurnMessages("http://111.111.111.111:12345")
155 |     tools = [
156 |         {
157 |             "type": "function",
158 |             "function": {
159 |                 "name": "get_current_weather",
160 |                 "description": "Get the current weather in a given location",
161 |                 "parameters": {
162 |                     "type": "object",
163 |                     "properties": {
164 |                         "location": {
165 |                             "type": "string",
166 |                             "description": "The city and state, e.g. San Francisco, CA"
167 |                         },
168 |                         "unit": {
169 |                             "type": "string",
170 |                             "enum": [
171 |                                 "celsius",
172 |                                 "fahrenheit"
173 |                             ]
174 |                         }
175 |                     },
176 |                     "required": [
177 |                         "location"
178 |                     ]
179 |                 }
180 |             }
181 |         }
182 |     ]
183 |     messages = [
184 |         {
185 |             "role": "user",
186 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
187 |         }
188 |     ]
189 |     content, tool_calls = handle.request_funcall(messages, tools)
190 |     print(content)
191 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
192 | 
193 | 
194 | if __name__ == "__main__":
195 |     main()
196 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/xlam2_handle.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pdb
  3 | import uuid
  4 | import requests
  5 | 
  6 | import sys
  7 | import os
  8 | 
  9 | current_path_list = os.getcwd().split("/")[:-2]
 10 | current_path = "/".join(current_path_list)
 11 | print(f"current_path: {current_path}\n")
 12 | sys.path.append(current_path)
 13 | 
 14 | from .basic_handle import SimulateMultiTurnMessages
 15 | from .tools import remove_messages
 16 | 
 17 | 
 18 | class XLAM2MultiTurnMessages(SimulateMultiTurnMessages):
 19 |     def __init__(self, model_url, is_english=False):
 20 |         super().__init__(model_url, is_english)
 21 |         self.model_messages = []
 22 | 
 23 |     def preprocess_to_simple(self, messages):
 24 |         # pdb.set_trace()
 25 |         if len(self.model_messages) == 0:
 26 |             self.model_messages = remove_messages(messages, is_english=True)
 27 |         else:
 28 |             if messages[-1]["role"] == "user":
 29 |                 self.model_messages.append({"role": "user",
 30 |                                             "content": messages[-1]["content"].replace("用户：", "").replace("User:",
 31 |                                                                                                           "").strip()})
 32 |             elif messages[-1]["role"] == "tool":
 33 |                 self.model_messages.append({"role": "tool", "content": messages[-1]["content"]})
 34 |         # print("##########################")
 35 |         # print(f"self.model_messages:\n{self.model_messages}")
 36 |         return self.model_messages
 37 | 
 38 |     def request_funcall(self, messages, tools, env_info=None):
 39 |         url = self.model_url
 40 |         headers = {"Content-Type": "application/json"}
 41 |         data = {
 42 |             "messages": self.add_date_to_messsage_user(self.preprocess_to_simple(messages), env_info),
 43 |             "tools": tools,
 44 |             "date": self.add_weekday_date(env_info)
 45 |         }
 46 | 
 47 |         text = None
 48 |         tool_calls = None
 49 |         try:
 50 |             response = requests.post(url, headers=headers, json=data, timeout=self.timeout)
 51 |             if response.status_code == 200:
 52 |                 result = response.json()
 53 |                 answer = result["answer"]
 54 |                 text, tool_calls = self.post_process_tool_call(answer)
 55 |         except Exception as e:
 56 |             print(f"error: {e}")
 57 |             text = None
 58 |             tool_calls = None
 59 | 
 60 |         return text, tool_calls
 61 | 
 62 |     def post_process_tool_call(self, answer):
 63 |         try:
 64 |             if answer.startswith("[") and answer.endswith("]"):
 65 |                 try:
 66 |                     answer = json.loads(answer)
 67 |                 except Exception as e:
 68 |                     print(f"json loads error: {e}")
 69 |                     pass
 70 | 
 71 |             if type(answer) == list:
 72 |                 text = "use {} to solve user problem".format(
 73 |                     ", ".join([
 74 |                         _["name"] for _ in answer
 75 |                     ])
 76 |                 )
 77 |                 tool_calls = [{"id": str(uuid.uuid4()), "type": "function", "function": _} for _ in answer]
 78 |                 self.model_messages.append({"role": "assistant", "content": text, "tool_calls": tool_calls})
 79 |             else:
 80 |                 self.model_messages.append({"role": "assistant", "content": answer})
 81 |                 text = answer
 82 |                 tool_calls = None
 83 | 
 84 |             return text, tool_calls
 85 | 
 86 |         except Exception as e:
 87 |             print(f"error: {e}")
 88 |             return None, None
 89 | 
 90 | 
 91 | def main():
 92 |     handle = XLAM2MultiTurnMessages("http://11.220.87.179:12345")
 93 |     tools = [
 94 |         {
 95 |             "type": "function",
 96 |             "function": {
 97 |                 "name": "get_current_weather",
 98 |                 "description": "Get the current weather in a given location",
 99 |                 "parameters": {
100 |                     "type": "object",
101 |                     "properties": {
102 |                         "location": {
103 |                             "type": "string",
104 |                             "description": "The city and state, e.g. San Francisco, CA"
105 |                         },
106 |                         "unit": {
107 |                             "type": "string",
108 |                             "enum": [
109 |                                 "celsius",
110 |                                 "fahrenheit"
111 |                             ]
112 |                         }
113 |                     },
114 |                     "required": [
115 |                         "location"
116 |                     ]
117 |                 }
118 |             }
119 |         }
120 |     ]
121 |     messages = [
122 |         {
123 |             "role": "user",
124 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
125 |         },
126 |         # {"role": "assistant", "content": "", "tool_calls": [{'id': '137c9f34-a7d1-4cd3-a0ae-a4763bf884ac', 'function': {'name': 'get_current_weather', 'arguments': {'location': 'Boston', 'unit': 'celsius'}}}, {'id': '94430843-c85c-4946-8333-26d470b73a93', 'function': {'name': 'get_current_weather', 'arguments': {'location': 'San Francisco', 'unit': 'celsius'}}}]},
127 |         # {"role": "tool", "content": "Boston and San Francisco is rainy."}
128 |     ]
129 |     content, tool_calls = handle.request_funcall(messages, tools, "2023-03-17 19:20:00")
130 |     print(content)
131 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     main()
136 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/handle/xlam_handle.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import uuid
 3 | 
 4 | from .basic_handle import SimulateMultiTurnMessages
 5 | from .tools import remove_messages
 6 | 
 7 | 
 8 | class XLAMMultiTurnMessages(SimulateMultiTurnMessages):
 9 |     def __init__(self, model_url, is_english=False):
10 |         super().__init__(model_url, is_english)
11 |         self.model_messages = []
12 | 
13 |     def preprocess_to_simple(self, messages):
14 |         if len(self.model_messages) == 0:
15 |             self.model_messages = remove_messages(messages, is_english=True)
16 |         else:
17 |             if messages[-1]["role"] == "user":
18 |                 self.model_messages.append({"role": "user", "content": messages[-1]["content"].replace("用户：", "").replace("User:", "").strip()})
19 |             elif messages[-1]["role"] == "tool":
20 |                 self.model_messages.append({"role": "tool", "content": messages[-1]["content"]})
21 |         return self.model_messages
22 | 
23 |     def post_process_tool_call(self, answer):
24 |         try:
25 |             if "tool_calls" in answer:
26 |                 try:
27 |                     answer = json.loads(answer)
28 |                 except Exception as e:
29 |                     print(f"json loads error: {e}")
30 |                     pass
31 | 
32 |             if "tool_calls" in answer and type(answer) == dict:
33 |                 text = "use {} to solve user problem".format(
34 |                     ", ".join([
35 |                         _["name"] for _ in answer["tool_calls"]
36 |                     ])
37 |                 )
38 |                 tool_calls = [{"id": str(uuid.uuid4()), "function": _} for _ in answer["tool_calls"]]
39 |                 self.model_messages.append({"role": "assistant", "content": text, "tool_calls": answer["tool_calls"]})
40 |             else:
41 |                 self.model_messages.append({"role": "assistant", "content": answer})
42 |                 text = answer
43 |                 tool_calls = None
44 | 
45 |             return text, tool_calls
46 | 
47 |         except Exception as e:
48 |             print(f"error: {e}")
49 |             return None, None
50 | 
51 | 
52 | def main():
53 |     handle = XLAMMultiTurnMessages("http://111.111.111.111:12345")
54 |     tools = [
55 |         {
56 |             "type": "function",
57 |             "function": {
58 |                 "name": "get_current_weather",
59 |                 "description": "Get the current weather in a given location",
60 |                 "parameters": {
61 |                     "type": "object",
62 |                     "properties": {
63 |                         "location": {
64 |                             "type": "string",
65 |                             "description": "The city and state, e.g. San Francisco, CA"
66 |                         },
67 |                         "unit": {
68 |                             "type": "string",
69 |                             "enum": [
70 |                                 "celsius",
71 |                                 "fahrenheit"
72 |                             ]
73 |                         }
74 |                     },
75 |                     "required": [
76 |                         "location"
77 |                     ]
78 |                 }
79 |             }
80 |         }
81 |     ]
82 |     messages = [
83 |         {
84 |             "role": "user",
85 |             "content": "What's the weather like in the two cities of Boston and San Francisco?"
86 |         }
87 |     ]
88 |     content, tool_calls = handle.request_funcall(messages, tools)
89 |     print(content)
90 |     print(json.dumps(tool_calls, ensure_ascii=False, indent=4))
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     main()


--------------------------------------------------------------------------------
/c3_bench/bench_test/request_pipeline.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import copy
  4 | import argparse
  5 | import sys
  6 | 
  7 | current_path_list = os.getcwd().split("/")[:-2]
  8 | current_path = "/".join(current_path_list)
  9 | print(f"current_path: {current_path}\n")
 10 | sys.path.append(current_path)
 11 | 
 12 | from utils import read_file_to_json, get_random_pathname
 13 | from tool_call_graph import eval_by_tool_call_graph
 14 | from handle.handles import tool_handle_map
 15 | from tqdm import tqdm
 16 | 
 17 | 
 18 | def str2bool(v):
 19 |     '''
 20 |     Transform string to bool.
 21 | 
 22 |     Arguments:
 23 |         v (str): The value to be converted.
 24 | 
 25 |     Returns:
 26 |         bool: The converted value.
 27 |     '''
 28 |     if isinstance(v, bool):
 29 |         return v
 30 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 31 |         return True
 32 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 33 |         return False
 34 |     else:
 35 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 36 | 
 37 | 
 38 | def get_messages_until_task(messages, task_id, task, history, is_english, remove_role=True):
 39 |     '''
 40 |     整合历史消息并根据任务ID和任务内容过滤消息，同时根据语言移除角色标识。
 41 | 
 42 |     Arguments:
 43 |         messages (list): 包含消息记录的列表，每条记录是一个字典，包含内容和角色等信息。
 44 |         task_id (int): 任务ID，用于定位特定任务的消息。
 45 |         task (str): 任务内容，用于验证消息中是否包含该任务。
 46 |         history (list): 历史消息列表，每个元素是一个消息列表。
 47 |         is_english (bool): 是否为英文消息，用于确定如何移除角色标识。
 48 |         remove_role (bool): 是否移除消息中的角色标识，默认为True。
 49 | 
 50 |     Returns:
 51 |         list: 整合后的消息列表，根据任务ID和任务内容过滤，并移除了角色标识。
 52 |     '''
 53 |     new_messages = []
 54 |     try:
 55 |         for history_messages in history:
 56 |             new_messages += history_messages
 57 |         assert len(new_messages) % 2 == 0
 58 |         assert task in messages[task_id]["content"]
 59 |         new_messages += messages[:task_id + 1]
 60 |         assert len(new_messages) % 2 == 1
 61 |         role = "user"
 62 |         for m in new_messages:
 63 |             assert m["role"] == role
 64 |             role = "assistant" if role == "user" else "user"
 65 |             if not remove_role:
 66 |                 continue
 67 |             if is_english:
 68 |                 colon_idx = m["content"].find(":")
 69 |                 if (
 70 |                         colon_idx != -1 and
 71 |                         m["content"][:colon_idx].lower() in [
 72 |                     "ai", "ai agent", "user", "ai agent assistant"
 73 |                 ]
 74 |                 ):
 75 |                     m['content'] = m["content"][colon_idx + 1:]
 76 |             else:
 77 |                 colon_idx = m["content"].find("：")
 78 |                 if (
 79 |                         colon_idx != -1 and
 80 |                         m["content"][:colon_idx] in [
 81 |                     "用户", "AI Agent助手", "AI Agent"
 82 |                 ]
 83 |                 ):
 84 |                     m['content'] = m["content"][colon_idx + 1:]
 85 |     except Exception as e:
 86 |         # ipdb.set_trace()
 87 |         print(f"error: {e}")
 88 | 
 89 |     return new_messages
 90 | 
 91 | 
 92 | def parse_argument():
 93 |     parser = argparse.ArgumentParser()
 94 |     parser.add_argument("--language", type=str, default="en")
 95 |     parser.add_argument("--new_key", type=str, default=None)
 96 |     parser.add_argument("--model", type=str, default="hunyuan-turbos-latest")
 97 |     parser.add_argument("--data_path", type=str, default="./data/C3-Bench.jsonl")
 98 |     parser.add_argument("--output_path", type=str, default="./result")
 99 |     parser.add_argument("--model_url", type=str, default="http://111.111.111.111:12345")
100 |     parser.add_argument("--continue_file", type=str, default=None)
101 |     parser.add_argument("--remove_role", type=str2bool, default=True)
102 |     parser.add_argument("--contain_context", type=str2bool, default=True)
103 |     parser.add_argument("--debug_id", type=str, default=None)
104 |     parser.add_argument("--debug_idx", type=str, default=None)
105 |     parser.add_argument("--skip_num", type=int, default=0)
106 |     parser.add_argument("--retry_num", type=int, default=1)
107 |     args = parser.parse_args()
108 |     return args
109 | 
110 | 
111 | def add_args_info_into_filename(args):
112 |     args_dict = vars(args)
113 |     key = []
114 |     if args.new_key is not None:
115 |         key.append(args.new_key)
116 |     key.append(args_dict["model"])
117 |     key.append(args_dict["language"])
118 |     for k in args_dict:
119 |         if type(args_dict[k]) == bool:
120 |             key.append(k)
121 |     return "_".join(key)
122 | 
123 | 
124 | def split_messages_by_equal(messages):
125 |     messages_list = []
126 |     now_message = []
127 |     for m in messages:
128 |         if type(m) == str and "=====" in m:
129 |             messages_list.append(copy.copy(now_message))
130 |             now_message = []
131 |         else:
132 |             now_message.append(m)
133 |     if len(now_message) != 0:
134 |         messages_list.append(now_message)
135 |     return messages_list
136 | 
137 | 
138 | def main(args):
139 |     data = read_file_to_json(args.data_path)
140 | 
141 |     res_data = []
142 |     path_ = get_random_pathname(args.output_path, "jsonl", keys=add_args_info_into_filename(args), need_time=True)
143 |     is_english = False if args.language == "zh" else True
144 |     error_list = []
145 |     too_long_continue = 0
146 |     task_length = 0
147 |     process_cnt = 0
148 |     debug_mode = args.debug_id and args.debug_idx
149 |     if args.continue_file and "None" not in args.continue_file and os.path.exists(
150 |             args.continue_file) and args.model in args.continue_file:
151 |         continue_file = read_file_to_json(args.continue_file)
152 |         res_data = continue_file
153 |         path_ = args.continue_file.replace(".unfinish", "")
154 |         task_length += len(res_data)
155 |         print(f"continue file: {args.continue_file}")
156 |         print(f"task_length: {task_length}")
157 |     elif args.skip_num != 0:
158 |         data = data[int(args.skip_num / 4) + 1:]
159 | 
160 |     for item in tqdm(data):
161 |         try:
162 |             if debug_mode and args.debug_id not in item["id"]:
163 |                 continue
164 |             task_list = item["english_task"] if is_english else item["task"]
165 |             answer_lists = item["english_answer_list"] if is_english else item["answer_list"]
166 |             messages_list = item["english_messages"] if is_english else item["messages"]
167 |             tools_list = item["english_tools"] if is_english else item["tools"]
168 |             messages_list = split_messages_by_equal(messages_list)
169 |             assert type(task_list) == list and type(answer_lists[0]) == list
170 |             assert len(task_list) == len(answer_lists) and len(task_list) == len(messages_list)
171 |             if not args.contain_context and len(task_list) == 1:
172 |                 continue
173 |             if type(item["env_info"]) == str:
174 |                 item["env_info"] = [item["env_info"] for _ in range(len(task_list))]
175 |             item["env_info"] = [
176 |                 env_info[:env_info.find("星期")].strip()
177 |                 for env_info in item["env_info"] if "星期" in env_info
178 |             ]
179 |             for id_, task_id, task, answer_list, messages, env_info in zip(
180 |                     range(len(task_list)), item["task_ids"],
181 |                     task_list, answer_lists, messages_list, item["env_info"]
182 |             ):
183 |                 if debug_mode and int(args.debug_idx) != id_:
184 |                     continue
185 |                 if not args.contain_context and id_ == 0:
186 |                     continue
187 |                 process_cnt += 1
188 |                 if process_cnt <= len(res_data):
189 |                     continue
190 |                 simulator, response_continue = tool_handle_map[args.model]
191 |                 simulator = simulator(args.model_url, is_english)
192 |                 if args.contain_context:
193 |                     messages = get_messages_until_task(
194 |                         messages, task_id, task, messages_list[:id_], is_english, args.remove_role
195 |                     )
196 |                 else:
197 |                     messages = get_messages_until_task(
198 |                         messages, task_id, task, [], is_english, args.remove_role
199 |                     )
200 |                 messages_length = len(messages)
201 |                 predict_label, predict_is_optimal, predict_result, answer_result = eval_by_tool_call_graph(
202 |                     simulator.request_funcall,
203 |                     messages,
204 |                     tools_list,
205 |                     answer_list,
206 |                     response_continue,
207 |                     env_info=env_info,
208 |                     retry_num=args.retry_num
209 |                 )
210 |                 res_data.append({
211 |                     "id": item["id"],
212 |                     "idx": id_,
213 |                     "messages": messages,
214 |                     "messages_length": messages_length,
215 |                     "task_id": task_id,
216 |                     "type": item["type"],
217 |                     "tools": tools_list,
218 |                     "task": task,
219 |                     "answer_list": answer_list,
220 |                     "predict_result": predict_result,
221 |                     "predict_label": predict_label,
222 |                     "predict_is_optimal": str(predict_is_optimal),
223 |                     "answer_result": answer_result,
224 |                     "turn_type": [(_ if type(_) == bool else _ == "真") for _ in item.get("turn_type", [])],
225 |                     "turn_subtypes": item.get("turn_subtypes", []),
226 |                 })
227 |                 if len(res_data) % 10 == 1:
228 |                     print(task)
229 |                     print(predict_result)
230 |                 with open(path_ + ".unfinish", "w", encoding="utf-8") as f:
231 |                     for res in res_data:
232 |                         f.write(json.dumps(res, ensure_ascii=False) + "\n")
233 |         except Exception as e:
234 |             print(f"error: {e}")
235 |             error_list.append([item["id"], e])
236 |             # with open("error.jsonl", "w", encoding="utf-8") as f:
237 |             #     for error in error_list:
238 |             #         f.write(json.dumps(error, ensure_ascii=False) + "\n")
239 | 
240 |         task_length += len(item["task"])
241 |         # print(f"{task_length}:-:{len(res_data)}:-:{process_cnt}")
242 | 
243 |     print(f"error cnt: {len(error_list)}")
244 |     print(f"too long: {too_long_continue}")
245 |     if not debug_mode:
246 |         os.system(f'mv {path_}.unfinish {path_}')
247 | 
248 | 
249 | if __name__ == "__main__":
250 |     args = parse_argument()
251 |     main(args)
252 |     # add_args_info_into_filename(args)
253 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/tool_class/__init__.py


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/chatglm.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import torch
 3 | 
 4 | from .tool_class_base import ToolClass
 5 | from transformers import AutoModelForCausalLM, AutoTokenizer
 6 | 
 7 | 
 8 | '''
 9 | import torch
10 | from transformers import AutoModelForCausalLM, AutoTokenizer
11 | 
12 | device = "cuda"
13 | 
14 | tokenizer = AutoTokenizer.from_pretrained("THUDM/glm-4-9b-chat", trust_remote_code=True)
15 | 
16 | query = "你好"
17 | 
18 | inputs = tokenizer.apply_chat_template([{"role": "user", "content": query}],
19 |                                        add_generation_prompt=True,
20 |                                        tokenize=True,
21 |                                        return_tensors="pt",
22 |                                        return_dict=True
23 |                                        )
24 | 
25 | inputs = inputs.to(device)
26 | model = AutoModelForCausalLM.from_pretrained(
27 |     "THUDM/glm-4-9b-chat",
28 |     torch_dtype=torch.bfloat16,
29 |     low_cpu_mem_usage=True,
30 |     trust_remote_code=True
31 | ).to(device).eval()
32 | 
33 | gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
34 | with torch.no_grad():
35 |     outputs = model.generate(**inputs, **gen_kwargs)
36 |     outputs = outputs[:, inputs['input_ids'].shape[1]:]
37 |     print(tokenizer.decode(outputs[0], skip_special_tokens=True))
38 | '''
39 | 
40 | 
41 | class ChatGLM(ToolClass):
42 |     def init(self):
43 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
44 |         self.model = AutoModelForCausalLM.from_pretrained(
45 |             self.model_path,
46 |             torch_dtype=torch.bfloat16,
47 |             device_map="auto",
48 |             low_cpu_mem_usage=True,
49 |             trust_remote_code=True
50 |         ).eval()
51 | 
52 |     def _format_prompt(self, messages, function):
53 |         formatted_prompt = ""
54 |         tools = function
55 |         if tools:
56 |             formatted_prompt = "[gMASK]<sop><|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具"
57 |             for tool in tools:
58 |                 formatted_prompt += f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], indent=4)}"
59 |                 formatted_prompt += "\n在调用上述函数时，请使用 Json 格式表示调用的参数。"
60 | 
61 |         for message in messages:
62 |             formatted_prompt += f"<|{message['role']}|>\n{message['content']}"
63 | 
64 |         formatted_prompt += "<|assistant|>"
65 | 
66 |         return formatted_prompt
67 | 
68 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
69 |         text = self._format_prompt(messages, functions)
70 |         inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
71 |         gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
72 |         outputs = self.model.generate(**inputs, **gen_kwargs)
73 |         return self.decode_res(inputs, outputs)
74 |     
75 |     def _get_res(self, messages):
76 |         # outputs = self.pipeline(messages, max_new_tokens=512)
77 |         print("just messages")
78 |         text = self.tokenizer.apply_chat_template(
79 |             messages,
80 |             tokenize=False,
81 |             add_generation_prompt=True,
82 |         )
83 |         model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
84 |         gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
85 |         outputs = self.model.generate(
86 |             **model_inputs,
87 |             **gen_kwargs
88 |         )
89 |         return model_inputs, outputs
90 | 
91 |     def decode_res(self, prompt, outputs):
92 |         # print(len(prompt))
93 |         # print(type(outputs), outputs)
94 |         generated_ids = outputs[:, prompt['input_ids'].shape[1]:]
95 |         return self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
96 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/deepseek.py:
--------------------------------------------------------------------------------
 1 | from .tool_class_base import ToolClass
 2 | from transformers import AutoModelForCausalLM, AutoTokenizer
 3 | 
 4 | 
 5 | '''
 6 | import torch
 7 | from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 8 | 
 9 | model_name = "deepseek-ai/DeepSeek-V2"
10 | tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
11 | # `max_memory` should be set based on your devices
12 | max_memory = {i: "75GB" for i in range(8)}
13 | # `device_map` cannot be set to `auto`
14 | model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="sequential", torch_dtype=torch.bfloat16, max_memory=max_memory, attn_implementation="eager")
15 | model.generation_config = GenerationConfig.from_pretrained(model_name)
16 | model.generation_config.pad_token_id = model.generation_config.eos_token_id
17 | 
18 | text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
19 | inputs = tokenizer(text, return_tensors="pt")
20 | outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)
21 | 
22 | result = tokenizer.decode(outputs[0], skip_special_tokens=True)
23 | print(result)
24 | '''
25 | 
26 | 
27 | class DeepSeek(ToolClass):
28 |     def init(self):
29 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
30 |         self.model = AutoModelForCausalLM.from_pretrained(
31 |             self.model_path,
32 |             torch_dtype="auto",
33 |             device_map="auto",
34 |         )
35 | 
36 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
37 |         text = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True, tokenize=False)
38 |         inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
39 |         outputs = self.model.generate(**inputs, max_new_tokens=512)
40 |         return self.decode_res(inputs, outputs)
41 |     
42 |     def _get_res(self, messages):
43 |         # outputs = self.pipeline(messages, max_new_tokens=512)
44 |         print("just messages")
45 |         text = self.tokenizer.apply_chat_template(
46 |             messages,
47 |             tokenize=False,
48 |             add_generation_prompt=True,
49 |         )
50 |         model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
51 |         outputs = self.model.generate(
52 |             **model_inputs,
53 |             max_new_tokens=4096
54 |         )
55 |         return model_inputs, outputs
56 | 
57 |     def decode_res(self, prompt, outputs):
58 |         # print(len(prompt))
59 |         # print(type(outputs), outputs)
60 |         generated_ids = [
61 |             output_ids[len(input_ids):] for input_ids, output_ids in zip(prompt.input_ids, outputs)
62 |         ]
63 |         return self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
64 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/fc_medium.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .tool_class_base import ToolClass
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer
 5 | 
 6 | 
 7 | logger = logging.getLogger()
 8 | 
 9 | 
10 | '''
11 | from transformers import AutoModelForCausalLM, AutoTokenizer
12 | 
13 | tokenizer = AutoTokenizer.from_pretrained("meetkai/functionary-medium-v3.1")
14 | model = AutoModelForCausalLM.from_pretrained("meetkai/functionary-medium-v3.1", device_map="auto", trust_remote_code=True)
15 | 
16 | tools = [
17 |     {
18 |         "type": "function",
19 |         "function": {
20 |             "name": "get_current_weather",
21 |             "description": "Get the current weather",
22 |             "parameters": {
23 |                 "type": "object",
24 |                 "properties": {
25 |                     "location": {
26 |                         "type": "string",
27 |                         "description": "The city and state, e.g. San Francisco, CA"
28 |                     }
29 |                 },
30 |                 "required": ["location"]
31 |             }
32 |         }
33 |     }
34 | ]
35 | messages = [{"role": "user", "content": "What is the weather in Istanbul and Singapore respectively?"}]
36 | 
37 | final_prompt = tokenizer.apply_chat_template(messages, tools, add_generation_prompt=True, tokenize=False)
38 | inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda")
39 | pred = model.generate_tool_use(**inputs, max_new_tokens=128, tokenizer=tokenizer)
40 | print(tokenizer.decode(pred.cpu()[0]))
41 | '''
42 | 
43 | 
44 | class FC_Medium(ToolClass):
45 |     def init(self):
46 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
47 |         self.model = AutoModelForCausalLM.from_pretrained(
48 |             self.model_path,
49 |             torch_dtype="auto",
50 |             device_map="auto",
51 |             trust_remote_code=True,
52 |             attn_implementation="flash_attention_2"
53 |         )
54 | 
55 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
56 |         text = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True, tokenize=False)
57 |         inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
58 |         self.tokenizer.pad_token = "<|eot_id|>"
59 |         outputs = self.model.generate(**inputs, max_new_tokens=512)
60 |         return self.decode_res(inputs, outputs)
61 |     
62 |     def _get_res(self, messages):
63 |         # outputs = self.pipeline(messages, max_new_tokens=512)
64 |         text = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
65 |         inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
66 |         outputs = self.model.generate(
67 |             **inputs,
68 |             max_new_tokens=512
69 |         )
70 |         return inputs, outputs
71 | 
72 |     def decode_res(self, prompt, outputs):
73 |         # print(len(prompt))
74 |         # print(type(outputs), outputs)
75 |         # generated_ids = [
76 |         #     output_ids[len(input_ids):] for input_ids, output_ids in zip(prompt.input_ids, outputs)
77 |         # ]
78 |         generated_ids = [
79 |             output_ids[len(input_ids):] for input_ids, output_ids in zip(prompt.input_ids, outputs)
80 |         ]
81 |         return self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/gorilla.py:
--------------------------------------------------------------------------------
 1 | from .tool_class_base import ToolClass
 2 | from transformers import AutoModelForCausalLM, AutoTokenizer
 3 | 
 4 | 
 5 | class Gorilla(ToolClass):
 6 |     def init(self):
 7 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
 8 |         self.model = AutoModelForCausalLM.from_pretrained(
 9 |             self.model_path,
10 |             torch_dtype='auto',
11 |             device_map='auto',
12 |             trust_remote_code=True
13 |         )
14 | 
15 |     def _get_res(self, messages):
16 |         inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device)
17 |         outputs = self.model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id)
18 |         return outputs, inputs
19 | 
20 |     def decode_res(self, outputs, inputs):
21 |         return self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
22 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/hammer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .tool_class_base import ToolClass
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer
 5 | 
 6 | 
 7 | '''
 8 | tokenizer = AutoTokenizer.from_pretrained("MadeAgents/Hammer2.1-7b")
 9 | model = AutoModelForCausalLM.from_pretrained("MadeAgents/Hammer2.1-7b", torch_dtype=torch.bfloat16, device_map="auto")
10 | 
11 | # Example conversation
12 | messages = [
13 |     {"role": "user", "content": "What's the weather like in New York?"},
14 |     {"role": "assistant","content": '```\n{"name": "get_weather", "arguments": {"location": "New York, NY ", "unit": "celsius"}\n```'},
15 |     {"role": "tool", "name": "get_weather", "content": '{"temperature": 72, "description": "Partly cloudy"}'},
16 |     {"role": "user", "content": "Now, search for the weather in San Francisco."}
17 | ]
18 | 
19 | # Example function definition (optional)
20 | tools = [
21 |     {
22 |         "name": "get_weather",
23 |         "description": "Get the current weather for a location",
24 |         "parameters": {
25 |             "type": "object",
26 |             "properties": {
27 |                 "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
28 |                 "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit of temperature to return"}
29 |             },
30 |             "required": ["location"]
31 |         }
32 |     },
33 |     {
34 |         "name": "respond",
35 |         "description": "When you are ready to respond, use this function. This function allows the assistant to formulate and deliver appropriate replies based on the input message and the context of the conversation. Generate a concise response for simple questions, and a more detailed response for complex questions.",
36 |         "parameters": {
37 |             "type": "object",
38 |             "properties": {
39 |                 "message": {"type": "string", "description": "The content of the message to respond to."}
40 |             },
41 |             "required": ["message"]
42 |         }
43 |     }
44 | ]
45 | 
46 | inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
47 | inputs = {k: v.to(model.device) for k, v in inputs.items()}
48 | out = model.generate(**inputs, max_new_tokens=128)
49 | print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):], skip_special_tokens=True))
50 | '''
51 | 
52 | 
53 | class Hammer(ToolClass):
54 |     def init(self):
55 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
56 |         self.model = AutoModelForCausalLM.from_pretrained(
57 |             self.model_path,
58 |             torch_dtype=torch.bfloat16,
59 |             device_map="auto",
60 |             # attn_implementation="flash_attention_2"
61 |         )
62 | 
63 |     def remove_function_object(self, functions):
64 |         functions_ = []
65 |         for func in functions:
66 |             if "function" in func and "name" in func["function"]:
67 |                 func = func["function"]
68 |             functions_.append(func)
69 |         return functions_
70 | 
71 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
72 |         functions = self.remove_function_object(functions)
73 |         inputs = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True, return_dict=True, return_tensors="pt")
74 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
75 |         outputs = self.model.generate(**inputs, max_new_tokens=128)
76 |         return self.decode_res(inputs, outputs)
77 |     
78 |     def _get_res(self, messages):
79 |         # outputs = self.pipeline(messages, max_new_tokens=512)
80 |         inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_dict=True, return_tensors="pt")
81 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
82 |         outputs = self.model.generate(
83 |             **inputs,
84 |             max_new_tokens=128
85 |         )
86 |         return inputs, outputs
87 | 
88 |     def decode_res(self, prompt, outputs):
89 |         # print(len(prompt))
90 |         # print(type(outputs), outputs)
91 |         return self.tokenizer.decode(outputs[0][len(prompt["input_ids"][0]):], skip_special_tokens=True)
92 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/llama.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import transformers
  4 | 
  5 | from .tool_class_base import ToolClass
  6 | from transformers import AutoModelForCausalLM, AutoTokenizer
  7 | 
  8 | 
  9 | logger = logging.getLogger()
 10 | 
 11 | 
 12 | class Llama(ToolClass):
 13 |     def init(self):
 14 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
 15 |         self.model = AutoModelForCausalLM.from_pretrained(
 16 |             self.model_path,
 17 |             torch_dtype='auto',
 18 |             device_map='auto',
 19 |             trust_remote_code=True,
 20 |             attn_implementation="flash_attention_2"
 21 |         )
 22 |         self.pipeline = transformers.pipeline(
 23 |             "text-generation",
 24 |             model=self.model,
 25 |             tokenizer=self.tokenizer,
 26 |         )
 27 | 
 28 |     def self_formated_template(self, messages ,functions):
 29 |         formatted_prompt = "<|begin_of_text|>"
 30 | 
 31 |         system_message = ""
 32 |         remaining_messages = messages
 33 |         if messages[0]["role"] == "system":
 34 |             system_message = messages[0]["content"].strip()
 35 |             remaining_messages = messages[1:]
 36 | 
 37 |         formatted_prompt += "<|start_header_id|>system<|end_header_id|>\n\n"
 38 |         formatted_prompt += "Environment: ipython\n"
 39 |         formatted_prompt += "Cutting Knowledge Date: December 2023\n"
 40 |         formatted_prompt += "Today Date: 26 Jul 2024\n\n"
 41 |         formatted_prompt += system_message + "<|eot_id|>"
 42 | 
 43 |         # Llama pass in custom tools in first user message
 44 |         is_first_user_message = True
 45 |         for message in remaining_messages:
 46 |             if message["role"] == "user" and is_first_user_message:
 47 |                 is_first_user_message = False
 48 |                 formatted_prompt += "<|start_header_id|>user<|end_header_id|>\n\n"
 49 |                 formatted_prompt += "Given the following functions, please respond with a JSON for a function call "
 50 |                 formatted_prompt += (
 51 |                     "with its proper arguments that best answers the given prompt.\n\n"
 52 |                 )
 53 |                 formatted_prompt += 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.'
 54 |                 formatted_prompt += "Do not use variables.\n\n"
 55 |                 for func in functions:
 56 |                     formatted_prompt += json.dumps(func, indent=4) + "\n\n"
 57 |                 formatted_prompt += f"{message['content'].strip()}<|eot_id|>"
 58 | 
 59 |             elif message["role"] == "tool":
 60 |                 formatted_prompt += "<|start_header_id|>ipython<|end_header_id|>\n\n"
 61 |                 if isinstance(message["content"], (dict, list)):
 62 |                     formatted_prompt += json.dumps(message["content"])
 63 |                 else:
 64 |                     formatted_prompt += message["content"]
 65 |                 formatted_prompt += "<|eot_id|>"
 66 | 
 67 |             else:
 68 |                 formatted_prompt += f"<|start_header_id|>{message['role']}<|end_header_id|>\n\n{message['content'].strip()}<|eot_id|>"
 69 | 
 70 |         formatted_prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
 71 |         return formatted_prompt
 72 | 
 73 |     def remove_function_object(self, functions):
 74 |         functions_ = []
 75 |         for func in functions:
 76 |             if "function" in func and "name" in func["function"]:
 77 |                 func = func["function"]
 78 |             functions_.append(func)
 79 |         return functions_
 80 | 
 81 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
 82 |         last_role = None
 83 |         for m in messages:
 84 |             if last_role is None:
 85 |                 last_role = m["role"]
 86 |                 continue
 87 |             assert last_role != m["role"], "Message role cannot be the same."
 88 |             last_role = m["role"]
 89 |         assert messages[-1]["role"] in ["tool", "user"]
 90 |         functions = self.remove_function_object(functions)
 91 |         if "date" in extra_args:
 92 |             date_string = extra_args["date"]
 93 |             logger.info(f"using date: {date_string}")
 94 |             prompt = self.pipeline.tokenizer.apply_chat_template(messages, tools=functions, date_string=date_string, tokenize=False, add_generation_prompt=True)
 95 |         else:
 96 |             prompt = self.pipeline.tokenizer.apply_chat_template(messages, tools=functions, tokenize=False, add_generation_prompt=True)
 97 |         # prompt = self.self_formated_template(messages, functions)
 98 |         outputs = self.pipeline(
 99 |             prompt,
100 |             max_new_tokens=1024,
101 |             do_sample=True,
102 |         )
103 | 
104 |         return outputs[0]["generated_text"][len(prompt):]
105 |     
106 |     def _get_res(self, messages):
107 |         # outputs = self.pipeline(messages, max_new_tokens=512)
108 |         print("just messages")
109 |         prompt = self.pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
110 |         outputs = self.pipeline(
111 |             prompt,
112 |             max_new_tokens=1024,
113 |             do_sample=True,
114 |         )
115 |         return prompt, outputs
116 | 
117 |     def decode_res(self, prompt, outputs):
118 |         # print(len(prompt))
119 |         # print(type(outputs), outputs)
120 |         return outputs[0]["generated_text"][len(prompt):]
121 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/tool_ace.py:
--------------------------------------------------------------------------------
 1 | from .tool_class_base import ToolClass
 2 | from transformers import AutoModelForCausalLM, AutoTokenizer
 3 | 
 4 | 
 5 | class ToolACE(ToolClass):
 6 |     def init(self):
 7 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
 8 |         self.model = AutoModelForCausalLM.from_pretrained(
 9 |             self.model_path,
10 |             torch_dtype='auto',
11 |             device_map='auto'
12 |         )
13 | 
14 |     def format_message(self, messages, functions, more_info=None):
15 |         # You can modify the prompt for your task
16 |         system_prompt = """You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
17 |         If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out.
18 |         You should only return the function call in tools call sections.
19 | 
20 |         If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
21 |         You SHOULD NOT include any other text in the response.
22 |         Here is a list of functions in JSON format that you can invoke.\n{functions}\n
23 |         """
24 |         tools = []
25 |         for func in functions:
26 |             if "function" in func and "name" in func["function"]:
27 |                 tools.append(func["function"])
28 |             else:
29 |                 tools.append(func)
30 | 
31 |         if messages[0]["role"] == "system":
32 |             messages = messages[1:]
33 |         messages = [
34 |             {'role': 'system', 'content': system_prompt.format(functions=tools)},
35 |         ] + messages
36 |         return messages
37 | 
38 |     def _get_res(self, messages):
39 |         inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device)
40 |         outputs = self.model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id)
41 |         return outputs, inputs
42 | 
43 |     def decode_res(self, outputs, inputs):
44 |         return self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
45 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/tool_class_base.py:
--------------------------------------------------------------------------------
 1 | class ToolClass:
 2 |     def __init__(self, name, model_path):
 3 |         self.name = name
 4 |         self.model_path = model_path
 5 |         self.init()
 6 | 
 7 |     def init(self):
 8 |         pass
 9 | 
10 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
11 |         last_role = None
12 |         for m in messages:
13 |             if last_role is None:
14 |                 last_role = m["role"]
15 |                 continue
16 |             assert last_role != m["role"], "Message role cannot be the same."
17 |             last_role = m["role"]
18 |         assert messages[-1]["role"] in ["tool", "user"]
19 |         print("tool base get_res")
20 |         messages_ = self.format_message(messages, functions, more_info)
21 |         outputs, inputs = self._get_res(messages_)
22 |         return self.decode_res(outputs, inputs)
23 | 
24 |     def get_messages_res(self, messages, extra_args={}, more_info=None):
25 |         last_role = None
26 |         for m in messages:
27 |             if last_role is None:
28 |                 last_role = m["role"]
29 |                 continue
30 |             assert last_role != m["role"], "Message role cannot be the same."
31 |             assert "content" in m, "Message content cannot be empty."
32 |             last_role = m["role"]
33 |         assert messages[-1]["role"] in ["tool", "user"]
34 |         outputs, inputs = self._get_res(messages)
35 |         return self.decode_res(outputs, inputs)
36 | 
37 |     def format_message(self, messages, functions, more_info=None):
38 |         return messages
39 |     
40 |     def _get_res(self, messages):
41 |         raise NotImplementedError
42 |     
43 |     def decode_res(self, res):
44 |         return res
45 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/tool_model_map.py:
--------------------------------------------------------------------------------
 1 | from tool_class.tool_ace import ToolACE
 2 | from tool_class.xlam import Xlam
 3 | from tool_class.xlam2 import Xlam2
 4 | from tool_class.gorilla import Gorilla
 5 | from tool_class.llama import Llama
 6 | from tool_class.qwen import Qwen
 7 | from tool_class.deepseek import DeepSeek
 8 | from tool_class.chatglm import ChatGLM
 9 | from tool_class.watt import Watt
10 | from tool_class.fc_medium import FC_Medium
11 | from tool_class.hammer import Hammer
12 | 
13 | 
14 | tool_model_map = {
15 |     "toolace": ToolACE,
16 |     "toolace2": ToolACE,
17 |     "xlam": Xlam,
18 |     "xlam2-70b": Xlam2,
19 |     "xlam2-32b": Xlam2,
20 |     "xlam2-8b": Xlam2,
21 |     "xlam2-3b": Xlam2,
22 |     "xlam2-1b": Xlam2,
23 |     "gorilla": Gorilla,
24 |     "deepseek-r1": DeepSeek,
25 |     "deepseek-v3": DeepSeek,
26 |     "chatglm": ChatGLM,
27 |     "fcm3.1": FC_Medium,
28 |     ## Watt
29 |     "watt70b": Watt,
30 |     "watt8b": Watt,
31 |     ## Hammer
32 |     "hammer7b": Hammer,
33 |     "hammer3b": Hammer,
34 |     "hammer1.5b": Hammer,
35 |     "hammer0.5b": Hammer,
36 |     ## LLAMA
37 |     "llama70b": Llama,
38 |     "llama8b": Llama,
39 |     "llama3b": Llama,
40 |     "llama1b": Llama
41 | }
42 | 
43 | tool_model_path_map = {
44 |     # toolace
45 |     "toolace": "/xxx/model/ToolACE-8B",
46 |     "toolace2": "/xxx/ToolACE-2-Llama-3.1-8B",
47 |     # xlam
48 |     "xlam": "/xxx/model/xLAM-7b-fc-r",
49 |     "xlam2-70b": "/xxx/Llama-xLAM-2-70b-fc-r",
50 |     "xlam2-32b": "/xxx/xLAM-2-32b-fc-r",
51 |     "xlam2-8b": "/xxx/Llama-xLAM-2-8b-fc-r",
52 |     "xlam2-3b": "/xxx/xLAM-2-3b-fc-r",
53 |     "xlam2-1b": "/xxx/xLAM-2-1b-fc-r",
54 |     # Watt
55 |     "watt70b": "/xxx/model/watt-tool-70B",
56 |     "watt8b": "/xxx/model/watt-tool-8B",
57 |     # Hammer2.1
58 |     "hammer7b": "/xxx/model/Hammer2.1-7b",
59 |     "hammer3b": "/xxx/model/Hammer2.1-3b",
60 |     "hammer1.5b": "/xxx/model/Hammer2.1-1.5b",
61 |     "hammer0.5b": "/xxx/model/Hammer2.1-0.5b",
62 |     # other
63 |     "gorilla": "/xxx/model/gorilla-openfunctions-v2",
64 |     "deepseek-r1": "/xxx/model/DeepSeek-R1",
65 |     "deepseek-v3": "/xxx/model/DeepSeek-V3",
66 |     "chatglm": "/xxx/model/glm-4-9b-chat-hf",
67 |     "fcm3.1": "/xxx/model/functionary-medium-v3.1",
68 |     # LLAMA3.3
69 |     "llama70b": "/xxx/model/Llama-3.3-70B-Instruct",
70 |     "llama8b": "/xxx/model/Meta-Llama-3.1-8B-Instruct",
71 |     "llama3b": "/xxx/model/Llama-3.2-3B-Instruct",
72 |     "llama1b": "/xxx/model/Llama-3.2-1B-Instruct"
73 | }
74 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/watt.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .tool_class_base import ToolClass
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer
 5 | 
 6 | 
 7 | logger = logging.getLogger()
 8 | 
 9 | 
10 | class Watt(ToolClass):
11 |     def init(self):
12 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
13 |         self.model = AutoModelForCausalLM.from_pretrained(
14 |             self.model_path,
15 |             torch_dtype="auto",
16 |             device_map="auto",
17 |             # attn_implementation="flash_attention_2"
18 |         )
19 |         # Example usage (adapt as needed for your specific tool usage scenario)
20 |         self.system_prompt = """You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
21 | If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out.
22 | You should only return the function call in tools call sections.
23 | 
24 | If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
25 | You SHOULD NOT include any other text in the response.
26 | Here is a list of functions in JSON format that you can invoke.\n{functions}\n
27 | """
28 | 
29 |     def remove_function_object(self, functions):
30 |         functions_ = []
31 |         for func in functions:
32 |             if "function" in func and "name" in func["function"]:
33 |                 func = func["function"]
34 |             functions_.append(func)
35 |         return functions_
36 | 
37 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
38 |         functions = self.remove_function_object(functions)
39 |         if messages[0]["role"] == "system":
40 |             system_content = self.system_prompt.format(functions=functions) + messages[0]["content"].replace("\n", "")
41 |             messages[0]["content"] = system_content
42 |         else:
43 |             system_content = self.system_prompt.format(functions=functions)
44 |             messages = [{"role": "system", "content": system_content}] + messages
45 |         if "date" in extra_args:
46 |             date_string = extra_args["date"]
47 |             logger.info("Using date string {}".format(date_string))
48 |             inputs = self.tokenizer.apply_chat_template(messages, date_string=date_string, add_generation_prompt=True, return_tensors="pt").to(self.model.device)
49 |         else:
50 |             inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device)
51 |         outputs = self.model.generate(inputs, max_new_tokens=2048, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id)
52 |         return self.decode_res(inputs, outputs)
53 | 
54 |     def _get_res(self, messages):
55 |         # outputs = self.pipeline(messages, max_new_tokens=512)
56 |         inputs = self.tokenizer.apply_chat_template(
57 |             messages, add_generation_prompt=True, return_tensors="pt"
58 |         ).to(self.model.device)
59 |         outputs = self.model.generate(
60 |             inputs, max_new_tokens=2048, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id
61 |         )
62 |         return inputs, outputs
63 | 
64 |     def decode_res(self, prompt, outputs):
65 |         return self.tokenizer.decode(outputs[0][len(prompt[0]):], skip_special_tokens=True)
66 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/xlam.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import torch
  3 | 
  4 | from .tool_class_base import ToolClass
  5 | from transformers import AutoModelForCausalLM, AutoTokenizer
  6 | 
  7 | 
  8 | torch.random.manual_seed(0)
  9 | 
 10 | 
 11 | class Xlam(ToolClass):
 12 |     def init(self):
 13 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
 14 |         self.model = AutoModelForCausalLM.from_pretrained(
 15 |             self.model_path,
 16 |             torch_dtype='auto',
 17 |             device_map='auto',
 18 |             trust_remote_code=True
 19 |         )
 20 |     
 21 |     # Helper function to convert openai format tools to our more concise xLAM format
 22 |     def convert_to_xlam_tool(self, tools):
 23 |         ''''''
 24 |         if isinstance(tools, dict):
 25 |             return {
 26 |                 "name": tools["name"],
 27 |                 "description": tools["description"],
 28 |                 "parameters": {k: v for k, v in tools["parameters"].get("properties", {}).items()}
 29 |             }
 30 |         elif isinstance(tools, list):
 31 |             return [self.convert_to_xlam_tool(tool) for tool in tools]
 32 |         else:
 33 |             return tools
 34 |     
 35 |     # Helper function to build the input prompt for our model
 36 |     
 37 |     def build_prompt(self, task_instruction: str, format_instruction: str, tools: list, query: str, conversation_history: list, system_message: str):
 38 |         if system_message:
 39 |             prompt = f"{system_message}\n\n"
 40 |         else:
 41 |             prompt = ""
 42 |         prompt += f"[BEGIN OF TASK INSTRUCTION]\n{task_instruction}\n[END OF TASK INSTRUCTION]\n\n"
 43 |         prompt += f"[BEGIN OF AVAILABLE TOOLS]\n{json.dumps(tools)}\n[END OF AVAILABLE TOOLS]\n\n"
 44 |         prompt += f"[BEGIN OF FORMAT INSTRUCTION]\n{format_instruction}\n[END OF FORMAT INSTRUCTION]\n\n"
 45 |         prompt += f"[BEGIN OF QUERY]\n{query}\n[END OF QUERY]\n\n"
 46 |         
 47 |         if len(conversation_history) > 0: prompt += self.build_conversation_history_prompt(conversation_history)
 48 |         return prompt
 49 | 
 50 |     def build_conversation_history_prompt(self, conversation_history: str):
 51 |         parsed_history = []
 52 |         for step_data in conversation_history:
 53 |             parsed_history.append({
 54 |                 "step_id": step_data["step_id"],
 55 |                 "thought": step_data["thought"],
 56 |                 "tool_calls": step_data["tool_calls"],
 57 |                 "next_observation": step_data["next_observation"],
 58 |                 "user_input": step_data['user_input']
 59 |             })
 60 |             
 61 |         history_string = json.dumps(parsed_history)
 62 |         return f"\n[BEGIN OF HISTORY STEPS]\n{history_string}\n[END OF HISTORY STEPS]\n"
 63 |         
 64 | 
 65 |     def format_message(self, messages, functions, more_info=None):
 66 |         # You can modify the prompt for your task
 67 |         task_instruction = """
 68 | You are an expert in composing functions. You are given a question and a set of possible functions. 
 69 | Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 
 70 | If none of the functions can be used, point it out and refuse to answer. 
 71 | If the given question lacks the parameters required by the function, also point it out.
 72 | """.strip()
 73 | 
 74 |         format_instruction = """
 75 | The output MUST strictly adhere to the following JSON format, and NO other text MUST be included.
 76 | The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.
 77 | ```
 78 | {
 79 |     "tool_calls": [
 80 |     {"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
 81 |     ... (more tool calls as required)
 82 |     ]
 83 | }
 84 | ```
 85 | """.strip()
 86 | 
 87 |         tools = []
 88 |         for func in functions:
 89 |             if "function" in func and "name" in func["function"]:
 90 |                 tools.append(func["function"])
 91 |             else:
 92 |                 tools.append(func)
 93 | 
 94 |         system = None
 95 |         if messages[0]["role"] == "system":
 96 |             system = messages[0]["content"]
 97 |             messages = messages[1:]
 98 | 
 99 |         xlam_format_tools = self.convert_to_xlam_tool(tools) if len(tools) != 0 else []
100 |         conversation_history = self.build_conversation_history(messages)
101 |         query = next((msg['content'] for msg in reversed(messages) if msg['role'] == 'user'), "")
102 |         messages = self.build_prompt(task_instruction, format_instruction, xlam_format_tools, query, conversation_history, system)
103 |         messages = [{'role': 'user', 'content': messages}]
104 |         # print(messages)
105 |         return messages
106 | 
107 |     def build_conversation_history(self, messages):
108 |         history = []
109 |         for msg in messages:
110 |             if msg['role'] == 'tool':
111 |                 history[-1]['next_observation'] = msg['content']
112 |             else:
113 |                 history.append({
114 |                     'step_id': len(history) + 1,
115 |                     'thought': msg.get('content', ''),
116 |                     'tool_calls': [msg['tool_calls']] if 'tool_calls' in msg else [],
117 |                     'next_observation': '',
118 |                     'user_input': msg['content'] if msg['role'] == 'user' else ''
119 |                 })
120 |         return history
121 | 
122 |     def _get_res(self, messages):
123 |         inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device)
124 |         outputs = self.model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id)
125 |         return outputs, inputs
126 | 
127 |     def decode_res(self, outputs, inputs):
128 |         return self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
129 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_class/xlam2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .tool_class_base import ToolClass
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer
 5 | 
 6 | 
 7 | class Xlam2(ToolClass):
 8 |     def init(self):
 9 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
10 |         self.model = AutoModelForCausalLM.from_pretrained(
11 |             self.model_path,
12 |             torch_dtype=torch.bfloat16,
13 |             device_map="auto",
14 |             # attn_implementation="flash_attention_2"
15 |         )
16 | 
17 |     def get_res(self, messages, functions, extra_args={}, more_info=None):
18 |         inputs = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True,
19 |                                                     return_dict=True, return_tensors="pt")
20 |         input_ids_len = inputs["input_ids"].shape[-1]
21 |         inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
22 |         outputs = self.model.generate(**inputs, max_new_tokens=256)
23 |         return self.decode_res(input_ids_len, outputs)
24 | 
25 |     def _get_res(self, messages):
26 |         # outputs = self.pipeline(messages, max_new_tokens=512)
27 |         print("just messages")
28 |         text = self.tokenizer.apply_chat_template(
29 |             messages,
30 |             tokenize=False,
31 |             add_generation_prompt=True,
32 |         )
33 |         model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
34 |         outputs = self.model.generate(
35 |             **model_inputs,
36 |             max_new_tokens=512
37 |         )
38 |         return model_inputs, outputs
39 | 
40 |     def decode_res(self, input_ids_len, outputs):
41 |         # print(len(prompt))
42 |         # print(type(outputs), outputs)
43 |         generated_tokens = outputs[:, input_ids_len:]  # Slice the output to get only the newly generated tokens
44 |         return self.tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
45 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/tool_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/tool_parser/__init__.py


--------------------------------------------------------------------------------
/c3_bench/bench_test/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .readnwrite import *
2 | from .tools import *
3 | from .date import *
4 | from .parse_res import *


--------------------------------------------------------------------------------
/c3_bench/bench_test/utils/date.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import random
 3 | 
 4 | from datetime import datetime
 5 | 
 6 | 
 7 | def get_random_date(a1=(2024, 1, 1, 0, 0, 0, 0, 0, 0), a2=(2024, 12, 31, 23, 59, 59, 0, 0, 0)):
 8 |     start = time.mktime(a1)  # 生成开始时间戳
 9 |     end = time.mktime(a2)  # 生成结束时间戳
10 | 
11 |     t = random.randint(start, end)  # 在开始和结束时间戳中随机取出一个
12 |     date_touple = time.localtime(t)  # 将时间戳生成时间元组
13 |     date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple)  # 将时间元组转成格式化字符串（1976-05-21）
14 |     return date
15 | 
16 | 
17 | def get_current_date():
18 |     current_time = time.time()  # 获取当前时间戳
19 |     current_date_tuple = time.localtime(current_time)  # 将时间戳生成时间元组
20 |     current_date = time.strftime("%Y-%m-%d %H:%M:%S", current_date_tuple)  # 将时间元组转成格式化字符串
21 |     return current_date
22 | 
23 | 
24 | def add_weekday_date(date):
25 |     if "星期" in date:
26 |         return date
27 |     date = date.replace("当前时间：", "").replace("环境：", "")
28 |     date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
29 |     weekday_num = date_obj.weekday()
30 |     weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
31 |     weekday = weekdays[weekday_num]
32 |     date = "当前时间：" + date + " " + weekday
33 |     return date
34 | 
35 | 
36 | def get_current_date_with_weekday():
37 |     return add_weekday_date(get_current_date())


--------------------------------------------------------------------------------
/c3_bench/bench_test/utils/parse_res.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | 
 3 | 
 4 | def get_keywords(value):
 5 |     if isinstance(value, ast.Str):
 6 |         value = value.s
 7 |     elif isinstance(value, ast.Num):
 8 |         value = value.n
 9 |     elif isinstance(value, ast.UnaryOp):
10 |         if isinstance(value.op, ast.USub):
11 |             operand = get_keywords(value.operand)
12 |             value = -operand
13 |     elif isinstance(value, ast.BinOp):
14 |         left = get_keywords(value.left)
15 |         right = get_keywords(value.right)
16 |         if isinstance(value.op, ast.Add):
17 |             value = left + right
18 |         elif isinstance(value.op, ast.Sub):
19 |             value = left - right
20 |         elif isinstance(value.op, ast.Mult):
21 |             value = left * right
22 |         elif isinstance(value.op, ast.Div):
23 |             value = left / right
24 |     elif isinstance(value, ast.Subscript):
25 |         value = value.slice.value
26 |         if isinstance(value.slice, ast.Index):
27 |             value = value.slice.value
28 |         elif isinstance(value.slice, ast.Slice):
29 |             value = value.slice.value
30 |         elif isinstance(value.slice, ast.Ellipsis):
31 |             value = "..."
32 |     elif isinstance(value, ast.NameConstant):
33 |         value = value.value
34 |     elif isinstance(value, ast.Name):
35 |         if value.id.lower() == "true":
36 |             value = True
37 |         elif value.id.lower() == "false":
38 |             value = False
39 |         else:
40 |             value = value.id
41 |     elif isinstance(value, ast.List):
42 |         value = [get_keywords(elt) for elt in value.elts]
43 |     elif isinstance(value, ast.Tuple):
44 |         value = tuple([get_keywords(elt) for elt in value.elts])
45 |     elif isinstance(value, ast.Dict):
46 |         value = {
47 |             get_keywords(key): get_keywords(val)
48 |             for key, val in zip(value.keys, value.values)
49 |         }
50 |     else:
51 |         raise Exception("Unsupported type: {}".format(type(value)))
52 |     return value
53 | 
54 | 
55 | def parse_string_to_function(input_str):
56 |     parsed_input = ast.parse(input_str)
57 | 
58 |     function_name = parsed_input.body[0].value.func.id
59 |     arguments = parsed_input.body[0].value.args
60 |     keywords = parsed_input.body[0].value.keywords
61 | 
62 |     args_list = []
63 |     for keyword in keywords:
64 |         key = keyword.arg
65 |         value = keyword.value
66 |         value = get_keywords(value)
67 |         args_list.append((key, value))
68 | 
69 |     return function_name, args_list
70 | 
71 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/utils/readnwrite.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pandas as pd
  3 | 
  4 | from glob import glob
  5 | 
  6 | 
  7 | def read_json_file_to_list(input_file):
  8 |     result = []
  9 |     with open(input_file) as fin:
 10 |         for line in fin:
 11 |             obj = json.loads(line)
 12 |             result.append(obj)
 13 |     return result
 14 | 
 15 | 
 16 | def read_file_to_json(path, skip_path=None):
 17 |     data = []
 18 |     files = glob(path)
 19 |     print("Read files:")
 20 |     for file_ in files:
 21 |         if skip_path is not None and (
 22 |                 file_ == skip_path
 23 |                 or file_ in skip_path
 24 |         ):
 25 |             continue
 26 |         with open(file_, "r") as f:
 27 |             tmps = [json.loads(_) for _ in f.readlines()]
 28 |             print(f"{file_}: {len(tmps)}")
 29 |             data += tmps
 30 |     return data
 31 | 
 32 | 
 33 | def write_json_to_file(data, path, func=None, print_f=True):
 34 |     with open(path, "w") as f:
 35 |         for item in data:
 36 |             if func != None:
 37 |                 item = func(item)
 38 |             f.write(json.dumps(item, ensure_ascii=False, sort_keys=True))
 39 |             f.write("\n")
 40 |     if print_f:
 41 |         print(f"Write {len(data)} items to {path}\nSamples: {json.dumps(item, ensure_ascii=False)}")
 42 |     else:
 43 |         print(f"Write {len(data)} items to {path}")
 44 | 
 45 | 
 46 | def read_csv_to_dict_list(file_path):
 47 |     # 使用pandas读取csv文件
 48 |     df = pd.read_csv(file_path, keep_default_na=False, na_values=[''])
 49 | 
 50 |     # 重命名"Unnamed: {num}"列
 51 |     all_none_from = None
 52 |     for col in df.columns:
 53 |         if col.startswith("Unnamed") and df[col].isnull().all():
 54 |             all_none_from = df.columns.get_loc(col)
 55 |             break
 56 | 
 57 |     # 如果存在这样的列，则删除这些列
 58 |     if all_none_from is not None:
 59 |         df = df.iloc[:, :all_none_from]
 60 | 
 61 |     df.rename(columns=lambda x: f"key{x.split(': ')[1]}" if x.startswith("Unnamed") else x, inplace=True)
 62 | 
 63 |     # 将DataFrame转换为字典列表
 64 |     dict_list = df.to_dict('records')
 65 | 
 66 |     # 过滤掉所有值都是None的字典
 67 |     filtered_dict_list = []
 68 |     for row in dict_list:
 69 |         # 将空字符串替换为None，并检查是否所有值都是None
 70 |         all_none = True
 71 |         for key in list(row.keys()):
 72 |             if pd.isna(row[key]):
 73 |                 row[key] = None
 74 |             else:
 75 |                 all_none = False
 76 |         # 如果不是所有值都是None，添加到结果列表中
 77 |         if not all_none:
 78 |             filtered_dict_list.append(row)
 79 | 
 80 |     print(f"Read file: {file_path}\ndata length:{len(filtered_dict_list)}\nkeys:{filtered_dict_list[0].keys()}")
 81 |     return filtered_dict_list
 82 | 
 83 | 
 84 | def write_list_of_list_to_csv(list_of_list, csv_file_name):
 85 |     # 检查list_of_list是否至少有两个元素（列名和至少一行数据）
 86 |     if len(list_of_list) < 2:
 87 |         raise ValueError("List of list must contain at least one row of data along with column names.")
 88 |     assert all([len(_) == len(list_of_list[0]) for _ in list_of_list])
 89 | 
 90 |     # 第一个元素是列名
 91 |     column_names = list_of_list[0]
 92 | 
 93 |     # 剩余的元素是数据行
 94 |     data_rows = list_of_list[1:]
 95 | 
 96 |     # 创建DataFrame
 97 |     df = pd.DataFrame(data_rows, columns=column_names)
 98 | 
 99 |     # 导出到CSV文件
100 |     df.to_csv(csv_file_name, index=False)  # index=False表示不导出行索引
101 |     print(f"Write data to {csv_file_name}\nSamples: {len(list_of_list)}")
102 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/utils/tools.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import uuid
  3 | import datetime
  4 | import argparse
  5 | import ast
  6 | import json
  7 | import traceback
  8 | 
  9 | 
 10 | def get_random_file_name(file_type, keys=None, need_time=False, need_uuid=True):
 11 |     file_name = []
 12 |     if need_time:
 13 |         today = str(datetime.datetime.now()).replace(" ", "-").split(".")[0]
 14 |         file_name.append(today)
 15 |     if need_uuid:
 16 |         file_name.append(str(uuid.uuid4())[:6])
 17 |     if keys is not None:
 18 |         file_name.append(str(keys))
 19 |     return "_".join(file_name) + f".{file_type}"
 20 | 
 21 | 
 22 | def get_random_pathname(path_, file_type, keys=None, need_time=False, need_uuid=True):
 23 |     file_name = get_random_file_name(file_type, keys, need_time, need_uuid)
 24 |     pathname = os.path.join(path_, file_name)
 25 |     return pathname
 26 | 
 27 | 
 28 | def str2bool(v):
 29 |     '''
 30 |     Transform string to bool.
 31 | 
 32 |     Arguments:
 33 |         v (str): The value to be converted.
 34 | 
 35 |     Returns:
 36 |         bool: The converted value.
 37 |     '''
 38 |     if isinstance(v, bool):
 39 |         return v
 40 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 41 |         return True
 42 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 43 |         return False
 44 |     else:
 45 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 46 | 
 47 | 
 48 | def get_keywords(value):
 49 |     if isinstance(value, ast.Str):
 50 |         value = value.s
 51 |     elif isinstance(value, ast.Num):
 52 |         value = value.n
 53 |     elif isinstance(value, ast.UnaryOp):
 54 |         if isinstance(value.op, ast.USub):
 55 |             operand = get_keywords(value.operand)
 56 |             value = -operand
 57 |     elif isinstance(value, ast.BinOp):
 58 |         left = get_keywords(value.left)
 59 |         right = get_keywords(value.right)
 60 |         if isinstance(value.op, ast.Add):
 61 |             value = left + right
 62 |         elif isinstance(value.op, ast.Sub):
 63 |             value = left - right
 64 |         elif isinstance(value.op, ast.Mult):
 65 |             value = left * right
 66 |         elif isinstance(value.op, ast.Div):
 67 |             value = left / right
 68 |     elif isinstance(value, ast.Subscript):
 69 |         value = value.slice.value
 70 |         if isinstance(value.slice, ast.Index):
 71 |             value = value.slice.value
 72 |         elif isinstance(value.slice, ast.Slice):
 73 |             value = value.slice.value
 74 |         elif isinstance(value.slice, ast.Ellipsis):
 75 |             value = "..."
 76 |     elif isinstance(value, ast.NameConstant):
 77 |         value = value.value
 78 |     elif isinstance(value, ast.Name):
 79 |         if value.id.lower() == "true":
 80 |             value = True
 81 |         elif value.id.lower() == "false":
 82 |             value = False
 83 |         else:
 84 |             value = value.id
 85 |     elif isinstance(value, ast.List):
 86 |         value = [get_keywords(elt) for elt in value.elts]
 87 |     elif isinstance(value, ast.Tuple):
 88 |         value = tuple([get_keywords(elt) for elt in value.elts])
 89 |     elif isinstance(value, ast.Dict):
 90 |         value = {
 91 |             get_keywords(key): get_keywords(val)
 92 |             for key, val in zip(value.keys, value.values)
 93 |         }
 94 |     else:
 95 |         raise Exception("Unsupported type: {}".format(type(value)))
 96 |     return value
 97 | 
 98 | 
 99 | def properties_filter(dic_):
100 |     if type(dic_) == dict:
101 |         dic_r = {}
102 |         for k in dic_:
103 |             if k not in ["parameters", "properties", "description", "type", "example_value", "enum", "items"]:
104 |                 continue
105 |             if k == "properties":
106 |                 dic_r["properties"] = {_: properties_filter(dic_[k][_]) for _ in dic_[k]}
107 |             elif k == "items":
108 |                 dic_r[k] = properties_filter(dic_[k])
109 |             elif k == "type":
110 |                 if "|" in dic_[k]:
111 |                     dic_[k] = dic_[k].split("|")[0]
112 |                 if dic_[k] == "float":
113 |                     r_ = "number"
114 |                 elif dic_[k] in ["list of dictionaries"]:
115 |                     r_ = "object"
116 |                 elif dic_[k] in ["int"]:
117 |                     r_ = "integer"
118 |                 elif dic_[k] in ["complex_string", "String", "UUID"]:
119 |                     r_ = "string"
120 |                 elif "enum" in dic_[k]:
121 |                     try:
122 |                         dic_r["enum"] = json.loads(dic_[k].replace("enum", ""))
123 |                         assert type(dic_r["enum"][0]) == str
124 |                         r_ = "string"
125 |                     except:
126 |                         r_ = "string"
127 |                 elif type(dic_[k]) == dict:
128 |                     r_ = "object"
129 |                 else:
130 |                     r_ = dic_[k]
131 |                 assert r_ in ["string", "integer", "boolean", "array", "object", "number", "enum"], f"Wrong: {r_}"
132 |                 dic_r[k] = r_
133 |             elif k == "enum":
134 |                 if type(dic_[k]) == dict:
135 |                     enum_ = []
136 |                     for k_ in dic_[k]:
137 |                         assert type(dic_[k][k_]) == list
138 |                         enum_.extend(dic_[k][k_])
139 |                 else:
140 |                     enum_ = dic_[k]
141 |                 assert type(enum_) == list and all([type(_) in [str, int, float, dict, bool, list] for _ in enum_])
142 |                 dic_r[k] = dic_[k]
143 |             else:
144 |                 dic_r[k] = dic_[k]
145 |         return dic_r
146 |     else:
147 |         return dic_
148 | 
149 | 
150 | def functions_uniform(function):
151 |     if type(function) == list and (
152 |             "function" in function[0]
153 |             or "name" in function[0]
154 |     ):
155 |         functions = []
156 |         for function_ in function:
157 |             functions.append(functions_uniform(function_))
158 |         return functions
159 |     function_ = {}
160 |     for key in function:
161 |         if key == "parameters":
162 |             if "properties" not in function[key]:
163 |                 function_[key] = {"type": "object", "properties": {}}
164 |             else:
165 |                 function_[key] = functions_uniform(function[key])
166 |         elif key == "properties":
167 |             function_[key] = {_: properties_filter(function[key][_]) for _ in function[key]}
168 |         elif key == "function":
169 |             function_[key] = functions_uniform(function[key])
170 |         else:
171 |             function_[key] = function[key]
172 |     return function_
173 | 
174 | 
175 | def remove_messages(messages, is_english=False):
176 |     new_messages = []
177 |     try:
178 |         role = "user"
179 |         for m in messages:
180 |             assert (
181 |                            m["role"] == "assistant"
182 |                            and role == "assistant"
183 |                    ) or (
184 |                            m["role"] in ["user", "tool"]
185 |                            and role in ["user", "tool"]
186 |                    )
187 |             role = "assistant" if role in ["user", "tool"] else "user"
188 |             if is_english:
189 |                 colon_idx = m["content"].find(":")
190 |                 if (
191 |                         colon_idx != -1 and
192 |                         m["content"][:colon_idx].lower() in [
193 |                     "ai", "ai agent", "user", "ai agent assistant", "planner", "observation", "tool"
194 |                 ]
195 |                 ):
196 |                     m['content'] = m["content"][colon_idx + 1:]
197 |             else:
198 |                 colon_idx = m["content"].find("：")
199 |                 if (
200 |                         colon_idx != -1 and
201 |                         m["content"][:colon_idx] in [
202 |                     "用户", "AI Agent助手", "AI Agent", "Planner", "Observation", "Tool"
203 |                 ]
204 |                 ):
205 |                     m['content'] = m["content"][colon_idx + 1:]
206 |             new_messages.append(m)
207 |     except Exception as e:
208 |         traceback.print_exc()
209 |     return new_messages
210 | 


--------------------------------------------------------------------------------
/c3_bench/bench_test/web_server.py:
--------------------------------------------------------------------------------
  1 | import http.server
  2 | import json
  3 | import logging
  4 | import os
  5 | import sys
  6 | import traceback
  7 | import time
  8 | 
  9 | from tool_class.tool_model_map import tool_model_map, tool_model_path_map
 10 | 
 11 | 
 12 | def get_current_date():
 13 |     current_time = time.time()  # 获取当前时间戳
 14 |     current_date_tuple = time.localtime(current_time)  # 将时间戳生成时间元组
 15 |     current_date = time.strftime("%Y-%m-%d", current_date_tuple)  # 将时间元组转成格式化字符串
 16 |     return current_date
 17 | 
 18 | 
 19 | # 设置日志
 20 | logger = logging.getLogger()
 21 | logger.setLevel(logging.INFO)
 22 | 
 23 | file_handler = logging.FileHandler(f'./log/server_{get_current_date()}.log')
 24 | file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s:%(message)s'))
 25 | 
 26 | # 创建流日志处理器
 27 | stream_handler = logging.StreamHandler()
 28 | stream_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s:%(message)s'))
 29 | 
 30 | # 添加日志处理器到日志记录器
 31 | logger.addHandler(file_handler)
 32 | logger.addHandler(stream_handler)
 33 | 
 34 | 
 35 | model_name = sys.argv[1]
 36 | model = tool_model_map[model_name](model_name, tool_model_path_map[model_name])
 37 | 
 38 | 
 39 | class RequestHandler(http.server.BaseHTTPRequestHandler):
 40 |     def do_POST(self):
 41 |         # 解析请求体中的 JSON 数据
 42 |         content_length = int(self.headers['Content-Length'])
 43 |         post_data = self.rfile.read(content_length)
 44 |         try:
 45 |             data = json.loads(post_data)
 46 |             messages = data.get('messages', [])
 47 |             tools = data.get('tools', [])
 48 | 
 49 |             answer = ""
 50 |             error = None
 51 |             st_time = time.time()
 52 |             try:
 53 |                 if len(tools) != 0:
 54 |                     answer = model.get_res(messages, tools, extra_args=data)
 55 |                 else:
 56 |                     answer = model.get_messages_res(messages)
 57 |             except Exception as e:
 58 |                 error = traceback.format_exc()
 59 |                 logging.error(f'Error handling request: {e}')
 60 | 
 61 |             # 记录日志
 62 |             logging.info(f'Received messages: {messages}')
 63 |             logging.info(f'Received tools: {tools}')
 64 | 
 65 |             # 准备响应数据
 66 |             response = {
 67 |                 'answer': answer,
 68 |                 "model_name": model_name,
 69 |                 "error": error if error else None,
 70 |                 "time": time.time() - st_time
 71 |             }
 72 |             response_data = json.dumps(response).encode('utf-8')
 73 | 
 74 |             # 发送 HTTP 响应
 75 |             self.send_response(200)
 76 |             self.send_header('Content-Type', 'application/json')
 77 |             self.send_header('Content-Length', len(response_data))
 78 |             self.end_headers()
 79 |             self.wfile.write(response_data)
 80 |         except json.JSONDecodeError as e:
 81 |             # 记录错误日志
 82 |             logging.error(f'Error parsing JSON: {e}')
 83 |             self.send_error(400, 'Invalid JSON')
 84 |         except Exception as e:
 85 |             # 记录错误日志
 86 |             logging.error(f'Error handling request: {e}')
 87 |             self.send_error(500, 'Internal Server Error')
 88 | 
 89 | 
 90 | # 确保日志目录存在
 91 | if not os.path.exists('./log'):
 92 |     os.makedirs('./log')
 93 | 
 94 | # 启动服务器
 95 | port = 12345
 96 | if sys.argv[2]:
 97 |     port = int(sys.argv[2])
 98 | server_address = ('0.0.0.0', port)
 99 | httpd = http.server.HTTPServer(server_address, RequestHandler)
100 | logging.info(f'Starting server on port {port}...')
101 | httpd.serve_forever()
102 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/__init__.py:
--------------------------------------------------------------------------------
 1 | from .planner import planner
 2 | from .agent_ask import agent_ask
 3 | from .agent_answer import agent_answer
 4 | from .agent_answer_chat import agent_answer_chat
 5 | from .tool import tool
 6 | from .user_answer_ask import user_answer_ask
 7 | from .user_vague_answer_ask import user_vague_answer_ask
 8 | from .user_single_tool import user_single_tool
 9 | from .user_multi_tool import user_multi_tool
10 | from .user_multi_tool_parallel import user_multi_tool_parallel
11 | from .user_multi_tool_serial_parallel import user_multi_tool_serial_parallel
12 | from .user_ask import user_ask
13 | from .user_chat import user_chat
14 | from .user_continue_question import user_continue_question
15 | from .checker_planner import checker_planner
16 | from .checker_tool import checker_tool


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/agent_answer.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from utils import get_all_tool_info, logger
 5 | 
 6 | 
 7 | agent_system_prompt_template_zh = '''请你扮演一个超级智能体中的Agent助手，超级智能体拥有一系列外部工具，超级智能体中的Planner可以通过调用外部工具来解决用户任务，具体见[工具列表]。
 8 | 你负责与用户进行交互，你根据Planner和Tool返回的结果，结合用户任务以及上下文对话信息进行回答，只有你的回答会展示给用户。
 9 | 输出格式参考[Agent助手输出格式]。
10 | 
11 | {{{all_tool_required_info}}}
12 | 
13 | [环境信息]="""
14 | {{{env_info}}}
15 | """
16 | 
17 | [Agent助手输出格式]="""
18 | Agent助手：根据[要求]，回复上下文对话信息中最近一轮以 "用户：" 开头的内容（不要重复这句话）
19 | """
20 | 
21 | [要求]="""
22 | 1、回复必须以 "Agent助手：" 开头。
23 | 2、根据上下文对话信息，总结回复最近一轮以 "用户：" 开头的用户任务。
24 | 3、使用markdown格式，务必注意排版要美观，段落之间使用两个换行。
25 | 4、务必注意！！！！如果Tool给出的Observation是一个列表，列表的每一项都有自己的ID，如xxx_id、xxxId，则请你在总结回复时，每一项都保留这些ID，告诉用户！！！！！
26 | 5、使用中文回复。
27 | """
28 | 
29 | [工具列表]="""
30 | {{{tools}}}
31 | """'''
32 | 
33 | agent_system_prompt_template_en = '''Please act as an Agent within a super intelligent agent, which has a series of external tools. The Planner within the super intelligent agent can solve user tasks by calling external tools, as detailed in the [Tool List].
34 | You are responsible for interacting with the user. Based on the results returned by the Planner and Tool, combined with the user task and the context of the conversation, you provide answers, and only your answers will be displayed to the user.
35 | Refer to the [Agent Output Format] for the output format.
36 | 
37 | {{{all_tool_required_info}}}
38 | 
39 | [Environmental Information]="""
40 | {{{env_info}}}
41 | """
42 | 
43 | [Agent Output Format] = """
44 | Agent: According to the [Requirements], reply to the most recent round of content starting with "User:" in the context conversation information (do not repeat this sentence).
45 | """
46 | 
47 | [Requirements]="""
48 | 1、The reply must start with "Agent:".
49 | 2、Summarize the user task from the most recent round starting with "User:" based on the context conversation information.
50 | 3、Use markdown format, and be sure to pay attention to the layout to make it look neat, with two line breaks between paragraphs.
51 | 4、Pay special attention!!!! If the Observation given by the Tool is a list, and each item in the list has its own ID, such as xxx_id or xxxId, then when summarizing the reply, please retain these IDs for each item and inform the user!!!!!!!
52 | 5、Reply in English.
53 | """
54 | 
55 | [Tool List]="""
56 | {{{tools}}}
57 | """'''
58 | 
59 | 
60 | def agent_answer(messages, tools, env_info, request_func):
61 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools)
62 |     language = os.getenv("LANGUAGE")
63 |     if language == "zh":
64 |         agent_system_prompt_template = agent_system_prompt_template_zh
65 |     else:
66 |         agent_system_prompt_template = agent_system_prompt_template_en
67 | 
68 |     agent_system_prompt = agent_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
69 |                                                       .replace("{{{env_info}}}", env_info) \
70 |                                                       .replace("{{{all_tool_name}}}", all_tool_name) \
71 |                                                       .replace("{{{all_tool_required_info}}}", all_tool_required_info)
72 |     messages_new = [
73 |         {
74 |             "role": "system",
75 |             "content": agent_system_prompt
76 |         }
77 |     ]
78 |     messages_new.extend(messages)
79 |     res = request_func(messages_new)
80 |     if "```markdown\n" in res:
81 |         res = res.replace("```markdown\n", "").replace("\n```", "").replace("Agent助手：\n\n", "Agent助手：") \
82 |                  .replace("Agent:\n\n", "Agent:").replace("Agent: \n\n", "Agent:")
83 |     logger.info(f"agent_answer:\n{res}\n")
84 |     fetch_data = {"task": "agent_answer", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res}
85 |     return res, fetch_data
86 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/agent_answer_chat.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from utils import get_all_tool_info, logger
 5 | 
 6 | 
 7 | agent_system_prompt_template_zh = '''请你扮演一个超级智能体中的Agent助手，超级智能体拥有一系列外部工具，超级智能体中的Planner可以通过调用外部工具来解决用户任务，具体见[工具列表]。
 8 | 你负责与用户进行交互，你根据Planner和Tool返回的结果，结合用户任务以及上下文对话信息进行回答，只有你的回答会展示给用户。
 9 | 目前Planner判断你可以直接回答用户任务，该任务不需要调用任何工具，请使用你的内部知识直接回答。
10 | 输出格式参考[Agent助手输出格式]。
11 | 
12 | [环境信息]="""
13 | {{{env_info}}}
14 | """
15 | 
16 | [Agent助手输出格式]="""
17 | Agent助手：根据[要求]，回复上下文对话信息中最近一轮以 "用户：" 开头的内容（不要重复这句话）
18 | """
19 | 
20 | [要求]="""
21 | 1、回复必须以 "Agent助手：" 开头。
22 | 2、根据上下文对话信息，直接回答最近一轮以 "用户：" 开头的用户任务。
23 | 3、使用markdown格式，务必注意排版要美观，段落之间使用两个换行。
24 | 4、使用中文回复。
25 | """
26 | 
27 | [工具列表]="""
28 | {{{tools}}}
29 | """'''
30 | 
31 | agent_system_prompt_template_en = '''Please play the role of an Agent assistant within a super intelligent agent. The super intelligent agent possesses a series of external tools. The Planner within the super intelligent agent can solve user tasks by invoking external tools, as detailed in the [Tool List].
32 | You are responsible for interacting with users. Based on the results returned by the Planner and Tools, combined with the user task and the context of the conversation, you provide answers, but only your responses are displayed to the users.
33 | Currently, the Planner has determined that you can directly answer the user's task, which does not require the invocation of any tools. Please use your internal knowledge to respond directly.
34 | Refer to the [Agent Assistant Output Format] for the output format.
35 | 
36 | [Environmental Information]=""
37 | {{{env_info}}}
38 | ""
39 | 
40 | [Agent Assistant Output Format]=""
41 | Agent Assistant: According to the [Requirements], reply to the most recent round of content that starts with "User:" in the context of the conversation (do not repeat this sentence).
42 | ""
43 | 
44 | [Requirements]=""
45 | 1. The reply must start with "Agent Assistant:".
46 | 2. Directly answer the most recent round of user tasks that start with "User:" based on the context of the conversation.
47 | 3. Use markdown format, and be sure to pay attention to the layout to make it visually appealing, with two line breaks between paragraphs.
48 | 4. Reply in English.
49 | ""
50 | 
51 | [Tool List]=""
52 | {{{tools}}}
53 | ""'''
54 | 
55 | 
56 | def agent_answer_chat(messages, tools, env_info, request_func):
57 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools)
58 |     language = os.getenv("LANGUAGE")
59 |     if language == "zh":
60 |         agent_system_prompt_template = agent_system_prompt_template_zh
61 |     else:
62 |         agent_system_prompt_template = agent_system_prompt_template_en
63 |     agent_system_prompt = agent_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
64 |                                                       .replace("{{{env_info}}}", env_info) \
65 |                                                       .replace("{{{all_tool_name}}}", all_tool_name) \
66 |                                                       .replace("{{{all_tool_required_info}}}", all_tool_required_info)
67 |     messages_new = [
68 |         {
69 |             "role": "system",
70 |             "content": agent_system_prompt
71 |         }
72 |     ]
73 |     messages_new.extend(messages)
74 |     res = request_func(messages_new)
75 |     if "```markdown\n" in res:
76 |         res = res.replace("```markdown\n", "").replace("\n```", "")
77 |     logger.info(f"agent_answer_chat:\n{res}\n")
78 |     fetch_data = {"task": "agent_answer_chat", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res}
79 |     return res, fetch_data
80 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/agent_ask.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from utils import get_all_tool_info, logger
 5 | 
 6 | 
 7 | agent_system_prompt_template_zh = '''请你扮演一个超级智能体中的Agent助手，超级智能体拥有一系列外部工具，超级智能体中的Planner可以通过调用外部工具来解决用户任务，具体见[工具列表]。
 8 | 你负责与用户进行交互，你根据Planner和Tool返回的结果，结合用户任务以及上下文对话信息进行回答，只有你的回答会展示给用户。
 9 | 输出格式参考[Agent助手输出格式]。
10 | 
11 | {{{all_tool_required_info}}}
12 | 
13 | [环境信息]="""
14 | {{{env_info}}}
15 | """
16 | 
17 | [Agent助手输出格式]="""
18 | Agent助手：根据[要求]，向用户提出认为需要用户输入或补充的信息（不要重复这句话）
19 | """
20 | 
21 | [要求]="""
22 | 1、回复必须以 "Agent助手：" 开头。
23 | 2、根据上下文对话信息，尤其是Planner中的信息，结合工具中的required参数，向用户提出认为需要用户输入或补充的信息，注意提问时不要包括参数的名字。
24 | 3、使用markdown格式，务必注意排版要美观，段落之间使用两个换行。
25 | 4、使用中文回复。
26 | """
27 | 
28 | [工具列表]="""
29 | {{{tools}}}
30 | """'''
31 | 
32 | 
33 | agent_system_prompt_template_en = '''You are to act as an Agent assistant with in a super intelligent agent. The super intelligent agent has a series of external tools, and the Planner within the system can solve user tasks by invoking these external tools, as detailed in the [Tool List].
34 | You are responsible for interacting with users, and you provide answers based on the results returned by the Planner and Tools, combined with the user's task sand contextual dialogue information. Only your responses will be displayed to the user.
35 | The output format should refer to the [Agent Assistant OutputFormat].
36 | 
37 | {{{all_tool_required_info}}}
38 | 
39 | [Environment Information]="""
40 | {{{env_info}}}
41 | """
42 | 
43 | [Agent Assistant Output Format]="""
44 | Agent Assistant: Based on the [Requirements], ask the user for any information or input you think is necessary (do not repeat this sentence).
45 | """
46 | 
47 | [Requirements]="""
48 | 1. The response must start with "Agent Assistant:".
49 | 2. Based on the contextual dialogue information, especially the information from the Planner, and combined with the required parameters from the tools, ask the user for any information or input you think is necessary, ensuring not to include the parameter names in your questions.
50 | 3. Use markdown format, ensuring the layout is aesthetically pleasing, with two line breaks between paragraphs.
51 | 4. Respond in English.
52 | """
53 | 
54 | [Tool List]="""
55 | {{{tools}}}
56 | """'''
57 | 
58 | def agent_ask(messages, tools, env_info, request_func):
59 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools)
60 |     language = os.getenv("LANGUAGE")
61 |     if language == "zh":
62 |         agent_system_prompt_template = agent_system_prompt_template_zh
63 |     else:
64 |         agent_system_prompt_template = agent_system_prompt_template_en
65 |     agent_system_prompt = agent_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
66 |                                                       .replace("{{{env_info}}}", env_info) \
67 |                                                       .replace("{{{all_tool_name}}}", all_tool_name) \
68 |                                                       .replace("{{{all_tool_required_info}}}", all_tool_required_info)
69 |     messages_new = [
70 |         {
71 |             "role": "system",
72 |             "content": agent_system_prompt
73 |         }
74 |     ]
75 |     messages_new.extend(messages)
76 |     res = request_func(messages_new)
77 |     if "```markdown\n" in res:
78 |         res = res.replace("```markdown\n", "").replace("\n```", "")
79 |     logger.info(f"agent_ask:\n{res}\n")
80 |     fetch_data = {"task": "agent_ask", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res}
81 |     return res, fetch_data
82 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/checker_tool.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from utils import parse_answer, logger
 5 | 
 6 | 
 7 | def rule_checker_zh(messages, action_list, tools):
 8 |     analysis = {
 9 |         "format_analysis": "",
10 |         "correct": "yes"
11 |     }
12 |     try:
13 |         tool_message = messages[-2]
14 |         tool_content = tool_message["content"]
15 |         tool_content_obj = parse_answer(tool_content)
16 |         observation_list = tool_content_obj["Observation_List"]
17 | 
18 |         if len(action_list) != len(observation_list):
19 |             analysis["format_analysis"] += f"Tool生成的Observation_List的数量为{len(observation_list)}，Planner给出的要执行Action_List的数量为{len(action_list)}，两者不相等，生成错误，需要重新生成"
20 |             analysis["correct"] = "no"
21 | 
22 |     except Exception as e:
23 |         analysis["format_analysis"] += f"Tool生成的格式错误，JSON无法解析，请不要增加//等注释信息，具体错误为：{e}"
24 |         analysis["correct"] = "no"
25 | 
26 |     rule_checker_result = f"Checker_Tool：\n```json\n{json.dumps(analysis, ensure_ascii=False, indent=4)}\n```"
27 |     logger.info(f"rule_checker\n{rule_checker_result}\n")
28 |     return analysis["correct"], rule_checker_result
29 | 
30 | 
31 | def rule_checker_en(messages, action_list, tools):
32 |     analysis = {
33 |         "format_analysis": "",
34 |         "correct": "yes"
35 |     }
36 |     try:
37 |         tool_message = messages[-2]
38 |         tool_content = tool_message["content"]
39 |         tool_content_obj = parse_answer(tool_content)
40 |         observation_list = tool_content_obj["Observation_List"]
41 | 
42 |         if len(action_list) != len(observation_list):
43 |             analysis["format_analysis"] += f"The number of Observation_List generated by Tool is {len(observation_list)}, and the number of Action_List to be executed given by Planner is {len(action_list)}. The two are not equal, resulting in an error, and need to be regenerated."
44 |             analysis["correct"] = "no"
45 | 
46 |     except Exception as e:
47 |         analysis["format_analysis"] += f"The format generated by Tool is incorrect, and JSON cannot be parsed. Please do not add comments such as //, etc. The specific error is: {e}"
48 |         analysis["correct"] = "no"
49 | 
50 |     rule_checker_result = f"Checker_Tool：\n```json\n{json.dumps(analysis, ensure_ascii=False, indent=4)}\n```"
51 |     logger.info(f"rule_checker\n{rule_checker_result}\n")
52 |     return analysis["correct"], rule_checker_result
53 | 
54 | 
55 | def checker_tool(messages, action_list, tools, env_info, request_func):
56 |     language = os.getenv("LANGUAGE")
57 |     if language == "zh":
58 |         rule_correct, rule_checker_result = rule_checker_zh(messages, action_list, tools)
59 |     else:
60 |         rule_correct, rule_checker_result = rule_checker_en(messages, action_list, tools)
61 |     return rule_correct, rule_checker_result
62 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/tool.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from utils import logger
  5 | 
  6 | 
  7 | tool_system_prompt_template_zh = '''请你扮演一个超级智能体中的外部工具Tool，你的这些外部工具可以用来解决用户任务，具体见[工具列表]。
  8 | 请你根据超级智能体的Planner输出的工具名和输入参数，模拟工具的执行结果。如果Planner给出的Action_List里有多个工具，则请你分别进行模拟，数量要与Action_List的数量一致，结果存在Observation_List中。
  9 | 输出格式参考[Tool输出格式]。
 10 | 
 11 | [环境信息]="""
 12 | {{{env_info}}}
 13 | """
 14 | 
 15 | [Tool输出格式]="""
 16 | Tool：
 17 | ```json
 18 | {
 19 |     "Observation_List": [
 20 |         {
 21 |             "status_code": "参考[工具调用结果要求]，给出包含 HTTP 响应状态代码",
 22 |             "response": "参考[工具调用结果要求]，模拟执行动作的结果。确保您的响应采用 JSON 格式、包含真实数据并符合 OpenAPI 规范格式。"
 23 |         }
 24 |     ]
 25 | }
 26 | ```
 27 | """
 28 | 
 29 | [工具调用结果要求]="""
 30 | 1. 根据 OpenAPI 规范验证请求中的 HTTP 方法和参数。
 31 | 2. 生成严格遵循 OpenAPI 规范中指定格式的响应，并确保其为 JSON 格式。
 32 | 3. 响应应包含真实数据，避免使用占位符。
 33 | 4. 通过提供适当的错误响应来处理边缘情况。
 34 | 5. 对于没有长度限制的请求，如get方法，请确保在响应中返回 3～5 个样本，务必注意不能使用省略符号！！！！！如// xxx、...等来省略样本信息，需要符合 JSON 格式，否则会导致 JSON 无法解析！！！！！
 35 | 6. 尽量使用中文模拟响应。
 36 | """
 37 | 
 38 | [工具列表]="""
 39 | {{{tools}}}
 40 | """
 41 | '''
 42 | 
 43 | tool_system_prompt_template_en = '''Please act as an external tool, Tool, within a super intelligent agent. These external tools can be used to solve user tasks, as detailed in the [Tool List].
 44 | Based on the tool name and input parameters output by the super intelligent agent's Planner, simulate the execution results of the tool.
 45 | If there are multiple tools in the Action_List provided by the Planner, please simulate each one separately, ensuring the number matches the Action_List, and store the results in the Observation_List.
 46 | Refer to the [Tool Output Format] for the outputformat.
 47 | 
 48 | [Environment Information]="""
 49 | {{{env_info}}}
 50 | """
 51 | 
 52 | [Tool Output Format]="""
 53 | Tool:
 54 | ```json
 55 | {
 56 |     "Observation_List": [
 57 |         {
 58 |             "status_code": "Refer to [Tool Invocation Result Requirements] for the HTTP response status code",
 59 |             "response": "Refer to [Tool Invocation Result Requirements] to simulate the result of the action execution. Ensure your response is in JSON format, contains real data, and complies with the OpenAPI specification format."
 60 |         }
 61 |     ]
 62 | }
 63 | ```
 64 | """
 65 | 
 66 | [Tool Invocation Result Requirements]="""
 67 | 1. Validate the HTTP method and parameters in the request according to the OpenAPI specification.
 68 | 2. Generate a response that strictly follows the format specified in the OpenAPI specification and ensure it isin JSON format.
 69 | 3. The response should contain real data, avoiding the use of placeholders.
 70 | 4. Handle edge cases by providing appropriate error responses.
 71 | 5. For requests without length limitations, such as the GET method, ensure the response returns 3 to 5 samples, and be careful not to use ellipses like// xxx, ... to omit sample information, as it must conform to JSON format, otherwise it will cause JSON parsing errors!!!!!!!
 72 | 6. Try to simulate responses in English.
 73 | """
 74 | 
 75 | [Tool List]="""
 76 | {{{tools}}}
 77 | """'''
 78 | 
 79 | 
 80 | def tool(messages, tools, env_info, request_func):
 81 |     language = os.getenv("LANGUAGE")
 82 |     if language == "zh":
 83 |         tool_system_prompt_template = tool_system_prompt_template_zh
 84 |     else:
 85 |         tool_system_prompt_template = tool_system_prompt_template_en
 86 |     tool_system_prompt = tool_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
 87 |                                                     .replace("{{{env_info}}}", env_info)
 88 |     # print(tool_system_prompt)
 89 |     messages_new = [
 90 |         {
 91 |             "role": "system",
 92 |             "content": tool_system_prompt
 93 |         }
 94 |     ]
 95 |     messages_new.extend(messages)
 96 |     res = request_func(messages_new)
 97 |     logger.info(f"tool:\n{res}\n")
 98 |     fetch_data = {"task": "tool", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res}
 99 |     return res, fetch_data
100 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_answer_ask.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from utils import get_all_tool_info, logger
 5 | 
 6 | 
 7 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
 8 | 这个超级智能体拥有一个Planner、Agent助手，并具备一系列外部工具，可以使用外部工具解决你提出的任务，具体见[工具列表]。
 9 | 根据上下文对话信息，你已经提出了你的任务，但是根据Planner的反馈，你提供的任务信息不足。
10 | 因此，接下来，请你根据最新一轮超级智能体的Agent助手询问的信息进行回复，给出Agent助手要求的必填参数，从而帮助超级智能体解决你的任务。
11 | 输出格式参考[用户输出格式]。
12 | 
13 | {{{all_tool_required_info}}}
14 | 
15 | [环境信息]="""
16 | {{{env_info}}}
17 | """
18 | 
19 | [用户输出格式]="""
20 | 用户：根据[要求]，回复上下文对话信息中最近一轮以 "Agent助手：" 开头的内容（不要重复这句话）
21 | """
22 | 
23 | [要求]="""
24 | 1、回复必须以 "用户：" 开头。
25 | 2、根据上下文对话信息，回复最近一轮以 "Agent助手：" 开头的用户任务。
26 | 3、你的回复里必须包含Agent助手所询问的所有必填参数的信息，使用自然语言描述，可以看情况伪造出一个，例如Base64编码字符串。（不要重复这句话）。
27 | 4、你的回复需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
28 | 5、你的回复应该使用不同的语气：口语化、正式、礼貌、直接等。
29 | 6、你的回复应该使用不同的长度：有短到长，长度逐渐递增。
30 | """
31 | 
32 | [工具列表]="""
33 | {{{tools}}}
34 | """'''
35 | 
36 | 
37 | user_system_prompt_template_en = '''Please play the role of a user who is interacting with a super intelligent agent.
38 | This super intelligent agent has a Planner, an Agent Assistant, and a series of external tools that can be used to solve the tasks you propose, as detailed in the [Tool List].
39 | Based on the context of the conversation, you have already proposed your task, but according to the Planner's feedback, the information you provided is insufficient.
40 | Therefore, next, please respond according to the latest round of inquiries from the super intelligent agent's Agent Assistant, providing the required parameters requested by the Agent Assistant to help the super intelligent agent solve your task.
41 | Refer to the [User Output Format] for the output format.
42 | 
43 | {{{all_tool_required_info}}}
44 | 
45 | [Environment Information]="""
46 | {{{env_info}}}
47 | """
48 | 
49 | [User Output Format]="""
50 | User: According to the [Requirements], respond to the most recent round of context conversation information that starts with "Agent Assistant:" (do not repeat this sentence).
51 | """
52 | 
53 | [Requirements]="""
54 | 1. The response must start with "User:".
55 | 2. Based on the context of the conversation, respond to the most recent user task that starts with "Agent Assistant:".
56 | 3. Your response must include all the required parameter information requested by the Agent Assistant, described in natural language. You may fabricate one if necessary, such as a Base64 encoded string. (Do not repeat this sentence).
57 | 4. Your response should use different types of sentence structures: imperative, declarative, interrogative, etc.
58 | 5. Your response should use different tones: colloquial, formal, polite, direct,etc.
59 | 6. Your response should vary in length: from short to long, gradually increasing in length.
60 | """
61 | 
62 | [Tool List]="""
63 | {{{tools}}}
64 | """'''
65 | 
66 | def user_answer_ask(messages, tools, env_info, request_func):
67 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools)
68 |     language = os.getenv("LANGUAGE")
69 |     if language == "zh":
70 |         user_system_prompt_template = user_system_prompt_template_zh
71 |     else:
72 |         user_system_prompt_template = user_system_prompt_template_en
73 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
74 |                                                     .replace("{{{env_info}}}", env_info) \
75 |                                                     .replace("{{{all_tool_required_info}}}", all_tool_required_info)
76 |     # print(user_system_prompt)
77 |     messages_new = [
78 |         {
79 |             "role": "system",
80 |             "content": user_system_prompt
81 |         }
82 |     ]
83 |     messages_new.extend(messages)
84 |     res = request_func(messages_new)
85 |     logger.info(f"user_answer_ask:\n{res}\n")
86 |     fetch_data = {"task": "user_answer_ask", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res}
87 |     return res, fetch_data
88 | 
89 | 
90 | def main():
91 |     pass
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_ask.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | import os
  4 | 
  5 | from utils import logger, remove_prepare_ask_tools
  6 | 
  7 | 
  8 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
  9 | 这个超级智能体具备一系列外部工具，可以使用外部工具解决你提出的任务。
 10 | 接下来，请你根据[要求]提出5个你需要超级智能体解决的模糊不清的任务。
 11 | 这5个任务都需要使用[工具列表]里的{{{tool}}}才能够完成，但是会让超级智能体不清楚如何填写{{{tool}}}里的某些必填(required)参数，需要多样。
 12 | 最后请你按照[格式]输出最终结果，不要生成多余的文字。
 13 | 
 14 | 工具{{{tool}}}的必填参数有：{{{tool_required}}}，非必填参数有：{{{tool_no_required}}}
 15 | 
 16 | [要求]="""
 17 | 1、用户任务的描述里必须缺乏调用{{{tool}}}时所需的所有必填参数的信息，剩下的非必填参数的信息，请你看情况添加，使用自然语言描述。
 18 | 注意工具参数允许一定的参数推导，即根据用户任务描述可以推导出工具参数的话，就不算缺乏了必要信息，缺乏指的是即使通过推导也无法获得参数值。
 19 | 2、用户任务需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
 20 | 3、用户任务应该包含不同的语气：口语化、正式、礼貌、直接等。
 21 | 4、确保用户任务的长度各不相同，有短到长，长度逐渐递增。
 22 | 5、确保用户任务涉及不同的主题/实例，不同的场景，不同的角色身份。
 23 | 6、根据[工具列表]中所有工具的description，请你提取在所有description中出现的共同实体，并确保用户任务中出现该实体。
 24 | 7、务必不要在用户任务中明确指定要使用的工具{{{tool}}}。
 25 | """
 26 | 
 27 | [工具列表]="""
 28 | {{{tools}}}
 29 | """
 30 | 
 31 | [格式]="""
 32 | ```json
 33 | {
 34 |     "任务1": "xxx",
 35 |     "任务2": "xxx",
 36 |     "任务3": "xxx",
 37 |     "任务4": "xxx",
 38 |     "任务5": "xxx"
 39 | }
 40 | ```
 41 | """'''
 42 | 
 43 | user_system_prompt_template_en = '''Please act as a user who is interacting with a super intelligent agent.
 44 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose.
 45 | Next, based on the [Requirements], please propose 5 ambiguous tasks that you need the super intelligent agent to solve.
 46 | All 5 tasks must require the use of {{{tool}}} from the [Tool List] to be completed, but will leave the super intelligent agent unclear on how to fill in some of the required parameters of {{{tool}}}, and should be diverse.
 47 | Finally, please output the final result according to the [Format], without generating any extra text.
 48 | 
 49 | The required parameters for tool {{{tool}}} are: {{{tool_required}}}, and the optional parameters are: {{{tool_no_required}}}
 50 | 
 51 | [Requirements]="""
 52 | 1. The description of the user's task must lack all the necessary information for calling {{{tool}}}, leaving only the optional parameter information, which you can add as you see fit, using natural language descriptions.
 53 | Note that tool parameters allow for some parameter inference, meaning that if the tool parameters can be inferred from the user's task description, it does not count as lacking necessary information. Lacking means that even through inference, the parameter values cannot be obtained.
 54 | 2. The user's tasks need touse different types of sentence structures: imperative sentences, declarative sentences, interrogative sentences, etc.
 55 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc.
 56 | 4. Ensure that the length of the user's tasks varies, from short to long, gradually increasing in length.
 57 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles.
 58 | 6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks.
 59 | 7. Do not explicitly specify the tool {{{tool}}} in the user's tasks.
 60 | """
 61 | 
 62 | [Tool List]="""
 63 | {{{tools}}}
 64 | """
 65 | 
 66 | [Format]="""
 67 | ```json
 68 | {
 69 |     "Task 1": "xxx",
 70 |     "Task 2": "xxx",
 71 |     "Task 3": "xxx",
 72 |     "Task 4": "xxx",
 73 |     "Task 5": "xxx"
 74 | }
 75 | ```
 76 | """'''
 77 | 
 78 | def parse_answer(user_tasks):
 79 |     user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "")
 80 |     user_tasks = json.loads(user_tasks)
 81 |     task_keys = list(user_tasks.keys())
 82 |     task_key = random.choice(task_keys)
 83 |     user_task = user_tasks[task_key]
 84 |     user_task = "用户：" + user_task
 85 |     return user_task
 86 | 
 87 | 
 88 | def user_ask(messages, tools,  request_func):
 89 |     tools_ = remove_prepare_ask_tools(tools)
 90 |     tool = random.choice(tools_)
 91 |     tool_name = tool["function"]["name"]
 92 |     tool_required = tool["function"]["parameters"]["required"]
 93 |     tool_required = ", ".join(tool_required)
 94 |     tool_all_properties = list(tool["function"]["parameters"]["properties"].keys())
 95 |     tool_no_required = []
 96 |     for property in tool_all_properties:
 97 |         if property not in tool_required:
 98 |             tool_no_required.append(property)
 99 |     tool_no_required = ", ".join(tool_no_required)
100 |     language = os.getenv("LANGUAGE")
101 |     if language == "zh":
102 |         user_system_prompt_template = user_system_prompt_template_zh
103 |     else:
104 |         user_system_prompt_template = user_system_prompt_template_en
105 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
106 |                                                     .replace("{{{tool}}}", tool_name) \
107 |                                                     .replace("{{{tool_required}}}", tool_required) \
108 |                                                     .replace("{{{tool_no_required}}}", tool_no_required)
109 |     messages_new = [
110 |         {
111 |             "role": "user",
112 |             "content": user_system_prompt
113 |         }
114 |     ]
115 |     res = request_func(messages_new)
116 |     logger.info(f"user_ask: {res}\n")
117 |     user_task = parse_answer(res)
118 |     logger.info(f"user_multi_tool:\n{user_task}\n")
119 |     user_message = [{"role": "user", "content": user_task}]
120 |     fetch_data = {"task": "user_ask", "tools": tools, "env_info": None, "messages": messages_new, "answer": res}
121 |     return user_message, fetch_data
122 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_chat.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import random
 4 | 
 5 | from utils import logger, remove_prepare_ask_tools
 6 | 
 7 | 
 8 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
 9 | 这个超级智能体具备一系列外部工具，可以使用外部工具解决你提出的任务。
10 | 接下来，请你根据[要求]提出5个你需要超级智能体解决的闲聊任务。
11 | 这5个闲聊任务都不需要使用[工具列表]里的任何工具，但是主题上需要有一些相关性。
12 | 最后请你按照[格式]输出最终结果，不要生成多余的文字。
13 | 
14 | [要求]="""
15 | 1、用户任务是一个闲聊任务，必须与[工具列表]的功能无关，但是主题有一定的相关性。
16 | 2、用户任务需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
17 | 3、用户任务应该包含不同的语气：口语化、正式、礼貌、直接等。
18 | 4、确保用户任务的长度各不相同，有短到长，长度逐渐递增。
19 | 5、确保用户任务涉及不同的主题/实例，不同的场景，不同的角色身份。
20 | """
21 | 
22 | [工具列表]="""
23 | {{{tools}}}
24 | """
25 | 
26 | [格式]="""
27 | ```json
28 | {
29 |     "任务1": "xxx",
30 |     "任务2": "xxx",
31 |     "任务3": "xxx",
32 |     "任务4": "xxx",
33 |     "任务5": "xxx"
34 | }
35 | ```
36 | """'''
37 | 
38 | user_system_prompt_template_en = '''Please pretend to be a user interacting with a super intelligent agent.
39 | This super intelligent agent has a series of external tools that can be used to solve tasks you propose.
40 | Next, based on the [Requirements], propose 5 casual conversation tasks that you need the super-intelligent agent to solve.
41 | These 5 casual conversation tasks should not use any tools from the [Tool List], but should have some thematic relevance.
42 | Finally, please output the final result according to the [Format], without generating any superfluous text.
43 | 
44 | [Requirements]=""
45 | 1. The user task is a casual conversation task, which must be unrelated to the functions of the [Tool List], but should have some thematic relevance.
46 | 2. User tasks need to use different types of sentence structures: imperative, declarative, interrogative, etc.
47 | 3. User tasks should include different tones: colloquial, formal, polite, direct, etc.
48 | 4. Ensure that the lengths of the user tasks are different, ranging from short to long, with gradually increasing length.
49 | 5. Ensure that the user tasks involve different themes/examples, different scenarios, and different role identities.
50 | """
51 | 
52 | [Tool list]=""
53 | {{{tools}}}
54 | """
55 | 
56 | [Format]=""
57 | ```json
58 | {
59 |     "Task 1": "xxx",
60 |     "Task 2": "xxx",
61 |     "Task 3": "xxx",
62 |     "Task 4": "xxx",
63 |     "Task 5": "xxx"
64 | }
65 | ```
66 | """'''
67 | 
68 | def parse_answer(user_tasks):
69 |     user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "")
70 |     user_tasks = json.loads(user_tasks)
71 |     task_keys = list(user_tasks.keys())
72 |     task_key = random.choice(task_keys)
73 |     user_task = user_tasks[task_key]
74 |     user_task = "用户：" + user_task
75 |     return user_task
76 | 
77 | 
78 | def user_chat(messages, tools,  request_func):
79 |     tools = remove_prepare_ask_tools(tools)
80 |     language = os.getenv("LANGUAGE")
81 |     if language == "zh":
82 |         user_system_prompt_template = user_system_prompt_template_zh
83 |     else:
84 |         user_system_prompt_template = user_system_prompt_template_en
85 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4))
86 |     messages_new = [
87 |         {
88 |             "role": "user",
89 |             "content": user_system_prompt
90 |         }
91 |     ]
92 |     res = request_func(messages_new)
93 |     logger.info(f"user_chat: {res}\n")
94 |     user_task = parse_answer(res)
95 |     logger.info(f"user_chat:\n{user_task}\n")
96 |     user_message = [{"role": "user", "content": user_task}]
97 |     fetch_data = {"task": "user_ask", "tools": tools, "env_info": None, "messages": messages_new, "answer": res}
98 |     return user_message, fetch_data
99 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_multi_tool.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from utils import remove_prepare_ask_tools, random_select_answer_cot, get_all_tool_info, logger
  5 | 
  6 | 
  7 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
  8 | 这个超级智能体具备一系列外部工具，可以使用外部工具解决你提出的任务。
  9 | 接下来，请你根据[要求]提出3个你需要超级智能体解决的任务。
 10 | 这3个任务都需要组合使用[工具列表]里的工具(包括：{{{all_tool_name}}})才能够完成，需要具体、多样、需要串行调用多个工具来解决任务。
 11 | 最后请你按照[格式]输出最终结果，不要生成多余的文字。
 12 | 
 13 | {{{all_tool_required_info}}}
 14 | 
 15 | [要求]="""
 16 | 1、用户任务的描述里必须包含调用工具所需的所有必填参数的信息，其他的非必填参数的信息，请你看情况添加，使用自然语言描述。
 17 | 2、用户任务需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
 18 | 3、用户任务应该包含不同的语气：口语化、正式、礼貌、直接等。
 19 | 4、确保用户任务的长度各不相同，有短到长，长度逐渐递增。
 20 | 5、确保用户任务涉及不同的主题/实例，不同的场景，不同的角色身份。
 21 | 6、根据[工具列表]中所有工具的description，请你提取在所有description中出现的共同实体，并确保用户任务中出现该实体。
 22 | 7、务必不要在用户任务中明确指定要使用的工具名。
 23 | 8、调用的多个工具之间必须有依赖关系，调用之间的依赖关系是指，必须在调用工具A完成之后才能运行调用工具B，即调用工具B之前必须先调用工具A。
 24 | 9、任务难度分为easy、medium、hard三个等级，easy代表简单，medium代表中等，hard代表困难，更难的任务需要更多的步骤执行，确保你生成的3个任务中，都是中等难度以上的任务。
 25 | """
 26 | 
 27 | [工具列表]="""
 28 | {{{tools}}}
 29 | """
 30 | 
 31 | [格式]="""
 32 | ```json
 33 | {
 34 |     "任务1": {
 35 |         "任务描述": "xxx",
 36 |         "任务难度": "medium|hard",
 37 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具，请你首先串行给出要调用的工具"
 38 |     },
 39 |     "任务2": {
 40 |         "任务描述": "xxx",
 41 |         "任务难度": "medium|hard",
 42 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具，请你首先串行给出要调用的工具"
 43 |     },
 44 |     "任务3": {
 45 |         "任务描述": "xxx",
 46 |         "任务难度": "medium|hard",
 47 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具，请你首先串行给出要调用的工具"
 48 |     }
 49 | }
 50 | ```
 51 | """'''
 52 | 
 53 | user_system_prompt_template_en = '''Please act as a user who is interacting with a super intelligent agent.
 54 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose.
 55 | Next, based on the [Requirements], please propose 3 tasks that you need the super intelligent agent to solve.
 56 | These 3 tasks must require the combined use of tools from the [Tool List] (including: {{{all_tool_name}}}) to be completed.
 57 | The tasks should be specific, diverse, and require the sequential invocation of multiple tools to solve.
 58 | Finally, please output the final result according to the [Format] without generating any extra text.
 59 | 
 60 | {{{all_tool_required_info}}}
 61 | 
 62 | [Requirements]="""
 63 | 1. The description of the user's task must include all the required parameters needed to invoke the tools, while other optional parameters can be added as you see fit, using natural language.
 64 | 2. The user's tasks should use different types of sentence structures: imperative, declarative, interrogative, etc.
 65 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc.
 66 | 4. Ensure that the length of the user's tasks varies, from short to long, gradually increasing in length.
 67 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles.
 68 | 6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks.
 69 | 7. Do not explicitly specify the names of the tools to be used in the user's tasks.
 70 | 8. There must be dependencies between the multiple tools invoked, meaning that tool A must be called and completed before tool B can be run, i.e., tool B must be invoked after tool A.
 71 | 9. The difficulty of the tasks is divided into easy, medium, and hard levels. Easy represents simple, medium represents moderate, and hard represents difficult. Ensure that the 3 tasks you generate are all of medium difficulty or above.
 72 | """
 73 | 
 74 | [Tool List]="""
 75 | {{{tools}}}
 76 | """
 77 | 
 78 | [Format]="""
 79 | ```json
 80 | {
 81 |     "Task 1": {
 82 |         "Task Description": "xxx",
 83 |         "Task Difficulty": "medium|hard",
 84 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step, and first sequentially list the tools to be invoked."
 85 |     },
 86 |     "Task 2": {
 87 |         "Task Description":"xxx",
 88 |         "Task Difficulty": "medium|hard",
 89 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step, and first sequentially list the tools to be invoked."
 90 |     },
 91 |     "Task 3": {
 92 |         "Task Description": "xxx",
 93 |         "Task Difficulty": "medium|hard",
 94 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step, and first sequentially list the tools to be invoked."
 95 |     }
 96 | }
 97 | ```
 98 | """'''
 99 | 
100 | 
101 | def user_multi_tool(messages, tools,  request_func):
102 |     tools_ = remove_prepare_ask_tools(tools)
103 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools_)
104 |     language = os.getenv("LANGUAGE")
105 |     if language == "zh":
106 |         user_system_prompt_template = user_system_prompt_template_zh
107 |     else:
108 |         user_system_prompt_template = user_system_prompt_template_en
109 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
110 |                                                     .replace("{{{all_tool_name}}}", all_tool_name) \
111 |                                                     .replace("{{{all_tool_required_info}}}", all_tool_required_info)
112 |     messages_new = [
113 |         {
114 |             "role": "user",
115 |             "content": user_system_prompt
116 |         }
117 |     ]
118 |     res = request_func(messages_new)
119 |     logger.info(f"user_multi_tool:\n{res}\n")
120 |     user_task = random_select_answer_cot(res)
121 |     logger.info(f"user_multi_tool:\n{user_task}\n")
122 |     user_message = [{"role": "user", "content": user_task}]
123 |     fetch_data = {"task": "user_multi_tool", "tools": tools, "env_info": None, "messages": messages_new, "answer": res}
124 |     return user_message, fetch_data
125 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_multi_tool_parallel.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from utils import remove_prepare_ask_tools, random_select_answer_cot, get_all_tool_info, logger
  5 | 
  6 | 
  7 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
  8 | 这个超级智能体具备一系列外部工具，可以使用外部工具解决你提出的任务。
  9 | 接下来，请你根据[要求]提出3个你需要超级智能体解决的任务。
 10 | 这3个任务都需要组合使用[工具列表]里的工具(包括：{{{all_tool_name}}})才能够完成，需要具体、多样、需要并行调用多个工具来解决任务。
 11 | 最后请你按照[格式]输出最终结果，不要生成多余的文字。
 12 | 
 13 | {{{all_tool_required_info}}}
 14 | 
 15 | [要求]="""
 16 | 1、用户任务的描述里必须包含调用工具所需的所有必填参数的信息，其他的非必填参数的信息，请你看情况添加，使用自然语言描述。
 17 | 2、用户任务需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
 18 | 3、用户任务应该包含不同的语气：口语化、正式、礼貌、直接等。
 19 | 4、确保用户任务的长度各不相同，有短到长，长度逐渐递增。
 20 | 5、确保用户任务涉及不同的主题/实例，不同的场景，不同的角色身份。
 21 | 6、根据[工具列表]中所有工具的description，请你提取在所有description中出现的共同实体，并确保用户任务中出现该实体。
 22 | 7、务必不要在用户任务中明确指定要使用的工具名。
 23 | 8、调用的多个工具之间必须没有依赖关系。
 24 | 调用之间有依赖关系是指：必须在调用工具A完成之后才能运行调用工具B。
 25 | 调用之间没有依赖关系是指：工具A和工具B可以并行调用。
 26 | 9、任务难度分为easy、medium、hard三个等级，easy代表简单，medium代表中等，hard代表困难，更难的任务需要更多的步骤执行，确保你生成的3个任务中，都是中等难度以上的任务。
 27 | """
 28 | 
 29 | [工具列表]="""
 30 | {{{tools}}}
 31 | """
 32 | 
 33 | [格式]="""
 34 | ```json
 35 | {
 36 |     "任务1": {
 37 |         "任务描述": "xxx",
 38 |         "任务难度": "medium|hard",
 39 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具"
 40 |     },
 41 |     "任务2": {
 42 |         "任务描述": "xxx",
 43 |         "任务难度": "medium|hard",
 44 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具"
 45 |     },
 46 |     "任务3": {
 47 |         "任务描述": "xxx",
 48 |         "任务难度": "medium|hard",
 49 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具"
 50 |     }
 51 | }
 52 | ```
 53 | """'''
 54 | 
 55 | user_system_prompt_template_en = '''Please act as a user interacting with a super intelligent agent.
 56 | This super intelligent agent is equipped with a series of external tools and can use these tools to solve the tasks you propose.
 57 | Next, please propose 3 tasks that you need the super intelligent agent to solve based on the [Requirements].
 58 | These 3 tasks all require the combined use of tools from the [Tool List] (including: {{{all_tool_name}}}) to be completed.
 59 | The tasks need to be specific, diverse, and require parallel invocation of multiple tools to solve.
 60 | Finally, please output the final result according to the [Format] without generating any extra text.
 61 | 
 62 | {{{all_tool_required_info}}}
 63 | 
 64 | [Requirements]="""
 65 | 1. The description of the user's task must include all the required parameters needed to invoke the tools, while other optional parameters can be added as needed, using natural language.
 66 | 2. The user's tasks need to use different types of sentence structures: imperative, declarative, interrogative, etc.
 67 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc.
 68 | 4. Ensure that the length of the user's tasks varies, from short to long, gradually increasing in length.
 69 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles.
 70 | 6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks.
 71 | 7. Do not explicitly specify the tool names to be used in the user's tasks.
 72 | 8. There must be no dependency between the multiple tools invoked. A dependency between invocations means that tool B can only be run after tool A is completed. No dependency means that tool A and tool B can be invoked in parallel.
 73 | 9. The difficulty of the tasks is divided into easy, medium, and hard levels. Easy represents simple, medium represents moderate, and hard represents difficult. More difficult tasks require more steps to execute. Ensure that the 3 tasks you generate are all of medium difficulty or above.
 74 | """
 75 | 
 76 | [ToolList]="""
 77 | {{{tools}}}
 78 | """
 79 | 
 80 | [Format]="""
 81 | ```json
 82 | {
 83 |     "Task 1": {
 84 |         "Task Description": "xxx",
 85 |         "Task Difficulty": "medium|hard",
 86 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step."
 87 |     },
 88 |     "Task 2": {
 89 |         "Task Description": "xxx",
 90 |         "Task Difficulty": "medium|hard",
 91 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step."
 92 |     },
 93 |     "Task 3": {
 94 |         "Task Description": "xxx",
 95 |         "Task Difficulty": "medium|hard",
 96 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step."
 97 |     }
 98 | }
 99 | ```
100 | """'''
101 | 
102 | 
103 | def user_multi_tool_parallel(messages, tools,  request_func):
104 |     tools_ = remove_prepare_ask_tools(tools)
105 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools_)
106 |     language = os.getenv("LANGUAGE")
107 |     if language == "zh":
108 |         user_system_prompt_template = user_system_prompt_template_zh
109 |     else:
110 |         user_system_prompt_template = user_system_prompt_template_en
111 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
112 |                                                     .replace("{{{all_tool_name}}}", all_tool_name) \
113 |                                                     .replace("{{{all_tool_required_info}}}", all_tool_required_info)
114 |     messages_new = [
115 |         {
116 |             "role": "user",
117 |             "content": user_system_prompt
118 |         }
119 |     ]
120 |     res = request_func(messages_new)
121 |     logger.info(f"user_multi_tool_parallel:\n{res}\n")
122 |     user_task = random_select_answer_cot(res)
123 |     logger.info(f"user_multi_tool:\n{user_task}\n")
124 |     user_message = [{"role": "user", "content": user_task}]
125 |     fetch_data = {"task": "user_multi_tool_parallel", "tools": tools, "env_info": None, "messages": messages_new, "answer": res}
126 |     return user_message, fetch_data
127 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_multi_tool_serial_parallel.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from utils import remove_prepare_ask_tools, random_select_answer_cot, get_all_tool_info, logger
  5 | 
  6 | 
  7 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
  8 | 这个超级智能体具备一系列外部工具，可以使用外部工具解决你提出的任务。
  9 | 接下来，请你根据[要求]提出3个你需要超级智能体解决的任务。
 10 | 这3个任务都需要组合使用[工具列表]里的工具(包括：{{{all_tool_name}}})才能够完成，需要具体、多样、需要同时串行和并行调用多个工具来解决任务。
 11 | 最后请你按照[格式]输出最终结果，不要生成多余的文字。
 12 | 
 13 | {{{all_tool_required_info}}}
 14 | 
 15 | [要求]="""
 16 | 1、用户任务的描述里必须包含调用工具所需的所有必填参数的信息，其他的非必填参数的信息，请你看情况添加，使用自然语言描述。
 17 | 2、用户任务需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
 18 | 3、用户任务应该包含不同的语气：口语化、正式、礼貌、直接等。
 19 | 4、确保用户任务的长度各不相同，有短到长，长度逐渐递增。
 20 | 5、确保用户任务涉及不同的主题/实例，不同的场景，不同的角色身份。
 21 | 6、根据[工具列表]中所有工具的description，请你提取在所有description中出现的共同实体，并确保用户任务中出现该实体。
 22 | 7、务必不要在用户任务中明确指定要使用的工具名。
 23 | 8、调用的多个工具之间需要有的有依赖关系，有的没有依赖关系。
 24 | 调用之间有依赖关系是指：必须在调用工具A完成之后才能运行调用工具B。
 25 | 调用之间没有依赖关系是指：工具A和工具B可以并行调用。
 26 | 9、任务难度分为easy、medium、hard三个等级，easy代表简单，medium代表中等，hard代表困难，更难的任务需要更多的步骤执行，确保你生成的3个任务中，都是中等难度以上的任务。
 27 | """
 28 | 
 29 | [工具列表]="""
 30 | {{{tools}}}
 31 | """
 32 | 
 33 | [格式]="""
 34 | ```json
 35 | {
 36 |     "任务1": {
 37 |         "任务描述": "xxx",
 38 |         "任务难度": "medium|hard",
 39 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具"
 40 |     },
 41 |     "任务2": {
 42 |         "任务描述": "xxx",
 43 |         "任务难度": "medium|hard",
 44 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具"
 45 |     },
 46 |     "任务3": {
 47 |         "任务描述": "xxx",
 48 |         "任务难度": "medium|hard",
 49 |         "解决任务的整体规划": "请你给出解决用户任务的整体规划，包括每一步骤需要调用哪个工具"
 50 |     }
 51 | }
 52 | ```
 53 | """'''
 54 | 
 55 | user_system_prompt_template_en = '''Please act as a user who is interacting with a super intelligent agent.
 56 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose.
 57 | Next, please propose 3 tasks that you need the super intelligent agent to solve based on the [Requirements].
 58 | These 3 tasks must require the combined use of tools from the [Tool List] (including: {{{all_tool_name}}}) to be completed.
 59 | The tasks should be specific, diverse, and require both serial and parallel invocation of multiple tools to solve.
 60 | Finally, please output the final result according to the [Format] without generating any extra text.
 61 | 
 62 | {{{all_tool_required_info}}}
 63 | 
 64 | [Requirements]="""
 65 | 1. The description of the user's task must include all the required parameters needed to invoke the tools, while other optional parameters can be added as you see fit, using natural language.
 66 | 2. The user's tasks need to use different types of sentence structures: imperative, declarative, interrogative, etc.3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc.
 67 | 4. Ensure that the length of the user's tasks varies, from short to long, with increasing length.
 68 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles.6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks.
 69 | 7. Do not explicitly specify the tool names to be used in the user's tasks.
 70 | 8. There should be dependencies between some of the tools invoked, while others should not have dependencies. A dependency between invocations means that tool B can only be run after tool A is completed. No dependency means that tool A and tool B can be invoked in parallel.
 71 | 9. Task difficulty is divided into easy, medium, and hard levels. Easy represents simple, medium represents moderate, and hard represents difficult. More difficult tasks require more steps to execute. Ensure that the 3 tasks you generate are all of medium difficulty or above.
 72 | """
 73 | 
 74 | [Tool List]="""
 75 | {{{tools}}}
 76 | """
 77 | 
 78 | [Format]="""
 79 | ```json
 80 | {
 81 |     "Task 1": {
 82 |         "Task Description": "xxx",
 83 |         "Task Difficulty": "medium|hard",
 84 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step."
 85 |     },
 86 |     "Task 2": {
 87 |         "Task Description": "xxx",
 88 |         "Task Difficulty": "medium|hard",
 89 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step."
 90 |     },
 91 |     "Task 3": {
 92 |         "Task Description": "xxx",
 93 |         "Task Difficulty": "medium|hard",
 94 |         "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step."
 95 |     }
 96 | }
 97 | ```
 98 | """'''
 99 | 
100 | 
101 | def user_multi_tool_serial_parallel(messages, tools,  request_func):
102 |     tools_ = remove_prepare_ask_tools(tools)
103 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools_)
104 |     language = os.getenv("LANGUAGE")
105 |     if language == "zh":
106 |         user_system_prompt_template = user_system_prompt_template_zh
107 |     else:
108 |         user_system_prompt_template = user_system_prompt_template_en
109 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
110 |                                                     .replace("{{{all_tool_name}}}", all_tool_name) \
111 |                                                     .replace("{{{all_tool_required_info}}}", all_tool_required_info)
112 |     messages_new = [
113 |         {
114 |             "role": "user",
115 |             "content": user_system_prompt
116 |         }
117 |     ]
118 |     res = request_func(messages_new)
119 |     logger.info(f"user_multi_tool_serial_parallel:\n{res}\n")
120 |     user_task = random_select_answer_cot(res)
121 |     logger.info(f"user_multi_tool_serial_parallel:\n{user_task}\n")
122 |     user_message = [{"role": "user", "content": user_task}]
123 |     fetch_data = {"task": "user_multi_tool_serial_parallel", "tools": tools, "env_info": None, "messages": messages_new, "answer": res}
124 |     return user_message, fetch_data
125 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_single_tool.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | import os
  4 | 
  5 | from utils import remove_prepare_ask_tools, random_select_answer, logger
  6 | 
  7 | 
  8 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
  9 | 这个超级智能体具备一系列外部工具，可以使用外部工具解决你提出的任务。
 10 | 接下来，请你根据[要求]提出5个你需要超级智能体解决的任务。
 11 | 这5个任务都需要使用[工具列表]里的{{{tool}}}才能够完成，且都只需要调用{{{tool}}}一次，需要具体、多样。
 12 | 最后请你按照[格式]输出最终结果，不要生成多余的文字。
 13 | 
 14 | 工具{{{tool}}}的必填参数有：{{{tool_required}}}，非必填参数有：{{{tool_no_required}}}
 15 | 
 16 | [要求]="""
 17 | 1、用户任务的描述里必须包含调用{{{tool}}}所需的所有必填参数的信息，其他的非必填参数的信息，请你看情况添加，使用自然语言描述。
 18 | 2、用户任务需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
 19 | 3、用户任务应该包含不同的语气：口语化、正式、礼貌、直接等。
 20 | 4、确保用户任务的长度各不相同，有短到长，长度逐渐递增。
 21 | 5、确保用户任务涉及不同的主题/实例，不同的场景，不同的角色身份。
 22 | 6、根据[工具列表]中所有工具的description，请你提取在所有description中出现的共同实体，并确保用户任务中出现该实体。
 23 | 7、务必不要在用户任务中明确指定要使用的工具{{{tool}}}。
 24 | """
 25 | 
 26 | [工具列表]="""
 27 | {{{tools}}}
 28 | """
 29 | 
 30 | [格式]="""
 31 | ```json
 32 | {
 33 |     "任务1": "xxx",
 34 |     "任务2": "xxx",
 35 |     "任务3": "xxx",
 36 |     "任务4": "xxx",
 37 |     "任务5": "xxx"
 38 | }
 39 | ```
 40 | """'''
 41 | 
 42 | user_system_prompt_template_en = '''Please act as a user interacting with a super intelligent agent.
 43 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose.
 44 | Next, please propose 5 tasks that you need the super intelligent agent to solve based on the [Requirements].
 45 | All 5 tasks must require the use of {{{tool}}} from the [Tool List] to be completed, and each task should only require a single call to {{{tool}}}.
 46 | The tasks should be specific and diverse.
 47 | Finally, please output the final result according to the [Format] without generating any extra text.
 48 | 
 49 | The required parameters for tool {{{tool}}} are: {{{tool_required}}}, and the optional parameters are: {{{tool_no_required}}}.
 50 | 
 51 | [Requirements]="""
 52 | 1. The description of the user's task must include information on all the required parameters needed to call {{{tool}}}. For other optional parameters, please add them as you see fit, using natural language.
 53 | 2. The user's tasks should use different types of sentence structures: imperative, declarative, interrogative, etc.
 54 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc.
 55 | 4. Ensure that the length of the user's tasks varies, gradually increasing from short to long.
 56 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles.
 57 | 6. Extract common entities that appear in all descriptions from the [Tool List] and ensure that these entities appear in the user's tasks.
 58 | 7. Do not explicitly specify the tool {{{tool}}} in the user's tasks.
 59 | """
 60 | 
 61 | [Tool List]="""
 62 | {{{tools}}}
 63 | """
 64 | 
 65 | [Format]="""
 66 | ```json
 67 | {
 68 |     "Task 1": "xxx",
 69 |     "Task 2": "xxx",
 70 |     "Task 3": "xxx",
 71 |     "Task 4": "xxx",
 72 |     "Task 5": "xxx"
 73 | }
 74 | ```
 75 | """'''
 76 | 
 77 | def user_single_tool(messages, tools,  request_func):
 78 |     tools_ = remove_prepare_ask_tools(tools)
 79 |     tool = random.choice(tools_)
 80 |     tool_name = tool["function"]["name"]
 81 |     tool_required = tool["function"]["parameters"]["required"]
 82 |     tool_required = ", ".join(tool_required)
 83 |     tool_all_properties = list(tool["function"]["parameters"]["properties"].keys())
 84 |     tool_no_required = []
 85 |     for property in tool_all_properties:
 86 |         if property not in tool_required:
 87 |             tool_no_required.append(property)
 88 |     tool_no_required = ", ".join(tool_no_required)
 89 |     language = os.getenv("LANGUAGE")
 90 |     if language == "zh":
 91 |         user_system_prompt_template = user_system_prompt_template_zh
 92 |     else:
 93 |         user_system_prompt_template = user_system_prompt_template_en
 94 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
 95 |                                                     .replace("{{{tool}}}", tool_name) \
 96 |                                                     .replace("{{{tool_required}}}", tool_required) \
 97 |                                                     .replace("{{{tool_no_required}}}", tool_no_required)
 98 |     messages_new = [
 99 |         {
100 |             "role": "user",
101 |             "content": user_system_prompt
102 |         }
103 |     ]
104 |     res = request_func(messages_new)
105 |     logger.info(f"user_single_tool:\n{res}\n")
106 |     user_task = random_select_answer(res)
107 |     logger.info(f"user_single_tool:\n{user_task}\n")
108 |     user_message = [{"role": "user", "content": user_task}]
109 |     fetch_data = {"task": "user_single_tool", "tools": tools, "env_info": None, "messages": messages_new, "answer": res}
110 |     return user_message, fetch_data
111 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/agent/user_vague_answer_ask.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from utils import get_all_tool_info, logger
 5 | 
 6 | 
 7 | user_system_prompt_template_zh = '''请你扮演一个用户，你正在和一个超级智能体进行交互。
 8 | 这个超级智能体拥有一个Planner、Agent助手，并具备一系列外部工具，可以使用外部工具解决你提出的任务，具体见[工具列表]。
 9 | 根据上下文对话信息，你已经提出了你的任务，但是根据Planner的反馈，你提供的任务信息不足。
10 | 接下来，请你根据最新一轮超级智能体的Agent助手询问的信息进行回复，但是请你不要给出Agent助手要求的必填参数，让超级智能体继续询问你。
11 | 输出格式参考[用户输出格式]。
12 | 
13 | {{{all_tool_required_info}}}
14 | 
15 | [环境信息]="""
16 | {{{env_info}}}
17 | """
18 | 
19 | [用户输出格式]="""
20 | 用户：根据[要求]，回复上下文对话信息中最近一轮以 "Agent助手：" 开头的内容（不要重复这句话）
21 | """
22 | 
23 | [要求]="""
24 | 1、回复必须以 "用户：" 开头。
25 | 2、根据上下文对话信息，回复最近一轮以 "Agent助手：" 开头的用户任务。
26 | 3、你的回复不要包含Agent助手所询问的所有必填参数的信息，让超级智能体继续询问你。
27 | 4、你的回复需要使用不同种类的句子结构：祈使句、陈述句、疑问句等。
28 | 5、你的回复应该使用不同的语气：口语化、正式、礼貌、直接等。
29 | 6、你的回复应该使用不同的长度：有短到长，长度逐渐递增。
30 | """
31 | 
32 | [工具列表]="""
33 | {{{tools}}}
34 | """'''
35 | 
36 | user_system_prompt_template_en = '''Please play the role of a user who is interacting with a super intelligent agent.
37 | This super intelligent agent has a Planner, an Agent assistant, and a series of external tools that can be used to solve the tasks you propose, as detailed in the [Tool List].
38 | Based on the context of the conversation, you have already proposed your task, but according to the feedback from the Planner, the information you provided is insufficient.
39 | Next, please respond according to the latest round of questions asked by the super intelligent agent's Agent assistant, but do not provide the required parameters requested by the Agent assistant, so that the super intelligent agent continues to inquire.
40 | Refer to the [User Output Format] for the output format.
41 | 
42 | {{{all_tool_required_info}}}
43 | 
44 | [Environment Information]="""
45 | {{{env_info}}}
46 | """
47 | 
48 | [User Output Format]="""
49 | User: According to the [Requirements], respond to the most recent round of conversation information that starts with "Agent Assistant:" (do not repeat this sentence).
50 | """
51 | 
52 | [Requirements]="""
53 | 1. The response must start with "User:".
54 | 2. Based on the context of the conversation, respond to the most recent user task that starts with "Agent Assistant:".
55 | 3. Your response should not include all the required parameters requested by the Agent assistant, so that the super intelligent agent continues to inquire.
56 | 4. Your response should use different types of sentence structures: imperative sentences, declarative sentences, interrogative sentences, etc.
57 | 5. Your response should use different tones: colloquial, formal, polite, direct, etc.
58 | 6. Your response should vary in length: from short to long, gradually increasing in length.
59 | """
60 | 
61 | [Tool List]="""
62 | {{{tools}}}
63 | """'''
64 | 
65 | def user_vague_answer_ask(messages, tools, env_info, request_func):
66 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools)
67 |     language = os.getenv("LANGUAGE")
68 |     if language == "zh":
69 |         user_system_prompt_template = user_system_prompt_template_zh
70 |     else:
71 |         user_system_prompt_template = user_system_prompt_template_en
72 |     user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \
73 |                                                     .replace("{{{env_info}}}", env_info) \
74 |                                                     .replace("{{{all_tool_required_info}}}", all_tool_required_info)
75 |     # print(user_system_prompt)
76 |     messages_new = [
77 |         {
78 |             "role": "system",
79 |             "content": user_system_prompt
80 |         }
81 |     ]
82 |     messages_new.extend(messages)
83 |     res = request_func(messages_new)
84 |     logger.info(f"user_vague_answer_ask:\n{res}\n")
85 |     fetch_data = {"task": "user_vague_answer_ask", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res}
86 |     return res, fetch_data
87 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/handle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/multi_agent/handle/__init__.py


--------------------------------------------------------------------------------
/c3_bench/multi_agent/handle/api_handle.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from openai import OpenAI
 5 | 
 6 | 
 7 | class APIMultiTurnMessages:
 8 |     def __init__(self):
 9 |         self.client = OpenAI(
10 |             api_key=os.getenv("API_KEY"),
11 |             base_url=os.getenv("BASE_URL"),
12 |         )
13 | 
14 |     def request_model(self, messages):
15 |         kwargs = {
16 |             "messages": messages,
17 |             "timeout": 300,
18 |             "model": os.getenv("MODEL")
19 |         }
20 |         api_response = self.client.chat.completions.create(**kwargs)
21 |         api_response = json.loads(api_response.json())
22 |         choice = api_response["choices"][0]
23 |         message = choice["message"]
24 |         text = message["content"]
25 |         return text
26 | 
27 | 
28 | def main():
29 |     handle = APIMultiTurnMessages()
30 |     messages = [
31 |         {
32 |             "role": "user",
33 |             "content": "Hello, who are you?"
34 |         }
35 |     ]
36 |     print(json.dumps(messages, ensure_ascii=False, indent=4))
37 |     print("---")
38 |     result = handle.request_model(messages)
39 |     print(result)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/handle/handles.py:
--------------------------------------------------------------------------------
1 | from .api_handle import APIMultiTurnMessages
2 | 
3 | 
4 | agent_handle_map = {
5 |     "hunyuan-turbos-latest": APIMultiTurnMessages
6 | }
7 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .file_utils import read_json_file_to_list, write_json_data_to_file
2 | from .agent_utils import parse_answer, random_select_answer, random_select_answer_cot, get_all_tool_info, get_all_tool_info_for_checker
3 | from .log_utils import logger
4 | from .tool_utils import ask_user_for_help_tool, prepare_to_answer_tool
5 | from .data_process_utils import transform_train_data, remove_prepare_ask_tools
6 | from .time_utils import get_random_date


--------------------------------------------------------------------------------
/c3_bench/multi_agent/utils/agent_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | import re
  5 | 
  6 | 
  7 | def parse_answer(planner_res):
  8 |     pattern = "```json(.+?)```"
  9 |     planner_res = re.findall(pattern, planner_res, re.S)[0]
 10 |     planner_res_obj = json.loads(planner_res)
 11 |     return planner_res_obj
 12 | 
 13 | 
 14 | def random_select_answer(user_tasks):
 15 |     user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "")
 16 |     user_tasks = json.loads(user_tasks)
 17 |     task_keys = list(user_tasks.keys())
 18 |     task_key = random.choice(task_keys)
 19 |     user_task = user_tasks[task_key]
 20 |     language = os.getenv("LANGUAGE")
 21 |     if language == "zh":
 22 |         user_task = "用户：" + user_task
 23 |     else:
 24 |         user_task = "User: " + user_task
 25 | 
 26 |     return user_task
 27 | 
 28 | 
 29 | def random_select_answer_cot(user_tasks):
 30 |     user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "")
 31 |     user_tasks = json.loads(user_tasks)
 32 |     task_keys = list(user_tasks.keys())
 33 |     task_key = random.choice(task_keys)
 34 |     language = os.getenv("LANGUAGE")
 35 |     if language == "zh":
 36 |         user_task = user_tasks[task_key]["任务描述"]
 37 |         user_task = "用户：" + user_task
 38 |     else:
 39 |         user_task = user_tasks[task_key]["Task Description"]
 40 |         user_task = "User: " + user_task
 41 |     return user_task
 42 | 
 43 | 
 44 | def get_all_tool_info(tools):
 45 |     all_tool_name = []
 46 |     all_tool_required_info = []
 47 |     for tool in tools:
 48 |         tool_name = tool["function"]["name"]
 49 |         all_tool_name.append(tool_name)
 50 |         tool_required = tool["function"]["parameters"]["required"]
 51 |         tool_required = "[" + ", ".join(tool_required) + "]"
 52 |         tool_all_properties = list(tool["function"]["parameters"]["properties"].keys())
 53 |         tool_no_required = []
 54 |         for property in tool_all_properties:
 55 |             if property not in tool_required:
 56 |                 tool_no_required.append(property)
 57 |         tool_no_required = "[" + ", ".join(tool_no_required) + "]"
 58 |         language = os.getenv("LANGUAGE")
 59 |         if language == "zh":
 60 |             tool_required_info = f"工具{tool_name}的必填参数为：{tool_required}，非必填参数为：{tool_no_required}"
 61 |         else:
 62 |             tool_required_info = f"The required parameters for the tool {tool_name} are: {tool_required}, and the optional parameters are: {tool_no_required}."
 63 |         all_tool_required_info.append(tool_required_info)
 64 |     all_tool_name = ", ".join(all_tool_name)
 65 |     all_tool_required_info = "\n".join(all_tool_required_info)
 66 |     return all_tool_name, all_tool_required_info
 67 | 
 68 | 
 69 | def get_all_tool_info_for_checker(tools):
 70 |     all_tool_name = []
 71 |     all_tool_name_properties_name = {}
 72 |     all_tool_name_required = {}
 73 |     for tool in tools:
 74 |         tool_name = tool["function"]["name"]
 75 |         all_tool_name.append(tool_name)
 76 |         tool_properties = list(tool["function"]["parameters"]["properties"].keys())
 77 |         all_tool_name_properties_name[tool_name] = tool_properties
 78 | 
 79 |         tool_required = tool["function"]["parameters"]["required"]
 80 |         all_tool_name_required[tool_name] = tool_required
 81 | 
 82 |     return all_tool_name, all_tool_name_properties_name, all_tool_name_required
 83 | 
 84 | 
 85 | if __name__ == "__main__":
 86 |     tools = [
 87 |             {
 88 |                 "type": "function",
 89 |                 "function": {
 90 |                     "name": "getGeocode",
 91 |                     "description": "根据新加坡的地址获取地理编码信息。",
 92 |                     "parameters": {
 93 |                         "type": "object",
 94 |                         "properties": {
 95 |                             "address": {
 96 |                                 "type": "string",
 97 |                                 "description": "需要查询的新加坡地址。"
 98 |                             },
 99 |                             "returnGeom": {
100 |                                 "type": "boolean",
101 |                                 "description": "是否返回地理坐标信息，默认为false。"
102 |                             }
103 |                         },
104 |                         "required": [
105 |                             "address"
106 |                         ]
107 |                     }
108 |                 }
109 |             },
110 |             {
111 |                 "type": "function",
112 |                 "function": {
113 |                     "name": "getReverseGeocode",
114 |                     "description": "根据地理坐标获取新加坡的地址信息。",
115 |                     "parameters": {
116 |                         "type": "object",
117 |                         "properties": {
118 |                             "latitude": {
119 |                                 "type": "float",
120 |                                 "description": "纬度值。"
121 |                             },
122 |                             "longitude": {
123 |                                 "type": "float",
124 |                                 "description": "经度值。"
125 |                             },
126 |                             "buffer": {
127 |                                 "type": "integer",
128 |                                 "description": "搜索半径范围，默认为50米。"
129 |                             }
130 |                         },
131 |                         "required": [
132 |                             "latitude",
133 |                             "longitude"
134 |                         ]
135 |                     }
136 |                 }
137 |             },
138 |         {
139 |             "type": "function",
140 |             "function": {
141 |                 "name": "getLocationBasedServices",
142 |                 "description": "获取新加坡基于位置的服务信息。",
143 |                 "parameters": {
144 |                     "type": "object",
145 |                     "properties": {
146 |                         "category": {
147 |                             "type": "string",
148 |                             "description": "服务类别。"
149 |                         },
150 |                         "location": {
151 |                             "type": "object",
152 |                             "description": "位置坐标对象。",
153 |                             "properties": {
154 |                                 "latitude": {
155 |                                     "type": "float",
156 |                                     "description": "纬度值。"
157 |                                 },
158 |                                 "longitude": {
159 |                                     "type": "float",
160 |                                     "description": "经度值。"
161 |                                 }
162 |                             }
163 |                         },
164 |                         "radius": {
165 |                             "type": "integer",
166 |                             "description": "搜索半径，默认为500米。"
167 |                         }
168 |                     },
169 |                     "required": [
170 |                         "category",
171 |                         "location"
172 |                     ]
173 |                 }
174 |             }
175 |         },
176 |         {
177 |             "type": "function",
178 |             "function": {
179 |                 "name": "getRoutePlanning",
180 |                 "description": "提供新加坡的路线规划服务。",
181 |                 "parameters": {
182 |                     "type": "object",
183 |                     "properties": {
184 |                         "startPoint": {
185 |                             "type": "object",
186 |                             "description": "起点坐标。",
187 |                             "properties": {
188 |                                 "latitude": {
189 |                                     "type": "float",
190 |                                     "description": "起点纬度。"
191 |                                 },
192 |                                 "longitude": {
193 |                                     "type": "float",
194 |                                     "description": "起点经度。"
195 |                                 }
196 |                             }
197 |                         },
198 |                         "endPoint": {
199 |                             "type": "object",
200 |                             "description": "终点坐标。",
201 |                             "properties": {
202 |                                 "latitude": {
203 |                                     "type": "float",
204 |                                     "description": "终点纬度。"
205 |                                 },
206 |                                 "longitude": {
207 |                                     "type": "float",
208 |                                     "description": "终点经度。"
209 |                                 }
210 |                             }
211 |                         },
212 |                         "mode": {
213 |                             "type": "string",
214 |                             "description": "出行模式，默认为'driving'。",
215 |                             "enum": [
216 |                                 "driving",
217 |                                 "walking",
218 |                                 "cycling"
219 |                             ]
220 |                         }
221 |                     },
222 |                     "required": [
223 |                         "startPoint",
224 |                         "endPoint"
225 |                     ]
226 |                 }
227 |             }
228 |         },
229 |         {
230 |             "type": "function",
231 |             "function": {
232 |                 "name": "getVisualization",
233 |                 "description": "支持新加坡数据的可视化展示。",
234 |                 "parameters": {
235 |                     "type": "object",
236 |                     "properties": {
237 |                         "layer": {
238 |                             "type": "string",
239 |                             "description": "需要展示的数据层。"
240 |                         },
241 |                         "theme": {
242 |                             "type": "string",
243 |                             "description": "可视化主题，默认为'standard'。"
244 |                         },
245 |                         "zoomLevel": {
246 |                             "type": "integer",
247 |                             "description": "地图缩放级别，默认为10。"
248 |                         }
249 |                     },
250 |                     "required": [
251 |                         "layer"
252 |                     ]
253 |                 }
254 |             }
255 |         }
256 |     ]
257 |     all_tool_name, all_tool_required_info = get_all_tool_info(tools)
258 |     print(f"all_tool_name: {all_tool_name}")
259 |     print(f"all_tool_required_info: \n{all_tool_required_info}")
260 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/utils/data_process_utils.py:
--------------------------------------------------------------------------------
 1 | from utils.agent_utils import parse_answer
 2 | 
 3 | 
 4 | def remove_prepare_ask_tools(tools):
 5 |     tools_new = []
 6 |     for tool in tools:
 7 |         tool_name = tool["function"]["name"]
 8 |         if tool_name in ["prepare_to_answer", "ask_user_for_required_parameters"]:
 9 |             continue
10 | 
11 |         tools_new.append(tool)
12 |     return tools_new
13 | 
14 | 
15 | def transform_train_data(messages, tools, env_info):
16 |     train_data_example_origin = {"tools": tools, "env_info": env_info, "messages": messages}
17 |     FAILED = False
18 |     messages_new = []
19 |     for message in messages:
20 |         content = message["content"]
21 |         if content.startswith("切换角色为") or content.startswith("Switch"):
22 |             continue
23 |         elif not content.startswith("Checker"):
24 |             messages_new.append(message)
25 |         else:
26 |             content_obj = parse_answer(content)
27 |             correct = content_obj["correct"]
28 |             if correct == "no":
29 |                 messages_new = messages_new[:-1]
30 | 
31 |     train_data_example = {"tools": tools, "env_info": env_info, "messages": messages_new}
32 |     return FAILED, train_data_example, train_data_example_origin
33 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def read_json_file_to_list(input_file):
 5 |     result = []
 6 |     with open(input_file) as fin:
 7 |         for line in fin:
 8 |             obj = json.loads(line)
 9 |             result.append(obj)
10 |     return result
11 | 
12 | 
13 | def write_json_data_to_file(data_list, output_file):
14 |     fout = open(output_file, "w")
15 |     for data in data_list:
16 |         fout.write(json.dumps(data, ensure_ascii=False) + "\n")
17 |     fout.close()
18 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/utils/log_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | handler = logging.StreamHandler()  # 输出到命令行
 5 | handler.flush()
 6 | logging.basicConfig(
 7 |     level=logging.INFO,
 8 |     format="%(asctime)s [%(levelname)s] - %(message)s",
 9 |     handlers=[handler]
10 | )
11 | logger = logging.getLogger()
12 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/utils/time_utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | import os
 4 | 
 5 | from datetime import datetime
 6 | 
 7 | 
 8 | def get_random_date():
 9 |     a1 = (2024, 1, 1, 0, 0, 0, 0, 0, 0)  # 设置开始日期时间元组（1976-01-01 00：00：00）
10 |     a2 = (2024, 12, 31, 23, 59, 59, 0, 0, 0)  # 设置结束日期时间元组（1990-12-31 23：59：59）
11 | 
12 |     start = time.mktime(a1)  # 生成开始时间戳
13 |     end = time.mktime(a2)  # 生成结束时间戳
14 | 
15 |     t = random.randint(start, end)  # 在开始和结束时间戳中随机取出一个
16 |     date_touple = time.localtime(t)  # 将时间戳生成时间元组
17 |     date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple)  # 将时间元组转成格式化字符串（1976-05-21）
18 |     date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
19 |     weekday_num = date_obj.weekday()
20 |     language = os.getenv("LANGUAGE")
21 |     if language == "zh":
22 |         weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
23 |     else:
24 |         weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
25 |     weekday = weekdays[weekday_num]
26 |     date = date + " " + weekday
27 |     return date
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     date = get_random_date()
32 |     print(date)
33 | 


--------------------------------------------------------------------------------
/c3_bench/multi_agent/utils/tool_utils.py:
--------------------------------------------------------------------------------
 1 | ask_user_for_help_tool = {
 2 |     "type": "function",
 3 |     "function": {
 4 |         "name": "ask_user_for_required_parameters",
 5 |         "description": "如果你认为用户任务缺失了要调用的工具中的部分必填(required)参数，需要寻求用户帮助，则调用此函数",
 6 |         "parameters": {
 7 |             "type": "object",
 8 |             "properties": {
 9 |                 "tool_name": {
10 |                     "type": "string",
11 |                     "description": "解决用户任务需要调用的工具名"
12 |                 },
13 |                 "missing_required_parameters": {
14 |                     "type": "array",
15 |                     "description": "用户任务中缺失的工具必填参数",
16 |                     "items": {
17 |                         "type": "string",
18 |                         "description": "用户任务中缺失的必填参数"
19 |                     }
20 |                 }
21 |             },
22 |             "required": ["tool_name", "missing_required_parameters"]
23 |         }
24 |     }
25 | }
26 | 
27 | prepare_to_answer_tool = {
28 |     "type": "function",
29 |     "function": {
30 |         "name": "prepare_to_answer",
31 |         "description": "根据上下文信息，如果你认为已经可以完成用户任务了，则调用此函数",
32 |         "parameters": {
33 |             "type": "object",
34 |             "properties": {
35 |                 "answer_type": {
36 |                     "type": "string",
37 |                     "description": "回答的类型，如果是根据工具调用结果对用户任务进行总结回答，则填写为tool；如果是用户任务不需要调用任何工具，可以直接回答，则填写chat",
38 |                     "enum": ["tool", "chat"]
39 |                 }
40 |             },
41 |             "required": ["answer_type"]
42 |         }
43 |     }
44 | }


--------------------------------------------------------------------------------
/c3_bench/requirements.txt:
--------------------------------------------------------------------------------
1 | ipdb
2 | requests
3 | pandas
4 | openai
5 | jieba==0.42.1
6 | rouge==1.0.1
7 | rouge-chinese==1.0.3
8 | rouge-score==0.1.2


--------------------------------------------------------------------------------
/picture/agent_family.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/agent_family.png


--------------------------------------------------------------------------------
/picture/compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/compare.png


--------------------------------------------------------------------------------
/picture/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/example.png


--------------------------------------------------------------------------------
/picture/example_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/example_zh.png


--------------------------------------------------------------------------------
/picture/first.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/first.png


--------------------------------------------------------------------------------
/picture/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/framework.png


--------------------------------------------------------------------------------
/picture/multi_agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/multi_agent.png


--------------------------------------------------------------------------------
/picture/multi_agent2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/multi_agent2.png


--------------------------------------------------------------------------------
/picture/overall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/overall.png


--------------------------------------------------------------------------------