├── .gitignore ├── LICENSE ├── README.md ├── README_ZH.md ├── c3_bench ├── __init__.py ├── bench_test │ ├── __init__.py │ ├── analysis_result.py │ ├── data │ │ ├── C3-Bench.csv │ │ └── C3-Bench.jsonl │ ├── handle │ │ ├── __init__.py │ │ ├── api_handle.py │ │ ├── basic_handle.py │ │ ├── chatglm_handle.py │ │ ├── fcm_handle.py │ │ ├── gorilla_handle.py │ │ ├── hammer_handle.py │ │ ├── handles.py │ │ ├── hunyuan_handle.py │ │ ├── llama_handle.py │ │ ├── toolace_handle.py │ │ ├── tools.py │ │ ├── watt_handle.py │ │ ├── xlam2_handle.py │ │ └── xlam_handle.py │ ├── request_pipeline.py │ ├── request_pipeline_upta.py │ ├── result │ │ └── upta │ │ │ └── 2025-06-25-15:50:37_b3b8be_hunyuan-a13b_en_remove_role_contain_context_history_with_planner_tool_.jsonl │ ├── tool_call_graph.py │ ├── tool_class │ │ ├── __init__.py │ │ ├── chatglm.py │ │ ├── deepseek.py │ │ ├── fc_medium.py │ │ ├── gorilla.py │ │ ├── hammer.py │ │ ├── llama.py │ │ ├── tool_ace.py │ │ ├── tool_class_base.py │ │ ├── tool_model_map.py │ │ ├── watt.py │ │ ├── xlam.py │ │ └── xlam2.py │ ├── tool_parser │ │ ├── __init__.py │ │ └── hunyuan_tool_parser.py │ ├── utils │ │ ├── __init__.py │ │ ├── date.py │ │ ├── parse_res.py │ │ ├── readnwrite.py │ │ └── tools.py │ └── web_server.py ├── multi_agent │ ├── agent │ │ ├── __init__.py │ │ ├── agent_answer.py │ │ ├── agent_answer_chat.py │ │ ├── agent_ask.py │ │ ├── checker_planner.py │ │ ├── checker_tool.py │ │ ├── planner.py │ │ ├── tool.py │ │ ├── user_answer_ask.py │ │ ├── user_ask.py │ │ ├── user_chat.py │ │ ├── user_continue_question.py │ │ ├── user_multi_tool.py │ │ ├── user_multi_tool_parallel.py │ │ ├── user_multi_tool_serial_parallel.py │ │ ├── user_single_tool.py │ │ └── user_vague_answer_ask.py │ ├── generate.py │ ├── handle │ │ ├── __init__.py │ │ ├── api_handle.py │ │ └── handles.py │ ├── tools │ │ ├── tools_en.jsonl │ │ └── tools_zh.jsonl │ └── utils │ │ ├── __init__.py │ │ ├── agent_utils.py │ │ ├── data_process_utils.py │ │ ├── file_utils.py │ │ ├── log_utils.py │ │ ├── time_utils.py │ │ └── tool_utils.py └── requirements.txt └── picture ├── agent_family.png ├── compare.png ├── example.png ├── example_zh.png ├── first.png ├── framework.png ├── multi_agent.png ├── multi_agent2.png └── overall.png /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | -------------------------------------------------------------------------------- /c3_bench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/__init__.py -------------------------------------------------------------------------------- /c3_bench/bench_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/__init__.py -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/handle/__init__.py -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/api_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from .basic_handle import SimulateMultiTurnMessages 5 | from c3_bench.bench_test.utils import functions_uniform 6 | from openai import OpenAI 7 | 8 | 9 | class APIMultiTurnMessages(SimulateMultiTurnMessages): 10 | def __init__(self, model_url, is_english=False): 11 | super().__init__(model_url, is_english) 12 | self.model_messages = [] 13 | self.client = OpenAI( 14 | api_key=os.getenv("API_KEY"), 15 | base_url=os.getenv("BASE_URL"), 16 | ) 17 | 18 | def request_funcall(self, messages, tools, env_info=None): 19 | messages = self.add_date_to_message(messages, env_info) 20 | tools = [functions_uniform(tool) for tool in tools] 21 | kwargs = { 22 | "messages": messages, 23 | "tools": tools, 24 | "temperature": 0.1, 25 | "timeout": 300, 26 | "model": os.getenv("MODEL") 27 | } 28 | api_response = self.client.chat.completions.create(**kwargs) 29 | api_response = json.loads(api_response.json()) 30 | choice = api_response["choices"][0] 31 | message = choice["message"] 32 | text = message["content"] 33 | tool_calls = message.get("tool_calls", None) 34 | return text, tool_calls 35 | 36 | 37 | def main(): 38 | handle = APIMultiTurnMessages("") 39 | tools = [ 40 | { 41 | "type": "function", 42 | "function": { 43 | "name": "get_current_weather", 44 | "description": "Get the current weather in a given location", 45 | "parameters": { 46 | "type": "object", 47 | "properties": { 48 | "location": { 49 | "type": "string", 50 | "description": "The city and state, e.g. San Francisco, CA" 51 | }, 52 | "unit": { 53 | "type": "string", 54 | "enum": [ 55 | "celsius", 56 | "fahrenheit" 57 | ] 58 | } 59 | }, 60 | "required": [ 61 | "location" 62 | ] 63 | } 64 | } 65 | } 66 | ] 67 | messages = [ 68 | { 69 | "role": "user", 70 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 71 | } 72 | ] 73 | content, tool_calls = handle.request_funcall(messages, tools) 74 | print(content) 75 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/basic_handle.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | from datetime import datetime 4 | 5 | 6 | class SimulateMultiTurnMessages: 7 | def __init__(self, model_url, is_english): 8 | self.model_url = model_url 9 | self.is_english = is_english 10 | self.model_messages = [] 11 | self.timeout = 90 12 | self.add_date = True 13 | 14 | def preprocess_to_simple(self, messages): 15 | pass 16 | 17 | def post_process_tool_call(self, answer): 18 | pass 19 | 20 | def add_weekday_date(self, date): 21 | date = date.replace("当前时间:", "").replace("环境:", "") 22 | date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S') 23 | weekday_num = date_obj.weekday() 24 | if self.is_english: 25 | weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] 26 | else: 27 | weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] 28 | weekday = weekdays[weekday_num] 29 | date = date + " " + weekday 30 | return date 31 | 32 | def add_date_to_message(self, message, env_info=None): 33 | if env_info is not None and self.add_date: 34 | system_content = message[0]["content"] if message[0]["role"] == "system" else "" 35 | if self.is_english: 36 | system_content = system_content[:system_content.rfind("Current Date:")] + "\n\nCurrent Date:" + self.add_weekday_date(env_info) 37 | else: 38 | system_content = system_content[:system_content.rfind("当前日期:")] + "当前日期:" + self.add_weekday_date(env_info) 39 | if message[0]["role"] == "system": 40 | message[0]["content"] = system_content.strip() 41 | else: 42 | message.insert(0, {"role": "system", "content": system_content.strip()}) 43 | return message 44 | else: 45 | return message 46 | 47 | def add_date_to_messsage_user(self, message, env_info=None): 48 | if env_info is not None and self.add_date: 49 | if self.is_english: 50 | system_content = "Current Date:" + self.add_weekday_date(env_info) 51 | else: 52 | system_content = "当前日期:" + self.add_weekday_date(env_info) 53 | idx = 0 54 | date_flag = False 55 | for idx_, item in enumerate(message): 56 | if item["role"] == "user": 57 | if "Current Date:" in item["content"] or "当前日期:" in item["content"]: 58 | date_flag = True 59 | idx = idx_ 60 | if not date_flag: 61 | message[idx]["content"] += "\n\n" + system_content 62 | return message 63 | else: 64 | return message 65 | 66 | def request_funcall(self, messages, tools, env_info=None): 67 | url = self.model_url 68 | headers = {"Content-Type": "application/json"} 69 | data = { 70 | "messages": self.add_date_to_message(self.preprocess_to_simple(messages), env_info), 71 | "tools": tools, 72 | "date": self.add_weekday_date(env_info) 73 | } 74 | 75 | text = None 76 | tool_calls = None 77 | try: 78 | response = requests.post(url, headers=headers, json=data, timeout=self.timeout) 79 | if response.status_code == 200: 80 | result = response.json() 81 | answer = result["answer"] 82 | text, tool_calls = self.post_process_tool_call(answer) 83 | except Exception as e: 84 | print(f"error: {e}") 85 | text = None 86 | tool_calls = None 87 | 88 | return text, tool_calls 89 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/chatglm_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | 4 | from .basic_handle import SimulateMultiTurnMessages 5 | from .tools import remove_messages 6 | 7 | 8 | class ChatGLMMultiTurnMessages(SimulateMultiTurnMessages): 9 | def __init__(self, model_url, is_english=False): 10 | super().__init__(model_url, is_english) 11 | self.model_messages = [] 12 | 13 | def preprocess_to_simple(self, messages): 14 | if len(self.model_messages) == 0: 15 | self.model_messages = remove_messages(messages, is_english=True) 16 | else: 17 | if messages[-1]["role"] == "user": 18 | self.model_messages.append({"role": "user", 19 | "content": messages[-1]["content"].replace("用户:", "").replace("User:", 20 | "").strip()}) 21 | elif messages[-1]["role"] == "tool": 22 | observations = json.loads(messages[-1]["content"]) 23 | functions = messages[-2]["tool_calls"] 24 | assert len(observations) == len(functions) 25 | ret_observation = [] 26 | for function, observation in zip(functions, observations): 27 | ret_observation.append({ 28 | "name": function["function"]["name"], 29 | "results": observation 30 | }) 31 | self.model_messages.append({"role": "observation", "content": json.dumps(ret_observation)}) 32 | return self.model_messages 33 | 34 | def post_process_tool_call(self, answer): 35 | try: 36 | self.model_messages.append({"role": "assistant", "content": answer}) 37 | answer_split = answer.split("\n") 38 | if len(answer_split) >= 2: 39 | text = f"use {answer_split[0]} to solve user problem" 40 | tool_calls = [{"id": str(uuid.uuid4()), "function": { 41 | "name": answer_split[0], 42 | "arguments": json.loads(answer_split[1]) 43 | }}] 44 | else: 45 | text = answer 46 | tool_calls = None 47 | return text, tool_calls 48 | except Exception as e: 49 | print(f"error: {e}") 50 | return answer, None 51 | 52 | 53 | def main(): 54 | handle = ChatGLMMultiTurnMessages("http://111.111.111.111:12345") 55 | tools = [ 56 | { 57 | "type": "function", 58 | "function": { 59 | "name": "get_current_weather", 60 | "description": "Get the current weather in a given location", 61 | "parameters": { 62 | "type": "object", 63 | "properties": { 64 | "location": { 65 | "type": "string", 66 | "description": "The city and state, e.g. San Francisco, CA" 67 | }, 68 | "unit": { 69 | "type": "string", 70 | "enum": [ 71 | "celsius", 72 | "fahrenheit" 73 | ] 74 | } 75 | }, 76 | "required": [ 77 | "location" 78 | ] 79 | } 80 | } 81 | } 82 | ] 83 | messages = [ 84 | { 85 | "role": "user", 86 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 87 | } 88 | ] 89 | content, tool_calls = handle.request_funcall(messages, tools) 90 | print(content) 91 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 92 | 93 | 94 | if __name__ == "__main__": 95 | main() 96 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/fcm_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | 4 | from .basic_handle import SimulateMultiTurnMessages 5 | from .tools import remove_messages 6 | 7 | 8 | class FCMMultiTurnMessages(SimulateMultiTurnMessages): 9 | def __init__(self, model_url, is_english=False): 10 | super().__init__(model_url, is_english) 11 | self.model_messages = [] 12 | self.timeout = 300 13 | 14 | def preprocess_to_simple(self, messages): 15 | if len(self.model_messages) == 0: 16 | self.model_messages = remove_messages(messages, is_english=self.is_english) 17 | else: 18 | if messages[-1]["role"] == "user": 19 | self.model_messages += remove_messages( 20 | [{"role":"user", "content": messages[-1]["content"]}], 21 | is_english=self.is_english 22 | ) 23 | elif messages[-1]["role"] == "tool": 24 | # messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"}) 25 | assistant = None 26 | observation = [] 27 | idx = -1 28 | while True or idx > -len(messages): 29 | if messages[idx]["role"] == "assistant": 30 | assistant = messages[idx] 31 | break 32 | if messages[idx]["role"] == "tool": 33 | observation.append(messages[idx]) 34 | idx -= 1 35 | idmap_observation = {} 36 | assert len(observation) == len(assistant["tool_calls"]) 37 | for tool_call in assistant["tool_calls"]: 38 | idmap_observation[tool_call["id"]] = tool_call["function"]["name"] 39 | for obser in observation: 40 | assert obser["tool_call_id"] in idmap_observation 41 | self.model_messages.append({ 42 | "role": "tool", "name": idmap_observation[obser["tool_call_id"]], 43 | "content": obser["content"] 44 | }) 45 | return self.model_messages 46 | 47 | def post_process_tool_call(self, answer): 48 | text = None 49 | tool_calls = None 50 | try: 51 | if "" in answer: 52 | try: 53 | # {"location": "Boston, MA"}{"location": "San Francisco, CA"} 54 | self.model_messages.append({"role": "assistant", "content": answer}) 55 | text = answer 56 | tool_calls = None 57 | assert answer.count("") 58 | answer = answer.split("") 59 | for tc in answer: 60 | if not tc.startswith("{")].replace("{")+1:]) 64 | if tool_calls is None: 65 | tool_calls = [] 66 | tool_calls.append({"id": str(uuid.uuid4()), "function": { 67 | "name": name, "arguments": argument 68 | }}) 69 | except: 70 | pass 71 | else: 72 | self.model_messages.append({"role": "assistant", "content": answer}) 73 | text = answer 74 | tool_calls = None 75 | return text, tool_calls 76 | except Exception as e: 77 | print(f"error: {e}") 78 | return None, None 79 | 80 | 81 | def main(): 82 | handle = FCMMultiTurnMessages("http://111.111.111.111:12345") 83 | tools = [ 84 | { 85 | "type": "function", 86 | "function": { 87 | "name": "get_current_weather", 88 | "description": "Get the current weather in a given location", 89 | "parameters": { 90 | "type": "object", 91 | "properties": { 92 | "location": { 93 | "type": "string", 94 | "description": "The city and state, e.g. San Francisco, CA" 95 | }, 96 | "unit": { 97 | "type": "string", 98 | "enum": [ 99 | "celsius", 100 | "fahrenheit" 101 | ] 102 | } 103 | }, 104 | "required": [ 105 | "location" 106 | ] 107 | } 108 | } 109 | } 110 | ] 111 | messages = [ 112 | { 113 | "role": "user", 114 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 115 | } 116 | ] 117 | content, tool_calls = handle.request_funcall(messages, tools) 118 | print(content) 119 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 120 | 121 | 122 | 123 | if __name__ == "__main__": 124 | main() -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/gorilla_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | import ast 4 | import requests 5 | 6 | from .basic_handle import SimulateMultiTurnMessages 7 | from c3_bench.bench_test.utils import get_keywords 8 | 9 | 10 | def parse_python_function_call(call_str): 11 | tree = ast.parse(call_str) 12 | expr = tree.body[0] 13 | 14 | call_node = expr.value 15 | function_name = ( 16 | call_node.func.id 17 | if isinstance(call_node.func, ast.Name) 18 | else str(call_node.func) 19 | ) 20 | 21 | parameters = {} 22 | noNameParam = [] 23 | 24 | # Process positional arguments 25 | for arg in call_node.args: 26 | noNameParam.append(get_keywords(arg)) 27 | 28 | # Process keyword arguments 29 | for kw in call_node.keywords: 30 | parameters[kw.arg] = get_keywords(kw.value) 31 | 32 | if noNameParam: 33 | parameters["None"] = noNameParam 34 | 35 | function_dict = {"name": function_name, "arguments": parameters} 36 | return function_dict 37 | 38 | 39 | FN_CALL_DELIMITER = "<>" 40 | 41 | 42 | def strip_function_calls(content): 43 | """ 44 | Split the content by the function call delimiter and remove empty strings 45 | """ 46 | return [element.strip() for element in content.split(FN_CALL_DELIMITER)[1:] if element.strip()] 47 | 48 | 49 | def parse_function_call(call): 50 | """ 51 | This is temporary. The long term solution is to union all the 52 | types of the parameters from the user's input function definition, 53 | and check which language is a proper super set of the union type. 54 | """ 55 | try: 56 | return parse_python_function_call(call) 57 | except Exception as e: 58 | print(f"error: {e}") 59 | return None 60 | 61 | 62 | def format_response(response): 63 | """ 64 | Formats the response from the OpenFunctions model. 65 | 66 | Parameters: 67 | - response (str): The response generated by the LLM. 68 | 69 | Returns: 70 | - str: The formatted response. 71 | - dict: The function call(s) extracted from the response. 72 | 73 | """ 74 | function_call_dicts = None 75 | try: 76 | response = strip_function_calls(response) 77 | # Parallel function calls returned as a str, list[dict] 78 | if len(response) > 1: 79 | function_call_dicts = [] 80 | for function_call in response: 81 | parse_function_call_dict = parse_function_call(function_call) 82 | if parse_function_call_dict is not None: 83 | function_call_dicts.append(parse_function_call_dict) 84 | response = ", ".join(response) 85 | # Single function call returned as a str, dict 86 | else: 87 | function_call_dicts = [parse_function_call(response[0])] 88 | response = response[0] 89 | except Exception as e: 90 | # Just faithfully return the generated response str to the user 91 | print(f"error: {e}") 92 | pass 93 | 94 | return response, function_call_dicts 95 | 96 | 97 | class GorillaMultiTurnMessages(SimulateMultiTurnMessages): 98 | def __init__(self, model_url, is_english=False): 99 | super().__init__(model_url, is_english) 100 | self.model_messages = [] 101 | 102 | def get_prompt(self, user_query: str, history, functions: list = [], env_info=None) -> str: 103 | """ 104 | Generates a conversation prompt based on the user's query and a list of functions. 105 | 106 | Parameters: 107 | - user_query (str): The user's query. 108 | - functions (list): A list of functions to include in the prompt. 109 | 110 | Returns: 111 | - str: The formatted conversation prompt. 112 | """ 113 | system = "You are an AI programming assistant, utilizing the Gorilla LLM model, developed by Gorilla LLM, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer." 114 | if env_info is not None: 115 | env_info = self.add_date_to_message([{"role":"", "content":""}], env_info)[0]["content"] 116 | assert env_info.startswith("当前日期") or env_info.startswith("Current Date") 117 | system = "\n\n" + env_info 118 | history = "\n".join([f"<<{m['role']}>>" + m["content"] + f"<<{m['role']}>>" for m in history]) 119 | if len(functions) == 0: 120 | return f"{system}\n### Instruction: <> {history}\n\n<> {user_query}\n### Response: " 121 | functions_string = json.dumps(functions) 122 | 123 | return f"{system}\n### Instruction: <> {history}\n\n<>{functions_string}\n<>{user_query}\n### Response: " 124 | 125 | def request_funcall(self, messages, tools, env_info=None): 126 | """ 127 | Request the function call(s) from the model. 128 | 129 | Parameters: 130 | - messages (list): The messages in the conversation. 131 | - tools (list): The list of tools to include in the prompt. 132 | 133 | Returns: 134 | - str: The formatted response. 135 | - list: The function call(s) extracted from the response. 136 | """ 137 | url = self.model_url 138 | headers = {'Content-Type': 'application/json'} 139 | history = [_ for _ in messages if _["role"] != "system"] 140 | query = [_ for _ in messages if _["role"] == "user"][-1]["content"] 141 | data = { 142 | 'messages': [{ 143 | "content": self.get_prompt(query, history, tools, env_info), "role": "user" 144 | }], 145 | } 146 | 147 | text = None 148 | tool_calls = None 149 | try: 150 | response = requests.post(url, headers=headers, json=data, timeout=90) 151 | if response.status_code == 200: 152 | result = response.json() 153 | text = result["answer"].strip() 154 | _, tool_calls = format_response(result["answer"]) 155 | if ( 156 | tool_calls is not None 157 | and len(tool_calls) > 0 158 | and len([_ for _ in tool_calls if _ is not None]) > 0 159 | and type(tool_calls[0]["name"]) == str 160 | ): 161 | tool_calls = [{"id":str(uuid.uuid4()), "function":_} for _ in tool_calls if _ is not None] 162 | else: 163 | tool_calls = None 164 | except Exception as e: 165 | print(f"error: {e}") 166 | 167 | return text, tool_calls 168 | 169 | 170 | def main(): 171 | handle = GorillaMultiTurnMessages("http://111.111.111.111:12345") 172 | tools = [ 173 | { 174 | "type": "function", 175 | "function": { 176 | "name": "get_current_weather", 177 | "description": "Get the current weather in a given location", 178 | "parameters": { 179 | "type": "object", 180 | "properties": { 181 | "location": { 182 | "type": "string", 183 | "description": "The city and state, e.g. San Francisco, CA" 184 | }, 185 | "unit": { 186 | "type": "string", 187 | "enum": [ 188 | "celsius", 189 | "fahrenheit" 190 | ] 191 | } 192 | }, 193 | "required": [ 194 | "location" 195 | ] 196 | } 197 | } 198 | } 199 | ] 200 | messages = [ 201 | { 202 | "role": "user", 203 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 204 | } 205 | ] 206 | content, tool_calls = handle.request_funcall(messages, tools) 207 | print(content) 208 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 209 | print("==="*10) 210 | 211 | tools = [ 212 | { 213 | "name": "thermodynamics.calculate_boiling_point", 214 | "description": "Calculate the boiling point of a given substance at a specific pressure.", 215 | "parameters": { 216 | "type": "object", 217 | "properties": { 218 | "substance": { 219 | "type": "string", 220 | "description": "The substance for which to calculate the boiling point." 221 | }, 222 | "pressure": { 223 | "type": "number", 224 | "description": "The pressure at which to calculate the boiling point." 225 | }, 226 | "unit": { 227 | "type": "string", 228 | "description": "The unit of the pressure. Default is 'kPa'." 229 | } 230 | }, 231 | "required": [ 232 | "substance", 233 | "pressure" 234 | ] 235 | } 236 | } 237 | ] 238 | messages = [ 239 | { 240 | "role": "user", 241 | "content": "What is the freezing point of water at a pressure of 10 kPa?" 242 | } 243 | ] 244 | content, tool_calls = handle.request_funcall(messages, tools) 245 | print(content) 246 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 247 | print("==="*10) 248 | 249 | res = "<>getActivityReport(user_id=456, include_details=true, date_range={'start_date': '2023-04-01', 'end_date': '2023-04-30'})" 250 | _, tool_calls = format_response(res) 251 | print(_) 252 | print(tool_calls) 253 | print(tool_calls is not None) 254 | print(len(tool_calls) > 0) 255 | print(len([_ for _ in tool_calls if _ is not None]) > 0) 256 | print(type(tool_calls[0]["name"]) == str) 257 | 258 | 259 | if __name__ == "__main__": 260 | main() 261 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/hammer_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pdb 3 | import uuid 4 | 5 | from .basic_handle import SimulateMultiTurnMessages 6 | from .tools import remove_messages 7 | 8 | 9 | class HammerMultiTurnMessages(SimulateMultiTurnMessages): 10 | def __init__(self, model_url, is_english=False): 11 | super().__init__(model_url, is_english) 12 | self.model_messages = [] 13 | self.timeout = 300 14 | 15 | def process_planner_tool(self, messages): 16 | new_messages = [] 17 | for i, message in enumerate(messages): 18 | role = message["role"] 19 | tool_calls = message.get("tool_calls", None) 20 | function_calls = [] 21 | if tool_calls: 22 | for tool_call in tool_calls: 23 | function = tool_call["function"] 24 | name = function["name"] 25 | arguments = function["arguments"] 26 | function_calls.append({"name": name, "arguments": arguments}) 27 | # pdb.set_trace() 28 | function_calls = f"```\n{json.dumps(function_calls, ensure_ascii=False)}\n```" 29 | new_messages.append({"role": "assistant", "content": function_calls}) 30 | elif role == "tool": 31 | functions = messages[i - 1]["tool_calls"] 32 | observations = json.loads(message["content"]) 33 | assert len(observations) == len(functions) 34 | ret_observation = [] 35 | for function, observation in zip(functions, observations): 36 | ret_observation.append({ 37 | "name": function["function"]["name"], 38 | "results": observation 39 | }) 40 | new_messages.append({"role": "tool", "content": json.dumps(ret_observation, ensure_ascii=False)}) 41 | else: 42 | new_messages.append(message) 43 | return new_messages 44 | 45 | def preprocess_to_simple(self, messages): 46 | # pdb.set_trace() 47 | if len(self.model_messages) == 0: 48 | messages = remove_messages(messages, is_english=self.is_english) 49 | self.model_messages = self.process_planner_tool(messages) 50 | else: 51 | if messages[-1]["role"] == "user": 52 | self.model_messages += remove_messages( 53 | [{"role": "user", "content": messages[-1]["content"]}], 54 | is_english=self.is_english 55 | ) 56 | elif messages[-1]["role"] == "tool": 57 | observations = json.loads(messages[-1]["content"]) 58 | functions = messages[-2]["tool_calls"] 59 | assert len(observations) == len(functions) 60 | ret_observation = [] 61 | for function, observation in zip(functions, observations): 62 | ret_observation.append({ 63 | "name": function["function"]["name"], 64 | "results": observation 65 | }) 66 | self.model_messages.append({"role": "user", "content": json.dumps(ret_observation)}) 67 | 68 | return self.model_messages 69 | 70 | def parameters2arguments(self, function_dict): 71 | return { 72 | "name": function_dict["name"], 73 | "arguments": function_dict["parameters"] if "parameters" in function_dict else function_dict["arguments"] 74 | } 75 | 76 | def post_process_tool_call(self, answer): 77 | text = None 78 | tool_calls = None 79 | try: 80 | if "```\n[{\"name\"" in answer: 81 | try: 82 | # ```\n[{"name": "get_current_weather", "arguments": {"location": "Boston"}}, {"name": "get_current_weather", "arguments": {"location": "San Francisco"}}]\n``` 83 | text = answer 84 | tool_calls = json.loads(answer[len("```"): -len("\n```")]) 85 | if type(tool_calls) == dict: 86 | tool_calls = [{ 87 | "id": str(uuid.uuid4()), "function": self.parameters2arguments(tool_calls) 88 | }] 89 | elif type(tool_calls) == list: 90 | tool_calls = [ 91 | {"id": str(uuid.uuid4()), "function": self.parameters2arguments(_)} 92 | for _ in tool_calls 93 | ] 94 | self.model_messages.append({"role": "assistant", "content": answer}) 95 | except Exception as e: 96 | print(f"process error: {e}") 97 | pass 98 | else: 99 | self.model_messages.append({"role": "assistant", "content": answer}) 100 | text = "[model doesnt choose function(Manual placeholder)]" 101 | tool_calls = None 102 | 103 | return text, tool_calls 104 | 105 | except Exception as e: 106 | print(f"error: {e}") 107 | return None, None 108 | 109 | 110 | def main(): 111 | handle = HammerMultiTurnMessages("http://111.111.111.111:12345") 112 | tools = [ 113 | { 114 | "type": "function", 115 | "function": { 116 | "name": "get_current_weather", 117 | "description": "Get the current weather in a given location", 118 | "parameters": { 119 | "type": "object", 120 | "properties": { 121 | "location": { 122 | "type": "string", 123 | "description": "The city and state, e.g. San Francisco, CA" 124 | }, 125 | "unit": { 126 | "type": "string", 127 | "enum": [ 128 | "celsius", 129 | "fahrenheit" 130 | ] 131 | } 132 | }, 133 | "required": [ 134 | "location" 135 | ] 136 | } 137 | } 138 | } 139 | ] 140 | messages = [ 141 | { 142 | "role": "user", 143 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 144 | } 145 | ] 146 | content, tool_calls = handle.request_funcall(messages, tools) 147 | print(content) 148 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 149 | 150 | 151 | if __name__ == "__main__": 152 | main() 153 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/handles.py: -------------------------------------------------------------------------------- 1 | from .toolace_handle import ToolACEMultiTurnMessages 2 | from .xlam_handle import XLAMMultiTurnMessages 3 | from .xlam2_handle import XLAM2MultiTurnMessages 4 | from .gorilla_handle import GorillaMultiTurnMessages 5 | from .api_handle import APIMultiTurnMessages 6 | from .llama_handle import LlamaMultiTurnMessages 7 | from .chatglm_handle import ChatGLMMultiTurnMessages 8 | from .hammer_handle import HammerMultiTurnMessages 9 | from .watt_handle import WattMultiTurnMessages 10 | from .fcm_handle import FCMMultiTurnMessages 11 | from .hunyuan_handle import HunyuanMultiTurnMessages 12 | 13 | 14 | tool_handle_map = { 15 | # hunyuan 16 | "hunyuan-turbos-latest": (APIMultiTurnMessages, False), 17 | "hunyuan-a13b": (HunyuanMultiTurnMessages, False), 18 | # toolace 19 | "toolace": (ToolACEMultiTurnMessages, False), 20 | "toolace2": (ToolACEMultiTurnMessages, False), 21 | # xlam 22 | "xlam": (XLAMMultiTurnMessages, False), 23 | "xlam2-70b": (XLAM2MultiTurnMessages, False), 24 | "xlam2-32b": (XLAM2MultiTurnMessages, False), 25 | "xlam2-8b": (XLAM2MultiTurnMessages, False), 26 | "xlam2-3b": (XLAM2MultiTurnMessages, False), 27 | "xlam2-1b": (XLAM2MultiTurnMessages, False), 28 | # other 29 | "gorilla": (GorillaMultiTurnMessages, False), 30 | "chatglm": (ChatGLMMultiTurnMessages, False), 31 | "fcm3.1": (FCMMultiTurnMessages, True), 32 | # Watt 33 | "watt70b": (WattMultiTurnMessages, True), 34 | "watt8b": (WattMultiTurnMessages, True), 35 | # Hammer 36 | "hammer7b": (HammerMultiTurnMessages, False), 37 | "hammer3b": (HammerMultiTurnMessages, False), 38 | "hammer1.5b": (HammerMultiTurnMessages, False), 39 | "hammer0.5b": (HammerMultiTurnMessages, False), 40 | # LLAMA 41 | "llama70b": (LlamaMultiTurnMessages, True), 42 | "llama8b": (LlamaMultiTurnMessages, True), 43 | "llama3b": (LlamaMultiTurnMessages, True), 44 | "llama1b": (LlamaMultiTurnMessages, True) 45 | } 46 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/hunyuan_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from openai import OpenAI 5 | 6 | 7 | class HunyuanMultiTurnMessages: 8 | def __init__(self, model_url, is_english=False): 9 | self.model = os.getenv("MODEL") 10 | self.model_url = model_url 11 | self.model_messages = [] 12 | self.remove_flag = False 13 | self.client = OpenAI( 14 | api_key="EMPTY", 15 | base_url=f"http://{self.model_url}/v1", 16 | ) 17 | 18 | def request_model(self, model, messages, tools, env_info): 19 | text, tool_calls = None, None 20 | messages = [{"role": "system", "content": f"Current time: {env_info}"}] + messages 21 | resp = None 22 | try: 23 | while True: 24 | response = self.client.chat.completions.create( 25 | model=model, 26 | messages=messages, 27 | stream=False, 28 | temperature=0.5, 29 | top_p=0.7, 30 | tools=tools, 31 | max_tokens=8192, 32 | extra_body={ 33 | "repetition_penalty": 1.05, 34 | "top_k": 20 35 | }, 36 | ) 37 | response = response.model_dump() 38 | text = response["choices"][0]["message"]["content"] 39 | if "" in text: 40 | text = text[text.find("") + len(""):] 41 | if "" in text and "" in text: 42 | text = text[text.find("") + len(""):text.rfind("")] 43 | if text.startswith("助手:"): 44 | text = text[len("助手:"):].strip() 45 | text = text.strip() 46 | tool_calls = response["choices"][0]["message"]["tool_calls"] 47 | if tool_calls is not None or text is not None: 48 | break 49 | 50 | except Exception as e: 51 | print(f"resp: {resp.text if resp is not None else resp}") 52 | print(f"error: {e}") 53 | 54 | if text is None: 55 | print("request model error") 56 | 57 | return text, tool_calls 58 | 59 | def request_funcall(self, messages, tools, env_info=None): 60 | try: 61 | text, tool_calls = self.request_model(self.model, messages, tools, env_info) 62 | except Exception as e: 63 | raise ValueError(f"Error raised by inference endpoint: {e}") 64 | return text, tool_calls 65 | 66 | 67 | def main(): 68 | handle = HunyuanMultiTurnMessages("http://111.111.111.111:12345") 69 | tools = [ 70 | { 71 | "type": "function", 72 | "function": { 73 | "name": "get_current_weather", 74 | "description": "Get the current weather in a given location", 75 | "parameters": { 76 | "type": "object", 77 | "properties": { 78 | "location": { 79 | "type": "string", 80 | "description": "The city and state, e.g. San Francisco, CA" 81 | }, 82 | "unit": { 83 | "type": "string", 84 | "enum": [ 85 | "celsius", 86 | "fahrenheit" 87 | ] 88 | } 89 | }, 90 | "required": [ 91 | "location" 92 | ] 93 | } 94 | } 95 | } 96 | ] 97 | messages = [ 98 | { 99 | "role": "user", 100 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 101 | } 102 | ] 103 | content, tool_calls = handle.request_funcall(messages, tools) 104 | print(content) 105 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 106 | 107 | 108 | if __name__ == "__main__": 109 | main() 110 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/llama_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | 4 | from .basic_handle import SimulateMultiTurnMessages 5 | from .tools import remove_messages 6 | 7 | 8 | class LlamaMultiTurnMessages(SimulateMultiTurnMessages): 9 | def __init__(self, model_url, is_english=False): 10 | super().__init__(model_url, is_english) 11 | self.model_messages = [] 12 | self.timeout = 300 13 | self.add_date = False 14 | 15 | def preprocess_to_simple(self, messages): 16 | if len(self.model_messages) == 0: 17 | self.model_messages = remove_messages(messages, is_english=self.is_english) 18 | else: 19 | if messages[-1]["role"] == "user": 20 | self.model_messages += remove_messages( 21 | [{"role": "user", "content": messages[-1]["content"]}], 22 | is_english=self.is_english 23 | ) 24 | elif messages[-1]["role"] == "tool": 25 | # messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"}) 26 | assistant = None 27 | observation = [] 28 | idx = -1 29 | while True or idx > -len(messages): 30 | if messages[idx]["role"] == "assistant": 31 | assistant = messages[idx] 32 | break 33 | if messages[idx]["role"] == "tool": 34 | observation.append(messages[idx]) 35 | idx -= 1 36 | idmap_observation = {} 37 | 38 | assert len(observation) == len(assistant["tool_calls"]) 39 | for tool_call in assistant["tool_calls"]: 40 | idmap_observation[tool_call["id"]] = tool_call["function"]["name"] 41 | 42 | for obser in observation: 43 | assert obser["tool_call_id"] in idmap_observation 44 | self.model_messages.append({ 45 | "role": "tool", "name": idmap_observation[obser["tool_call_id"]], 46 | "content": obser["content"] 47 | }) 48 | 49 | return self.model_messages 50 | 51 | def parameters2arguments(self, function_dict): 52 | return { 53 | "name": function_dict["name"], 54 | "arguments": function_dict["parameters"] if "parameters" in function_dict else function_dict["arguments"] 55 | } 56 | 57 | def post_process_tool_call(self, answer): 58 | text = None 59 | tool_calls = None 60 | try: 61 | if "function" in answer and "name" in answer and "parameters" in answer: 62 | try: 63 | # messages.append({"role": "assistant", "tool_calls": [{"type": "function", "function": tool_call}]}) 64 | text = answer 65 | tool_calls = json.loads(answer) 66 | if type(tool_calls) == dict: 67 | tool_calls = [{ 68 | "id": str(uuid.uuid4()), "function": self.parameters2arguments(tool_calls) 69 | }] 70 | elif type(tool_calls) == list: 71 | tool_calls = [ 72 | {"id": str(uuid.uuid4()), "function": self.parameters2arguments(json.loads(_))} 73 | for _ in tool_calls 74 | ] 75 | self.model_messages.append({ 76 | "role": "assistant", "tool_calls": [ 77 | {"type": "function", "function": { 78 | key: tool_call["function"][key] for key in ["name", "arguments"] 79 | }} 80 | for tool_call in tool_calls 81 | ] 82 | }) 83 | except Exception as e: 84 | print(f"process error: {e}") 85 | pass 86 | else: 87 | self.model_messages.append({"role": "assistant", "content": answer}) 88 | text = answer 89 | tool_calls = None 90 | 91 | return text, tool_calls 92 | 93 | except Exception as e: 94 | print(f"error: {e}") 95 | return None, None 96 | 97 | 98 | def main(): 99 | handle = LlamaMultiTurnMessages("http://111.111.111.111:12345") 100 | tools = [ 101 | { 102 | "type": "function", 103 | "function": { 104 | "name": "get_current_weather", 105 | "description": "Get the current weather in a given location", 106 | "parameters": { 107 | "type": "object", 108 | "properties": { 109 | "location": { 110 | "type": "string", 111 | "description": "The city and state, e.g. San Francisco, CA" 112 | }, 113 | "unit": { 114 | "type": "string", 115 | "enum": [ 116 | "celsius", 117 | "fahrenheit" 118 | ] 119 | } 120 | }, 121 | "required": [ 122 | "location" 123 | ] 124 | } 125 | } 126 | } 127 | ] 128 | messages = [ 129 | { 130 | "role": "user", 131 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 132 | } 133 | ] 134 | content, tool_calls = handle.request_funcall(messages, tools) 135 | print(content) 136 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 137 | 138 | 139 | if __name__ == "__main__": 140 | main() 141 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/toolace_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pdb 3 | import uuid 4 | import ast 5 | import requests 6 | 7 | from .basic_handle import SimulateMultiTurnMessages 8 | from .tools import remove_messages, AstVisitor, create_ast_value, generate_code 9 | 10 | 11 | class DoubleQuoteStrTransformer(ast.NodeTransformer): 12 | def visit_Str(self, node): 13 | # 自定义一个类属性来指示是否使用双引号 14 | node.use_double_quotes = True 15 | return node 16 | 17 | 18 | class ToolACEMultiTurnMessages(SimulateMultiTurnMessages): 19 | def __init__(self, model_url, is_english=False): 20 | super().__init__(model_url, is_english) 21 | self.model_messages = [] 22 | 23 | def process_planner_tool(self, messages): 24 | new_messages = [] 25 | for i, message in enumerate(messages): 26 | role = message["role"] 27 | tool_calls = message.get("tool_calls", None) 28 | function_calls = [] 29 | if tool_calls: 30 | for tool_call in tool_calls: 31 | function = tool_call["function"] 32 | name = function["name"] 33 | arguments = function["arguments"] 34 | 35 | func_call = ast.Call( 36 | func=ast.Name(id=name, ctx=ast.Load()), 37 | args=[], 38 | keywords=[ 39 | ast.keyword(arg=k, value=create_ast_value(v)) for k, v in arguments.items() 40 | ] 41 | ) 42 | function_calls.append(func_call) 43 | 44 | list_node = ast.List(elts=function_calls, ctx=ast.Load()) 45 | module = ast.Module(body=[ast.Expr(value=list_node)], type_ignores=[]) 46 | ast_tool_calls = generate_code(module) 47 | # pdb.set_trace() 48 | new_messages.append({"role": "assistant", "content": ast_tool_calls}) 49 | elif role == "tool": 50 | functions = messages[i - 1]["tool_calls"] 51 | observations = json.loads(message["content"]) 52 | assert len(observations) == len(functions) 53 | ret_observation = [] 54 | for function, observation in zip(functions, observations): 55 | ret_observation.append({ 56 | "name": function["function"]["name"], 57 | "results": observation 58 | }) 59 | new_messages.append({"role": "tool", "content": json.dumps(ret_observation, ensure_ascii=False)}) 60 | else: 61 | new_messages.append(message) 62 | return new_messages 63 | 64 | def preprocess_to_simple(self, messages): 65 | if len(self.model_messages) == 0: 66 | messages = remove_messages(messages, is_english=self.is_english) 67 | self.model_messages = self.process_planner_tool(messages) 68 | else: 69 | if messages[-1]["role"] == "user": 70 | self.model_messages.append({"role": "user", "content": messages[-1]["content"].replace("用户:", "").replace("User:", "").strip()}) 71 | elif messages[-1]["role"] == "tool": 72 | observations = json.loads(messages[-1]["content"]) 73 | functions = messages[-2]["tool_calls"] 74 | assert len(observations) == len(functions) 75 | ret_observation = [] 76 | for function, observation in zip(functions, observations): 77 | ret_observation.append({ 78 | "name": function["function"]["name"], 79 | "results": observation 80 | }) 81 | self.model_messages.append({"role": "tool", "content": json.dumps(ret_observation, ensure_ascii=False)}) 82 | 83 | return self.model_messages 84 | 85 | def post_process_tool_call(self, answer): 86 | try: 87 | self.model_messages.append({"role": "assistant", "content": answer}) 88 | if answer.startswith("[") and answer.endswith("]"): 89 | astor = AstVisitor() 90 | astor.visit(ast.parse(answer)) 91 | answer = astor.function 92 | text = "use {} to solve user problem".format(", ".join([_["name"] for _ in answer])) 93 | tool_calls = [{"id": str(uuid.uuid4()), "function": _} for _ in answer] 94 | else: 95 | text = answer 96 | tool_calls = None 97 | 98 | return text, tool_calls 99 | 100 | except Exception as e: 101 | print(f"error: {e}") 102 | return None, None 103 | 104 | def request_funcall(self, messages, tools, env_info=None): 105 | url = self.model_url 106 | headers = {"Content-Type": "application/json"} 107 | data = { 108 | "messages": self.add_date_to_messsage_user(self.preprocess_to_simple(messages), env_info), 109 | "tools": tools, 110 | "date": self.add_weekday_date(env_info) 111 | } 112 | 113 | text = None 114 | tool_calls = None 115 | try: 116 | response = requests.post(url, headers=headers, json=data, timeout=self.timeout) 117 | if response.status_code == 200: 118 | result = response.json() 119 | answer = result["answer"] 120 | text, tool_calls = self.post_process_tool_call(answer) 121 | except Exception as e: 122 | print(f"error: {e}") 123 | text = None 124 | tool_calls = None 125 | 126 | return text, tool_calls 127 | 128 | 129 | def main(): 130 | handle = ToolACEMultiTurnMessages("http://111.111.111.111:12345") 131 | tools = [ 132 | { 133 | "type": "function", 134 | "function": { 135 | "name": "get_current_weather", 136 | "description": "Get the current weather in a given location", 137 | "parameters": { 138 | "type": "object", 139 | "properties": { 140 | "location": { 141 | "type": "string", 142 | "description": "The city and state, e.g. San Francisco, CA" 143 | }, 144 | "unit": { 145 | "type": "string", 146 | "enum": [ 147 | "celsius", 148 | "fahrenheit" 149 | ] 150 | } 151 | }, 152 | "required": [ 153 | "location" 154 | ] 155 | } 156 | } 157 | } 158 | ] 159 | messages = [ 160 | { 161 | "role": "user", 162 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 163 | } 164 | ] 165 | content, tool_calls = handle.request_funcall(messages, tools) 166 | print(content) 167 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 168 | 169 | 170 | if __name__ == "__main__": 171 | main() 172 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/tools.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import json 3 | import traceback 4 | 5 | from c3_bench.bench_test.utils import get_keywords 6 | 7 | 8 | class AstVisitor(ast.NodeVisitor): 9 | def __init__(self): 10 | self.function = [] 11 | 12 | def visit_Call(self, node): 13 | # self.function_name, self.args = parse_string_to_function(node) 14 | function = {} 15 | if isinstance(node.func, ast.Name): 16 | function["name"] = node.func.id 17 | elif isinstance(node.func, ast.Attribute): 18 | function["name"] = node.func.attr 19 | 20 | function["arguments"] = {} 21 | for keyword in node.keywords: 22 | function["arguments"][keyword.arg] = get_keywords(keyword.value) 23 | self.function.append(function) 24 | 25 | def clear(self): 26 | self.function = [] 27 | 28 | 29 | english_prompt = ''' 30 | You are an expert in function composition. You will be given a question and a set of possible functions. Based on the question, you need to make one or more function/tool calls to achieve the purpose. 31 | If none of the functions can be used, please directly reply to the user in natural language, starting with "Assistant:". 32 | If the given question lacks the parameters required by the function, please ask the user for the necessary information in natural language, starting with "Assistant:". 33 | If the result of the call is already sufficient to answer the user's question, please summarize the historical results and reply to the user in natural language, starting with "Assistant:". 34 | You should only return function calls in the tool call section. If you decide to make any function calls, you must format them as [{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]. You should not include any other text in your reply. The following is a list of functions you can call, in JSON format. 35 | 36 | {{{tools}}} 37 | 38 | If you decide to return function calls, please format them as [{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...], without including any other text. 39 | Otherwise, please refer to the three cases mentioned at the beginning and reply starting with "Assistant:". 40 | 41 | Current time: {{{env_info}}}'''.strip("\n") 42 | 43 | 44 | def tool_call_prompt(messages, tools, date_time): 45 | system_prompt = english_prompt.replace( 46 | "{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=2) 47 | ).replace( 48 | "{{{env_info}}}", date_time 49 | ) 50 | new_messages = [{"role": "system", "content": system_prompt}] 51 | for message in messages: 52 | role = message["role"] 53 | content = message["content"] 54 | if role == "user": 55 | new_messages.append({"role": "user", "content": content}) 56 | elif role == "assistant": 57 | tool_calls = message.get("tool_calls", None) 58 | if tool_calls and len(message["tool_calls"]) != 0: 59 | new_tool_calls = [] 60 | for tool_call in tool_calls: 61 | function = tool_call["function"] 62 | new_tool_calls.append(function) 63 | new_messages.append({"role": "assistant", "content": 64 | f"{json.dumps(new_tool_calls, ensure_ascii=False)}"}) 65 | else: 66 | new_messages.append({"role": "assistant", "content": f"Assistant:{content}"}) 67 | elif role == "tool": 68 | new_messages.append({"role": "user", "content": f"{content}"}) 69 | elif role == "system": 70 | continue 71 | else: 72 | raise NotImplementedError 73 | return new_messages 74 | 75 | 76 | def remove_messages(messages, is_english=False): 77 | new_messages = [] 78 | try: 79 | role = "user" 80 | for m in messages: 81 | assert ( 82 | m["role"] == "assistant" 83 | and role == "assistant" 84 | ) or ( 85 | m["role"] in ["user", "tool"] 86 | and role in ["user", "tool"] 87 | ) 88 | role = "assistant" if role in ["user", "tool"] else "user" 89 | if is_english: 90 | colon_idx = m["content"].find(":") 91 | if ( 92 | colon_idx != -1 and 93 | m["content"][:colon_idx].lower() in [ 94 | "ai", "ai agent", "user", "ai agent assistant", "planner", "observation", "tool" 95 | ] 96 | ): 97 | m['content'] = m["content"][colon_idx+1:] 98 | else: 99 | colon_idx = m["content"].find(":") 100 | if ( 101 | colon_idx != -1 and 102 | m["content"][:colon_idx] in [ 103 | "用户", "AI Agent助手", "AI Agent", "Planner", "Observation", "Tool" 104 | ] 105 | ): 106 | m['content'] = m["content"][colon_idx+1:] 107 | new_messages.append(m) 108 | except Exception as e: 109 | print(f"error: {e}") 110 | traceback.print_exc() 111 | return new_messages 112 | 113 | 114 | def create_ast_value(value): 115 | if isinstance(value, str): 116 | return ast.Str(s=value) 117 | elif isinstance(value, int): 118 | return ast.Num(n=value) 119 | elif isinstance(value, float): 120 | return ast.Num(n=value) 121 | elif isinstance(value, bool): 122 | return ast.NameConstant(value=value) 123 | elif isinstance(value, list): 124 | return ast.List(elts=[create_ast_value(item) for item in value], ctx=ast.Load()) 125 | elif isinstance(value, dict): 126 | keys = [ast.Str(s=k) for k in value.keys()] 127 | values = [create_ast_value(v) for v in value.values()] 128 | return ast.Dict(keys=keys, values=values) 129 | else: 130 | raise ValueError(f"Unsupported value type: {type(value).__name__}") 131 | 132 | 133 | def generate_code(node): 134 | if isinstance(node, ast.Str): 135 | return f'"{node.s}"' 136 | elif isinstance(node, ast.Num): 137 | return str(node.n) 138 | elif isinstance(node, ast.NameConstant): 139 | return str(node.value).lower() 140 | elif isinstance(node, ast.List): 141 | elements = [generate_code(elt) for elt in node.elts] 142 | return f"[{', '.join(elements)}]" 143 | elif isinstance(node, ast.Dict): 144 | pairs = [] 145 | for key, value in zip(node.keys, node.values): 146 | key_str = generate_code(key) 147 | value_str = generate_code(value) 148 | pairs.append(f"{key_str}: {value_str}") 149 | return f"{{{', '.join(pairs)}}}" 150 | elif isinstance(node, ast.Call): 151 | func_name = node.func.id 152 | args_str = ", ".join([generate_code(arg) for arg in node.args]) 153 | kwargs_str = ", ".join([f"{kw.arg}={generate_code(kw.value)}" for kw in node.keywords]) 154 | all_args_str = ", ".join(filter(None, [args_str, kwargs_str])) 155 | return f"{func_name}({all_args_str})" 156 | elif isinstance(node, ast.Module): 157 | body_str = ", ".join([generate_code(item) for item in node.body]) 158 | return body_str 159 | elif isinstance(node, ast.Expr): 160 | return generate_code(node.value) 161 | else: 162 | raise ValueError(f"Unsupported AST node type: {type(node).__name__}") 163 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/watt_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pdb 3 | import traceback 4 | import uuid 5 | import ast 6 | import requests 7 | 8 | from .basic_handle import SimulateMultiTurnMessages 9 | from .tools import remove_messages, AstVisitor, create_ast_value, generate_code 10 | 11 | 12 | class WattMultiTurnMessages(SimulateMultiTurnMessages): 13 | def __init__(self, model_url, is_english=False): 14 | super().__init__(model_url, is_english) 15 | self.model_messages = [] 16 | self.timeout = 300 17 | 18 | def process_planner_tool(self, messages): 19 | new_messages = [] 20 | for i, message in enumerate(messages): 21 | role = message["role"] 22 | tool_calls = message.get("tool_calls", None) 23 | function_calls = [] 24 | if tool_calls: 25 | for tool_call in tool_calls: 26 | function = tool_call["function"] 27 | name = function["name"] 28 | arguments = function["arguments"] 29 | 30 | func_call = ast.Call( 31 | func=ast.Name(id=name, ctx=ast.Load()), 32 | args=[], 33 | keywords=[ 34 | ast.keyword(arg=k, value=create_ast_value(v)) for k, v in arguments.items() 35 | ] 36 | ) 37 | function_calls.append(func_call) 38 | 39 | list_node = ast.List(elts=function_calls, ctx=ast.Load()) 40 | module = ast.Module(body=[ast.Expr(value=list_node)], type_ignores=[]) 41 | ast_tool_calls = generate_code(module) 42 | # pdb.set_trace() 43 | new_messages.append({"role": "assistant", "content": ast_tool_calls}) 44 | elif role == "tool": 45 | functions = messages[i - 1]["tool_calls"] 46 | observations = json.loads(message["content"]) 47 | assert len(observations) == len(functions) 48 | for function, observation in zip(functions, observations): 49 | # pdb.set_trace() 50 | new_messages.append({"role": "tool", "name": function["function"]["name"], "content": json.dumps(observation, ensure_ascii=False)}) 51 | else: 52 | new_messages.append(message) 53 | return new_messages 54 | 55 | def preprocess_to_simple(self, messages): 56 | # pdb.set_trace() 57 | if len(self.model_messages) == 0: 58 | messages = remove_messages(messages, is_english=self.is_english) 59 | self.model_messages = self.process_planner_tool(messages) 60 | else: 61 | if messages[-1]["role"] == "user": 62 | self.model_messages += remove_messages( 63 | [{"role": "user", "content": messages[-1]["content"]}], 64 | is_english=self.is_english 65 | ) 66 | elif messages[-1]["role"] == "tool": 67 | assistant = None 68 | observation = [] 69 | idx = -1 70 | while True or idx > -len(messages): 71 | if messages[idx]["role"] == "assistant": 72 | assistant = messages[idx] 73 | break 74 | if messages[idx]["role"] == "tool": 75 | observation.append(messages[idx]) 76 | idx -= 1 77 | idmap_observation = {} 78 | assert len(observation) == len(assistant["tool_calls"]) 79 | for tool_call in assistant["tool_calls"]: 80 | idmap_observation[tool_call["id"]] = tool_call["function"]["name"] 81 | for obser in observation: 82 | assert obser["tool_call_id"] in idmap_observation 83 | self.model_messages.append({ 84 | "role": "tool", "name": idmap_observation[obser["tool_call_id"]], 85 | "content": obser["content"] 86 | }) 87 | return self.model_messages 88 | 89 | def parameters2arguments(self, function_dict): 90 | return { 91 | "name": function_dict["name"], 92 | "arguments": function_dict["parameters"] if "parameters" in function_dict else function_dict["arguments"] 93 | } 94 | 95 | def post_process_tool_call(self, answer): 96 | text = None 97 | tool_calls = None 98 | try: 99 | if answer.startswith("[") and answer.endswith("]"): 100 | try: 101 | self.model_messages.append({"role": "assistant", "content": answer}) 102 | astor = AstVisitor() 103 | astor.visit(ast.parse(answer)) 104 | answer_ = astor.function 105 | text = "use {} to solve user problem".format(", ".join([_["name"] for _ in answer_])) 106 | tool_calls = [{"id": str(uuid.uuid4()), "function": _} for _ in answer_] 107 | except Exception as e: 108 | traceback.print_exc() 109 | print(f"process error: {e}", flush=True) 110 | else: 111 | self.model_messages.append({"role": "assistant", "content": answer}) 112 | text = answer 113 | tool_calls = None 114 | 115 | return text, tool_calls 116 | 117 | except Exception as e: 118 | traceback.print_exc() 119 | print(f"error: {e}", flush=True) 120 | return None, None 121 | 122 | def request_funcall(self, messages, tools, env_info=None): 123 | url = self.model_url 124 | headers = {"Content-Type": "application/json"} 125 | data = { 126 | "messages": self.add_date_to_message(self.preprocess_to_simple(messages), env_info), 127 | "tools": tools, 128 | "date": self.add_weekday_date(env_info) 129 | } 130 | 131 | text = None 132 | tool_calls = None 133 | try_nums = 0 134 | while True: 135 | try: 136 | response = requests.post(url, headers=headers, json=data, timeout=self.timeout) 137 | if response.status_code == 200: 138 | result = response.json() 139 | answer = result["answer"] 140 | text, tool_calls = self.post_process_tool_call(answer) 141 | break 142 | except Exception as e: 143 | print(f"error: {e}", flush=True) 144 | traceback.print_exc() 145 | try_nums += 1 146 | print(f"try_nums: {try_nums}", flush=True) 147 | if try_nums >= 5: 148 | break 149 | 150 | return text, tool_calls 151 | 152 | 153 | def main(): 154 | handle = WattMultiTurnMessages("http://111.111.111.111:12345") 155 | tools = [ 156 | { 157 | "type": "function", 158 | "function": { 159 | "name": "get_current_weather", 160 | "description": "Get the current weather in a given location", 161 | "parameters": { 162 | "type": "object", 163 | "properties": { 164 | "location": { 165 | "type": "string", 166 | "description": "The city and state, e.g. San Francisco, CA" 167 | }, 168 | "unit": { 169 | "type": "string", 170 | "enum": [ 171 | "celsius", 172 | "fahrenheit" 173 | ] 174 | } 175 | }, 176 | "required": [ 177 | "location" 178 | ] 179 | } 180 | } 181 | } 182 | ] 183 | messages = [ 184 | { 185 | "role": "user", 186 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 187 | } 188 | ] 189 | content, tool_calls = handle.request_funcall(messages, tools) 190 | print(content) 191 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 192 | 193 | 194 | if __name__ == "__main__": 195 | main() 196 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/xlam2_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pdb 3 | import uuid 4 | import requests 5 | 6 | import sys 7 | import os 8 | 9 | current_path_list = os.getcwd().split("/")[:-2] 10 | current_path = "/".join(current_path_list) 11 | print(f"current_path: {current_path}\n") 12 | sys.path.append(current_path) 13 | 14 | from .basic_handle import SimulateMultiTurnMessages 15 | from .tools import remove_messages 16 | 17 | 18 | class XLAM2MultiTurnMessages(SimulateMultiTurnMessages): 19 | def __init__(self, model_url, is_english=False): 20 | super().__init__(model_url, is_english) 21 | self.model_messages = [] 22 | 23 | def preprocess_to_simple(self, messages): 24 | # pdb.set_trace() 25 | if len(self.model_messages) == 0: 26 | self.model_messages = remove_messages(messages, is_english=True) 27 | else: 28 | if messages[-1]["role"] == "user": 29 | self.model_messages.append({"role": "user", 30 | "content": messages[-1]["content"].replace("用户:", "").replace("User:", 31 | "").strip()}) 32 | elif messages[-1]["role"] == "tool": 33 | self.model_messages.append({"role": "tool", "content": messages[-1]["content"]}) 34 | # print("##########################") 35 | # print(f"self.model_messages:\n{self.model_messages}") 36 | return self.model_messages 37 | 38 | def request_funcall(self, messages, tools, env_info=None): 39 | url = self.model_url 40 | headers = {"Content-Type": "application/json"} 41 | data = { 42 | "messages": self.add_date_to_messsage_user(self.preprocess_to_simple(messages), env_info), 43 | "tools": tools, 44 | "date": self.add_weekday_date(env_info) 45 | } 46 | 47 | text = None 48 | tool_calls = None 49 | try: 50 | response = requests.post(url, headers=headers, json=data, timeout=self.timeout) 51 | if response.status_code == 200: 52 | result = response.json() 53 | answer = result["answer"] 54 | text, tool_calls = self.post_process_tool_call(answer) 55 | except Exception as e: 56 | print(f"error: {e}") 57 | text = None 58 | tool_calls = None 59 | 60 | return text, tool_calls 61 | 62 | def post_process_tool_call(self, answer): 63 | try: 64 | if answer.startswith("[") and answer.endswith("]"): 65 | try: 66 | answer = json.loads(answer) 67 | except Exception as e: 68 | print(f"json loads error: {e}") 69 | pass 70 | 71 | if type(answer) == list: 72 | text = "use {} to solve user problem".format( 73 | ", ".join([ 74 | _["name"] for _ in answer 75 | ]) 76 | ) 77 | tool_calls = [{"id": str(uuid.uuid4()), "type": "function", "function": _} for _ in answer] 78 | self.model_messages.append({"role": "assistant", "content": text, "tool_calls": tool_calls}) 79 | else: 80 | self.model_messages.append({"role": "assistant", "content": answer}) 81 | text = answer 82 | tool_calls = None 83 | 84 | return text, tool_calls 85 | 86 | except Exception as e: 87 | print(f"error: {e}") 88 | return None, None 89 | 90 | 91 | def main(): 92 | handle = XLAM2MultiTurnMessages("http://11.220.87.179:12345") 93 | tools = [ 94 | { 95 | "type": "function", 96 | "function": { 97 | "name": "get_current_weather", 98 | "description": "Get the current weather in a given location", 99 | "parameters": { 100 | "type": "object", 101 | "properties": { 102 | "location": { 103 | "type": "string", 104 | "description": "The city and state, e.g. San Francisco, CA" 105 | }, 106 | "unit": { 107 | "type": "string", 108 | "enum": [ 109 | "celsius", 110 | "fahrenheit" 111 | ] 112 | } 113 | }, 114 | "required": [ 115 | "location" 116 | ] 117 | } 118 | } 119 | } 120 | ] 121 | messages = [ 122 | { 123 | "role": "user", 124 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 125 | }, 126 | # {"role": "assistant", "content": "", "tool_calls": [{'id': '137c9f34-a7d1-4cd3-a0ae-a4763bf884ac', 'function': {'name': 'get_current_weather', 'arguments': {'location': 'Boston', 'unit': 'celsius'}}}, {'id': '94430843-c85c-4946-8333-26d470b73a93', 'function': {'name': 'get_current_weather', 'arguments': {'location': 'San Francisco', 'unit': 'celsius'}}}]}, 127 | # {"role": "tool", "content": "Boston and San Francisco is rainy."} 128 | ] 129 | content, tool_calls = handle.request_funcall(messages, tools, "2023-03-17 19:20:00") 130 | print(content) 131 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 132 | 133 | 134 | if __name__ == "__main__": 135 | main() 136 | -------------------------------------------------------------------------------- /c3_bench/bench_test/handle/xlam_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | 4 | from .basic_handle import SimulateMultiTurnMessages 5 | from .tools import remove_messages 6 | 7 | 8 | class XLAMMultiTurnMessages(SimulateMultiTurnMessages): 9 | def __init__(self, model_url, is_english=False): 10 | super().__init__(model_url, is_english) 11 | self.model_messages = [] 12 | 13 | def preprocess_to_simple(self, messages): 14 | if len(self.model_messages) == 0: 15 | self.model_messages = remove_messages(messages, is_english=True) 16 | else: 17 | if messages[-1]["role"] == "user": 18 | self.model_messages.append({"role": "user", "content": messages[-1]["content"].replace("用户:", "").replace("User:", "").strip()}) 19 | elif messages[-1]["role"] == "tool": 20 | self.model_messages.append({"role": "tool", "content": messages[-1]["content"]}) 21 | return self.model_messages 22 | 23 | def post_process_tool_call(self, answer): 24 | try: 25 | if "tool_calls" in answer: 26 | try: 27 | answer = json.loads(answer) 28 | except Exception as e: 29 | print(f"json loads error: {e}") 30 | pass 31 | 32 | if "tool_calls" in answer and type(answer) == dict: 33 | text = "use {} to solve user problem".format( 34 | ", ".join([ 35 | _["name"] for _ in answer["tool_calls"] 36 | ]) 37 | ) 38 | tool_calls = [{"id": str(uuid.uuid4()), "function": _} for _ in answer["tool_calls"]] 39 | self.model_messages.append({"role": "assistant", "content": text, "tool_calls": answer["tool_calls"]}) 40 | else: 41 | self.model_messages.append({"role": "assistant", "content": answer}) 42 | text = answer 43 | tool_calls = None 44 | 45 | return text, tool_calls 46 | 47 | except Exception as e: 48 | print(f"error: {e}") 49 | return None, None 50 | 51 | 52 | def main(): 53 | handle = XLAMMultiTurnMessages("http://111.111.111.111:12345") 54 | tools = [ 55 | { 56 | "type": "function", 57 | "function": { 58 | "name": "get_current_weather", 59 | "description": "Get the current weather in a given location", 60 | "parameters": { 61 | "type": "object", 62 | "properties": { 63 | "location": { 64 | "type": "string", 65 | "description": "The city and state, e.g. San Francisco, CA" 66 | }, 67 | "unit": { 68 | "type": "string", 69 | "enum": [ 70 | "celsius", 71 | "fahrenheit" 72 | ] 73 | } 74 | }, 75 | "required": [ 76 | "location" 77 | ] 78 | } 79 | } 80 | } 81 | ] 82 | messages = [ 83 | { 84 | "role": "user", 85 | "content": "What's the weather like in the two cities of Boston and San Francisco?" 86 | } 87 | ] 88 | content, tool_calls = handle.request_funcall(messages, tools) 89 | print(content) 90 | print(json.dumps(tool_calls, ensure_ascii=False, indent=4)) 91 | 92 | 93 | if __name__ == "__main__": 94 | main() -------------------------------------------------------------------------------- /c3_bench/bench_test/request_pipeline.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import copy 4 | import argparse 5 | import sys 6 | 7 | current_path_list = os.getcwd().split("/")[:-2] 8 | current_path = "/".join(current_path_list) 9 | print(f"current_path: {current_path}\n") 10 | sys.path.append(current_path) 11 | 12 | from utils import read_file_to_json, get_random_pathname 13 | from tool_call_graph import eval_by_tool_call_graph 14 | from handle.handles import tool_handle_map 15 | from tqdm import tqdm 16 | 17 | 18 | def str2bool(v): 19 | ''' 20 | Transform string to bool. 21 | 22 | Arguments: 23 | v (str): The value to be converted. 24 | 25 | Returns: 26 | bool: The converted value. 27 | ''' 28 | if isinstance(v, bool): 29 | return v 30 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 31 | return True 32 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 33 | return False 34 | else: 35 | raise argparse.ArgumentTypeError('Boolean value expected.') 36 | 37 | 38 | def get_messages_until_task(messages, task_id, task, history, is_english, remove_role=True): 39 | ''' 40 | 整合历史消息并根据任务ID和任务内容过滤消息,同时根据语言移除角色标识。 41 | 42 | Arguments: 43 | messages (list): 包含消息记录的列表,每条记录是一个字典,包含内容和角色等信息。 44 | task_id (int): 任务ID,用于定位特定任务的消息。 45 | task (str): 任务内容,用于验证消息中是否包含该任务。 46 | history (list): 历史消息列表,每个元素是一个消息列表。 47 | is_english (bool): 是否为英文消息,用于确定如何移除角色标识。 48 | remove_role (bool): 是否移除消息中的角色标识,默认为True。 49 | 50 | Returns: 51 | list: 整合后的消息列表,根据任务ID和任务内容过滤,并移除了角色标识。 52 | ''' 53 | new_messages = [] 54 | try: 55 | for history_messages in history: 56 | new_messages += history_messages 57 | assert len(new_messages) % 2 == 0 58 | assert task in messages[task_id]["content"] 59 | new_messages += messages[:task_id + 1] 60 | assert len(new_messages) % 2 == 1 61 | role = "user" 62 | for m in new_messages: 63 | assert m["role"] == role 64 | role = "assistant" if role == "user" else "user" 65 | if not remove_role: 66 | continue 67 | if is_english: 68 | colon_idx = m["content"].find(":") 69 | if ( 70 | colon_idx != -1 and 71 | m["content"][:colon_idx].lower() in [ 72 | "ai", "ai agent", "user", "ai agent assistant" 73 | ] 74 | ): 75 | m['content'] = m["content"][colon_idx + 1:] 76 | else: 77 | colon_idx = m["content"].find(":") 78 | if ( 79 | colon_idx != -1 and 80 | m["content"][:colon_idx] in [ 81 | "用户", "AI Agent助手", "AI Agent" 82 | ] 83 | ): 84 | m['content'] = m["content"][colon_idx + 1:] 85 | except Exception as e: 86 | # ipdb.set_trace() 87 | print(f"error: {e}") 88 | 89 | return new_messages 90 | 91 | 92 | def parse_argument(): 93 | parser = argparse.ArgumentParser() 94 | parser.add_argument("--language", type=str, default="en") 95 | parser.add_argument("--new_key", type=str, default=None) 96 | parser.add_argument("--model", type=str, default="hunyuan-turbos-latest") 97 | parser.add_argument("--data_path", type=str, default="./data/C3-Bench.jsonl") 98 | parser.add_argument("--output_path", type=str, default="./result") 99 | parser.add_argument("--model_url", type=str, default="http://111.111.111.111:12345") 100 | parser.add_argument("--continue_file", type=str, default=None) 101 | parser.add_argument("--remove_role", type=str2bool, default=True) 102 | parser.add_argument("--contain_context", type=str2bool, default=True) 103 | parser.add_argument("--debug_id", type=str, default=None) 104 | parser.add_argument("--debug_idx", type=str, default=None) 105 | parser.add_argument("--skip_num", type=int, default=0) 106 | parser.add_argument("--retry_num", type=int, default=1) 107 | args = parser.parse_args() 108 | return args 109 | 110 | 111 | def add_args_info_into_filename(args): 112 | args_dict = vars(args) 113 | key = [] 114 | if args.new_key is not None: 115 | key.append(args.new_key) 116 | key.append(args_dict["model"]) 117 | key.append(args_dict["language"]) 118 | for k in args_dict: 119 | if type(args_dict[k]) == bool: 120 | key.append(k) 121 | return "_".join(key) 122 | 123 | 124 | def split_messages_by_equal(messages): 125 | messages_list = [] 126 | now_message = [] 127 | for m in messages: 128 | if type(m) == str and "=====" in m: 129 | messages_list.append(copy.copy(now_message)) 130 | now_message = [] 131 | else: 132 | now_message.append(m) 133 | if len(now_message) != 0: 134 | messages_list.append(now_message) 135 | return messages_list 136 | 137 | 138 | def main(args): 139 | data = read_file_to_json(args.data_path) 140 | 141 | res_data = [] 142 | path_ = get_random_pathname(args.output_path, "jsonl", keys=add_args_info_into_filename(args), need_time=True) 143 | is_english = False if args.language == "zh" else True 144 | error_list = [] 145 | too_long_continue = 0 146 | task_length = 0 147 | process_cnt = 0 148 | debug_mode = args.debug_id and args.debug_idx 149 | if args.continue_file and "None" not in args.continue_file and os.path.exists( 150 | args.continue_file) and args.model in args.continue_file: 151 | continue_file = read_file_to_json(args.continue_file) 152 | res_data = continue_file 153 | path_ = args.continue_file.replace(".unfinish", "") 154 | task_length += len(res_data) 155 | print(f"continue file: {args.continue_file}") 156 | print(f"task_length: {task_length}") 157 | elif args.skip_num != 0: 158 | data = data[int(args.skip_num / 4) + 1:] 159 | 160 | for item in tqdm(data): 161 | try: 162 | if debug_mode and args.debug_id not in item["id"]: 163 | continue 164 | task_list = item["english_task"] if is_english else item["task"] 165 | answer_lists = item["english_answer_list"] if is_english else item["answer_list"] 166 | messages_list = item["english_messages"] if is_english else item["messages"] 167 | tools_list = item["english_tools"] if is_english else item["tools"] 168 | messages_list = split_messages_by_equal(messages_list) 169 | assert type(task_list) == list and type(answer_lists[0]) == list 170 | assert len(task_list) == len(answer_lists) and len(task_list) == len(messages_list) 171 | if not args.contain_context and len(task_list) == 1: 172 | continue 173 | if type(item["env_info"]) == str: 174 | item["env_info"] = [item["env_info"] for _ in range(len(task_list))] 175 | item["env_info"] = [ 176 | env_info[:env_info.find("星期")].strip() 177 | for env_info in item["env_info"] if "星期" in env_info 178 | ] 179 | for id_, task_id, task, answer_list, messages, env_info in zip( 180 | range(len(task_list)), item["task_ids"], 181 | task_list, answer_lists, messages_list, item["env_info"] 182 | ): 183 | if debug_mode and int(args.debug_idx) != id_: 184 | continue 185 | if not args.contain_context and id_ == 0: 186 | continue 187 | process_cnt += 1 188 | if process_cnt <= len(res_data): 189 | continue 190 | simulator, response_continue = tool_handle_map[args.model] 191 | simulator = simulator(args.model_url, is_english) 192 | if args.contain_context: 193 | messages = get_messages_until_task( 194 | messages, task_id, task, messages_list[:id_], is_english, args.remove_role 195 | ) 196 | else: 197 | messages = get_messages_until_task( 198 | messages, task_id, task, [], is_english, args.remove_role 199 | ) 200 | messages_length = len(messages) 201 | predict_label, predict_is_optimal, predict_result, answer_result = eval_by_tool_call_graph( 202 | simulator.request_funcall, 203 | messages, 204 | tools_list, 205 | answer_list, 206 | response_continue, 207 | env_info=env_info, 208 | retry_num=args.retry_num 209 | ) 210 | res_data.append({ 211 | "id": item["id"], 212 | "idx": id_, 213 | "messages": messages, 214 | "messages_length": messages_length, 215 | "task_id": task_id, 216 | "type": item["type"], 217 | "tools": tools_list, 218 | "task": task, 219 | "answer_list": answer_list, 220 | "predict_result": predict_result, 221 | "predict_label": predict_label, 222 | "predict_is_optimal": str(predict_is_optimal), 223 | "answer_result": answer_result, 224 | "turn_type": [(_ if type(_) == bool else _ == "真") for _ in item.get("turn_type", [])], 225 | "turn_subtypes": item.get("turn_subtypes", []), 226 | }) 227 | if len(res_data) % 10 == 1: 228 | print(task) 229 | print(predict_result) 230 | with open(path_ + ".unfinish", "w", encoding="utf-8") as f: 231 | for res in res_data: 232 | f.write(json.dumps(res, ensure_ascii=False) + "\n") 233 | except Exception as e: 234 | print(f"error: {e}") 235 | error_list.append([item["id"], e]) 236 | # with open("error.jsonl", "w", encoding="utf-8") as f: 237 | # for error in error_list: 238 | # f.write(json.dumps(error, ensure_ascii=False) + "\n") 239 | 240 | task_length += len(item["task"]) 241 | # print(f"{task_length}:-:{len(res_data)}:-:{process_cnt}") 242 | 243 | print(f"error cnt: {len(error_list)}") 244 | print(f"too long: {too_long_continue}") 245 | if not debug_mode: 246 | os.system(f'mv {path_}.unfinish {path_}') 247 | 248 | 249 | if __name__ == "__main__": 250 | args = parse_argument() 251 | main(args) 252 | # add_args_info_into_filename(args) 253 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/tool_class/__init__.py -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/chatglm.py: -------------------------------------------------------------------------------- 1 | import json 2 | import torch 3 | 4 | from .tool_class_base import ToolClass 5 | from transformers import AutoModelForCausalLM, AutoTokenizer 6 | 7 | 8 | ''' 9 | import torch 10 | from transformers import AutoModelForCausalLM, AutoTokenizer 11 | 12 | device = "cuda" 13 | 14 | tokenizer = AutoTokenizer.from_pretrained("THUDM/glm-4-9b-chat", trust_remote_code=True) 15 | 16 | query = "你好" 17 | 18 | inputs = tokenizer.apply_chat_template([{"role": "user", "content": query}], 19 | add_generation_prompt=True, 20 | tokenize=True, 21 | return_tensors="pt", 22 | return_dict=True 23 | ) 24 | 25 | inputs = inputs.to(device) 26 | model = AutoModelForCausalLM.from_pretrained( 27 | "THUDM/glm-4-9b-chat", 28 | torch_dtype=torch.bfloat16, 29 | low_cpu_mem_usage=True, 30 | trust_remote_code=True 31 | ).to(device).eval() 32 | 33 | gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} 34 | with torch.no_grad(): 35 | outputs = model.generate(**inputs, **gen_kwargs) 36 | outputs = outputs[:, inputs['input_ids'].shape[1]:] 37 | print(tokenizer.decode(outputs[0], skip_special_tokens=True)) 38 | ''' 39 | 40 | 41 | class ChatGLM(ToolClass): 42 | def init(self): 43 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True) 44 | self.model = AutoModelForCausalLM.from_pretrained( 45 | self.model_path, 46 | torch_dtype=torch.bfloat16, 47 | device_map="auto", 48 | low_cpu_mem_usage=True, 49 | trust_remote_code=True 50 | ).eval() 51 | 52 | def _format_prompt(self, messages, function): 53 | formatted_prompt = "" 54 | tools = function 55 | if tools: 56 | formatted_prompt = "[gMASK]<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具" 57 | for tool in tools: 58 | formatted_prompt += f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], indent=4)}" 59 | formatted_prompt += "\n在调用上述函数时,请使用 Json 格式表示调用的参数。" 60 | 61 | for message in messages: 62 | formatted_prompt += f"<|{message['role']}|>\n{message['content']}" 63 | 64 | formatted_prompt += "<|assistant|>" 65 | 66 | return formatted_prompt 67 | 68 | def get_res(self, messages, functions, extra_args={}, more_info=None): 69 | text = self._format_prompt(messages, functions) 70 | inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device) 71 | gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} 72 | outputs = self.model.generate(**inputs, **gen_kwargs) 73 | return self.decode_res(inputs, outputs) 74 | 75 | def _get_res(self, messages): 76 | # outputs = self.pipeline(messages, max_new_tokens=512) 77 | print("just messages") 78 | text = self.tokenizer.apply_chat_template( 79 | messages, 80 | tokenize=False, 81 | add_generation_prompt=True, 82 | ) 83 | model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device) 84 | gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} 85 | outputs = self.model.generate( 86 | **model_inputs, 87 | **gen_kwargs 88 | ) 89 | return model_inputs, outputs 90 | 91 | def decode_res(self, prompt, outputs): 92 | # print(len(prompt)) 93 | # print(type(outputs), outputs) 94 | generated_ids = outputs[:, prompt['input_ids'].shape[1]:] 95 | return self.tokenizer.decode(generated_ids[0], skip_special_tokens=True) 96 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/deepseek.py: -------------------------------------------------------------------------------- 1 | from .tool_class_base import ToolClass 2 | from transformers import AutoModelForCausalLM, AutoTokenizer 3 | 4 | 5 | ''' 6 | import torch 7 | from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig 8 | 9 | model_name = "deepseek-ai/DeepSeek-V2" 10 | tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) 11 | # `max_memory` should be set based on your devices 12 | max_memory = {i: "75GB" for i in range(8)} 13 | # `device_map` cannot be set to `auto` 14 | model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="sequential", torch_dtype=torch.bfloat16, max_memory=max_memory, attn_implementation="eager") 15 | model.generation_config = GenerationConfig.from_pretrained(model_name) 16 | model.generation_config.pad_token_id = model.generation_config.eos_token_id 17 | 18 | text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is" 19 | inputs = tokenizer(text, return_tensors="pt") 20 | outputs = model.generate(**inputs.to(model.device), max_new_tokens=100) 21 | 22 | result = tokenizer.decode(outputs[0], skip_special_tokens=True) 23 | print(result) 24 | ''' 25 | 26 | 27 | class DeepSeek(ToolClass): 28 | def init(self): 29 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 30 | self.model = AutoModelForCausalLM.from_pretrained( 31 | self.model_path, 32 | torch_dtype="auto", 33 | device_map="auto", 34 | ) 35 | 36 | def get_res(self, messages, functions, extra_args={}, more_info=None): 37 | text = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True, tokenize=False) 38 | inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device) 39 | outputs = self.model.generate(**inputs, max_new_tokens=512) 40 | return self.decode_res(inputs, outputs) 41 | 42 | def _get_res(self, messages): 43 | # outputs = self.pipeline(messages, max_new_tokens=512) 44 | print("just messages") 45 | text = self.tokenizer.apply_chat_template( 46 | messages, 47 | tokenize=False, 48 | add_generation_prompt=True, 49 | ) 50 | model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device) 51 | outputs = self.model.generate( 52 | **model_inputs, 53 | max_new_tokens=4096 54 | ) 55 | return model_inputs, outputs 56 | 57 | def decode_res(self, prompt, outputs): 58 | # print(len(prompt)) 59 | # print(type(outputs), outputs) 60 | generated_ids = [ 61 | output_ids[len(input_ids):] for input_ids, output_ids in zip(prompt.input_ids, outputs) 62 | ] 63 | return self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] 64 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/fc_medium.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .tool_class_base import ToolClass 4 | from transformers import AutoModelForCausalLM, AutoTokenizer 5 | 6 | 7 | logger = logging.getLogger() 8 | 9 | 10 | ''' 11 | from transformers import AutoModelForCausalLM, AutoTokenizer 12 | 13 | tokenizer = AutoTokenizer.from_pretrained("meetkai/functionary-medium-v3.1") 14 | model = AutoModelForCausalLM.from_pretrained("meetkai/functionary-medium-v3.1", device_map="auto", trust_remote_code=True) 15 | 16 | tools = [ 17 | { 18 | "type": "function", 19 | "function": { 20 | "name": "get_current_weather", 21 | "description": "Get the current weather", 22 | "parameters": { 23 | "type": "object", 24 | "properties": { 25 | "location": { 26 | "type": "string", 27 | "description": "The city and state, e.g. San Francisco, CA" 28 | } 29 | }, 30 | "required": ["location"] 31 | } 32 | } 33 | } 34 | ] 35 | messages = [{"role": "user", "content": "What is the weather in Istanbul and Singapore respectively?"}] 36 | 37 | final_prompt = tokenizer.apply_chat_template(messages, tools, add_generation_prompt=True, tokenize=False) 38 | inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda") 39 | pred = model.generate_tool_use(**inputs, max_new_tokens=128, tokenizer=tokenizer) 40 | print(tokenizer.decode(pred.cpu()[0])) 41 | ''' 42 | 43 | 44 | class FC_Medium(ToolClass): 45 | def init(self): 46 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 47 | self.model = AutoModelForCausalLM.from_pretrained( 48 | self.model_path, 49 | torch_dtype="auto", 50 | device_map="auto", 51 | trust_remote_code=True, 52 | attn_implementation="flash_attention_2" 53 | ) 54 | 55 | def get_res(self, messages, functions, extra_args={}, more_info=None): 56 | text = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True, tokenize=False) 57 | inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device) 58 | self.tokenizer.pad_token = "<|eot_id|>" 59 | outputs = self.model.generate(**inputs, max_new_tokens=512) 60 | return self.decode_res(inputs, outputs) 61 | 62 | def _get_res(self, messages): 63 | # outputs = self.pipeline(messages, max_new_tokens=512) 64 | text = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False) 65 | inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device) 66 | outputs = self.model.generate( 67 | **inputs, 68 | max_new_tokens=512 69 | ) 70 | return inputs, outputs 71 | 72 | def decode_res(self, prompt, outputs): 73 | # print(len(prompt)) 74 | # print(type(outputs), outputs) 75 | # generated_ids = [ 76 | # output_ids[len(input_ids):] for input_ids, output_ids in zip(prompt.input_ids, outputs) 77 | # ] 78 | generated_ids = [ 79 | output_ids[len(input_ids):] for input_ids, output_ids in zip(prompt.input_ids, outputs) 80 | ] 81 | return self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/gorilla.py: -------------------------------------------------------------------------------- 1 | from .tool_class_base import ToolClass 2 | from transformers import AutoModelForCausalLM, AutoTokenizer 3 | 4 | 5 | class Gorilla(ToolClass): 6 | def init(self): 7 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 8 | self.model = AutoModelForCausalLM.from_pretrained( 9 | self.model_path, 10 | torch_dtype='auto', 11 | device_map='auto', 12 | trust_remote_code=True 13 | ) 14 | 15 | def _get_res(self, messages): 16 | inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device) 17 | outputs = self.model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id) 18 | return outputs, inputs 19 | 20 | def decode_res(self, outputs, inputs): 21 | return self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True) 22 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/hammer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .tool_class_base import ToolClass 4 | from transformers import AutoModelForCausalLM, AutoTokenizer 5 | 6 | 7 | ''' 8 | tokenizer = AutoTokenizer.from_pretrained("MadeAgents/Hammer2.1-7b") 9 | model = AutoModelForCausalLM.from_pretrained("MadeAgents/Hammer2.1-7b", torch_dtype=torch.bfloat16, device_map="auto") 10 | 11 | # Example conversation 12 | messages = [ 13 | {"role": "user", "content": "What's the weather like in New York?"}, 14 | {"role": "assistant","content": '```\n{"name": "get_weather", "arguments": {"location": "New York, NY ", "unit": "celsius"}\n```'}, 15 | {"role": "tool", "name": "get_weather", "content": '{"temperature": 72, "description": "Partly cloudy"}'}, 16 | {"role": "user", "content": "Now, search for the weather in San Francisco."} 17 | ] 18 | 19 | # Example function definition (optional) 20 | tools = [ 21 | { 22 | "name": "get_weather", 23 | "description": "Get the current weather for a location", 24 | "parameters": { 25 | "type": "object", 26 | "properties": { 27 | "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, 28 | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit of temperature to return"} 29 | }, 30 | "required": ["location"] 31 | } 32 | }, 33 | { 34 | "name": "respond", 35 | "description": "When you are ready to respond, use this function. This function allows the assistant to formulate and deliver appropriate replies based on the input message and the context of the conversation. Generate a concise response for simple questions, and a more detailed response for complex questions.", 36 | "parameters": { 37 | "type": "object", 38 | "properties": { 39 | "message": {"type": "string", "description": "The content of the message to respond to."} 40 | }, 41 | "required": ["message"] 42 | } 43 | } 44 | ] 45 | 46 | inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt") 47 | inputs = {k: v.to(model.device) for k, v in inputs.items()} 48 | out = model.generate(**inputs, max_new_tokens=128) 49 | print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)) 50 | ''' 51 | 52 | 53 | class Hammer(ToolClass): 54 | def init(self): 55 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 56 | self.model = AutoModelForCausalLM.from_pretrained( 57 | self.model_path, 58 | torch_dtype=torch.bfloat16, 59 | device_map="auto", 60 | # attn_implementation="flash_attention_2" 61 | ) 62 | 63 | def remove_function_object(self, functions): 64 | functions_ = [] 65 | for func in functions: 66 | if "function" in func and "name" in func["function"]: 67 | func = func["function"] 68 | functions_.append(func) 69 | return functions_ 70 | 71 | def get_res(self, messages, functions, extra_args={}, more_info=None): 72 | functions = self.remove_function_object(functions) 73 | inputs = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True, return_dict=True, return_tensors="pt") 74 | inputs = {k: v.to(self.model.device) for k, v in inputs.items()} 75 | outputs = self.model.generate(**inputs, max_new_tokens=128) 76 | return self.decode_res(inputs, outputs) 77 | 78 | def _get_res(self, messages): 79 | # outputs = self.pipeline(messages, max_new_tokens=512) 80 | inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_dict=True, return_tensors="pt") 81 | inputs = {k: v.to(self.model.device) for k, v in inputs.items()} 82 | outputs = self.model.generate( 83 | **inputs, 84 | max_new_tokens=128 85 | ) 86 | return inputs, outputs 87 | 88 | def decode_res(self, prompt, outputs): 89 | # print(len(prompt)) 90 | # print(type(outputs), outputs) 91 | return self.tokenizer.decode(outputs[0][len(prompt["input_ids"][0]):], skip_special_tokens=True) 92 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/llama.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import transformers 4 | 5 | from .tool_class_base import ToolClass 6 | from transformers import AutoModelForCausalLM, AutoTokenizer 7 | 8 | 9 | logger = logging.getLogger() 10 | 11 | 12 | class Llama(ToolClass): 13 | def init(self): 14 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 15 | self.model = AutoModelForCausalLM.from_pretrained( 16 | self.model_path, 17 | torch_dtype='auto', 18 | device_map='auto', 19 | trust_remote_code=True, 20 | attn_implementation="flash_attention_2" 21 | ) 22 | self.pipeline = transformers.pipeline( 23 | "text-generation", 24 | model=self.model, 25 | tokenizer=self.tokenizer, 26 | ) 27 | 28 | def self_formated_template(self, messages ,functions): 29 | formatted_prompt = "<|begin_of_text|>" 30 | 31 | system_message = "" 32 | remaining_messages = messages 33 | if messages[0]["role"] == "system": 34 | system_message = messages[0]["content"].strip() 35 | remaining_messages = messages[1:] 36 | 37 | formatted_prompt += "<|start_header_id|>system<|end_header_id|>\n\n" 38 | formatted_prompt += "Environment: ipython\n" 39 | formatted_prompt += "Cutting Knowledge Date: December 2023\n" 40 | formatted_prompt += "Today Date: 26 Jul 2024\n\n" 41 | formatted_prompt += system_message + "<|eot_id|>" 42 | 43 | # Llama pass in custom tools in first user message 44 | is_first_user_message = True 45 | for message in remaining_messages: 46 | if message["role"] == "user" and is_first_user_message: 47 | is_first_user_message = False 48 | formatted_prompt += "<|start_header_id|>user<|end_header_id|>\n\n" 49 | formatted_prompt += "Given the following functions, please respond with a JSON for a function call " 50 | formatted_prompt += ( 51 | "with its proper arguments that best answers the given prompt.\n\n" 52 | ) 53 | formatted_prompt += 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' 54 | formatted_prompt += "Do not use variables.\n\n" 55 | for func in functions: 56 | formatted_prompt += json.dumps(func, indent=4) + "\n\n" 57 | formatted_prompt += f"{message['content'].strip()}<|eot_id|>" 58 | 59 | elif message["role"] == "tool": 60 | formatted_prompt += "<|start_header_id|>ipython<|end_header_id|>\n\n" 61 | if isinstance(message["content"], (dict, list)): 62 | formatted_prompt += json.dumps(message["content"]) 63 | else: 64 | formatted_prompt += message["content"] 65 | formatted_prompt += "<|eot_id|>" 66 | 67 | else: 68 | formatted_prompt += f"<|start_header_id|>{message['role']}<|end_header_id|>\n\n{message['content'].strip()}<|eot_id|>" 69 | 70 | formatted_prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n" 71 | return formatted_prompt 72 | 73 | def remove_function_object(self, functions): 74 | functions_ = [] 75 | for func in functions: 76 | if "function" in func and "name" in func["function"]: 77 | func = func["function"] 78 | functions_.append(func) 79 | return functions_ 80 | 81 | def get_res(self, messages, functions, extra_args={}, more_info=None): 82 | last_role = None 83 | for m in messages: 84 | if last_role is None: 85 | last_role = m["role"] 86 | continue 87 | assert last_role != m["role"], "Message role cannot be the same." 88 | last_role = m["role"] 89 | assert messages[-1]["role"] in ["tool", "user"] 90 | functions = self.remove_function_object(functions) 91 | if "date" in extra_args: 92 | date_string = extra_args["date"] 93 | logger.info(f"using date: {date_string}") 94 | prompt = self.pipeline.tokenizer.apply_chat_template(messages, tools=functions, date_string=date_string, tokenize=False, add_generation_prompt=True) 95 | else: 96 | prompt = self.pipeline.tokenizer.apply_chat_template(messages, tools=functions, tokenize=False, add_generation_prompt=True) 97 | # prompt = self.self_formated_template(messages, functions) 98 | outputs = self.pipeline( 99 | prompt, 100 | max_new_tokens=1024, 101 | do_sample=True, 102 | ) 103 | 104 | return outputs[0]["generated_text"][len(prompt):] 105 | 106 | def _get_res(self, messages): 107 | # outputs = self.pipeline(messages, max_new_tokens=512) 108 | print("just messages") 109 | prompt = self.pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) 110 | outputs = self.pipeline( 111 | prompt, 112 | max_new_tokens=1024, 113 | do_sample=True, 114 | ) 115 | return prompt, outputs 116 | 117 | def decode_res(self, prompt, outputs): 118 | # print(len(prompt)) 119 | # print(type(outputs), outputs) 120 | return outputs[0]["generated_text"][len(prompt):] 121 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/tool_ace.py: -------------------------------------------------------------------------------- 1 | from .tool_class_base import ToolClass 2 | from transformers import AutoModelForCausalLM, AutoTokenizer 3 | 4 | 5 | class ToolACE(ToolClass): 6 | def init(self): 7 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 8 | self.model = AutoModelForCausalLM.from_pretrained( 9 | self.model_path, 10 | torch_dtype='auto', 11 | device_map='auto' 12 | ) 13 | 14 | def format_message(self, messages, functions, more_info=None): 15 | # You can modify the prompt for your task 16 | system_prompt = """You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 17 | If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. 18 | You should only return the function call in tools call sections. 19 | 20 | If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] 21 | You SHOULD NOT include any other text in the response. 22 | Here is a list of functions in JSON format that you can invoke.\n{functions}\n 23 | """ 24 | tools = [] 25 | for func in functions: 26 | if "function" in func and "name" in func["function"]: 27 | tools.append(func["function"]) 28 | else: 29 | tools.append(func) 30 | 31 | if messages[0]["role"] == "system": 32 | messages = messages[1:] 33 | messages = [ 34 | {'role': 'system', 'content': system_prompt.format(functions=tools)}, 35 | ] + messages 36 | return messages 37 | 38 | def _get_res(self, messages): 39 | inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device) 40 | outputs = self.model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id) 41 | return outputs, inputs 42 | 43 | def decode_res(self, outputs, inputs): 44 | return self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True) 45 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/tool_class_base.py: -------------------------------------------------------------------------------- 1 | class ToolClass: 2 | def __init__(self, name, model_path): 3 | self.name = name 4 | self.model_path = model_path 5 | self.init() 6 | 7 | def init(self): 8 | pass 9 | 10 | def get_res(self, messages, functions, extra_args={}, more_info=None): 11 | last_role = None 12 | for m in messages: 13 | if last_role is None: 14 | last_role = m["role"] 15 | continue 16 | assert last_role != m["role"], "Message role cannot be the same." 17 | last_role = m["role"] 18 | assert messages[-1]["role"] in ["tool", "user"] 19 | print("tool base get_res") 20 | messages_ = self.format_message(messages, functions, more_info) 21 | outputs, inputs = self._get_res(messages_) 22 | return self.decode_res(outputs, inputs) 23 | 24 | def get_messages_res(self, messages, extra_args={}, more_info=None): 25 | last_role = None 26 | for m in messages: 27 | if last_role is None: 28 | last_role = m["role"] 29 | continue 30 | assert last_role != m["role"], "Message role cannot be the same." 31 | assert "content" in m, "Message content cannot be empty." 32 | last_role = m["role"] 33 | assert messages[-1]["role"] in ["tool", "user"] 34 | outputs, inputs = self._get_res(messages) 35 | return self.decode_res(outputs, inputs) 36 | 37 | def format_message(self, messages, functions, more_info=None): 38 | return messages 39 | 40 | def _get_res(self, messages): 41 | raise NotImplementedError 42 | 43 | def decode_res(self, res): 44 | return res 45 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/tool_model_map.py: -------------------------------------------------------------------------------- 1 | from tool_class.tool_ace import ToolACE 2 | from tool_class.xlam import Xlam 3 | from tool_class.xlam2 import Xlam2 4 | from tool_class.gorilla import Gorilla 5 | from tool_class.llama import Llama 6 | from tool_class.qwen import Qwen 7 | from tool_class.deepseek import DeepSeek 8 | from tool_class.chatglm import ChatGLM 9 | from tool_class.watt import Watt 10 | from tool_class.fc_medium import FC_Medium 11 | from tool_class.hammer import Hammer 12 | 13 | 14 | tool_model_map = { 15 | "toolace": ToolACE, 16 | "toolace2": ToolACE, 17 | "xlam": Xlam, 18 | "xlam2-70b": Xlam2, 19 | "xlam2-32b": Xlam2, 20 | "xlam2-8b": Xlam2, 21 | "xlam2-3b": Xlam2, 22 | "xlam2-1b": Xlam2, 23 | "gorilla": Gorilla, 24 | "deepseek-r1": DeepSeek, 25 | "deepseek-v3": DeepSeek, 26 | "chatglm": ChatGLM, 27 | "fcm3.1": FC_Medium, 28 | ## Watt 29 | "watt70b": Watt, 30 | "watt8b": Watt, 31 | ## Hammer 32 | "hammer7b": Hammer, 33 | "hammer3b": Hammer, 34 | "hammer1.5b": Hammer, 35 | "hammer0.5b": Hammer, 36 | ## LLAMA 37 | "llama70b": Llama, 38 | "llama8b": Llama, 39 | "llama3b": Llama, 40 | "llama1b": Llama 41 | } 42 | 43 | tool_model_path_map = { 44 | # toolace 45 | "toolace": "/xxx/model/ToolACE-8B", 46 | "toolace2": "/xxx/ToolACE-2-Llama-3.1-8B", 47 | # xlam 48 | "xlam": "/xxx/model/xLAM-7b-fc-r", 49 | "xlam2-70b": "/xxx/Llama-xLAM-2-70b-fc-r", 50 | "xlam2-32b": "/xxx/xLAM-2-32b-fc-r", 51 | "xlam2-8b": "/xxx/Llama-xLAM-2-8b-fc-r", 52 | "xlam2-3b": "/xxx/xLAM-2-3b-fc-r", 53 | "xlam2-1b": "/xxx/xLAM-2-1b-fc-r", 54 | # Watt 55 | "watt70b": "/xxx/model/watt-tool-70B", 56 | "watt8b": "/xxx/model/watt-tool-8B", 57 | # Hammer2.1 58 | "hammer7b": "/xxx/model/Hammer2.1-7b", 59 | "hammer3b": "/xxx/model/Hammer2.1-3b", 60 | "hammer1.5b": "/xxx/model/Hammer2.1-1.5b", 61 | "hammer0.5b": "/xxx/model/Hammer2.1-0.5b", 62 | # other 63 | "gorilla": "/xxx/model/gorilla-openfunctions-v2", 64 | "deepseek-r1": "/xxx/model/DeepSeek-R1", 65 | "deepseek-v3": "/xxx/model/DeepSeek-V3", 66 | "chatglm": "/xxx/model/glm-4-9b-chat-hf", 67 | "fcm3.1": "/xxx/model/functionary-medium-v3.1", 68 | # LLAMA3.3 69 | "llama70b": "/xxx/model/Llama-3.3-70B-Instruct", 70 | "llama8b": "/xxx/model/Meta-Llama-3.1-8B-Instruct", 71 | "llama3b": "/xxx/model/Llama-3.2-3B-Instruct", 72 | "llama1b": "/xxx/model/Llama-3.2-1B-Instruct" 73 | } 74 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/watt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .tool_class_base import ToolClass 4 | from transformers import AutoModelForCausalLM, AutoTokenizer 5 | 6 | 7 | logger = logging.getLogger() 8 | 9 | 10 | class Watt(ToolClass): 11 | def init(self): 12 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 13 | self.model = AutoModelForCausalLM.from_pretrained( 14 | self.model_path, 15 | torch_dtype="auto", 16 | device_map="auto", 17 | # attn_implementation="flash_attention_2" 18 | ) 19 | # Example usage (adapt as needed for your specific tool usage scenario) 20 | self.system_prompt = """You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 21 | If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. 22 | You should only return the function call in tools call sections. 23 | 24 | If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] 25 | You SHOULD NOT include any other text in the response. 26 | Here is a list of functions in JSON format that you can invoke.\n{functions}\n 27 | """ 28 | 29 | def remove_function_object(self, functions): 30 | functions_ = [] 31 | for func in functions: 32 | if "function" in func and "name" in func["function"]: 33 | func = func["function"] 34 | functions_.append(func) 35 | return functions_ 36 | 37 | def get_res(self, messages, functions, extra_args={}, more_info=None): 38 | functions = self.remove_function_object(functions) 39 | if messages[0]["role"] == "system": 40 | system_content = self.system_prompt.format(functions=functions) + messages[0]["content"].replace("\n", "") 41 | messages[0]["content"] = system_content 42 | else: 43 | system_content = self.system_prompt.format(functions=functions) 44 | messages = [{"role": "system", "content": system_content}] + messages 45 | if "date" in extra_args: 46 | date_string = extra_args["date"] 47 | logger.info("Using date string {}".format(date_string)) 48 | inputs = self.tokenizer.apply_chat_template(messages, date_string=date_string, add_generation_prompt=True, return_tensors="pt").to(self.model.device) 49 | else: 50 | inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device) 51 | outputs = self.model.generate(inputs, max_new_tokens=2048, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id) 52 | return self.decode_res(inputs, outputs) 53 | 54 | def _get_res(self, messages): 55 | # outputs = self.pipeline(messages, max_new_tokens=512) 56 | inputs = self.tokenizer.apply_chat_template( 57 | messages, add_generation_prompt=True, return_tensors="pt" 58 | ).to(self.model.device) 59 | outputs = self.model.generate( 60 | inputs, max_new_tokens=2048, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id 61 | ) 62 | return inputs, outputs 63 | 64 | def decode_res(self, prompt, outputs): 65 | return self.tokenizer.decode(outputs[0][len(prompt[0]):], skip_special_tokens=True) 66 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/xlam.py: -------------------------------------------------------------------------------- 1 | import json 2 | import torch 3 | 4 | from .tool_class_base import ToolClass 5 | from transformers import AutoModelForCausalLM, AutoTokenizer 6 | 7 | 8 | torch.random.manual_seed(0) 9 | 10 | 11 | class Xlam(ToolClass): 12 | def init(self): 13 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 14 | self.model = AutoModelForCausalLM.from_pretrained( 15 | self.model_path, 16 | torch_dtype='auto', 17 | device_map='auto', 18 | trust_remote_code=True 19 | ) 20 | 21 | # Helper function to convert openai format tools to our more concise xLAM format 22 | def convert_to_xlam_tool(self, tools): 23 | '''''' 24 | if isinstance(tools, dict): 25 | return { 26 | "name": tools["name"], 27 | "description": tools["description"], 28 | "parameters": {k: v for k, v in tools["parameters"].get("properties", {}).items()} 29 | } 30 | elif isinstance(tools, list): 31 | return [self.convert_to_xlam_tool(tool) for tool in tools] 32 | else: 33 | return tools 34 | 35 | # Helper function to build the input prompt for our model 36 | 37 | def build_prompt(self, task_instruction: str, format_instruction: str, tools: list, query: str, conversation_history: list, system_message: str): 38 | if system_message: 39 | prompt = f"{system_message}\n\n" 40 | else: 41 | prompt = "" 42 | prompt += f"[BEGIN OF TASK INSTRUCTION]\n{task_instruction}\n[END OF TASK INSTRUCTION]\n\n" 43 | prompt += f"[BEGIN OF AVAILABLE TOOLS]\n{json.dumps(tools)}\n[END OF AVAILABLE TOOLS]\n\n" 44 | prompt += f"[BEGIN OF FORMAT INSTRUCTION]\n{format_instruction}\n[END OF FORMAT INSTRUCTION]\n\n" 45 | prompt += f"[BEGIN OF QUERY]\n{query}\n[END OF QUERY]\n\n" 46 | 47 | if len(conversation_history) > 0: prompt += self.build_conversation_history_prompt(conversation_history) 48 | return prompt 49 | 50 | def build_conversation_history_prompt(self, conversation_history: str): 51 | parsed_history = [] 52 | for step_data in conversation_history: 53 | parsed_history.append({ 54 | "step_id": step_data["step_id"], 55 | "thought": step_data["thought"], 56 | "tool_calls": step_data["tool_calls"], 57 | "next_observation": step_data["next_observation"], 58 | "user_input": step_data['user_input'] 59 | }) 60 | 61 | history_string = json.dumps(parsed_history) 62 | return f"\n[BEGIN OF HISTORY STEPS]\n{history_string}\n[END OF HISTORY STEPS]\n" 63 | 64 | 65 | def format_message(self, messages, functions, more_info=None): 66 | # You can modify the prompt for your task 67 | task_instruction = """ 68 | You are an expert in composing functions. You are given a question and a set of possible functions. 69 | Based on the question, you will need to make one or more function/tool calls to achieve the purpose. 70 | If none of the functions can be used, point it out and refuse to answer. 71 | If the given question lacks the parameters required by the function, also point it out. 72 | """.strip() 73 | 74 | format_instruction = """ 75 | The output MUST strictly adhere to the following JSON format, and NO other text MUST be included. 76 | The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'. 77 | ``` 78 | { 79 | "tool_calls": [ 80 | {"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}}, 81 | ... (more tool calls as required) 82 | ] 83 | } 84 | ``` 85 | """.strip() 86 | 87 | tools = [] 88 | for func in functions: 89 | if "function" in func and "name" in func["function"]: 90 | tools.append(func["function"]) 91 | else: 92 | tools.append(func) 93 | 94 | system = None 95 | if messages[0]["role"] == "system": 96 | system = messages[0]["content"] 97 | messages = messages[1:] 98 | 99 | xlam_format_tools = self.convert_to_xlam_tool(tools) if len(tools) != 0 else [] 100 | conversation_history = self.build_conversation_history(messages) 101 | query = next((msg['content'] for msg in reversed(messages) if msg['role'] == 'user'), "") 102 | messages = self.build_prompt(task_instruction, format_instruction, xlam_format_tools, query, conversation_history, system) 103 | messages = [{'role': 'user', 'content': messages}] 104 | # print(messages) 105 | return messages 106 | 107 | def build_conversation_history(self, messages): 108 | history = [] 109 | for msg in messages: 110 | if msg['role'] == 'tool': 111 | history[-1]['next_observation'] = msg['content'] 112 | else: 113 | history.append({ 114 | 'step_id': len(history) + 1, 115 | 'thought': msg.get('content', ''), 116 | 'tool_calls': [msg['tool_calls']] if 'tool_calls' in msg else [], 117 | 'next_observation': '', 118 | 'user_input': msg['content'] if msg['role'] == 'user' else '' 119 | }) 120 | return history 121 | 122 | def _get_res(self, messages): 123 | inputs = self.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(self.model.device) 124 | outputs = self.model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id) 125 | return outputs, inputs 126 | 127 | def decode_res(self, outputs, inputs): 128 | return self.tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True) 129 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_class/xlam2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .tool_class_base import ToolClass 4 | from transformers import AutoModelForCausalLM, AutoTokenizer 5 | 6 | 7 | class Xlam2(ToolClass): 8 | def init(self): 9 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) 10 | self.model = AutoModelForCausalLM.from_pretrained( 11 | self.model_path, 12 | torch_dtype=torch.bfloat16, 13 | device_map="auto", 14 | # attn_implementation="flash_attention_2" 15 | ) 16 | 17 | def get_res(self, messages, functions, extra_args={}, more_info=None): 18 | inputs = self.tokenizer.apply_chat_template(messages, tools=functions, add_generation_prompt=True, 19 | return_dict=True, return_tensors="pt") 20 | input_ids_len = inputs["input_ids"].shape[-1] 21 | inputs = {k: v.to(self.model.device) for k, v in inputs.items()} 22 | outputs = self.model.generate(**inputs, max_new_tokens=256) 23 | return self.decode_res(input_ids_len, outputs) 24 | 25 | def _get_res(self, messages): 26 | # outputs = self.pipeline(messages, max_new_tokens=512) 27 | print("just messages") 28 | text = self.tokenizer.apply_chat_template( 29 | messages, 30 | tokenize=False, 31 | add_generation_prompt=True, 32 | ) 33 | model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device) 34 | outputs = self.model.generate( 35 | **model_inputs, 36 | max_new_tokens=512 37 | ) 38 | return model_inputs, outputs 39 | 40 | def decode_res(self, input_ids_len, outputs): 41 | # print(len(prompt)) 42 | # print(type(outputs), outputs) 43 | generated_tokens = outputs[:, input_ids_len:] # Slice the output to get only the newly generated tokens 44 | return self.tokenizer.decode(generated_tokens[0], skip_special_tokens=True) 45 | -------------------------------------------------------------------------------- /c3_bench/bench_test/tool_parser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/bench_test/tool_parser/__init__.py -------------------------------------------------------------------------------- /c3_bench/bench_test/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .readnwrite import * 2 | from .tools import * 3 | from .date import * 4 | from .parse_res import * -------------------------------------------------------------------------------- /c3_bench/bench_test/utils/date.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | 4 | from datetime import datetime 5 | 6 | 7 | def get_random_date(a1=(2024, 1, 1, 0, 0, 0, 0, 0, 0), a2=(2024, 12, 31, 23, 59, 59, 0, 0, 0)): 8 | start = time.mktime(a1) # 生成开始时间戳 9 | end = time.mktime(a2) # 生成结束时间戳 10 | 11 | t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个 12 | date_touple = time.localtime(t) # 将时间戳生成时间元组 13 | date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple) # 将时间元组转成格式化字符串(1976-05-21) 14 | return date 15 | 16 | 17 | def get_current_date(): 18 | current_time = time.time() # 获取当前时间戳 19 | current_date_tuple = time.localtime(current_time) # 将时间戳生成时间元组 20 | current_date = time.strftime("%Y-%m-%d %H:%M:%S", current_date_tuple) # 将时间元组转成格式化字符串 21 | return current_date 22 | 23 | 24 | def add_weekday_date(date): 25 | if "星期" in date: 26 | return date 27 | date = date.replace("当前时间:", "").replace("环境:", "") 28 | date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S') 29 | weekday_num = date_obj.weekday() 30 | weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] 31 | weekday = weekdays[weekday_num] 32 | date = "当前时间:" + date + " " + weekday 33 | return date 34 | 35 | 36 | def get_current_date_with_weekday(): 37 | return add_weekday_date(get_current_date()) -------------------------------------------------------------------------------- /c3_bench/bench_test/utils/parse_res.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | 4 | def get_keywords(value): 5 | if isinstance(value, ast.Str): 6 | value = value.s 7 | elif isinstance(value, ast.Num): 8 | value = value.n 9 | elif isinstance(value, ast.UnaryOp): 10 | if isinstance(value.op, ast.USub): 11 | operand = get_keywords(value.operand) 12 | value = -operand 13 | elif isinstance(value, ast.BinOp): 14 | left = get_keywords(value.left) 15 | right = get_keywords(value.right) 16 | if isinstance(value.op, ast.Add): 17 | value = left + right 18 | elif isinstance(value.op, ast.Sub): 19 | value = left - right 20 | elif isinstance(value.op, ast.Mult): 21 | value = left * right 22 | elif isinstance(value.op, ast.Div): 23 | value = left / right 24 | elif isinstance(value, ast.Subscript): 25 | value = value.slice.value 26 | if isinstance(value.slice, ast.Index): 27 | value = value.slice.value 28 | elif isinstance(value.slice, ast.Slice): 29 | value = value.slice.value 30 | elif isinstance(value.slice, ast.Ellipsis): 31 | value = "..." 32 | elif isinstance(value, ast.NameConstant): 33 | value = value.value 34 | elif isinstance(value, ast.Name): 35 | if value.id.lower() == "true": 36 | value = True 37 | elif value.id.lower() == "false": 38 | value = False 39 | else: 40 | value = value.id 41 | elif isinstance(value, ast.List): 42 | value = [get_keywords(elt) for elt in value.elts] 43 | elif isinstance(value, ast.Tuple): 44 | value = tuple([get_keywords(elt) for elt in value.elts]) 45 | elif isinstance(value, ast.Dict): 46 | value = { 47 | get_keywords(key): get_keywords(val) 48 | for key, val in zip(value.keys, value.values) 49 | } 50 | else: 51 | raise Exception("Unsupported type: {}".format(type(value))) 52 | return value 53 | 54 | 55 | def parse_string_to_function(input_str): 56 | parsed_input = ast.parse(input_str) 57 | 58 | function_name = parsed_input.body[0].value.func.id 59 | arguments = parsed_input.body[0].value.args 60 | keywords = parsed_input.body[0].value.keywords 61 | 62 | args_list = [] 63 | for keyword in keywords: 64 | key = keyword.arg 65 | value = keyword.value 66 | value = get_keywords(value) 67 | args_list.append((key, value)) 68 | 69 | return function_name, args_list 70 | 71 | -------------------------------------------------------------------------------- /c3_bench/bench_test/utils/readnwrite.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | 4 | from glob import glob 5 | 6 | 7 | def read_json_file_to_list(input_file): 8 | result = [] 9 | with open(input_file) as fin: 10 | for line in fin: 11 | obj = json.loads(line) 12 | result.append(obj) 13 | return result 14 | 15 | 16 | def read_file_to_json(path, skip_path=None): 17 | data = [] 18 | files = glob(path) 19 | print("Read files:") 20 | for file_ in files: 21 | if skip_path is not None and ( 22 | file_ == skip_path 23 | or file_ in skip_path 24 | ): 25 | continue 26 | with open(file_, "r") as f: 27 | tmps = [json.loads(_) for _ in f.readlines()] 28 | print(f"{file_}: {len(tmps)}") 29 | data += tmps 30 | return data 31 | 32 | 33 | def write_json_to_file(data, path, func=None, print_f=True): 34 | with open(path, "w") as f: 35 | for item in data: 36 | if func != None: 37 | item = func(item) 38 | f.write(json.dumps(item, ensure_ascii=False, sort_keys=True)) 39 | f.write("\n") 40 | if print_f: 41 | print(f"Write {len(data)} items to {path}\nSamples: {json.dumps(item, ensure_ascii=False)}") 42 | else: 43 | print(f"Write {len(data)} items to {path}") 44 | 45 | 46 | def read_csv_to_dict_list(file_path): 47 | # 使用pandas读取csv文件 48 | df = pd.read_csv(file_path, keep_default_na=False, na_values=['']) 49 | 50 | # 重命名"Unnamed: {num}"列 51 | all_none_from = None 52 | for col in df.columns: 53 | if col.startswith("Unnamed") and df[col].isnull().all(): 54 | all_none_from = df.columns.get_loc(col) 55 | break 56 | 57 | # 如果存在这样的列,则删除这些列 58 | if all_none_from is not None: 59 | df = df.iloc[:, :all_none_from] 60 | 61 | df.rename(columns=lambda x: f"key{x.split(': ')[1]}" if x.startswith("Unnamed") else x, inplace=True) 62 | 63 | # 将DataFrame转换为字典列表 64 | dict_list = df.to_dict('records') 65 | 66 | # 过滤掉所有值都是None的字典 67 | filtered_dict_list = [] 68 | for row in dict_list: 69 | # 将空字符串替换为None,并检查是否所有值都是None 70 | all_none = True 71 | for key in list(row.keys()): 72 | if pd.isna(row[key]): 73 | row[key] = None 74 | else: 75 | all_none = False 76 | # 如果不是所有值都是None,添加到结果列表中 77 | if not all_none: 78 | filtered_dict_list.append(row) 79 | 80 | print(f"Read file: {file_path}\ndata length:{len(filtered_dict_list)}\nkeys:{filtered_dict_list[0].keys()}") 81 | return filtered_dict_list 82 | 83 | 84 | def write_list_of_list_to_csv(list_of_list, csv_file_name): 85 | # 检查list_of_list是否至少有两个元素(列名和至少一行数据) 86 | if len(list_of_list) < 2: 87 | raise ValueError("List of list must contain at least one row of data along with column names.") 88 | assert all([len(_) == len(list_of_list[0]) for _ in list_of_list]) 89 | 90 | # 第一个元素是列名 91 | column_names = list_of_list[0] 92 | 93 | # 剩余的元素是数据行 94 | data_rows = list_of_list[1:] 95 | 96 | # 创建DataFrame 97 | df = pd.DataFrame(data_rows, columns=column_names) 98 | 99 | # 导出到CSV文件 100 | df.to_csv(csv_file_name, index=False) # index=False表示不导出行索引 101 | print(f"Write data to {csv_file_name}\nSamples: {len(list_of_list)}") 102 | -------------------------------------------------------------------------------- /c3_bench/bench_test/utils/tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import datetime 4 | import argparse 5 | import ast 6 | import json 7 | import traceback 8 | 9 | 10 | def get_random_file_name(file_type, keys=None, need_time=False, need_uuid=True): 11 | file_name = [] 12 | if need_time: 13 | today = str(datetime.datetime.now()).replace(" ", "-").split(".")[0] 14 | file_name.append(today) 15 | if need_uuid: 16 | file_name.append(str(uuid.uuid4())[:6]) 17 | if keys is not None: 18 | file_name.append(str(keys)) 19 | return "_".join(file_name) + f".{file_type}" 20 | 21 | 22 | def get_random_pathname(path_, file_type, keys=None, need_time=False, need_uuid=True): 23 | file_name = get_random_file_name(file_type, keys, need_time, need_uuid) 24 | pathname = os.path.join(path_, file_name) 25 | return pathname 26 | 27 | 28 | def str2bool(v): 29 | ''' 30 | Transform string to bool. 31 | 32 | Arguments: 33 | v (str): The value to be converted. 34 | 35 | Returns: 36 | bool: The converted value. 37 | ''' 38 | if isinstance(v, bool): 39 | return v 40 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 41 | return True 42 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 43 | return False 44 | else: 45 | raise argparse.ArgumentTypeError('Boolean value expected.') 46 | 47 | 48 | def get_keywords(value): 49 | if isinstance(value, ast.Str): 50 | value = value.s 51 | elif isinstance(value, ast.Num): 52 | value = value.n 53 | elif isinstance(value, ast.UnaryOp): 54 | if isinstance(value.op, ast.USub): 55 | operand = get_keywords(value.operand) 56 | value = -operand 57 | elif isinstance(value, ast.BinOp): 58 | left = get_keywords(value.left) 59 | right = get_keywords(value.right) 60 | if isinstance(value.op, ast.Add): 61 | value = left + right 62 | elif isinstance(value.op, ast.Sub): 63 | value = left - right 64 | elif isinstance(value.op, ast.Mult): 65 | value = left * right 66 | elif isinstance(value.op, ast.Div): 67 | value = left / right 68 | elif isinstance(value, ast.Subscript): 69 | value = value.slice.value 70 | if isinstance(value.slice, ast.Index): 71 | value = value.slice.value 72 | elif isinstance(value.slice, ast.Slice): 73 | value = value.slice.value 74 | elif isinstance(value.slice, ast.Ellipsis): 75 | value = "..." 76 | elif isinstance(value, ast.NameConstant): 77 | value = value.value 78 | elif isinstance(value, ast.Name): 79 | if value.id.lower() == "true": 80 | value = True 81 | elif value.id.lower() == "false": 82 | value = False 83 | else: 84 | value = value.id 85 | elif isinstance(value, ast.List): 86 | value = [get_keywords(elt) for elt in value.elts] 87 | elif isinstance(value, ast.Tuple): 88 | value = tuple([get_keywords(elt) for elt in value.elts]) 89 | elif isinstance(value, ast.Dict): 90 | value = { 91 | get_keywords(key): get_keywords(val) 92 | for key, val in zip(value.keys, value.values) 93 | } 94 | else: 95 | raise Exception("Unsupported type: {}".format(type(value))) 96 | return value 97 | 98 | 99 | def properties_filter(dic_): 100 | if type(dic_) == dict: 101 | dic_r = {} 102 | for k in dic_: 103 | if k not in ["parameters", "properties", "description", "type", "example_value", "enum", "items"]: 104 | continue 105 | if k == "properties": 106 | dic_r["properties"] = {_: properties_filter(dic_[k][_]) for _ in dic_[k]} 107 | elif k == "items": 108 | dic_r[k] = properties_filter(dic_[k]) 109 | elif k == "type": 110 | if "|" in dic_[k]: 111 | dic_[k] = dic_[k].split("|")[0] 112 | if dic_[k] == "float": 113 | r_ = "number" 114 | elif dic_[k] in ["list of dictionaries"]: 115 | r_ = "object" 116 | elif dic_[k] in ["int"]: 117 | r_ = "integer" 118 | elif dic_[k] in ["complex_string", "String", "UUID"]: 119 | r_ = "string" 120 | elif "enum" in dic_[k]: 121 | try: 122 | dic_r["enum"] = json.loads(dic_[k].replace("enum", "")) 123 | assert type(dic_r["enum"][0]) == str 124 | r_ = "string" 125 | except: 126 | r_ = "string" 127 | elif type(dic_[k]) == dict: 128 | r_ = "object" 129 | else: 130 | r_ = dic_[k] 131 | assert r_ in ["string", "integer", "boolean", "array", "object", "number", "enum"], f"Wrong: {r_}" 132 | dic_r[k] = r_ 133 | elif k == "enum": 134 | if type(dic_[k]) == dict: 135 | enum_ = [] 136 | for k_ in dic_[k]: 137 | assert type(dic_[k][k_]) == list 138 | enum_.extend(dic_[k][k_]) 139 | else: 140 | enum_ = dic_[k] 141 | assert type(enum_) == list and all([type(_) in [str, int, float, dict, bool, list] for _ in enum_]) 142 | dic_r[k] = dic_[k] 143 | else: 144 | dic_r[k] = dic_[k] 145 | return dic_r 146 | else: 147 | return dic_ 148 | 149 | 150 | def functions_uniform(function): 151 | if type(function) == list and ( 152 | "function" in function[0] 153 | or "name" in function[0] 154 | ): 155 | functions = [] 156 | for function_ in function: 157 | functions.append(functions_uniform(function_)) 158 | return functions 159 | function_ = {} 160 | for key in function: 161 | if key == "parameters": 162 | if "properties" not in function[key]: 163 | function_[key] = {"type": "object", "properties": {}} 164 | else: 165 | function_[key] = functions_uniform(function[key]) 166 | elif key == "properties": 167 | function_[key] = {_: properties_filter(function[key][_]) for _ in function[key]} 168 | elif key == "function": 169 | function_[key] = functions_uniform(function[key]) 170 | else: 171 | function_[key] = function[key] 172 | return function_ 173 | 174 | 175 | def remove_messages(messages, is_english=False): 176 | new_messages = [] 177 | try: 178 | role = "user" 179 | for m in messages: 180 | assert ( 181 | m["role"] == "assistant" 182 | and role == "assistant" 183 | ) or ( 184 | m["role"] in ["user", "tool"] 185 | and role in ["user", "tool"] 186 | ) 187 | role = "assistant" if role in ["user", "tool"] else "user" 188 | if is_english: 189 | colon_idx = m["content"].find(":") 190 | if ( 191 | colon_idx != -1 and 192 | m["content"][:colon_idx].lower() in [ 193 | "ai", "ai agent", "user", "ai agent assistant", "planner", "observation", "tool" 194 | ] 195 | ): 196 | m['content'] = m["content"][colon_idx + 1:] 197 | else: 198 | colon_idx = m["content"].find(":") 199 | if ( 200 | colon_idx != -1 and 201 | m["content"][:colon_idx] in [ 202 | "用户", "AI Agent助手", "AI Agent", "Planner", "Observation", "Tool" 203 | ] 204 | ): 205 | m['content'] = m["content"][colon_idx + 1:] 206 | new_messages.append(m) 207 | except Exception as e: 208 | traceback.print_exc() 209 | return new_messages 210 | -------------------------------------------------------------------------------- /c3_bench/bench_test/web_server.py: -------------------------------------------------------------------------------- 1 | import http.server 2 | import json 3 | import logging 4 | import os 5 | import sys 6 | import traceback 7 | import time 8 | 9 | from tool_class.tool_model_map import tool_model_map, tool_model_path_map 10 | 11 | 12 | def get_current_date(): 13 | current_time = time.time() # 获取当前时间戳 14 | current_date_tuple = time.localtime(current_time) # 将时间戳生成时间元组 15 | current_date = time.strftime("%Y-%m-%d", current_date_tuple) # 将时间元组转成格式化字符串 16 | return current_date 17 | 18 | 19 | # 设置日志 20 | logger = logging.getLogger() 21 | logger.setLevel(logging.INFO) 22 | 23 | file_handler = logging.FileHandler(f'./log/server_{get_current_date()}.log') 24 | file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s:%(message)s')) 25 | 26 | # 创建流日志处理器 27 | stream_handler = logging.StreamHandler() 28 | stream_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s:%(message)s')) 29 | 30 | # 添加日志处理器到日志记录器 31 | logger.addHandler(file_handler) 32 | logger.addHandler(stream_handler) 33 | 34 | 35 | model_name = sys.argv[1] 36 | model = tool_model_map[model_name](model_name, tool_model_path_map[model_name]) 37 | 38 | 39 | class RequestHandler(http.server.BaseHTTPRequestHandler): 40 | def do_POST(self): 41 | # 解析请求体中的 JSON 数据 42 | content_length = int(self.headers['Content-Length']) 43 | post_data = self.rfile.read(content_length) 44 | try: 45 | data = json.loads(post_data) 46 | messages = data.get('messages', []) 47 | tools = data.get('tools', []) 48 | 49 | answer = "" 50 | error = None 51 | st_time = time.time() 52 | try: 53 | if len(tools) != 0: 54 | answer = model.get_res(messages, tools, extra_args=data) 55 | else: 56 | answer = model.get_messages_res(messages) 57 | except Exception as e: 58 | error = traceback.format_exc() 59 | logging.error(f'Error handling request: {e}') 60 | 61 | # 记录日志 62 | logging.info(f'Received messages: {messages}') 63 | logging.info(f'Received tools: {tools}') 64 | 65 | # 准备响应数据 66 | response = { 67 | 'answer': answer, 68 | "model_name": model_name, 69 | "error": error if error else None, 70 | "time": time.time() - st_time 71 | } 72 | response_data = json.dumps(response).encode('utf-8') 73 | 74 | # 发送 HTTP 响应 75 | self.send_response(200) 76 | self.send_header('Content-Type', 'application/json') 77 | self.send_header('Content-Length', len(response_data)) 78 | self.end_headers() 79 | self.wfile.write(response_data) 80 | except json.JSONDecodeError as e: 81 | # 记录错误日志 82 | logging.error(f'Error parsing JSON: {e}') 83 | self.send_error(400, 'Invalid JSON') 84 | except Exception as e: 85 | # 记录错误日志 86 | logging.error(f'Error handling request: {e}') 87 | self.send_error(500, 'Internal Server Error') 88 | 89 | 90 | # 确保日志目录存在 91 | if not os.path.exists('./log'): 92 | os.makedirs('./log') 93 | 94 | # 启动服务器 95 | port = 12345 96 | if sys.argv[2]: 97 | port = int(sys.argv[2]) 98 | server_address = ('0.0.0.0', port) 99 | httpd = http.server.HTTPServer(server_address, RequestHandler) 100 | logging.info(f'Starting server on port {port}...') 101 | httpd.serve_forever() 102 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/__init__.py: -------------------------------------------------------------------------------- 1 | from .planner import planner 2 | from .agent_ask import agent_ask 3 | from .agent_answer import agent_answer 4 | from .agent_answer_chat import agent_answer_chat 5 | from .tool import tool 6 | from .user_answer_ask import user_answer_ask 7 | from .user_vague_answer_ask import user_vague_answer_ask 8 | from .user_single_tool import user_single_tool 9 | from .user_multi_tool import user_multi_tool 10 | from .user_multi_tool_parallel import user_multi_tool_parallel 11 | from .user_multi_tool_serial_parallel import user_multi_tool_serial_parallel 12 | from .user_ask import user_ask 13 | from .user_chat import user_chat 14 | from .user_continue_question import user_continue_question 15 | from .checker_planner import checker_planner 16 | from .checker_tool import checker_tool -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/agent_answer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import get_all_tool_info, logger 5 | 6 | 7 | agent_system_prompt_template_zh = '''请你扮演一个超级智能体中的Agent助手,超级智能体拥有一系列外部工具,超级智能体中的Planner可以通过调用外部工具来解决用户任务,具体见[工具列表]。 8 | 你负责与用户进行交互,你根据Planner和Tool返回的结果,结合用户任务以及上下文对话信息进行回答,只有你的回答会展示给用户。 9 | 输出格式参考[Agent助手输出格式]。 10 | 11 | {{{all_tool_required_info}}} 12 | 13 | [环境信息]=""" 14 | {{{env_info}}} 15 | """ 16 | 17 | [Agent助手输出格式]=""" 18 | Agent助手:根据[要求],回复上下文对话信息中最近一轮以 "用户:" 开头的内容(不要重复这句话) 19 | """ 20 | 21 | [要求]=""" 22 | 1、回复必须以 "Agent助手:" 开头。 23 | 2、根据上下文对话信息,总结回复最近一轮以 "用户:" 开头的用户任务。 24 | 3、使用markdown格式,务必注意排版要美观,段落之间使用两个换行。 25 | 4、务必注意!!!!如果Tool给出的Observation是一个列表,列表的每一项都有自己的ID,如xxx_id、xxxId,则请你在总结回复时,每一项都保留这些ID,告诉用户!!!!! 26 | 5、使用中文回复。 27 | """ 28 | 29 | [工具列表]=""" 30 | {{{tools}}} 31 | """''' 32 | 33 | agent_system_prompt_template_en = '''Please act as an Agent within a super intelligent agent, which has a series of external tools. The Planner within the super intelligent agent can solve user tasks by calling external tools, as detailed in the [Tool List]. 34 | You are responsible for interacting with the user. Based on the results returned by the Planner and Tool, combined with the user task and the context of the conversation, you provide answers, and only your answers will be displayed to the user. 35 | Refer to the [Agent Output Format] for the output format. 36 | 37 | {{{all_tool_required_info}}} 38 | 39 | [Environmental Information]=""" 40 | {{{env_info}}} 41 | """ 42 | 43 | [Agent Output Format] = """ 44 | Agent: According to the [Requirements], reply to the most recent round of content starting with "User:" in the context conversation information (do not repeat this sentence). 45 | """ 46 | 47 | [Requirements]=""" 48 | 1、The reply must start with "Agent:". 49 | 2、Summarize the user task from the most recent round starting with "User:" based on the context conversation information. 50 | 3、Use markdown format, and be sure to pay attention to the layout to make it look neat, with two line breaks between paragraphs. 51 | 4、Pay special attention!!!! If the Observation given by the Tool is a list, and each item in the list has its own ID, such as xxx_id or xxxId, then when summarizing the reply, please retain these IDs for each item and inform the user!!!!!!! 52 | 5、Reply in English. 53 | """ 54 | 55 | [Tool List]=""" 56 | {{{tools}}} 57 | """''' 58 | 59 | 60 | def agent_answer(messages, tools, env_info, request_func): 61 | all_tool_name, all_tool_required_info = get_all_tool_info(tools) 62 | language = os.getenv("LANGUAGE") 63 | if language == "zh": 64 | agent_system_prompt_template = agent_system_prompt_template_zh 65 | else: 66 | agent_system_prompt_template = agent_system_prompt_template_en 67 | 68 | agent_system_prompt = agent_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 69 | .replace("{{{env_info}}}", env_info) \ 70 | .replace("{{{all_tool_name}}}", all_tool_name) \ 71 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 72 | messages_new = [ 73 | { 74 | "role": "system", 75 | "content": agent_system_prompt 76 | } 77 | ] 78 | messages_new.extend(messages) 79 | res = request_func(messages_new) 80 | if "```markdown\n" in res: 81 | res = res.replace("```markdown\n", "").replace("\n```", "").replace("Agent助手:\n\n", "Agent助手:") \ 82 | .replace("Agent:\n\n", "Agent:").replace("Agent: \n\n", "Agent:") 83 | logger.info(f"agent_answer:\n{res}\n") 84 | fetch_data = {"task": "agent_answer", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res} 85 | return res, fetch_data 86 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/agent_answer_chat.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import get_all_tool_info, logger 5 | 6 | 7 | agent_system_prompt_template_zh = '''请你扮演一个超级智能体中的Agent助手,超级智能体拥有一系列外部工具,超级智能体中的Planner可以通过调用外部工具来解决用户任务,具体见[工具列表]。 8 | 你负责与用户进行交互,你根据Planner和Tool返回的结果,结合用户任务以及上下文对话信息进行回答,只有你的回答会展示给用户。 9 | 目前Planner判断你可以直接回答用户任务,该任务不需要调用任何工具,请使用你的内部知识直接回答。 10 | 输出格式参考[Agent助手输出格式]。 11 | 12 | [环境信息]=""" 13 | {{{env_info}}} 14 | """ 15 | 16 | [Agent助手输出格式]=""" 17 | Agent助手:根据[要求],回复上下文对话信息中最近一轮以 "用户:" 开头的内容(不要重复这句话) 18 | """ 19 | 20 | [要求]=""" 21 | 1、回复必须以 "Agent助手:" 开头。 22 | 2、根据上下文对话信息,直接回答最近一轮以 "用户:" 开头的用户任务。 23 | 3、使用markdown格式,务必注意排版要美观,段落之间使用两个换行。 24 | 4、使用中文回复。 25 | """ 26 | 27 | [工具列表]=""" 28 | {{{tools}}} 29 | """''' 30 | 31 | agent_system_prompt_template_en = '''Please play the role of an Agent assistant within a super intelligent agent. The super intelligent agent possesses a series of external tools. The Planner within the super intelligent agent can solve user tasks by invoking external tools, as detailed in the [Tool List]. 32 | You are responsible for interacting with users. Based on the results returned by the Planner and Tools, combined with the user task and the context of the conversation, you provide answers, but only your responses are displayed to the users. 33 | Currently, the Planner has determined that you can directly answer the user's task, which does not require the invocation of any tools. Please use your internal knowledge to respond directly. 34 | Refer to the [Agent Assistant Output Format] for the output format. 35 | 36 | [Environmental Information]="" 37 | {{{env_info}}} 38 | "" 39 | 40 | [Agent Assistant Output Format]="" 41 | Agent Assistant: According to the [Requirements], reply to the most recent round of content that starts with "User:" in the context of the conversation (do not repeat this sentence). 42 | "" 43 | 44 | [Requirements]="" 45 | 1. The reply must start with "Agent Assistant:". 46 | 2. Directly answer the most recent round of user tasks that start with "User:" based on the context of the conversation. 47 | 3. Use markdown format, and be sure to pay attention to the layout to make it visually appealing, with two line breaks between paragraphs. 48 | 4. Reply in English. 49 | "" 50 | 51 | [Tool List]="" 52 | {{{tools}}} 53 | ""''' 54 | 55 | 56 | def agent_answer_chat(messages, tools, env_info, request_func): 57 | all_tool_name, all_tool_required_info = get_all_tool_info(tools) 58 | language = os.getenv("LANGUAGE") 59 | if language == "zh": 60 | agent_system_prompt_template = agent_system_prompt_template_zh 61 | else: 62 | agent_system_prompt_template = agent_system_prompt_template_en 63 | agent_system_prompt = agent_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 64 | .replace("{{{env_info}}}", env_info) \ 65 | .replace("{{{all_tool_name}}}", all_tool_name) \ 66 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 67 | messages_new = [ 68 | { 69 | "role": "system", 70 | "content": agent_system_prompt 71 | } 72 | ] 73 | messages_new.extend(messages) 74 | res = request_func(messages_new) 75 | if "```markdown\n" in res: 76 | res = res.replace("```markdown\n", "").replace("\n```", "") 77 | logger.info(f"agent_answer_chat:\n{res}\n") 78 | fetch_data = {"task": "agent_answer_chat", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res} 79 | return res, fetch_data 80 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/agent_ask.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import get_all_tool_info, logger 5 | 6 | 7 | agent_system_prompt_template_zh = '''请你扮演一个超级智能体中的Agent助手,超级智能体拥有一系列外部工具,超级智能体中的Planner可以通过调用外部工具来解决用户任务,具体见[工具列表]。 8 | 你负责与用户进行交互,你根据Planner和Tool返回的结果,结合用户任务以及上下文对话信息进行回答,只有你的回答会展示给用户。 9 | 输出格式参考[Agent助手输出格式]。 10 | 11 | {{{all_tool_required_info}}} 12 | 13 | [环境信息]=""" 14 | {{{env_info}}} 15 | """ 16 | 17 | [Agent助手输出格式]=""" 18 | Agent助手:根据[要求],向用户提出认为需要用户输入或补充的信息(不要重复这句话) 19 | """ 20 | 21 | [要求]=""" 22 | 1、回复必须以 "Agent助手:" 开头。 23 | 2、根据上下文对话信息,尤其是Planner中的信息,结合工具中的required参数,向用户提出认为需要用户输入或补充的信息,注意提问时不要包括参数的名字。 24 | 3、使用markdown格式,务必注意排版要美观,段落之间使用两个换行。 25 | 4、使用中文回复。 26 | """ 27 | 28 | [工具列表]=""" 29 | {{{tools}}} 30 | """''' 31 | 32 | 33 | agent_system_prompt_template_en = '''You are to act as an Agent assistant with in a super intelligent agent. The super intelligent agent has a series of external tools, and the Planner within the system can solve user tasks by invoking these external tools, as detailed in the [Tool List]. 34 | You are responsible for interacting with users, and you provide answers based on the results returned by the Planner and Tools, combined with the user's task sand contextual dialogue information. Only your responses will be displayed to the user. 35 | The output format should refer to the [Agent Assistant OutputFormat]. 36 | 37 | {{{all_tool_required_info}}} 38 | 39 | [Environment Information]=""" 40 | {{{env_info}}} 41 | """ 42 | 43 | [Agent Assistant Output Format]=""" 44 | Agent Assistant: Based on the [Requirements], ask the user for any information or input you think is necessary (do not repeat this sentence). 45 | """ 46 | 47 | [Requirements]=""" 48 | 1. The response must start with "Agent Assistant:". 49 | 2. Based on the contextual dialogue information, especially the information from the Planner, and combined with the required parameters from the tools, ask the user for any information or input you think is necessary, ensuring not to include the parameter names in your questions. 50 | 3. Use markdown format, ensuring the layout is aesthetically pleasing, with two line breaks between paragraphs. 51 | 4. Respond in English. 52 | """ 53 | 54 | [Tool List]=""" 55 | {{{tools}}} 56 | """''' 57 | 58 | def agent_ask(messages, tools, env_info, request_func): 59 | all_tool_name, all_tool_required_info = get_all_tool_info(tools) 60 | language = os.getenv("LANGUAGE") 61 | if language == "zh": 62 | agent_system_prompt_template = agent_system_prompt_template_zh 63 | else: 64 | agent_system_prompt_template = agent_system_prompt_template_en 65 | agent_system_prompt = agent_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 66 | .replace("{{{env_info}}}", env_info) \ 67 | .replace("{{{all_tool_name}}}", all_tool_name) \ 68 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 69 | messages_new = [ 70 | { 71 | "role": "system", 72 | "content": agent_system_prompt 73 | } 74 | ] 75 | messages_new.extend(messages) 76 | res = request_func(messages_new) 77 | if "```markdown\n" in res: 78 | res = res.replace("```markdown\n", "").replace("\n```", "") 79 | logger.info(f"agent_ask:\n{res}\n") 80 | fetch_data = {"task": "agent_ask", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res} 81 | return res, fetch_data 82 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/checker_tool.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import parse_answer, logger 5 | 6 | 7 | def rule_checker_zh(messages, action_list, tools): 8 | analysis = { 9 | "format_analysis": "", 10 | "correct": "yes" 11 | } 12 | try: 13 | tool_message = messages[-2] 14 | tool_content = tool_message["content"] 15 | tool_content_obj = parse_answer(tool_content) 16 | observation_list = tool_content_obj["Observation_List"] 17 | 18 | if len(action_list) != len(observation_list): 19 | analysis["format_analysis"] += f"Tool生成的Observation_List的数量为{len(observation_list)},Planner给出的要执行Action_List的数量为{len(action_list)},两者不相等,生成错误,需要重新生成" 20 | analysis["correct"] = "no" 21 | 22 | except Exception as e: 23 | analysis["format_analysis"] += f"Tool生成的格式错误,JSON无法解析,请不要增加//等注释信息,具体错误为:{e}" 24 | analysis["correct"] = "no" 25 | 26 | rule_checker_result = f"Checker_Tool:\n```json\n{json.dumps(analysis, ensure_ascii=False, indent=4)}\n```" 27 | logger.info(f"rule_checker\n{rule_checker_result}\n") 28 | return analysis["correct"], rule_checker_result 29 | 30 | 31 | def rule_checker_en(messages, action_list, tools): 32 | analysis = { 33 | "format_analysis": "", 34 | "correct": "yes" 35 | } 36 | try: 37 | tool_message = messages[-2] 38 | tool_content = tool_message["content"] 39 | tool_content_obj = parse_answer(tool_content) 40 | observation_list = tool_content_obj["Observation_List"] 41 | 42 | if len(action_list) != len(observation_list): 43 | analysis["format_analysis"] += f"The number of Observation_List generated by Tool is {len(observation_list)}, and the number of Action_List to be executed given by Planner is {len(action_list)}. The two are not equal, resulting in an error, and need to be regenerated." 44 | analysis["correct"] = "no" 45 | 46 | except Exception as e: 47 | analysis["format_analysis"] += f"The format generated by Tool is incorrect, and JSON cannot be parsed. Please do not add comments such as //, etc. The specific error is: {e}" 48 | analysis["correct"] = "no" 49 | 50 | rule_checker_result = f"Checker_Tool:\n```json\n{json.dumps(analysis, ensure_ascii=False, indent=4)}\n```" 51 | logger.info(f"rule_checker\n{rule_checker_result}\n") 52 | return analysis["correct"], rule_checker_result 53 | 54 | 55 | def checker_tool(messages, action_list, tools, env_info, request_func): 56 | language = os.getenv("LANGUAGE") 57 | if language == "zh": 58 | rule_correct, rule_checker_result = rule_checker_zh(messages, action_list, tools) 59 | else: 60 | rule_correct, rule_checker_result = rule_checker_en(messages, action_list, tools) 61 | return rule_correct, rule_checker_result 62 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/tool.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import logger 5 | 6 | 7 | tool_system_prompt_template_zh = '''请你扮演一个超级智能体中的外部工具Tool,你的这些外部工具可以用来解决用户任务,具体见[工具列表]。 8 | 请你根据超级智能体的Planner输出的工具名和输入参数,模拟工具的执行结果。如果Planner给出的Action_List里有多个工具,则请你分别进行模拟,数量要与Action_List的数量一致,结果存在Observation_List中。 9 | 输出格式参考[Tool输出格式]。 10 | 11 | [环境信息]=""" 12 | {{{env_info}}} 13 | """ 14 | 15 | [Tool输出格式]=""" 16 | Tool: 17 | ```json 18 | { 19 | "Observation_List": [ 20 | { 21 | "status_code": "参考[工具调用结果要求],给出包含 HTTP 响应状态代码", 22 | "response": "参考[工具调用结果要求],模拟执行动作的结果。确保您的响应采用 JSON 格式、包含真实数据并符合 OpenAPI 规范格式。" 23 | } 24 | ] 25 | } 26 | ``` 27 | """ 28 | 29 | [工具调用结果要求]=""" 30 | 1. 根据 OpenAPI 规范验证请求中的 HTTP 方法和参数。 31 | 2. 生成严格遵循 OpenAPI 规范中指定格式的响应,并确保其为 JSON 格式。 32 | 3. 响应应包含真实数据,避免使用占位符。 33 | 4. 通过提供适当的错误响应来处理边缘情况。 34 | 5. 对于没有长度限制的请求,如get方法,请确保在响应中返回 3~5 个样本,务必注意不能使用省略符号!!!!!如// xxx、...等来省略样本信息,需要符合 JSON 格式,否则会导致 JSON 无法解析!!!!! 35 | 6. 尽量使用中文模拟响应。 36 | """ 37 | 38 | [工具列表]=""" 39 | {{{tools}}} 40 | """ 41 | ''' 42 | 43 | tool_system_prompt_template_en = '''Please act as an external tool, Tool, within a super intelligent agent. These external tools can be used to solve user tasks, as detailed in the [Tool List]. 44 | Based on the tool name and input parameters output by the super intelligent agent's Planner, simulate the execution results of the tool. 45 | If there are multiple tools in the Action_List provided by the Planner, please simulate each one separately, ensuring the number matches the Action_List, and store the results in the Observation_List. 46 | Refer to the [Tool Output Format] for the outputformat. 47 | 48 | [Environment Information]=""" 49 | {{{env_info}}} 50 | """ 51 | 52 | [Tool Output Format]=""" 53 | Tool: 54 | ```json 55 | { 56 | "Observation_List": [ 57 | { 58 | "status_code": "Refer to [Tool Invocation Result Requirements] for the HTTP response status code", 59 | "response": "Refer to [Tool Invocation Result Requirements] to simulate the result of the action execution. Ensure your response is in JSON format, contains real data, and complies with the OpenAPI specification format." 60 | } 61 | ] 62 | } 63 | ``` 64 | """ 65 | 66 | [Tool Invocation Result Requirements]=""" 67 | 1. Validate the HTTP method and parameters in the request according to the OpenAPI specification. 68 | 2. Generate a response that strictly follows the format specified in the OpenAPI specification and ensure it isin JSON format. 69 | 3. The response should contain real data, avoiding the use of placeholders. 70 | 4. Handle edge cases by providing appropriate error responses. 71 | 5. For requests without length limitations, such as the GET method, ensure the response returns 3 to 5 samples, and be careful not to use ellipses like// xxx, ... to omit sample information, as it must conform to JSON format, otherwise it will cause JSON parsing errors!!!!!!! 72 | 6. Try to simulate responses in English. 73 | """ 74 | 75 | [Tool List]=""" 76 | {{{tools}}} 77 | """''' 78 | 79 | 80 | def tool(messages, tools, env_info, request_func): 81 | language = os.getenv("LANGUAGE") 82 | if language == "zh": 83 | tool_system_prompt_template = tool_system_prompt_template_zh 84 | else: 85 | tool_system_prompt_template = tool_system_prompt_template_en 86 | tool_system_prompt = tool_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 87 | .replace("{{{env_info}}}", env_info) 88 | # print(tool_system_prompt) 89 | messages_new = [ 90 | { 91 | "role": "system", 92 | "content": tool_system_prompt 93 | } 94 | ] 95 | messages_new.extend(messages) 96 | res = request_func(messages_new) 97 | logger.info(f"tool:\n{res}\n") 98 | fetch_data = {"task": "tool", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res} 99 | return res, fetch_data 100 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_answer_ask.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import get_all_tool_info, logger 5 | 6 | 7 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 8 | 这个超级智能体拥有一个Planner、Agent助手,并具备一系列外部工具,可以使用外部工具解决你提出的任务,具体见[工具列表]。 9 | 根据上下文对话信息,你已经提出了你的任务,但是根据Planner的反馈,你提供的任务信息不足。 10 | 因此,接下来,请你根据最新一轮超级智能体的Agent助手询问的信息进行回复,给出Agent助手要求的必填参数,从而帮助超级智能体解决你的任务。 11 | 输出格式参考[用户输出格式]。 12 | 13 | {{{all_tool_required_info}}} 14 | 15 | [环境信息]=""" 16 | {{{env_info}}} 17 | """ 18 | 19 | [用户输出格式]=""" 20 | 用户:根据[要求],回复上下文对话信息中最近一轮以 "Agent助手:" 开头的内容(不要重复这句话) 21 | """ 22 | 23 | [要求]=""" 24 | 1、回复必须以 "用户:" 开头。 25 | 2、根据上下文对话信息,回复最近一轮以 "Agent助手:" 开头的用户任务。 26 | 3、你的回复里必须包含Agent助手所询问的所有必填参数的信息,使用自然语言描述,可以看情况伪造出一个,例如Base64编码字符串。(不要重复这句话)。 27 | 4、你的回复需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 28 | 5、你的回复应该使用不同的语气:口语化、正式、礼貌、直接等。 29 | 6、你的回复应该使用不同的长度:有短到长,长度逐渐递增。 30 | """ 31 | 32 | [工具列表]=""" 33 | {{{tools}}} 34 | """''' 35 | 36 | 37 | user_system_prompt_template_en = '''Please play the role of a user who is interacting with a super intelligent agent. 38 | This super intelligent agent has a Planner, an Agent Assistant, and a series of external tools that can be used to solve the tasks you propose, as detailed in the [Tool List]. 39 | Based on the context of the conversation, you have already proposed your task, but according to the Planner's feedback, the information you provided is insufficient. 40 | Therefore, next, please respond according to the latest round of inquiries from the super intelligent agent's Agent Assistant, providing the required parameters requested by the Agent Assistant to help the super intelligent agent solve your task. 41 | Refer to the [User Output Format] for the output format. 42 | 43 | {{{all_tool_required_info}}} 44 | 45 | [Environment Information]=""" 46 | {{{env_info}}} 47 | """ 48 | 49 | [User Output Format]=""" 50 | User: According to the [Requirements], respond to the most recent round of context conversation information that starts with "Agent Assistant:" (do not repeat this sentence). 51 | """ 52 | 53 | [Requirements]=""" 54 | 1. The response must start with "User:". 55 | 2. Based on the context of the conversation, respond to the most recent user task that starts with "Agent Assistant:". 56 | 3. Your response must include all the required parameter information requested by the Agent Assistant, described in natural language. You may fabricate one if necessary, such as a Base64 encoded string. (Do not repeat this sentence). 57 | 4. Your response should use different types of sentence structures: imperative, declarative, interrogative, etc. 58 | 5. Your response should use different tones: colloquial, formal, polite, direct,etc. 59 | 6. Your response should vary in length: from short to long, gradually increasing in length. 60 | """ 61 | 62 | [Tool List]=""" 63 | {{{tools}}} 64 | """''' 65 | 66 | def user_answer_ask(messages, tools, env_info, request_func): 67 | all_tool_name, all_tool_required_info = get_all_tool_info(tools) 68 | language = os.getenv("LANGUAGE") 69 | if language == "zh": 70 | user_system_prompt_template = user_system_prompt_template_zh 71 | else: 72 | user_system_prompt_template = user_system_prompt_template_en 73 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 74 | .replace("{{{env_info}}}", env_info) \ 75 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 76 | # print(user_system_prompt) 77 | messages_new = [ 78 | { 79 | "role": "system", 80 | "content": user_system_prompt 81 | } 82 | ] 83 | messages_new.extend(messages) 84 | res = request_func(messages_new) 85 | logger.info(f"user_answer_ask:\n{res}\n") 86 | fetch_data = {"task": "user_answer_ask", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res} 87 | return res, fetch_data 88 | 89 | 90 | def main(): 91 | pass 92 | 93 | 94 | if __name__ == "__main__": 95 | main() 96 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_ask.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import os 4 | 5 | from utils import logger, remove_prepare_ask_tools 6 | 7 | 8 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 9 | 这个超级智能体具备一系列外部工具,可以使用外部工具解决你提出的任务。 10 | 接下来,请你根据[要求]提出5个你需要超级智能体解决的模糊不清的任务。 11 | 这5个任务都需要使用[工具列表]里的{{{tool}}}才能够完成,但是会让超级智能体不清楚如何填写{{{tool}}}里的某些必填(required)参数,需要多样。 12 | 最后请你按照[格式]输出最终结果,不要生成多余的文字。 13 | 14 | 工具{{{tool}}}的必填参数有:{{{tool_required}}},非必填参数有:{{{tool_no_required}}} 15 | 16 | [要求]=""" 17 | 1、用户任务的描述里必须缺乏调用{{{tool}}}时所需的所有必填参数的信息,剩下的非必填参数的信息,请你看情况添加,使用自然语言描述。 18 | 注意工具参数允许一定的参数推导,即根据用户任务描述可以推导出工具参数的话,就不算缺乏了必要信息,缺乏指的是即使通过推导也无法获得参数值。 19 | 2、用户任务需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 20 | 3、用户任务应该包含不同的语气:口语化、正式、礼貌、直接等。 21 | 4、确保用户任务的长度各不相同,有短到长,长度逐渐递增。 22 | 5、确保用户任务涉及不同的主题/实例,不同的场景,不同的角色身份。 23 | 6、根据[工具列表]中所有工具的description,请你提取在所有description中出现的共同实体,并确保用户任务中出现该实体。 24 | 7、务必不要在用户任务中明确指定要使用的工具{{{tool}}}。 25 | """ 26 | 27 | [工具列表]=""" 28 | {{{tools}}} 29 | """ 30 | 31 | [格式]=""" 32 | ```json 33 | { 34 | "任务1": "xxx", 35 | "任务2": "xxx", 36 | "任务3": "xxx", 37 | "任务4": "xxx", 38 | "任务5": "xxx" 39 | } 40 | ``` 41 | """''' 42 | 43 | user_system_prompt_template_en = '''Please act as a user who is interacting with a super intelligent agent. 44 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose. 45 | Next, based on the [Requirements], please propose 5 ambiguous tasks that you need the super intelligent agent to solve. 46 | All 5 tasks must require the use of {{{tool}}} from the [Tool List] to be completed, but will leave the super intelligent agent unclear on how to fill in some of the required parameters of {{{tool}}}, and should be diverse. 47 | Finally, please output the final result according to the [Format], without generating any extra text. 48 | 49 | The required parameters for tool {{{tool}}} are: {{{tool_required}}}, and the optional parameters are: {{{tool_no_required}}} 50 | 51 | [Requirements]=""" 52 | 1. The description of the user's task must lack all the necessary information for calling {{{tool}}}, leaving only the optional parameter information, which you can add as you see fit, using natural language descriptions. 53 | Note that tool parameters allow for some parameter inference, meaning that if the tool parameters can be inferred from the user's task description, it does not count as lacking necessary information. Lacking means that even through inference, the parameter values cannot be obtained. 54 | 2. The user's tasks need touse different types of sentence structures: imperative sentences, declarative sentences, interrogative sentences, etc. 55 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc. 56 | 4. Ensure that the length of the user's tasks varies, from short to long, gradually increasing in length. 57 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles. 58 | 6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks. 59 | 7. Do not explicitly specify the tool {{{tool}}} in the user's tasks. 60 | """ 61 | 62 | [Tool List]=""" 63 | {{{tools}}} 64 | """ 65 | 66 | [Format]=""" 67 | ```json 68 | { 69 | "Task 1": "xxx", 70 | "Task 2": "xxx", 71 | "Task 3": "xxx", 72 | "Task 4": "xxx", 73 | "Task 5": "xxx" 74 | } 75 | ``` 76 | """''' 77 | 78 | def parse_answer(user_tasks): 79 | user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "") 80 | user_tasks = json.loads(user_tasks) 81 | task_keys = list(user_tasks.keys()) 82 | task_key = random.choice(task_keys) 83 | user_task = user_tasks[task_key] 84 | user_task = "用户:" + user_task 85 | return user_task 86 | 87 | 88 | def user_ask(messages, tools, request_func): 89 | tools_ = remove_prepare_ask_tools(tools) 90 | tool = random.choice(tools_) 91 | tool_name = tool["function"]["name"] 92 | tool_required = tool["function"]["parameters"]["required"] 93 | tool_required = ", ".join(tool_required) 94 | tool_all_properties = list(tool["function"]["parameters"]["properties"].keys()) 95 | tool_no_required = [] 96 | for property in tool_all_properties: 97 | if property not in tool_required: 98 | tool_no_required.append(property) 99 | tool_no_required = ", ".join(tool_no_required) 100 | language = os.getenv("LANGUAGE") 101 | if language == "zh": 102 | user_system_prompt_template = user_system_prompt_template_zh 103 | else: 104 | user_system_prompt_template = user_system_prompt_template_en 105 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 106 | .replace("{{{tool}}}", tool_name) \ 107 | .replace("{{{tool_required}}}", tool_required) \ 108 | .replace("{{{tool_no_required}}}", tool_no_required) 109 | messages_new = [ 110 | { 111 | "role": "user", 112 | "content": user_system_prompt 113 | } 114 | ] 115 | res = request_func(messages_new) 116 | logger.info(f"user_ask: {res}\n") 117 | user_task = parse_answer(res) 118 | logger.info(f"user_multi_tool:\n{user_task}\n") 119 | user_message = [{"role": "user", "content": user_task}] 120 | fetch_data = {"task": "user_ask", "tools": tools, "env_info": None, "messages": messages_new, "answer": res} 121 | return user_message, fetch_data 122 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_chat.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | 5 | from utils import logger, remove_prepare_ask_tools 6 | 7 | 8 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 9 | 这个超级智能体具备一系列外部工具,可以使用外部工具解决你提出的任务。 10 | 接下来,请你根据[要求]提出5个你需要超级智能体解决的闲聊任务。 11 | 这5个闲聊任务都不需要使用[工具列表]里的任何工具,但是主题上需要有一些相关性。 12 | 最后请你按照[格式]输出最终结果,不要生成多余的文字。 13 | 14 | [要求]=""" 15 | 1、用户任务是一个闲聊任务,必须与[工具列表]的功能无关,但是主题有一定的相关性。 16 | 2、用户任务需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 17 | 3、用户任务应该包含不同的语气:口语化、正式、礼貌、直接等。 18 | 4、确保用户任务的长度各不相同,有短到长,长度逐渐递增。 19 | 5、确保用户任务涉及不同的主题/实例,不同的场景,不同的角色身份。 20 | """ 21 | 22 | [工具列表]=""" 23 | {{{tools}}} 24 | """ 25 | 26 | [格式]=""" 27 | ```json 28 | { 29 | "任务1": "xxx", 30 | "任务2": "xxx", 31 | "任务3": "xxx", 32 | "任务4": "xxx", 33 | "任务5": "xxx" 34 | } 35 | ``` 36 | """''' 37 | 38 | user_system_prompt_template_en = '''Please pretend to be a user interacting with a super intelligent agent. 39 | This super intelligent agent has a series of external tools that can be used to solve tasks you propose. 40 | Next, based on the [Requirements], propose 5 casual conversation tasks that you need the super-intelligent agent to solve. 41 | These 5 casual conversation tasks should not use any tools from the [Tool List], but should have some thematic relevance. 42 | Finally, please output the final result according to the [Format], without generating any superfluous text. 43 | 44 | [Requirements]="" 45 | 1. The user task is a casual conversation task, which must be unrelated to the functions of the [Tool List], but should have some thematic relevance. 46 | 2. User tasks need to use different types of sentence structures: imperative, declarative, interrogative, etc. 47 | 3. User tasks should include different tones: colloquial, formal, polite, direct, etc. 48 | 4. Ensure that the lengths of the user tasks are different, ranging from short to long, with gradually increasing length. 49 | 5. Ensure that the user tasks involve different themes/examples, different scenarios, and different role identities. 50 | """ 51 | 52 | [Tool list]="" 53 | {{{tools}}} 54 | """ 55 | 56 | [Format]="" 57 | ```json 58 | { 59 | "Task 1": "xxx", 60 | "Task 2": "xxx", 61 | "Task 3": "xxx", 62 | "Task 4": "xxx", 63 | "Task 5": "xxx" 64 | } 65 | ``` 66 | """''' 67 | 68 | def parse_answer(user_tasks): 69 | user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "") 70 | user_tasks = json.loads(user_tasks) 71 | task_keys = list(user_tasks.keys()) 72 | task_key = random.choice(task_keys) 73 | user_task = user_tasks[task_key] 74 | user_task = "用户:" + user_task 75 | return user_task 76 | 77 | 78 | def user_chat(messages, tools, request_func): 79 | tools = remove_prepare_ask_tools(tools) 80 | language = os.getenv("LANGUAGE") 81 | if language == "zh": 82 | user_system_prompt_template = user_system_prompt_template_zh 83 | else: 84 | user_system_prompt_template = user_system_prompt_template_en 85 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) 86 | messages_new = [ 87 | { 88 | "role": "user", 89 | "content": user_system_prompt 90 | } 91 | ] 92 | res = request_func(messages_new) 93 | logger.info(f"user_chat: {res}\n") 94 | user_task = parse_answer(res) 95 | logger.info(f"user_chat:\n{user_task}\n") 96 | user_message = [{"role": "user", "content": user_task}] 97 | fetch_data = {"task": "user_ask", "tools": tools, "env_info": None, "messages": messages_new, "answer": res} 98 | return user_message, fetch_data 99 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_multi_tool.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import remove_prepare_ask_tools, random_select_answer_cot, get_all_tool_info, logger 5 | 6 | 7 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 8 | 这个超级智能体具备一系列外部工具,可以使用外部工具解决你提出的任务。 9 | 接下来,请你根据[要求]提出3个你需要超级智能体解决的任务。 10 | 这3个任务都需要组合使用[工具列表]里的工具(包括:{{{all_tool_name}}})才能够完成,需要具体、多样、需要串行调用多个工具来解决任务。 11 | 最后请你按照[格式]输出最终结果,不要生成多余的文字。 12 | 13 | {{{all_tool_required_info}}} 14 | 15 | [要求]=""" 16 | 1、用户任务的描述里必须包含调用工具所需的所有必填参数的信息,其他的非必填参数的信息,请你看情况添加,使用自然语言描述。 17 | 2、用户任务需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 18 | 3、用户任务应该包含不同的语气:口语化、正式、礼貌、直接等。 19 | 4、确保用户任务的长度各不相同,有短到长,长度逐渐递增。 20 | 5、确保用户任务涉及不同的主题/实例,不同的场景,不同的角色身份。 21 | 6、根据[工具列表]中所有工具的description,请你提取在所有description中出现的共同实体,并确保用户任务中出现该实体。 22 | 7、务必不要在用户任务中明确指定要使用的工具名。 23 | 8、调用的多个工具之间必须有依赖关系,调用之间的依赖关系是指,必须在调用工具A完成之后才能运行调用工具B,即调用工具B之前必须先调用工具A。 24 | 9、任务难度分为easy、medium、hard三个等级,easy代表简单,medium代表中等,hard代表困难,更难的任务需要更多的步骤执行,确保你生成的3个任务中,都是中等难度以上的任务。 25 | """ 26 | 27 | [工具列表]=""" 28 | {{{tools}}} 29 | """ 30 | 31 | [格式]=""" 32 | ```json 33 | { 34 | "任务1": { 35 | "任务描述": "xxx", 36 | "任务难度": "medium|hard", 37 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具,请你首先串行给出要调用的工具" 38 | }, 39 | "任务2": { 40 | "任务描述": "xxx", 41 | "任务难度": "medium|hard", 42 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具,请你首先串行给出要调用的工具" 43 | }, 44 | "任务3": { 45 | "任务描述": "xxx", 46 | "任务难度": "medium|hard", 47 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具,请你首先串行给出要调用的工具" 48 | } 49 | } 50 | ``` 51 | """''' 52 | 53 | user_system_prompt_template_en = '''Please act as a user who is interacting with a super intelligent agent. 54 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose. 55 | Next, based on the [Requirements], please propose 3 tasks that you need the super intelligent agent to solve. 56 | These 3 tasks must require the combined use of tools from the [Tool List] (including: {{{all_tool_name}}}) to be completed. 57 | The tasks should be specific, diverse, and require the sequential invocation of multiple tools to solve. 58 | Finally, please output the final result according to the [Format] without generating any extra text. 59 | 60 | {{{all_tool_required_info}}} 61 | 62 | [Requirements]=""" 63 | 1. The description of the user's task must include all the required parameters needed to invoke the tools, while other optional parameters can be added as you see fit, using natural language. 64 | 2. The user's tasks should use different types of sentence structures: imperative, declarative, interrogative, etc. 65 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc. 66 | 4. Ensure that the length of the user's tasks varies, from short to long, gradually increasing in length. 67 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles. 68 | 6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks. 69 | 7. Do not explicitly specify the names of the tools to be used in the user's tasks. 70 | 8. There must be dependencies between the multiple tools invoked, meaning that tool A must be called and completed before tool B can be run, i.e., tool B must be invoked after tool A. 71 | 9. The difficulty of the tasks is divided into easy, medium, and hard levels. Easy represents simple, medium represents moderate, and hard represents difficult. Ensure that the 3 tasks you generate are all of medium difficulty or above. 72 | """ 73 | 74 | [Tool List]=""" 75 | {{{tools}}} 76 | """ 77 | 78 | [Format]=""" 79 | ```json 80 | { 81 | "Task 1": { 82 | "Task Description": "xxx", 83 | "Task Difficulty": "medium|hard", 84 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step, and first sequentially list the tools to be invoked." 85 | }, 86 | "Task 2": { 87 | "Task Description":"xxx", 88 | "Task Difficulty": "medium|hard", 89 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step, and first sequentially list the tools to be invoked." 90 | }, 91 | "Task 3": { 92 | "Task Description": "xxx", 93 | "Task Difficulty": "medium|hard", 94 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step, and first sequentially list the tools to be invoked." 95 | } 96 | } 97 | ``` 98 | """''' 99 | 100 | 101 | def user_multi_tool(messages, tools, request_func): 102 | tools_ = remove_prepare_ask_tools(tools) 103 | all_tool_name, all_tool_required_info = get_all_tool_info(tools_) 104 | language = os.getenv("LANGUAGE") 105 | if language == "zh": 106 | user_system_prompt_template = user_system_prompt_template_zh 107 | else: 108 | user_system_prompt_template = user_system_prompt_template_en 109 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 110 | .replace("{{{all_tool_name}}}", all_tool_name) \ 111 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 112 | messages_new = [ 113 | { 114 | "role": "user", 115 | "content": user_system_prompt 116 | } 117 | ] 118 | res = request_func(messages_new) 119 | logger.info(f"user_multi_tool:\n{res}\n") 120 | user_task = random_select_answer_cot(res) 121 | logger.info(f"user_multi_tool:\n{user_task}\n") 122 | user_message = [{"role": "user", "content": user_task}] 123 | fetch_data = {"task": "user_multi_tool", "tools": tools, "env_info": None, "messages": messages_new, "answer": res} 124 | return user_message, fetch_data 125 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_multi_tool_parallel.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import remove_prepare_ask_tools, random_select_answer_cot, get_all_tool_info, logger 5 | 6 | 7 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 8 | 这个超级智能体具备一系列外部工具,可以使用外部工具解决你提出的任务。 9 | 接下来,请你根据[要求]提出3个你需要超级智能体解决的任务。 10 | 这3个任务都需要组合使用[工具列表]里的工具(包括:{{{all_tool_name}}})才能够完成,需要具体、多样、需要并行调用多个工具来解决任务。 11 | 最后请你按照[格式]输出最终结果,不要生成多余的文字。 12 | 13 | {{{all_tool_required_info}}} 14 | 15 | [要求]=""" 16 | 1、用户任务的描述里必须包含调用工具所需的所有必填参数的信息,其他的非必填参数的信息,请你看情况添加,使用自然语言描述。 17 | 2、用户任务需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 18 | 3、用户任务应该包含不同的语气:口语化、正式、礼貌、直接等。 19 | 4、确保用户任务的长度各不相同,有短到长,长度逐渐递增。 20 | 5、确保用户任务涉及不同的主题/实例,不同的场景,不同的角色身份。 21 | 6、根据[工具列表]中所有工具的description,请你提取在所有description中出现的共同实体,并确保用户任务中出现该实体。 22 | 7、务必不要在用户任务中明确指定要使用的工具名。 23 | 8、调用的多个工具之间必须没有依赖关系。 24 | 调用之间有依赖关系是指:必须在调用工具A完成之后才能运行调用工具B。 25 | 调用之间没有依赖关系是指:工具A和工具B可以并行调用。 26 | 9、任务难度分为easy、medium、hard三个等级,easy代表简单,medium代表中等,hard代表困难,更难的任务需要更多的步骤执行,确保你生成的3个任务中,都是中等难度以上的任务。 27 | """ 28 | 29 | [工具列表]=""" 30 | {{{tools}}} 31 | """ 32 | 33 | [格式]=""" 34 | ```json 35 | { 36 | "任务1": { 37 | "任务描述": "xxx", 38 | "任务难度": "medium|hard", 39 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具" 40 | }, 41 | "任务2": { 42 | "任务描述": "xxx", 43 | "任务难度": "medium|hard", 44 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具" 45 | }, 46 | "任务3": { 47 | "任务描述": "xxx", 48 | "任务难度": "medium|hard", 49 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具" 50 | } 51 | } 52 | ``` 53 | """''' 54 | 55 | user_system_prompt_template_en = '''Please act as a user interacting with a super intelligent agent. 56 | This super intelligent agent is equipped with a series of external tools and can use these tools to solve the tasks you propose. 57 | Next, please propose 3 tasks that you need the super intelligent agent to solve based on the [Requirements]. 58 | These 3 tasks all require the combined use of tools from the [Tool List] (including: {{{all_tool_name}}}) to be completed. 59 | The tasks need to be specific, diverse, and require parallel invocation of multiple tools to solve. 60 | Finally, please output the final result according to the [Format] without generating any extra text. 61 | 62 | {{{all_tool_required_info}}} 63 | 64 | [Requirements]=""" 65 | 1. The description of the user's task must include all the required parameters needed to invoke the tools, while other optional parameters can be added as needed, using natural language. 66 | 2. The user's tasks need to use different types of sentence structures: imperative, declarative, interrogative, etc. 67 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc. 68 | 4. Ensure that the length of the user's tasks varies, from short to long, gradually increasing in length. 69 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles. 70 | 6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks. 71 | 7. Do not explicitly specify the tool names to be used in the user's tasks. 72 | 8. There must be no dependency between the multiple tools invoked. A dependency between invocations means that tool B can only be run after tool A is completed. No dependency means that tool A and tool B can be invoked in parallel. 73 | 9. The difficulty of the tasks is divided into easy, medium, and hard levels. Easy represents simple, medium represents moderate, and hard represents difficult. More difficult tasks require more steps to execute. Ensure that the 3 tasks you generate are all of medium difficulty or above. 74 | """ 75 | 76 | [ToolList]=""" 77 | {{{tools}}} 78 | """ 79 | 80 | [Format]=""" 81 | ```json 82 | { 83 | "Task 1": { 84 | "Task Description": "xxx", 85 | "Task Difficulty": "medium|hard", 86 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step." 87 | }, 88 | "Task 2": { 89 | "Task Description": "xxx", 90 | "Task Difficulty": "medium|hard", 91 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step." 92 | }, 93 | "Task 3": { 94 | "Task Description": "xxx", 95 | "Task Difficulty": "medium|hard", 96 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step." 97 | } 98 | } 99 | ``` 100 | """''' 101 | 102 | 103 | def user_multi_tool_parallel(messages, tools, request_func): 104 | tools_ = remove_prepare_ask_tools(tools) 105 | all_tool_name, all_tool_required_info = get_all_tool_info(tools_) 106 | language = os.getenv("LANGUAGE") 107 | if language == "zh": 108 | user_system_prompt_template = user_system_prompt_template_zh 109 | else: 110 | user_system_prompt_template = user_system_prompt_template_en 111 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 112 | .replace("{{{all_tool_name}}}", all_tool_name) \ 113 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 114 | messages_new = [ 115 | { 116 | "role": "user", 117 | "content": user_system_prompt 118 | } 119 | ] 120 | res = request_func(messages_new) 121 | logger.info(f"user_multi_tool_parallel:\n{res}\n") 122 | user_task = random_select_answer_cot(res) 123 | logger.info(f"user_multi_tool:\n{user_task}\n") 124 | user_message = [{"role": "user", "content": user_task}] 125 | fetch_data = {"task": "user_multi_tool_parallel", "tools": tools, "env_info": None, "messages": messages_new, "answer": res} 126 | return user_message, fetch_data 127 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_multi_tool_serial_parallel.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import remove_prepare_ask_tools, random_select_answer_cot, get_all_tool_info, logger 5 | 6 | 7 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 8 | 这个超级智能体具备一系列外部工具,可以使用外部工具解决你提出的任务。 9 | 接下来,请你根据[要求]提出3个你需要超级智能体解决的任务。 10 | 这3个任务都需要组合使用[工具列表]里的工具(包括:{{{all_tool_name}}})才能够完成,需要具体、多样、需要同时串行和并行调用多个工具来解决任务。 11 | 最后请你按照[格式]输出最终结果,不要生成多余的文字。 12 | 13 | {{{all_tool_required_info}}} 14 | 15 | [要求]=""" 16 | 1、用户任务的描述里必须包含调用工具所需的所有必填参数的信息,其他的非必填参数的信息,请你看情况添加,使用自然语言描述。 17 | 2、用户任务需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 18 | 3、用户任务应该包含不同的语气:口语化、正式、礼貌、直接等。 19 | 4、确保用户任务的长度各不相同,有短到长,长度逐渐递增。 20 | 5、确保用户任务涉及不同的主题/实例,不同的场景,不同的角色身份。 21 | 6、根据[工具列表]中所有工具的description,请你提取在所有description中出现的共同实体,并确保用户任务中出现该实体。 22 | 7、务必不要在用户任务中明确指定要使用的工具名。 23 | 8、调用的多个工具之间需要有的有依赖关系,有的没有依赖关系。 24 | 调用之间有依赖关系是指:必须在调用工具A完成之后才能运行调用工具B。 25 | 调用之间没有依赖关系是指:工具A和工具B可以并行调用。 26 | 9、任务难度分为easy、medium、hard三个等级,easy代表简单,medium代表中等,hard代表困难,更难的任务需要更多的步骤执行,确保你生成的3个任务中,都是中等难度以上的任务。 27 | """ 28 | 29 | [工具列表]=""" 30 | {{{tools}}} 31 | """ 32 | 33 | [格式]=""" 34 | ```json 35 | { 36 | "任务1": { 37 | "任务描述": "xxx", 38 | "任务难度": "medium|hard", 39 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具" 40 | }, 41 | "任务2": { 42 | "任务描述": "xxx", 43 | "任务难度": "medium|hard", 44 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具" 45 | }, 46 | "任务3": { 47 | "任务描述": "xxx", 48 | "任务难度": "medium|hard", 49 | "解决任务的整体规划": "请你给出解决用户任务的整体规划,包括每一步骤需要调用哪个工具" 50 | } 51 | } 52 | ``` 53 | """''' 54 | 55 | user_system_prompt_template_en = '''Please act as a user who is interacting with a super intelligent agent. 56 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose. 57 | Next, please propose 3 tasks that you need the super intelligent agent to solve based on the [Requirements]. 58 | These 3 tasks must require the combined use of tools from the [Tool List] (including: {{{all_tool_name}}}) to be completed. 59 | The tasks should be specific, diverse, and require both serial and parallel invocation of multiple tools to solve. 60 | Finally, please output the final result according to the [Format] without generating any extra text. 61 | 62 | {{{all_tool_required_info}}} 63 | 64 | [Requirements]=""" 65 | 1. The description of the user's task must include all the required parameters needed to invoke the tools, while other optional parameters can be added as you see fit, using natural language. 66 | 2. The user's tasks need to use different types of sentence structures: imperative, declarative, interrogative, etc.3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc. 67 | 4. Ensure that the length of the user's tasks varies, from short to long, with increasing length. 68 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles.6. Based on the descriptions of all tools in the [Tool List], extract the common entities that appear in all descriptions and ensure that these entities appear in the user's tasks. 69 | 7. Do not explicitly specify the tool names to be used in the user's tasks. 70 | 8. There should be dependencies between some of the tools invoked, while others should not have dependencies. A dependency between invocations means that tool B can only be run after tool A is completed. No dependency means that tool A and tool B can be invoked in parallel. 71 | 9. Task difficulty is divided into easy, medium, and hard levels. Easy represents simple, medium represents moderate, and hard represents difficult. More difficult tasks require more steps to execute. Ensure that the 3 tasks you generate are all of medium difficulty or above. 72 | """ 73 | 74 | [Tool List]=""" 75 | {{{tools}}} 76 | """ 77 | 78 | [Format]=""" 79 | ```json 80 | { 81 | "Task 1": { 82 | "Task Description": "xxx", 83 | "Task Difficulty": "medium|hard", 84 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step." 85 | }, 86 | "Task 2": { 87 | "Task Description": "xxx", 88 | "Task Difficulty": "medium|hard", 89 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step." 90 | }, 91 | "Task 3": { 92 | "Task Description": "xxx", 93 | "Task Difficulty": "medium|hard", 94 | "Overall Plan to Solve the Task": "Please provide an overall plan to solve the user's task, including which tool to invoke at each step." 95 | } 96 | } 97 | ``` 98 | """''' 99 | 100 | 101 | def user_multi_tool_serial_parallel(messages, tools, request_func): 102 | tools_ = remove_prepare_ask_tools(tools) 103 | all_tool_name, all_tool_required_info = get_all_tool_info(tools_) 104 | language = os.getenv("LANGUAGE") 105 | if language == "zh": 106 | user_system_prompt_template = user_system_prompt_template_zh 107 | else: 108 | user_system_prompt_template = user_system_prompt_template_en 109 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 110 | .replace("{{{all_tool_name}}}", all_tool_name) \ 111 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 112 | messages_new = [ 113 | { 114 | "role": "user", 115 | "content": user_system_prompt 116 | } 117 | ] 118 | res = request_func(messages_new) 119 | logger.info(f"user_multi_tool_serial_parallel:\n{res}\n") 120 | user_task = random_select_answer_cot(res) 121 | logger.info(f"user_multi_tool_serial_parallel:\n{user_task}\n") 122 | user_message = [{"role": "user", "content": user_task}] 123 | fetch_data = {"task": "user_multi_tool_serial_parallel", "tools": tools, "env_info": None, "messages": messages_new, "answer": res} 124 | return user_message, fetch_data 125 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_single_tool.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import os 4 | 5 | from utils import remove_prepare_ask_tools, random_select_answer, logger 6 | 7 | 8 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 9 | 这个超级智能体具备一系列外部工具,可以使用外部工具解决你提出的任务。 10 | 接下来,请你根据[要求]提出5个你需要超级智能体解决的任务。 11 | 这5个任务都需要使用[工具列表]里的{{{tool}}}才能够完成,且都只需要调用{{{tool}}}一次,需要具体、多样。 12 | 最后请你按照[格式]输出最终结果,不要生成多余的文字。 13 | 14 | 工具{{{tool}}}的必填参数有:{{{tool_required}}},非必填参数有:{{{tool_no_required}}} 15 | 16 | [要求]=""" 17 | 1、用户任务的描述里必须包含调用{{{tool}}}所需的所有必填参数的信息,其他的非必填参数的信息,请你看情况添加,使用自然语言描述。 18 | 2、用户任务需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 19 | 3、用户任务应该包含不同的语气:口语化、正式、礼貌、直接等。 20 | 4、确保用户任务的长度各不相同,有短到长,长度逐渐递增。 21 | 5、确保用户任务涉及不同的主题/实例,不同的场景,不同的角色身份。 22 | 6、根据[工具列表]中所有工具的description,请你提取在所有description中出现的共同实体,并确保用户任务中出现该实体。 23 | 7、务必不要在用户任务中明确指定要使用的工具{{{tool}}}。 24 | """ 25 | 26 | [工具列表]=""" 27 | {{{tools}}} 28 | """ 29 | 30 | [格式]=""" 31 | ```json 32 | { 33 | "任务1": "xxx", 34 | "任务2": "xxx", 35 | "任务3": "xxx", 36 | "任务4": "xxx", 37 | "任务5": "xxx" 38 | } 39 | ``` 40 | """''' 41 | 42 | user_system_prompt_template_en = '''Please act as a user interacting with a super intelligent agent. 43 | This super intelligent agent has access to a range of external tools and can use these tools to solve the tasks you propose. 44 | Next, please propose 5 tasks that you need the super intelligent agent to solve based on the [Requirements]. 45 | All 5 tasks must require the use of {{{tool}}} from the [Tool List] to be completed, and each task should only require a single call to {{{tool}}}. 46 | The tasks should be specific and diverse. 47 | Finally, please output the final result according to the [Format] without generating any extra text. 48 | 49 | The required parameters for tool {{{tool}}} are: {{{tool_required}}}, and the optional parameters are: {{{tool_no_required}}}. 50 | 51 | [Requirements]=""" 52 | 1. The description of the user's task must include information on all the required parameters needed to call {{{tool}}}. For other optional parameters, please add them as you see fit, using natural language. 53 | 2. The user's tasks should use different types of sentence structures: imperative, declarative, interrogative, etc. 54 | 3. The user's tasks should include different tones: colloquial, formal, polite, direct, etc. 55 | 4. Ensure that the length of the user's tasks varies, gradually increasing from short to long. 56 | 5. Ensure that the user's tasks involve different themes/instances, different scenarios, and different roles. 57 | 6. Extract common entities that appear in all descriptions from the [Tool List] and ensure that these entities appear in the user's tasks. 58 | 7. Do not explicitly specify the tool {{{tool}}} in the user's tasks. 59 | """ 60 | 61 | [Tool List]=""" 62 | {{{tools}}} 63 | """ 64 | 65 | [Format]=""" 66 | ```json 67 | { 68 | "Task 1": "xxx", 69 | "Task 2": "xxx", 70 | "Task 3": "xxx", 71 | "Task 4": "xxx", 72 | "Task 5": "xxx" 73 | } 74 | ``` 75 | """''' 76 | 77 | def user_single_tool(messages, tools, request_func): 78 | tools_ = remove_prepare_ask_tools(tools) 79 | tool = random.choice(tools_) 80 | tool_name = tool["function"]["name"] 81 | tool_required = tool["function"]["parameters"]["required"] 82 | tool_required = ", ".join(tool_required) 83 | tool_all_properties = list(tool["function"]["parameters"]["properties"].keys()) 84 | tool_no_required = [] 85 | for property in tool_all_properties: 86 | if property not in tool_required: 87 | tool_no_required.append(property) 88 | tool_no_required = ", ".join(tool_no_required) 89 | language = os.getenv("LANGUAGE") 90 | if language == "zh": 91 | user_system_prompt_template = user_system_prompt_template_zh 92 | else: 93 | user_system_prompt_template = user_system_prompt_template_en 94 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 95 | .replace("{{{tool}}}", tool_name) \ 96 | .replace("{{{tool_required}}}", tool_required) \ 97 | .replace("{{{tool_no_required}}}", tool_no_required) 98 | messages_new = [ 99 | { 100 | "role": "user", 101 | "content": user_system_prompt 102 | } 103 | ] 104 | res = request_func(messages_new) 105 | logger.info(f"user_single_tool:\n{res}\n") 106 | user_task = random_select_answer(res) 107 | logger.info(f"user_single_tool:\n{user_task}\n") 108 | user_message = [{"role": "user", "content": user_task}] 109 | fetch_data = {"task": "user_single_tool", "tools": tools, "env_info": None, "messages": messages_new, "answer": res} 110 | return user_message, fetch_data 111 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/agent/user_vague_answer_ask.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from utils import get_all_tool_info, logger 5 | 6 | 7 | user_system_prompt_template_zh = '''请你扮演一个用户,你正在和一个超级智能体进行交互。 8 | 这个超级智能体拥有一个Planner、Agent助手,并具备一系列外部工具,可以使用外部工具解决你提出的任务,具体见[工具列表]。 9 | 根据上下文对话信息,你已经提出了你的任务,但是根据Planner的反馈,你提供的任务信息不足。 10 | 接下来,请你根据最新一轮超级智能体的Agent助手询问的信息进行回复,但是请你不要给出Agent助手要求的必填参数,让超级智能体继续询问你。 11 | 输出格式参考[用户输出格式]。 12 | 13 | {{{all_tool_required_info}}} 14 | 15 | [环境信息]=""" 16 | {{{env_info}}} 17 | """ 18 | 19 | [用户输出格式]=""" 20 | 用户:根据[要求],回复上下文对话信息中最近一轮以 "Agent助手:" 开头的内容(不要重复这句话) 21 | """ 22 | 23 | [要求]=""" 24 | 1、回复必须以 "用户:" 开头。 25 | 2、根据上下文对话信息,回复最近一轮以 "Agent助手:" 开头的用户任务。 26 | 3、你的回复不要包含Agent助手所询问的所有必填参数的信息,让超级智能体继续询问你。 27 | 4、你的回复需要使用不同种类的句子结构:祈使句、陈述句、疑问句等。 28 | 5、你的回复应该使用不同的语气:口语化、正式、礼貌、直接等。 29 | 6、你的回复应该使用不同的长度:有短到长,长度逐渐递增。 30 | """ 31 | 32 | [工具列表]=""" 33 | {{{tools}}} 34 | """''' 35 | 36 | user_system_prompt_template_en = '''Please play the role of a user who is interacting with a super intelligent agent. 37 | This super intelligent agent has a Planner, an Agent assistant, and a series of external tools that can be used to solve the tasks you propose, as detailed in the [Tool List]. 38 | Based on the context of the conversation, you have already proposed your task, but according to the feedback from the Planner, the information you provided is insufficient. 39 | Next, please respond according to the latest round of questions asked by the super intelligent agent's Agent assistant, but do not provide the required parameters requested by the Agent assistant, so that the super intelligent agent continues to inquire. 40 | Refer to the [User Output Format] for the output format. 41 | 42 | {{{all_tool_required_info}}} 43 | 44 | [Environment Information]=""" 45 | {{{env_info}}} 46 | """ 47 | 48 | [User Output Format]=""" 49 | User: According to the [Requirements], respond to the most recent round of conversation information that starts with "Agent Assistant:" (do not repeat this sentence). 50 | """ 51 | 52 | [Requirements]=""" 53 | 1. The response must start with "User:". 54 | 2. Based on the context of the conversation, respond to the most recent user task that starts with "Agent Assistant:". 55 | 3. Your response should not include all the required parameters requested by the Agent assistant, so that the super intelligent agent continues to inquire. 56 | 4. Your response should use different types of sentence structures: imperative sentences, declarative sentences, interrogative sentences, etc. 57 | 5. Your response should use different tones: colloquial, formal, polite, direct, etc. 58 | 6. Your response should vary in length: from short to long, gradually increasing in length. 59 | """ 60 | 61 | [Tool List]=""" 62 | {{{tools}}} 63 | """''' 64 | 65 | def user_vague_answer_ask(messages, tools, env_info, request_func): 66 | all_tool_name, all_tool_required_info = get_all_tool_info(tools) 67 | language = os.getenv("LANGUAGE") 68 | if language == "zh": 69 | user_system_prompt_template = user_system_prompt_template_zh 70 | else: 71 | user_system_prompt_template = user_system_prompt_template_en 72 | user_system_prompt = user_system_prompt_template.replace("{{{tools}}}", json.dumps(tools, ensure_ascii=False, indent=4)) \ 73 | .replace("{{{env_info}}}", env_info) \ 74 | .replace("{{{all_tool_required_info}}}", all_tool_required_info) 75 | # print(user_system_prompt) 76 | messages_new = [ 77 | { 78 | "role": "system", 79 | "content": user_system_prompt 80 | } 81 | ] 82 | messages_new.extend(messages) 83 | res = request_func(messages_new) 84 | logger.info(f"user_vague_answer_ask:\n{res}\n") 85 | fetch_data = {"task": "user_vague_answer_ask", "tools": tools, "env_info": env_info, "messages": messages_new, "answer": res} 86 | return res, fetch_data 87 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/handle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/c3_bench/multi_agent/handle/__init__.py -------------------------------------------------------------------------------- /c3_bench/multi_agent/handle/api_handle.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | from openai import OpenAI 5 | 6 | 7 | class APIMultiTurnMessages: 8 | def __init__(self): 9 | self.client = OpenAI( 10 | api_key=os.getenv("API_KEY"), 11 | base_url=os.getenv("BASE_URL"), 12 | ) 13 | 14 | def request_model(self, messages): 15 | kwargs = { 16 | "messages": messages, 17 | "timeout": 300, 18 | "model": os.getenv("MODEL") 19 | } 20 | api_response = self.client.chat.completions.create(**kwargs) 21 | api_response = json.loads(api_response.json()) 22 | choice = api_response["choices"][0] 23 | message = choice["message"] 24 | text = message["content"] 25 | return text 26 | 27 | 28 | def main(): 29 | handle = APIMultiTurnMessages() 30 | messages = [ 31 | { 32 | "role": "user", 33 | "content": "Hello, who are you?" 34 | } 35 | ] 36 | print(json.dumps(messages, ensure_ascii=False, indent=4)) 37 | print("---") 38 | result = handle.request_model(messages) 39 | print(result) 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/handle/handles.py: -------------------------------------------------------------------------------- 1 | from .api_handle import APIMultiTurnMessages 2 | 3 | 4 | agent_handle_map = { 5 | "hunyuan-turbos-latest": APIMultiTurnMessages 6 | } 7 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .file_utils import read_json_file_to_list, write_json_data_to_file 2 | from .agent_utils import parse_answer, random_select_answer, random_select_answer_cot, get_all_tool_info, get_all_tool_info_for_checker 3 | from .log_utils import logger 4 | from .tool_utils import ask_user_for_help_tool, prepare_to_answer_tool 5 | from .data_process_utils import transform_train_data, remove_prepare_ask_tools 6 | from .time_utils import get_random_date -------------------------------------------------------------------------------- /c3_bench/multi_agent/utils/agent_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | import re 5 | 6 | 7 | def parse_answer(planner_res): 8 | pattern = "```json(.+?)```" 9 | planner_res = re.findall(pattern, planner_res, re.S)[0] 10 | planner_res_obj = json.loads(planner_res) 11 | return planner_res_obj 12 | 13 | 14 | def random_select_answer(user_tasks): 15 | user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "") 16 | user_tasks = json.loads(user_tasks) 17 | task_keys = list(user_tasks.keys()) 18 | task_key = random.choice(task_keys) 19 | user_task = user_tasks[task_key] 20 | language = os.getenv("LANGUAGE") 21 | if language == "zh": 22 | user_task = "用户:" + user_task 23 | else: 24 | user_task = "User: " + user_task 25 | 26 | return user_task 27 | 28 | 29 | def random_select_answer_cot(user_tasks): 30 | user_tasks = user_tasks.replace("```json\n", "").replace("\n```", "") 31 | user_tasks = json.loads(user_tasks) 32 | task_keys = list(user_tasks.keys()) 33 | task_key = random.choice(task_keys) 34 | language = os.getenv("LANGUAGE") 35 | if language == "zh": 36 | user_task = user_tasks[task_key]["任务描述"] 37 | user_task = "用户:" + user_task 38 | else: 39 | user_task = user_tasks[task_key]["Task Description"] 40 | user_task = "User: " + user_task 41 | return user_task 42 | 43 | 44 | def get_all_tool_info(tools): 45 | all_tool_name = [] 46 | all_tool_required_info = [] 47 | for tool in tools: 48 | tool_name = tool["function"]["name"] 49 | all_tool_name.append(tool_name) 50 | tool_required = tool["function"]["parameters"]["required"] 51 | tool_required = "[" + ", ".join(tool_required) + "]" 52 | tool_all_properties = list(tool["function"]["parameters"]["properties"].keys()) 53 | tool_no_required = [] 54 | for property in tool_all_properties: 55 | if property not in tool_required: 56 | tool_no_required.append(property) 57 | tool_no_required = "[" + ", ".join(tool_no_required) + "]" 58 | language = os.getenv("LANGUAGE") 59 | if language == "zh": 60 | tool_required_info = f"工具{tool_name}的必填参数为:{tool_required},非必填参数为:{tool_no_required}" 61 | else: 62 | tool_required_info = f"The required parameters for the tool {tool_name} are: {tool_required}, and the optional parameters are: {tool_no_required}." 63 | all_tool_required_info.append(tool_required_info) 64 | all_tool_name = ", ".join(all_tool_name) 65 | all_tool_required_info = "\n".join(all_tool_required_info) 66 | return all_tool_name, all_tool_required_info 67 | 68 | 69 | def get_all_tool_info_for_checker(tools): 70 | all_tool_name = [] 71 | all_tool_name_properties_name = {} 72 | all_tool_name_required = {} 73 | for tool in tools: 74 | tool_name = tool["function"]["name"] 75 | all_tool_name.append(tool_name) 76 | tool_properties = list(tool["function"]["parameters"]["properties"].keys()) 77 | all_tool_name_properties_name[tool_name] = tool_properties 78 | 79 | tool_required = tool["function"]["parameters"]["required"] 80 | all_tool_name_required[tool_name] = tool_required 81 | 82 | return all_tool_name, all_tool_name_properties_name, all_tool_name_required 83 | 84 | 85 | if __name__ == "__main__": 86 | tools = [ 87 | { 88 | "type": "function", 89 | "function": { 90 | "name": "getGeocode", 91 | "description": "根据新加坡的地址获取地理编码信息。", 92 | "parameters": { 93 | "type": "object", 94 | "properties": { 95 | "address": { 96 | "type": "string", 97 | "description": "需要查询的新加坡地址。" 98 | }, 99 | "returnGeom": { 100 | "type": "boolean", 101 | "description": "是否返回地理坐标信息,默认为false。" 102 | } 103 | }, 104 | "required": [ 105 | "address" 106 | ] 107 | } 108 | } 109 | }, 110 | { 111 | "type": "function", 112 | "function": { 113 | "name": "getReverseGeocode", 114 | "description": "根据地理坐标获取新加坡的地址信息。", 115 | "parameters": { 116 | "type": "object", 117 | "properties": { 118 | "latitude": { 119 | "type": "float", 120 | "description": "纬度值。" 121 | }, 122 | "longitude": { 123 | "type": "float", 124 | "description": "经度值。" 125 | }, 126 | "buffer": { 127 | "type": "integer", 128 | "description": "搜索半径范围,默认为50米。" 129 | } 130 | }, 131 | "required": [ 132 | "latitude", 133 | "longitude" 134 | ] 135 | } 136 | } 137 | }, 138 | { 139 | "type": "function", 140 | "function": { 141 | "name": "getLocationBasedServices", 142 | "description": "获取新加坡基于位置的服务信息。", 143 | "parameters": { 144 | "type": "object", 145 | "properties": { 146 | "category": { 147 | "type": "string", 148 | "description": "服务类别。" 149 | }, 150 | "location": { 151 | "type": "object", 152 | "description": "位置坐标对象。", 153 | "properties": { 154 | "latitude": { 155 | "type": "float", 156 | "description": "纬度值。" 157 | }, 158 | "longitude": { 159 | "type": "float", 160 | "description": "经度值。" 161 | } 162 | } 163 | }, 164 | "radius": { 165 | "type": "integer", 166 | "description": "搜索半径,默认为500米。" 167 | } 168 | }, 169 | "required": [ 170 | "category", 171 | "location" 172 | ] 173 | } 174 | } 175 | }, 176 | { 177 | "type": "function", 178 | "function": { 179 | "name": "getRoutePlanning", 180 | "description": "提供新加坡的路线规划服务。", 181 | "parameters": { 182 | "type": "object", 183 | "properties": { 184 | "startPoint": { 185 | "type": "object", 186 | "description": "起点坐标。", 187 | "properties": { 188 | "latitude": { 189 | "type": "float", 190 | "description": "起点纬度。" 191 | }, 192 | "longitude": { 193 | "type": "float", 194 | "description": "起点经度。" 195 | } 196 | } 197 | }, 198 | "endPoint": { 199 | "type": "object", 200 | "description": "终点坐标。", 201 | "properties": { 202 | "latitude": { 203 | "type": "float", 204 | "description": "终点纬度。" 205 | }, 206 | "longitude": { 207 | "type": "float", 208 | "description": "终点经度。" 209 | } 210 | } 211 | }, 212 | "mode": { 213 | "type": "string", 214 | "description": "出行模式,默认为'driving'。", 215 | "enum": [ 216 | "driving", 217 | "walking", 218 | "cycling" 219 | ] 220 | } 221 | }, 222 | "required": [ 223 | "startPoint", 224 | "endPoint" 225 | ] 226 | } 227 | } 228 | }, 229 | { 230 | "type": "function", 231 | "function": { 232 | "name": "getVisualization", 233 | "description": "支持新加坡数据的可视化展示。", 234 | "parameters": { 235 | "type": "object", 236 | "properties": { 237 | "layer": { 238 | "type": "string", 239 | "description": "需要展示的数据层。" 240 | }, 241 | "theme": { 242 | "type": "string", 243 | "description": "可视化主题,默认为'standard'。" 244 | }, 245 | "zoomLevel": { 246 | "type": "integer", 247 | "description": "地图缩放级别,默认为10。" 248 | } 249 | }, 250 | "required": [ 251 | "layer" 252 | ] 253 | } 254 | } 255 | } 256 | ] 257 | all_tool_name, all_tool_required_info = get_all_tool_info(tools) 258 | print(f"all_tool_name: {all_tool_name}") 259 | print(f"all_tool_required_info: \n{all_tool_required_info}") 260 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/utils/data_process_utils.py: -------------------------------------------------------------------------------- 1 | from utils.agent_utils import parse_answer 2 | 3 | 4 | def remove_prepare_ask_tools(tools): 5 | tools_new = [] 6 | for tool in tools: 7 | tool_name = tool["function"]["name"] 8 | if tool_name in ["prepare_to_answer", "ask_user_for_required_parameters"]: 9 | continue 10 | 11 | tools_new.append(tool) 12 | return tools_new 13 | 14 | 15 | def transform_train_data(messages, tools, env_info): 16 | train_data_example_origin = {"tools": tools, "env_info": env_info, "messages": messages} 17 | FAILED = False 18 | messages_new = [] 19 | for message in messages: 20 | content = message["content"] 21 | if content.startswith("切换角色为") or content.startswith("Switch"): 22 | continue 23 | elif not content.startswith("Checker"): 24 | messages_new.append(message) 25 | else: 26 | content_obj = parse_answer(content) 27 | correct = content_obj["correct"] 28 | if correct == "no": 29 | messages_new = messages_new[:-1] 30 | 31 | train_data_example = {"tools": tools, "env_info": env_info, "messages": messages_new} 32 | return FAILED, train_data_example, train_data_example_origin 33 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def read_json_file_to_list(input_file): 5 | result = [] 6 | with open(input_file) as fin: 7 | for line in fin: 8 | obj = json.loads(line) 9 | result.append(obj) 10 | return result 11 | 12 | 13 | def write_json_data_to_file(data_list, output_file): 14 | fout = open(output_file, "w") 15 | for data in data_list: 16 | fout.write(json.dumps(data, ensure_ascii=False) + "\n") 17 | fout.close() 18 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/utils/log_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | handler = logging.StreamHandler() # 输出到命令行 5 | handler.flush() 6 | logging.basicConfig( 7 | level=logging.INFO, 8 | format="%(asctime)s [%(levelname)s] - %(message)s", 9 | handlers=[handler] 10 | ) 11 | logger = logging.getLogger() 12 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/utils/time_utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | import os 4 | 5 | from datetime import datetime 6 | 7 | 8 | def get_random_date(): 9 | a1 = (2024, 1, 1, 0, 0, 0, 0, 0, 0) # 设置开始日期时间元组(1976-01-01 00:00:00) 10 | a2 = (2024, 12, 31, 23, 59, 59, 0, 0, 0) # 设置结束日期时间元组(1990-12-31 23:59:59) 11 | 12 | start = time.mktime(a1) # 生成开始时间戳 13 | end = time.mktime(a2) # 生成结束时间戳 14 | 15 | t = random.randint(start, end) # 在开始和结束时间戳中随机取出一个 16 | date_touple = time.localtime(t) # 将时间戳生成时间元组 17 | date = time.strftime("%Y-%m-%d %H:%M:%S", date_touple) # 将时间元组转成格式化字符串(1976-05-21) 18 | date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S') 19 | weekday_num = date_obj.weekday() 20 | language = os.getenv("LANGUAGE") 21 | if language == "zh": 22 | weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] 23 | else: 24 | weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] 25 | weekday = weekdays[weekday_num] 26 | date = date + " " + weekday 27 | return date 28 | 29 | 30 | if __name__ == "__main__": 31 | date = get_random_date() 32 | print(date) 33 | -------------------------------------------------------------------------------- /c3_bench/multi_agent/utils/tool_utils.py: -------------------------------------------------------------------------------- 1 | ask_user_for_help_tool = { 2 | "type": "function", 3 | "function": { 4 | "name": "ask_user_for_required_parameters", 5 | "description": "如果你认为用户任务缺失了要调用的工具中的部分必填(required)参数,需要寻求用户帮助,则调用此函数", 6 | "parameters": { 7 | "type": "object", 8 | "properties": { 9 | "tool_name": { 10 | "type": "string", 11 | "description": "解决用户任务需要调用的工具名" 12 | }, 13 | "missing_required_parameters": { 14 | "type": "array", 15 | "description": "用户任务中缺失的工具必填参数", 16 | "items": { 17 | "type": "string", 18 | "description": "用户任务中缺失的必填参数" 19 | } 20 | } 21 | }, 22 | "required": ["tool_name", "missing_required_parameters"] 23 | } 24 | } 25 | } 26 | 27 | prepare_to_answer_tool = { 28 | "type": "function", 29 | "function": { 30 | "name": "prepare_to_answer", 31 | "description": "根据上下文信息,如果你认为已经可以完成用户任务了,则调用此函数", 32 | "parameters": { 33 | "type": "object", 34 | "properties": { 35 | "answer_type": { 36 | "type": "string", 37 | "description": "回答的类型,如果是根据工具调用结果对用户任务进行总结回答,则填写为tool;如果是用户任务不需要调用任何工具,可以直接回答,则填写chat", 38 | "enum": ["tool", "chat"] 39 | } 40 | }, 41 | "required": ["answer_type"] 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /c3_bench/requirements.txt: -------------------------------------------------------------------------------- 1 | ipdb 2 | requests 3 | pandas 4 | openai 5 | jieba==0.42.1 6 | rouge==1.0.1 7 | rouge-chinese==1.0.3 8 | rouge-score==0.1.2 -------------------------------------------------------------------------------- /picture/agent_family.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/agent_family.png -------------------------------------------------------------------------------- /picture/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/compare.png -------------------------------------------------------------------------------- /picture/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/example.png -------------------------------------------------------------------------------- /picture/example_zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/example_zh.png -------------------------------------------------------------------------------- /picture/first.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/first.png -------------------------------------------------------------------------------- /picture/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/framework.png -------------------------------------------------------------------------------- /picture/multi_agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/multi_agent.png -------------------------------------------------------------------------------- /picture/multi_agent2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/multi_agent2.png -------------------------------------------------------------------------------- /picture/overall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent-Hunyuan/C3-Benchmark/18d25283333002fbc77e63a39ea234eaa3133411/picture/overall.png --------------------------------------------------------------------------------