├── .env.example ├── .gitignore ├── GeneralAgent ├── __init__.py ├── agent │ ├── __init__.py │ └── agent.py ├── interpreter │ ├── __init__.py │ ├── applescript_interpreter.py │ ├── interpreter.py │ ├── knowledge_interpreter.py │ ├── python_interpreter.py │ ├── role_interpreter.py │ └── shell_interpreter.py ├── llamaindex.py ├── memory │ ├── __init__.py │ └── normal_memory.py ├── skills │ ├── __init__.py │ ├── file_operation.py │ ├── openai_model.py │ ├── python_envs.py │ ├── token_count.py │ ├── unique_name.py │ └── web_tools.py └── utils.py ├── README.md ├── README_EN.md ├── docs ├── develop.md ├── images │ ├── 2023.11.15.jpg │ ├── 2023_11_27_builder_agent.jpg │ ├── 2023_11_27_image_creator.jpg │ ├── Architecture.png │ ├── Architecture_2023.11.15.png │ ├── general_agent_2024.01.16.png │ ├── self_call.png │ ├── stack_memory.png │ ├── wechat.jpg │ └── wechat_company.jpg └── paper │ └── General_Agent__Self_Call_And_Stack_Memory.pdf ├── examples ├── 0_base_usage.py ├── 10_rag_function.py ├── 11_collection_and_store.py ├── 12_hide_python_code.py ├── 13_image_input.py ├── 14_doubao_llm.py ├── 15_run_check.py ├── 16_test_azure.py ├── 17_qwen.py ├── 18_translate_agent.py ├── 19_temporary_context.py ├── 1_function_call.py ├── 20_load_memory.py ├── 21_market_search.py ├── 2_write_novel.py ├── 3_ai_search.py ├── 3_ai_search_simple.py ├── 4_multi_agents.py ├── 5_serialize.py ├── 6_disable_python_run.py ├── 7_hide_stream.py ├── 8_multi_model.py └── 9_knowledge_files.py ├── pyproject.toml └── test ├── data ├── .gitkeep ├── Nougat.pdf ├── Nougat_piece.pdf ├── a.py ├── hello.py └── test.jpeg ├── pytest.ini ├── test_agent.py ├── test_examples.py ├── test_interpreter_python.py ├── test_link_memory.py ├── test_skills.py ├── test_skills_llm_inference.py ├── test_skills_memory_utils.py └── test_stack_memory.py /.env.example: -------------------------------------------------------------------------------- 1 | # OPENAI API Key or 兼容OpenAI Python SDK 的其他国产大模型API Key 2 | OPENAI_API_KEY='your_openai_api_key' 3 | 4 | # OPENAI API 访问地址 or 代理地址 or 兼容OpenAI Python SDK 的其他国产大模型API地址 5 | OPENAI_API_BASE='https://api.openai.com/v1' 6 | 7 | # 默认大模型 8 | DEFAULT_LLM_MODEL='gpt-4o' 9 | 10 | # embedding模型 11 | OPENAI_EMBEDDING_MODEL='text-embedding-3-small' 12 | 13 | # 默认大模型的温度 14 | LLM_TEMPERATURE='0.5' 15 | 16 | AGENT_LOG='info' 17 | 18 | # SERPER API Key(可选) ./examples/3_ai_search.py 需要用到 19 | SERPER_API_KEY='your_serper_api_key' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /knowledge_files 2 | /.vscode 3 | 4 | *.pyc 5 | GeneralAgent/.env 6 | GeneralAgent/*.json 7 | 8 | test/data_0.json 9 | test/data/test_workspace/* 10 | test/tetris.py 11 | test/tetris.py.bak 12 | GeneralAgent/cache_json 13 | test/data/plan_memory.json 14 | test/data/test_interpreter.bin 15 | test/data/b.txt 16 | test/data/a.txt 17 | test/data/hello.pptx 18 | test/.env 19 | 20 | build/* 21 | dist/* 22 | GeneralAgent.egg-info* 23 | test/multi_lines_input/* 24 | test/multi_lines_input/* 25 | .env 26 | examples/memory.json 27 | test/link_memory.json 28 | test/memory.json 29 | test/llm_cache.json 30 | test/summary_memory.json 31 | */llm_cache.json 32 | test/test_skills/data/* 33 | test/test_skills/llm_cache.json 34 | webui/server/server/applications/test_application_id/bot.json 35 | webui/server/server/applications/test_application_id/main.py 36 | data/* 37 | 38 | .idea/* 39 | test/test_skills/code/* 40 | test/data/ui/* 41 | test/code/* 42 | test/data/read_interpreter/* 43 | webui/server/server/ts_builder/src/lib/index.tsx 44 | GeneralAgent/skills/a.jpg 45 | examples/code.bin 46 | examples/normal_memory.json 47 | examples/novel.md 48 | examples/9_knowledge_files/* 49 | ga/* 50 | html/* 51 | .history 52 | -------------------------------------------------------------------------------- /GeneralAgent/__init__.py: -------------------------------------------------------------------------------- 1 | from .skills import skills 2 | from .agent.agent import Agent -------------------------------------------------------------------------------- /GeneralAgent/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/GeneralAgent/agent/__init__.py -------------------------------------------------------------------------------- /GeneralAgent/agent/agent.py: -------------------------------------------------------------------------------- 1 | # Agent 2 | import os 3 | import logging 4 | from typing import Union 5 | from GeneralAgent.memory import NormalMemory 6 | from GeneralAgent.interpreter import Interpreter 7 | from GeneralAgent.interpreter import KnowledgeInterpreter 8 | from GeneralAgent.interpreter import RoleInterpreter, PythonInterpreter 9 | from GeneralAgent.utils import cut_messages, string_token_count 10 | 11 | 12 | def default_output_callback(token): 13 | if token is not None: 14 | print(token, end="", flush=True) 15 | else: 16 | print("\n", end="", flush=True) 17 | 18 | 19 | def default_check(check_content=None): 20 | show = "确认 | 继续 (回车, yes, y, 是, ok) 或者 直接输入你的想法\n" 21 | if check_content is not None: 22 | show = f"{check_content}\n\n{show}" 23 | response = input(show) 24 | if response.lower() in ["", "yes", "y", "是", "ok"]: 25 | return None 26 | else: 27 | return response 28 | 29 | 30 | class Agent: 31 | """ 32 | Agent 33 | """ 34 | 35 | # @memory: Memory 36 | # @interpreters: list, interpreters 37 | # @output_callback: function, output_callback(content: str) -> None 38 | # @python_run_result: str, python run result 39 | # @run_level: int, python run level, use for check stack overflow level 40 | # @continue_run: bool, continue run when task not finished 41 | # @disable_python_run: bool, disable python run 42 | # @hide_python_code: bool, hide python code in output 43 | memory = None 44 | interpreters = [] 45 | output_callback = None 46 | python_run_result = None 47 | run_level = 0 48 | continue_run = True 49 | disable_python_run = False 50 | hide_python_code = False 51 | 52 | def __init__( 53 | self, 54 | role: str = None, 55 | functions: list = [], 56 | knowledge_files=[], 57 | rag_function=None, 58 | workspace: str = None, 59 | model=None, 60 | token_limit=None, 61 | api_key=None, 62 | base_url=None, 63 | self_call=False, 64 | continue_run=False, 65 | output_callback=default_output_callback, 66 | disable_python_run=False, 67 | hide_python_code=False, 68 | messages=[], 69 | **args, 70 | ): 71 | """ 72 | @role: str, Agent角色描述,例如"你是一个小说家",默认为None 73 | 74 | @functions: list, Agent可用的函数(工具)列表,默认为[] 75 | 76 | @knowledge_files: list, 知识库文件列表。当执行delete()函数时,不会删除构建好的知识库(embedding). 77 | 78 | @rag_function: function, RAG function,用于自定义RAG函数,输入参数为chat模式的messages(包含最近一次输入),返回值为字符串. 79 | 80 | @workspace: str, Agent序列化目录地址,如果目录不存在会自动创建,如果workspace不为None,则会从workspace中加载序列化的memory和python代码。默认None表示不序列化,不加载。当knowledge_files不为空时, workspace必须提供 81 | 82 | @model: str, 模型类型,比如"gpt-3.5-turbo", "gpt-4o"等 83 | 84 | @token_limit: int, 模型token限制. None: gpt3.5: 16*1000, gpt4: 128*1000, 其他: 16*1000 85 | 86 | @api_key: str, OpenAI or other LLM API KEY 87 | 88 | @base_url: str, OpenAI or other LLM API BASE URL 89 | 90 | @self_call: bool, 是否开启自我调用(Agent可以写代码来自我调用完成复杂任务), 默认为False. 91 | 92 | @continue_run: bool, 是否自动继续执行。Agent在任务没有完成时,是否自动执行。默认为True. 93 | 94 | @output_callback: function, 输出回调函数,用于输出Agent的流式输出结果,默认为None,表示使用默认输出函数(skills.output==print) 95 | 96 | @disable_python_run (deprecated) : bool, 是否禁用python运行,默认为False 97 | 98 | @hide_python_code (deprecated) : bool, 是否隐藏python代码,默认为False 99 | 100 | @messages: list, 历史对话列表 101 | 102 | @args: 其他LLM对话参数 103 | 104 | temperature: float, 采样温度 105 | 106 | frequency_penalty: float, 频率惩罚, 在 -2 和 2 之间 107 | 108 | """ 109 | if workspace is None and len(knowledge_files) > 0: 110 | raise Exception( 111 | "workspace must be provided when knowledge_files is not empty" 112 | ) 113 | if workspace is not None and not os.path.exists(workspace): 114 | os.makedirs(workspace) 115 | self.workspace = workspace 116 | self.disable_python_run = disable_python_run 117 | self.hide_python_code = hide_python_code 118 | self.memory = NormalMemory(serialize_path=self._memory_path, messages=messages) 119 | self.role_interpreter = RoleInterpreter(role=role, self_call=self_call) 120 | self.python_interpreter = PythonInterpreter( 121 | self, serialize_path=self._python_path 122 | ) 123 | self.python_interpreter.function_tools = functions 124 | self.model = model or os.environ.get("DEFAULT_LLM_MODEL", "gpt-4o") 125 | self.token_limit = token_limit or 64 * 1000 126 | self.api_key = api_key 127 | self.base_url = base_url 128 | # self.temperature = temperature 129 | # self.frequency_penalty = frequency_penalty 130 | self.llm_args = args 131 | self.continue_run = continue_run 132 | self.knowledge_interpreter = KnowledgeInterpreter( 133 | workspace, knowledge_files=knowledge_files, rag_function=rag_function 134 | ) 135 | self.interpreters = [ 136 | self.role_interpreter, 137 | self.python_interpreter, 138 | self.knowledge_interpreter, 139 | ] 140 | self.enter_index = None # 进入 with 语句时 self.memory.messages 的索引 141 | self.output_callback = output_callback 142 | 143 | def __enter__(self): 144 | self.enter_index = len( 145 | self.memory.get_messages() 146 | ) # Record the index of self.messages 147 | return self 148 | 149 | def __exit__(self, exc_type, exc_val, exc_tb): 150 | if exc_type: 151 | self.clear_temporary_messages() 152 | self.handle_exception(exc_type, exc_val, exc_tb) 153 | self.clear_temporary_messages() 154 | return False 155 | 156 | @property 157 | def _memory_path(self): 158 | if self.workspace is None: 159 | return None 160 | else: 161 | return os.path.join(self.workspace, "memory.json") 162 | 163 | @property 164 | def _python_path(self): 165 | if self.workspace is None: 166 | return None 167 | else: 168 | return os.path.join(self.workspace, "code.bin") 169 | 170 | @property 171 | def functions(self): 172 | return self.python_interpreter.function_tools 173 | 174 | @functions.setter 175 | def functions(self, new_value): 176 | self.python_interpreter.function_tools = new_value 177 | 178 | @property 179 | def role(self): 180 | return self.role_interpreter.role 181 | 182 | @role.setter 183 | def role(self, new_value): 184 | self.role_interpreter.role = new_value 185 | 186 | class TemporaryManager: 187 | def __init__(self, agent): 188 | self.agent = agent 189 | 190 | def __enter__(self): 191 | self.agent.enter_index = len(self.agent.memory.get_messages()) 192 | return self.agent 193 | 194 | def __exit__(self, exc_type, exc_val, exc_tb): 195 | if exc_type: 196 | self.agent.clear_temporary_messages() 197 | self.agent.handle_exception(exc_type, exc_val, exc_tb) 198 | self.agent.clear_temporary_messages() 199 | return False 200 | 201 | def temporary_context(self): 202 | return self.TemporaryManager(self) 203 | 204 | def disable_output_callback(self): 205 | """ 206 | 禁用输出回调函数 207 | """ 208 | self.tmp_output_callback = self.output_callback 209 | self.output_callback = None 210 | 211 | def enable_output_callback(self): 212 | """ 213 | 启用输出回调函数 214 | """ 215 | self.output_callback = self.tmp_output_callback 216 | self.tmp_output_callback = None 217 | 218 | def disable_python(self): 219 | """ 220 | 禁用python运行 221 | """ 222 | self.disable_python_run = True 223 | 224 | def enable_python(self): 225 | """ 226 | 启用python运行 227 | """ 228 | self.disable_python_run = False 229 | 230 | def run( 231 | self, 232 | command: Union[str, list], 233 | return_type=str, 234 | display=False, 235 | verbose=True, 236 | user_check=False, 237 | check_render=None, 238 | ): 239 | """ 240 | 执行command命令,并返回return_type类型的结果 241 | 242 | @command: 命令内容, str or list. list: [{'type': 'text', 'text': 'hello world'}, {'type': 'image_url', 'image_url': 'xxxx.jpg'}] 243 | 244 | @return_type: type, 返回类型,默认str. 可以是任意的python类型。 245 | 246 | @display: bool, 是否显示流输出 247 | 248 | @verbose: bool, 是否显示详细输出 249 | 250 | @user_check: bool, 是否需要用户确认命令执行后的结果,默认不需要 251 | 252 | @check_render: function, 检查渲染函数,用于渲染显示给用户的check内容: check_render(result:return_type) -> str 253 | 254 | """ 255 | # 代码调用agent执行,直接run_level+1 256 | self.run_level += 1 257 | if not display: 258 | self.disable_output_callback() 259 | try: 260 | result = self._run(command, return_type=return_type, verbose=verbose) 261 | return result 262 | except Exception as e: 263 | logging.exception(e) 264 | return str(e) 265 | finally: 266 | self.run_level -= 1 267 | if not display: 268 | self.enable_output_callback() 269 | 270 | def user_input(self, input: Union[str, list], verbose=True): 271 | """ 272 | Agent接收用户输入 273 | 274 | :input: 用户输入内容, str类型 or list: [{'type': 'text', 'text': 'hello world'}, {'type': 'image_url', 'image_url': 'xxxx.jpg'}] 275 | """ 276 | from GeneralAgent import skills 277 | 278 | result = self._run(input, verbose=verbose) 279 | if self.continue_run and self.run_level == 0: 280 | # 判断是否继续执行 281 | messages = self.memory.get_messages() 282 | messages = cut_messages(messages, 2 * 1000) 283 | the_prompt = "对于当前状态,无需用户输入或者确认,继续执行任务,请回复yes,其他情况回复no" 284 | messages += [{"role": "system", "content": the_prompt}] 285 | response = skills.llm_inference( 286 | messages, 287 | model="smart", 288 | stream=False, 289 | api_key=self.api_key, 290 | base_url=self.base_url, 291 | **self.llm_args, 292 | ) 293 | if "yes" in response.lower(): 294 | result = self.run("ok") 295 | return result 296 | 297 | def _run(self, input, return_type=str, verbose=False): 298 | """ 299 | agent run: parse input -> get llm messages -> run LLM and parse output 300 | 301 | @input: str, user's new input, None means continue to run where it stopped 302 | 303 | @return_type: type, return type, default str 304 | 305 | @verbose: bool, verbose mode 306 | """ 307 | 308 | result = "" 309 | 310 | def local_output(token): 311 | nonlocal result 312 | if token is not None: 313 | result += token 314 | else: 315 | result += "\n" 316 | if self.output_callback is not None: 317 | self.output_callback(token) 318 | 319 | if self.run_level != 0: 320 | if return_type == str: 321 | add_content = "Directly answer the question, no need to run python\n" 322 | # add_content 在前面 323 | if isinstance(input, list): 324 | input = [add_content] + input 325 | else: 326 | input = add_content + input 327 | else: 328 | add_content = ( 329 | "\nYou should return python values in type " 330 | + str(return_type) 331 | + " by run python code(```python\n#run code\nxxx\n).\n" 332 | ) 333 | # add_content 在后面 334 | if isinstance(input, list): 335 | input = input + [add_content] 336 | else: 337 | input = input + add_content 338 | self._memory_add_input(input) 339 | 340 | try_count = 0 341 | while True: 342 | messages = self._get_llm_messages() 343 | output_stop = self._llm_and_parse_output(messages, local_output, verbose) 344 | if output_stop: 345 | local_output(None) 346 | if self.python_run_result is not None: 347 | result = self.python_run_result 348 | self.python_run_result = None 349 | if return_type == str: 350 | return result 351 | if type(result) != return_type and try_count < 1: 352 | logging.info("return type should be: return_type") 353 | try_count += 1 354 | self._memory_add_input("return type should be " + str(return_type)) 355 | result = "" 356 | continue 357 | return result 358 | 359 | def _memory_add_input(self, input): 360 | # 记忆添加用户输入 361 | self.memory.add_message("user", input) 362 | 363 | def _get_llm_messages(self): 364 | # 获取记忆 + prompt 365 | messages = self.memory.get_messages() 366 | if self.disable_python_run: 367 | prompt = "\n\n".join( 368 | [ 369 | interpreter.prompt(messages) 370 | for interpreter in self.interpreters 371 | if interpreter.__class__ != PythonInterpreter 372 | ] 373 | ) 374 | else: 375 | prompt = "\n\n".join( 376 | [interpreter.prompt(messages) for interpreter in self.interpreters] 377 | ) 378 | # 动态调整记忆长度 379 | prompt_count = string_token_count(prompt) 380 | left_count = int(self.token_limit * 0.9) - prompt_count 381 | messages = cut_messages(messages, left_count) 382 | # 组合messages 383 | messages = [{"role": "system", "content": prompt}] + messages 384 | return messages 385 | 386 | def _llm_and_parse_output(self, messages, output_callback, verbose): 387 | outputer = _PythonCodeFilter(output_callback, verbose) 388 | from GeneralAgent import skills 389 | 390 | try: 391 | result = "" 392 | is_stop = True 393 | is_break = False 394 | response = skills.llm_inference( 395 | messages, 396 | model=self.model, 397 | stream=True, 398 | api_key=self.api_key, 399 | base_url=self.base_url, 400 | **self.llm_args, 401 | ) 402 | message_id = None 403 | for token in response: 404 | if token is None: 405 | break 406 | result += token 407 | outputer.process_text(token) 408 | interpreter: Interpreter = None 409 | for interpreter in self.interpreters: 410 | if ( 411 | self.disable_python_run 412 | and interpreter.__class__ == PythonInterpreter 413 | ): 414 | continue 415 | if interpreter.output_match(result): 416 | logging.debug("interpreter: " + interpreter.__class__.__name__) 417 | message_id = self.memory.add_message("assistant", result) 418 | self.memory.push_stack() 419 | output, is_stop = interpreter.output_parse(result) 420 | if self.python_run_result is not None: 421 | output = output.strip() 422 | if len(output) > 50000: 423 | output = output[:50000] + "..." 424 | self.memory.pop_stack() 425 | message_id = self.memory.append_message( 426 | "assistant", "\n" + output + "\n", message_id=message_id 427 | ) 428 | result = "" 429 | # if is_stop: 430 | outputer.process_text(None) 431 | outputer.process_text("```output\n" + output + "\n```\n") 432 | if interpreter.__class__ == PythonInterpreter: 433 | outputer.exit_python_code() 434 | is_break = True 435 | break 436 | if is_break: 437 | break 438 | if len(result) > 0: 439 | message_id = self.memory.add_message("assistant", result) 440 | outputer.flush() 441 | return is_stop 442 | except Exception as e: 443 | logging.exception(e) 444 | outputer.process_text(str(e)) 445 | outputer.flush() 446 | return True 447 | 448 | def clear(self): 449 | """ 450 | 清除: 删除memory和python序列化文件。不会删除workspace和知识库。 451 | """ 452 | if self._memory_path is not None and os.path.exists(self._memory_path): 453 | os.remove(self._memory_path) 454 | if self._python_path is not None and os.path.exists(self._python_path): 455 | os.remove(self._python_path) 456 | self.memory = NormalMemory(serialize_path=self._memory_path) 457 | self.python_interpreter = PythonInterpreter( 458 | self, serialize_path=self._python_path 459 | ) 460 | 461 | def clear_temporary_messages(self): 462 | """ 463 | 清除: 临时产生的数据 464 | """ 465 | assert self.enter_index is not None 466 | self.memory.recover(self.enter_index) 467 | self.enter_index = None 468 | 469 | 470 | class _PythonCodeFilter: 471 | """ 472 | Python代码过滤器,用于隐藏Python代码块 473 | """ 474 | 475 | def __init__(self, output_callback, verbose): 476 | """ 477 | 构造函数 478 | 479 | @output_callback: 输出回调函数 480 | 481 | @verbose: 是否显示详细输出 482 | """ 483 | self.verbose = verbose 484 | self.in_python_code = False 485 | self.buffer = "" 486 | self.output_callback = output_callback 487 | 488 | def process_text(self, text): 489 | """ 490 | 处理输入问题 491 | """ 492 | if self.verbose: 493 | self.output_callback(text) 494 | else: 495 | if text is None: 496 | self.flush() 497 | self.output_callback(None) 498 | else: 499 | if not self.in_python_code: 500 | self.buffer += text 501 | self._process_buffer() 502 | 503 | def exit_python_code(self): 504 | """ 505 | 退出python代码块 506 | """ 507 | self.in_python_code = False 508 | 509 | def _process_buffer(self): 510 | format = "```python\n#run code\n" 511 | if self.buffer.endswith(format): 512 | self.in_python_code = True 513 | self.buffer = "" # 清空缓冲区,因为我们不打印```python 514 | elif "```" in self.buffer and not self.in_python_code: 515 | # 清空```之前的内容 516 | index = self.buffer.rfind("```") 517 | if index != -1: 518 | self.output_callback(self.buffer[:index]) 519 | self.buffer = self.buffer[index:] 520 | # 如果缓冲区太大,就表示不是python代码块,直接输出 521 | if len(self.buffer) > len(format): 522 | self.flush() 523 | else: 524 | self.output_callback(self.buffer) 525 | self.buffer = "" 526 | 527 | def flush(self): 528 | if self.buffer: 529 | self.output_callback(self.buffer) 530 | self.buffer = "" 531 | -------------------------------------------------------------------------------- /GeneralAgent/interpreter/__init__.py: -------------------------------------------------------------------------------- 1 | from .interpreter import Interpreter 2 | from .role_interpreter import RoleInterpreter 3 | from .python_interpreter import PythonInterpreter 4 | from .knowledge_interpreter import KnowledgeInterpreter 5 | from .applescript_interpreter import AppleScriptInterpreter 6 | from .shell_interpreter import ShellInterpreter -------------------------------------------------------------------------------- /GeneralAgent/interpreter/applescript_interpreter.py: -------------------------------------------------------------------------------- 1 | import re 2 | from .interpreter import Interpreter 3 | 4 | applescript_prompt = """ 5 | # Run applescript 6 | * Here are the commands 7 | ```applescript 8 | 9 | ``` 10 | * the command will be executed if in macOS computer. 11 | """ 12 | 13 | class AppleScriptInterpreter(Interpreter): 14 | output_match_pattern = '```(\n)?applescript(.*?)\n```' 15 | 16 | def prompt(self, messages) -> str: 17 | return applescript_prompt 18 | 19 | def output_parse(self, string) -> (str, bool): 20 | pattern = re.compile(self.output_match_pattern, re.DOTALL) 21 | match = pattern.search(string) 22 | assert match is not None 23 | sys_out = self._run_applescript(match.group(2)) 24 | return sys_out.strip(), True 25 | 26 | def _run_applescript(self, content): 27 | content = content.replace('"', '\\"') 28 | sys_out = '' 29 | import subprocess 30 | try: 31 | p = subprocess.Popen('osascript -e "{}"'.format(content), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 32 | except: 33 | pass 34 | finally: 35 | sys_out, err = p.communicate() 36 | sys_out = sys_out.decode('utf-8') 37 | sys_out = sys_out.strip() 38 | if sys_out == '': 39 | sys_out = 'run successfully' 40 | return sys_out -------------------------------------------------------------------------------- /GeneralAgent/interpreter/interpreter.py: -------------------------------------------------------------------------------- 1 | # Interpreter 2 | import abc 3 | import re 4 | 5 | class Interpreter(metaclass=abc.ABCMeta): 6 | """ 7 | Interpreter is the base class for all interpreters. 8 | output_match_pattern is the pattern to match the LLM ouput string. for example ```tsx\n(.*?)\n``` 9 | """ 10 | output_match_pattern = None 11 | 12 | def prompt(self, messages) -> str: 13 | """ 14 | :param messages: list of messages 15 | :return: string 16 | """ 17 | return '' 18 | 19 | def output_match(self, string) -> bool: 20 | if self.output_match_pattern is None: 21 | return False 22 | match = re.compile(self.output_match_pattern, re.DOTALL).search(string) 23 | if match is not None: 24 | return True 25 | else: 26 | return False 27 | 28 | def output_parse(self, string) -> (str, bool): 29 | """ 30 | parse the input、output string, and return the output string and is_stop 31 | """ 32 | return '', False -------------------------------------------------------------------------------- /GeneralAgent/interpreter/knowledge_interpreter.py: -------------------------------------------------------------------------------- 1 | # 知识库解析器 2 | from .interpreter import Interpreter 3 | from GeneralAgent.llamaindex import create_llamaindex, load_llamaindex, query_llamaindex 4 | 5 | import os 6 | import json 7 | import shutil 8 | 9 | class KnowledgeInterpreter(Interpreter): 10 | """ 11 | 知识库解析器,用户解析知识库的问题 12 | """ 13 | def __init__(self, workspace, knowledge_files=[], rag_function=None) -> None: 14 | """ 15 | @param workspace: 工作目录 16 | @param knowledge_files: 知识库文件列表,可以是本地文件或者网络文件,比如['http://xxx.txt', './xxx.pdf'],支持格式为llama库支持的格式 17 | @param rag_function: 查询函数,输入问题,返回答案列表 18 | """ 19 | self.workspace = workspace 20 | self.knowledge_files = knowledge_files 21 | self.rag_function = rag_function 22 | self.work = len(knowledge_files) > 0 or (rag_function is not None) 23 | 24 | if len(knowledge_files) > 0: 25 | self._create_index() 26 | else: 27 | self.index = None 28 | 29 | def _create_index(self): 30 | """ 31 | 构建索引 32 | """ 33 | llama_dir = os.path.join(self.workspace, 'llama') 34 | meta_path = os.path.join(llama_dir, 'meta.json') 35 | data_dir = os.path.join(llama_dir, 'data') 36 | storage_dir = os.path.join(llama_dir, 'storage') 37 | 38 | if not os.path.exists(llama_dir): 39 | os.makedirs(llama_dir) 40 | if not os.path.exists(data_dir): 41 | os.makedirs(data_dir) 42 | if not os.path.exists(storage_dir): 43 | os.makedirs(storage_dir) 44 | 45 | # 判断是否需要重新构建索引 46 | files_change = False 47 | if os.path.exists(meta_path): 48 | with open(meta_path, 'r') as f: 49 | meta = json.load(f) 50 | # 使用set比较两个列表是否相等 51 | if set(meta['knowledge_files']) != set(self.knowledge_files): 52 | files_change = True 53 | else: 54 | files_change = True 55 | 56 | # 如果文件有变化,重新构建索引 57 | if files_change: 58 | # 删除data目录下的所有文件 & 使用 shutil 库 拷贝knowledge_files到data目录下 59 | for file in os.listdir(data_dir): 60 | os.remove(os.path.join(data_dir, file)) 61 | for file in self.knowledge_files: 62 | # 如果文件是网络文件,下载到data目录下 63 | if file.startswith('http'): 64 | import requests 65 | res = requests.get(file) 66 | file_name = file.split('/')[-1] 67 | with open(os.path.join(data_dir, file_name), 'wb') as f: 68 | f.write(res.content) 69 | else: 70 | file_name = os.path.basename(file) 71 | shutil.copy(file, os.path.join(data_dir, file_name)) 72 | self.index = create_llamaindex(data_dir, storage_dir) 73 | with open(meta_path, 'w') as f: 74 | json.dump({'knowledge_files': self.knowledge_files}, f) 75 | else: 76 | self.index = load_llamaindex(storage_dir) 77 | 78 | def prompt(self, messages) -> str: 79 | if len(messages) == 0: 80 | return '' 81 | if len(self.knowledge_files) == 0 and self.rag_function is None: 82 | return '' 83 | background = 'Background:' 84 | if len(self.knowledge_files) > 0: 85 | background += query_llamaindex(self.index, messages) 86 | if self.rag_function is not None: 87 | background += '\n' + self.rag_function(messages) 88 | return background -------------------------------------------------------------------------------- /GeneralAgent/interpreter/python_interpreter.py: -------------------------------------------------------------------------------- 1 | import re, io, os, sys 2 | import pickle 3 | import logging 4 | from jinja2 import Template 5 | from functools import partial 6 | from codyer import skills 7 | from .interpreter import Interpreter 8 | 9 | 10 | def get_python_version() -> str: 11 | """ 12 | Return the python version, like "3.9.12" 13 | """ 14 | import platform 15 | 16 | python_version = platform.python_version() 17 | return python_version 18 | 19 | 20 | def get_function_signature(func, module: str = None): 21 | """Returns a description string of function""" 22 | try: 23 | import inspect 24 | 25 | sig = inspect.signature(func) 26 | sig_str = str(sig) 27 | desc = f"{func.__name__}{sig_str}" 28 | if func.__doc__: 29 | desc += ": " + func.__doc__.strip() 30 | if module is not None: 31 | desc = f"{module}.{desc}" 32 | if inspect.iscoroutinefunction(func): 33 | desc = "" + desc 34 | return desc 35 | except Exception as e: 36 | import logging 37 | 38 | logging.exception(e) 39 | return "" 40 | 41 | 42 | default_import_code = """ 43 | import os, sys, math, time 44 | from codyer import skills 45 | """ 46 | 47 | 48 | class PythonInterpreter(Interpreter): 49 | """ 50 | Python Interpreter: run python code in the interpreter. Not same namespace with the agent & Can Only run synchronous code 51 | """ 52 | 53 | output_match_pattern = "```python\n#run code\n(.*?)\n```" 54 | agent = None 55 | 56 | python_prompt_template = """ 57 | # Run python code 58 | - format: ```python\n#run code\nyour code\n```. Only this format will be executed. 59 | - Every time you output code, you need to reimport the required library. Each execution only shares variables and functions, without including libraries. 60 | - Available libraries: {{python_libs}} 61 | - The following functions can be used in code (already implemented and imported for you, do not import them again): 62 | ``` 63 | {{python_funcs}} 64 | ``` 65 | - Example: 66 | ```python 67 | #run code 68 | result = 1 + 1 69 | result 70 | ``` 71 | 72 | # Show python code 73 | - format: ```python\n#show code\nyour code\n```. This format will be displayed. 74 | - Example: 75 | ```python 76 | #show code 77 | print('Hello, world!') 78 | ``` 79 | """ 80 | 81 | function_tools = [] 82 | 83 | def __init__( 84 | self, 85 | agent=None, 86 | serialize_path: str = None, 87 | libs: str = "", 88 | import_code: str = None, 89 | prompt_append="", 90 | stop_wrong_count=3, 91 | ): 92 | """ 93 | @serialize_path (str): python解释器的序列化路径,如果为None,则不序列化。举例: './python_interpreter.bin' or 'serialized.pkl' 94 | @lib (str, optional): 可以使用的库 95 | @import_code (str, optional): code to import. The tools used should be imported. Defaults to default_import_code. 96 | @prompt_append: append to the prompt, custom prompt can be added here 97 | @stop_wrong_count: stop running when the code is wrong for stop_wrong_count times 98 | """ 99 | self.globals = {} # global variables shared by all code 100 | self.agent = agent 101 | self.python_libs = libs 102 | self.import_code = import_code or default_import_code 103 | self.serialize_path = serialize_path 104 | self.prompt_append = prompt_append 105 | # self.tools = tools or Tools([]) 106 | self.globals = self.load() 107 | # count the number of times the code is wrong, and stop running when it reaches the threshold 108 | self.run_wrong_count = 0 109 | self.stop_wrong_count = stop_wrong_count 110 | 111 | def load(self): 112 | if self.serialize_path is None: 113 | return {} 114 | if os.path.exists(self.serialize_path): 115 | with open(self.serialize_path, "rb") as f: 116 | data = pickle.loads(f.read()) 117 | return data["globals"] 118 | return {} 119 | 120 | def prompt(self, messages) -> str: 121 | funtions = "\n\n".join([get_function_signature(x) for x in self.function_tools]) 122 | variables = { 123 | "python_libs": self.python_libs, 124 | "python_funcs": funtions, 125 | "python_version": get_python_version(), 126 | } 127 | return ( 128 | Template(self.python_prompt_template).render(**variables) 129 | + self.prompt_append 130 | ) 131 | 132 | def save(self): 133 | if self.serialize_path is None: 134 | return 135 | save_globals = self._remove_unpickleable() 136 | # save 137 | with open(self.serialize_path, "wb") as f: 138 | data = {"globals": save_globals} 139 | f.write(pickle.dumps(data)) 140 | 141 | def _remove_unpickleable(self): 142 | save_globals = self.globals.copy() 143 | if "__builtins__" in save_globals: 144 | save_globals.__delitem__("__builtins__") 145 | keys = list(save_globals.keys()) 146 | for key in keys: 147 | try: 148 | pickle.dumps(save_globals[key]) 149 | except Exception: 150 | save_globals.__delitem__(key) 151 | return save_globals 152 | 153 | def output_parse(self, string) -> (str, bool): 154 | pattern = re.compile(self.output_match_pattern, re.DOTALL) 155 | match = pattern.search(string) 156 | assert match is not None 157 | result, stop = self.run_code(match.group(1)) 158 | result = ( 159 | "\nThe execution of the python code is completed, and the result is as follows:\n" 160 | + result 161 | + "\n" 162 | ) 163 | return result, stop 164 | 165 | def run_code(self, code): 166 | code = self.import_code + "\n" + code 167 | logging.debug(code) 168 | 169 | output = io.StringIO() 170 | sys.stdout = output 171 | 172 | try: 173 | if self.agent is not None: 174 | self.agent.run_level += 1 175 | if self.agent is not None: 176 | self.globals["agent"] = self.agent 177 | for fun in self.function_tools: 178 | # partial function default is remote function 179 | if isinstance(fun, partial): 180 | name = fun.args[0] 181 | else: 182 | name = fun.__name__ 183 | self.globals[name] = fun 184 | result = skills._exec(code, self.globals) 185 | self.run_wrong_count = 0 186 | stop = True 187 | # 出现了自我调用,则判断一下层级,如果层级为1,则停止 188 | if self.agent is not None: 189 | stop = self.agent.run_level >= 1 190 | self.agent.python_run_result = result 191 | if result is None: 192 | result = output.getvalue() 193 | else: 194 | if output.getvalue().strip() != "": 195 | result = output.getvalue() + "\n" + str(result) 196 | return str(result), stop 197 | except Exception as e: 198 | logging.exception(e) 199 | import traceback 200 | 201 | error = traceback.format_exc() 202 | self.run_wrong_count += 1 203 | if self.run_wrong_count >= self.stop_wrong_count: 204 | raise e 205 | return error, False 206 | finally: 207 | self.save() 208 | sys.stdout = sys.__stdout__ 209 | if self.agent is not None: 210 | self.agent.run_level -= 1 211 | 212 | def get_variable(self, name): 213 | if name in self.globals: 214 | return self.globals[name] 215 | else: 216 | logging.warning(f"Variable {name} not found") 217 | return None 218 | 219 | def set_variable(self, name, value): 220 | self.globals[name] = value 221 | -------------------------------------------------------------------------------- /GeneralAgent/interpreter/role_interpreter.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from jinja2 import Template 3 | from .interpreter import Interpreter 4 | 5 | default_system_role = """ 6 | Current Time: {{now}} 7 | You are an agent on the computer, tasked with assisting users in resolving their issues. 8 | You have the capability to control the computer and access the internet. 9 | All code in ```python ``` will be automatically executed by the system. So if you don't need to run the code, please don't write it in the code block. 10 | All responses should be formatted using markdown. For file references, use the format [title](a.txt), with all files stored in the './' directory. 11 | When result file is ready, provide it to the user with donwload link. 12 | """ 13 | 14 | class RoleInterpreter(Interpreter): 15 | """ 16 | RoleInterpreter, a interpreter that can change the role of the agent. 17 | Note: This should be the first interpreter in the agent. 18 | """ 19 | 20 | def __init__(self, system_role=None, self_call=False, search_functions=False, role:str=None) -> None: 21 | """ 22 | prompt = system_role | default_system_role + role 23 | @system_role: str, 系统角色. 如果为None,则使用默认系统角色 24 | @self_call: bool, 是否开启自调用 25 | @search_functions: bool, 是否开启搜索功能 26 | @role: str, 用户角色 27 | """ 28 | self.system_role = system_role 29 | self.self_control = self_call 30 | self.search_functions = search_functions 31 | self.role = role 32 | 33 | def prompt(self, messages) -> str: 34 | if self.system_role is not None: 35 | prompt = self.system_role 36 | else: 37 | prompt = Template(default_system_role).render(now=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) 38 | if self.role is not None: 39 | prompt += '\n\n' + self.role 40 | return prompt -------------------------------------------------------------------------------- /GeneralAgent/interpreter/shell_interpreter.py: -------------------------------------------------------------------------------- 1 | import re 2 | from .interpreter import Interpreter 3 | 4 | shell_prompt = """ 5 | # Run shell 6 | * format is : ```shell\\nthe_command\\n``` 7 | * the command will be executed 8 | """ 9 | 10 | class ShellInterpreter(Interpreter): 11 | output_match_pattern = '```shell\n(.*?)\n```' 12 | 13 | def __init__(self, workspace='./') -> None: 14 | self.workspace = workspace 15 | 16 | def prompt(self, messages) -> str: 17 | return shell_prompt 18 | 19 | def output_parse(self, string) -> (str, bool): 20 | pattern = re.compile(self.output_match_pattern, re.DOTALL) 21 | match = pattern.search(string) 22 | assert match is not None 23 | output = self._run_bash(match.group(1)) 24 | return output.strip(), True 25 | 26 | def _run_bash(self, content): 27 | sys_out = '' 28 | import subprocess 29 | if 'python ' in content: 30 | content = content.replace('python ', 'python3 ') 31 | try: 32 | p = subprocess.Popen(content, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 33 | except: 34 | pass 35 | finally: 36 | sys_out, err = p.communicate() 37 | sys_out = sys_out.decode('utf-8') 38 | return sys_out -------------------------------------------------------------------------------- /GeneralAgent/llamaindex.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import logging 4 | from typing import Any, List 5 | from llama_index.core import Settings 6 | from llama_index.core.embeddings import BaseEmbedding 7 | from llama_index.core import (VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage) 8 | 9 | 10 | # 使用: https://github.com/run-llama/llama_index 库构建知识库索引 11 | # 默认使用 GeneralAgent.skills 中 embedding_texts 函数来embedding,你可以重写 embedding_texts 函数 12 | 13 | # def new_embedding_texts(texts) -> [[float]]: 14 | # """ 15 | # 对文本数组进行embedding 16 | # """ 17 | # import os 18 | # client = _get_openai_client() 19 | # model = os.environ.get('EMBEDDING_MODEL', 'text-embedding-3-small') 20 | # resp = client.embeddings.create(input=texts, model=model) 21 | # result = [x.embedding for x in resp.data] 22 | # return result 23 | # from GeneralAgent import skills 24 | # skills.embedding_texts = new_embedding_texts 25 | 26 | class CustomEmbeddings(BaseEmbedding): 27 | def __init__( 28 | self, 29 | **kwargs: Any, 30 | ) -> None: 31 | super().__init__(**kwargs) 32 | 33 | @classmethod 34 | def class_name(cls) -> str: 35 | return "CustomEmbeddings" 36 | 37 | async def _aget_query_embedding(self, query: str) -> List[float]: 38 | return self._get_query_embedding(query) 39 | 40 | async def _aget_text_embedding(self, text: str) -> List[float]: 41 | return self._get_text_embedding(text) 42 | 43 | def _get_query_embedding(self, query: str) -> List[float]: 44 | from GeneralAgent import skills 45 | return skills.embedding_texts([query])[0] 46 | 47 | def _get_text_embedding(self, text: str) -> List[float]: 48 | from GeneralAgent import skills 49 | return skills.embedding_texts([text])[0] 50 | 51 | def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: 52 | from GeneralAgent import skills 53 | return skills.embedding_texts(texts) 54 | 55 | embed_model = CustomEmbeddings(embed_batch_size=16) 56 | Settings.embed_model = embed_model 57 | 58 | 59 | def create_llamaindex(data_dir, storage_dir, limit_count=1000000): 60 | """ 61 | 创建llamaindex索引 62 | @param data_dir: 数据目录 63 | @param storage_dir: 存储目录 64 | @param limit_count: 限制的token数量 65 | """ 66 | documents = SimpleDirectoryReader(data_dir).load_data() 67 | # 限制token数量 68 | total_count = 0 69 | for doc in documents: 70 | total_count += len(doc.get_content()) 71 | # 英文下,一个单词多个字母,所以乘以4 72 | if total_count > limit_count * 4: 73 | return None 74 | index = VectorStoreIndex.from_documents(documents) 75 | index.storage_context.persist(persist_dir=storage_dir) 76 | return index 77 | 78 | 79 | def load_llamaindex(storage_dir): 80 | """ 81 | 从存储中加载索引 82 | """ 83 | storage_context = StorageContext.from_defaults(persist_dir=storage_dir) 84 | index = load_index_from_storage(storage_context) 85 | return index 86 | 87 | 88 | def _get_last_text_query(messages): 89 | if len(messages) == 0: 90 | return '' 91 | for index in range(len(messages) - 1, -1, -1): 92 | content = messages[index]['content'] 93 | if isinstance(content, str): 94 | return content 95 | elif isinstance(content, list): 96 | for item in content: 97 | if item['type'] == 'text': 98 | return item['text'] 99 | return '' 100 | 101 | 102 | def query_llamaindex(index, messages): 103 | query = _get_last_text_query(messages) 104 | nodes = index.as_retriever().retrieve(query) 105 | return '\n\n'.join([node.get_text() for node in nodes]) 106 | 107 | 108 | def retrieve_knowlege(storage_dir, messages) -> list: 109 | """ 110 | 从知识库中检索,返回检索结果 111 | @param query_str: 检索字符串 112 | @return: 检测结果,list of string 113 | """ 114 | if len(messages) == 0: 115 | logging.info('messages is empty') 116 | return '' 117 | if not os.path.exists(storage_dir): 118 | logging.info(f'storage_dir {storage_dir} not exists') 119 | return '' 120 | query = _get_last_text_query(messages) 121 | index = load_llamaindex(storage_dir) 122 | return query_llamaindex(index, query) -------------------------------------------------------------------------------- /GeneralAgent/memory/__init__.py: -------------------------------------------------------------------------------- 1 | # import 2 | from .normal_memory import NormalMemory -------------------------------------------------------------------------------- /GeneralAgent/memory/normal_memory.py: -------------------------------------------------------------------------------- 1 | # Memeory 2 | import json 3 | import os 4 | import logging 5 | from GeneralAgent.utils import encode_image 6 | 7 | class NormalMemory: 8 | def __init__(self, serialize_path='./memory.json', messages=[]): 9 | """ 10 | @serialize_path: str, 序列化路径,默认为'./memory.json'。如果为None,则使用内存存储 11 | """ 12 | self.messages = [] 13 | self.serialize_path = serialize_path 14 | if serialize_path is not None: 15 | if os.path.exists(serialize_path): 16 | with open(serialize_path, 'r', encoding='utf-8') as f: 17 | self.messages = json.load(f) 18 | if len(messages) > 0: 19 | self._validate_messages(messages) 20 | # 将 messages 的内容拼到 self.messages 后面 21 | self.messages += messages 22 | 23 | def save(self): 24 | if self.serialize_path is not None: 25 | with open(self.serialize_path, 'w', encoding='utf-8') as f: 26 | json.dump(self.messages, f) 27 | 28 | def push_stack(self): 29 | pass 30 | 31 | def pop_stack(self): 32 | pass 33 | 34 | def add_message(self, role, content): 35 | """ 36 | add a new message 37 | @role: str, 'user' or 'assistant' 38 | @content: str, message content 39 | return message id 40 | """ 41 | assert role in ['user', 'system', 'assistant'] 42 | if isinstance(content, list): 43 | r = [] 44 | for c in content: 45 | if isinstance(c, dict): 46 | if 'image' in c: 47 | r.append({'type': 'image_url', 'image_url': {'url': encode_image(c['image'])}}) 48 | elif 'text' in c: 49 | r.append({'type': 'text', 'text': c['text']}) 50 | else: 51 | raise Exception('message type wrong') 52 | else: 53 | r.append({'type': 'text', 'text': c}) 54 | self.messages.append({'role': role, 'content': r}) 55 | else: 56 | self.messages.append({'role': role, 'content': content}) 57 | self.save() 58 | 59 | def append_message(self, role, content, message_id=None): 60 | """ 61 | append a message. when message_id is not None, append to the message with message_id and move it to the end 62 | @role: str, 'user' or 'assistant' 63 | @content: str, message content 64 | return message id 65 | """ 66 | # self.show_messages() 67 | assert role in ['user', 'assistant'] 68 | if message_id is not None: 69 | assert message_id >= 0 and message_id < len(self.messages) 70 | assert self.messages[message_id]['role'] == role 71 | self.messages[message_id]['content'] += '\n' + content 72 | # self.messages.append(self.messages.pop(message_id)) 73 | self.messages = self.messages[:message_id+1] 74 | self.save() 75 | # self.show_messages() 76 | return len(self.messages) - 1 77 | else: 78 | if len(self.messages) > 0 and self.messages[-1]['role'] == role: 79 | self.messages[-1]['content'] += '\n' + content 80 | else: 81 | self.messages.append({'role': role, 'content': content}) 82 | self.save() 83 | # self.show_messages() 84 | return len(self.messages) - 1 85 | 86 | # 恢复 message 数据, [: index] 87 | def recover(self, index): 88 | """ 89 | recover the messages to the index 90 | """ 91 | self.messages = self.messages[:index] 92 | self.save() 93 | 94 | def get_messages(self): 95 | return self.messages 96 | 97 | def __str__(self): 98 | return json.dumps(self.messages, indent=4) 99 | 100 | def show_messages(self): 101 | logging.info('-' * 50 + '' + '-' * 50) 102 | for message in self.messages: 103 | logging.info('[[' + message['role'] + ']]: ' + message['content'][:100]) 104 | logging.info('-' * 50 + '' + '-' * 50) 105 | 106 | def _validate_messages(self, messages): 107 | """ 108 | Validate each message in the messages. 109 | @messages (list): List of messages where each message is a dict with 'role' and 'content'. 110 | Raises: 111 | AssertionError: If any message does not conform to the required format ('message format wrong'). 112 | """ 113 | for message in messages: 114 | assert isinstance(message, dict), 'message format wrong' 115 | assert 'role' in message, 'message format wrong' 116 | assert 'content' in message, 'message format wrong' 117 | assert message['role'] in ['user', 'assistant'], 'message format wrong' 118 | 119 | 120 | def test_NormalMemory(): 121 | serialize_path = './memory.json' 122 | mem = NormalMemory(serialize_path=serialize_path) 123 | mem.add_message('user', 'hello') 124 | mem.add_message('assistant', 'hi') 125 | mem = NormalMemory(serialize_path=serialize_path) 126 | assert len(mem.get_messages()) == 2 127 | mem.append_message('assistant', 'hi') 128 | assert len(mem.get_messages()) == 2 -------------------------------------------------------------------------------- /GeneralAgent/skills/__init__.py: -------------------------------------------------------------------------------- 1 | # 单列 2 | import os 3 | from codyer import skills 4 | 5 | 6 | def default_output_callback(token): 7 | if token is not None: 8 | print(token, end="", flush=True) 9 | else: 10 | print("\n", end="", flush=True) 11 | 12 | 13 | def default_check(check_content=None): 14 | show = "确认 | 继续 (回车, yes, y, 是, ok) 或者 直接输入你的想法\n" 15 | if check_content is not None: 16 | show = f"{check_content}\n\n{show}" 17 | response = input(show) 18 | if response.lower() in ["", "yes", "y", "是", "ok"]: 19 | return None 20 | else: 21 | return response 22 | 23 | 24 | def load_functions_with_path(python_code_path) -> tuple[list, str]: 25 | """ 26 | Load functions from python file 27 | @param python_code_path: the path of python file 28 | @return: a list of functions and error message (if any, else None) 29 | """ 30 | try: 31 | import importlib.util 32 | import inspect 33 | 34 | # 指定要加载的文件路径和文件名 35 | module_name = "skills" 36 | module_file = python_code_path 37 | 38 | # 使用importlib加载文件 39 | spec = importlib.util.spec_from_file_location(module_name, module_file) 40 | module = importlib.util.module_from_spec(spec) 41 | spec.loader.exec_module(module) 42 | 43 | # 获取文件中的所有函数 44 | functions = inspect.getmembers(module, inspect.isfunction) 45 | 46 | # 过滤functions中以下划线开头的函数 47 | functions = filter(lambda f: not f[0].startswith("_"), functions) 48 | 49 | return [f[1] for f in functions], None 50 | except Exception as e: 51 | # 代码可能有错误,加载不起来 52 | import logging 53 | 54 | logging.exception(e) 55 | return [], str(e) 56 | 57 | 58 | def load_functions_with_directory(python_code_dir) -> list: 59 | """ 60 | Load functions from python directory (recursively) 61 | @param python_code_dir: the path of python directory 62 | @return: a list of functions 63 | """ 64 | import os 65 | 66 | total_funs = [] 67 | for file in os.listdir(python_code_dir): 68 | # if file is directory 69 | if os.path.isdir(os.path.join(python_code_dir, file)): 70 | total_funs += load_functions_with_directory( 71 | os.path.join(python_code_dir, file) 72 | ) 73 | else: 74 | # if file is file 75 | if file.endswith(".py") and ( 76 | not file.startswith("__init__") 77 | and not file.startswith("_") 78 | and not file == "main.py" 79 | ): 80 | funcs, error = load_functions_with_path( 81 | os.path.join(python_code_dir, file) 82 | ) 83 | total_funs += funcs 84 | return total_funs 85 | 86 | 87 | def _exec(code, globals_vars={}): 88 | """ 89 | Execute code and return the last expression 90 | """ 91 | import ast 92 | 93 | tree = ast.parse(code) 94 | 95 | try: 96 | last_node = tree.body[-1] 97 | code_body = tree.body[0:-1] 98 | last_expr = ast.unparse(last_node) 99 | 100 | if isinstance(last_node, ast.Assign): 101 | code_body = tree.body 102 | expr_left = last_node.targets[-1] 103 | if isinstance(expr_left, ast.Tuple): 104 | last_expr = f"({', '.join([x.id for x in expr_left.elts])})" 105 | else: 106 | last_expr = expr_left.id 107 | 108 | elif isinstance(last_node, ast.AugAssign) or isinstance( 109 | last_node, ast.AnnAssign 110 | ): 111 | code_body = tree.body 112 | last_expr = last_node.target.id 113 | 114 | if len(code_body): 115 | main_code = compile(ast.unparse(code_body), "", "exec") 116 | exec(main_code, globals_vars) 117 | except SyntaxError: 118 | return None 119 | 120 | try: 121 | return eval( 122 | compile(last_expr, "", "eval"), 123 | globals_vars, 124 | ) 125 | except SyntaxError: 126 | return None 127 | 128 | 129 | if len(skills._functions) == 0: 130 | skills._add_function("input", input) 131 | skills._add_function("check", default_check) 132 | skills._add_function("print", default_output_callback) 133 | skills._add_function("output", default_output_callback) 134 | skills._add_function("_exec", _exec) 135 | funcs = load_functions_with_directory(os.path.dirname(__file__)) 136 | for fun in funcs: 137 | skills._add_function(fun.__name__, fun) 138 | -------------------------------------------------------------------------------- /GeneralAgent/skills/file_operation.py: -------------------------------------------------------------------------------- 1 | 2 | def read_pdf_pages(file_path): 3 | """Read the pdf file and return a list of strings on each page of the pdf""" 4 | """读取pdf文件,返回pdf每页字符串的列表""" 5 | import fitz 6 | doc = fitz.open(file_path) 7 | documents = [] 8 | for page in doc: 9 | documents.append(page.get_text()) 10 | return documents 11 | 12 | def read_word_pages(file_path): 13 | """Read the word file and return a list of word paragraph strings""" 14 | """读取word文件,返回word段落字符串的列表""" 15 | # https://zhuanlan.zhihu.com/p/146363527 16 | from docx import Document 17 | # 打开文档 18 | document = Document(file_path) 19 | # 读取标题、段落、列表内容 20 | ps = [ paragraph.text for paragraph in document.paragraphs] 21 | return ps 22 | 23 | def read_ppt(file_path): 24 | import pptx 25 | prs = pptx.Presentation(file_path) 26 | documents = [] 27 | for slide in prs.slides: 28 | for shape in slide.shapes: 29 | if hasattr(shape, "text"): 30 | documents.append(shape.text) 31 | return '\n'.join(documents) 32 | 33 | 34 | def read_file_content(file_path): 35 | """return content of txt, md, pdf, docx file""" 36 | # 支持file_path的类型包括txt、md、pdf、docx 37 | if file_path.endswith('.pdf'): 38 | return ' '.join(read_pdf_pages(file_path)) 39 | elif file_path.endswith('.docx'): 40 | return ' '.join(read_word_pages(file_path)) 41 | elif file_path.endswith('.ppt') or file_path.endswith('.pptx'): 42 | return read_ppt(file_path) 43 | else: 44 | # 默认当做文本文件 45 | with open(file_path, 'r', encoding='utf-8') as f: 46 | return '\n'.join(f.readlines()) 47 | 48 | def write_file_content(file_path, content): 49 | """write content to txt, md""" 50 | with open(file_path, 'w', encoding='utf-8') as f: 51 | f.write(content) 52 | -------------------------------------------------------------------------------- /GeneralAgent/skills/openai_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from openai import OpenAI 4 | from openai import AzureOpenAI 5 | import numpy as np 6 | from numpy.linalg import norm 7 | 8 | 9 | def _get_openai_client(api_key=None, base_url=None): 10 | if api_key is None and 'OPENAI_API_KEY' not in os.environ: 11 | raise ValueError('Please set OPENAI_API_KEY in environment') 12 | api_key = api_key or os.environ['OPENAI_API_KEY'] 13 | base_url = base_url or os.environ.get('OPENAI_API_BASE', 'https://api.openai.com/v1') 14 | client = OpenAI(api_key=api_key, base_url=base_url, max_retries=3) 15 | return client 16 | 17 | 18 | def _get_azure_client(api_key=None, base_url=None): 19 | if api_key is None and 'OPENAI_API_KEY' not in os.environ: 20 | raise ValueError('Please set OPENAI_API_KEY (Azure API Key) in environment') 21 | api_key = api_key or os.environ['OPENAI_API_KEY'] 22 | if base_url is None and 'OPENAI_API_BASE' not in os.environ: 23 | raise ValueError('Please set OPENAI_API_BASE (Azure API Base URL) in environment') 24 | base_url = base_url or os.environ['OPENAI_API_BASE'] 25 | api_version = os.environ.get('AZURE_API_VERSION', '2024-05-01-preview') 26 | client = AzureOpenAI( 27 | api_key=api_key, 28 | api_version=api_version, 29 | azure_endpoint=base_url, 30 | ) 31 | return client 32 | 33 | 34 | def embedding_texts(texts, model=None) -> [[float]]: 35 | """ 36 | 对文本数组进行embedding 37 | """ 38 | if model is not None and 'azure_' in model: 39 | client = _get_azure_client() 40 | model = model.replace('azure_', '') 41 | else: 42 | client = _get_openai_client() 43 | model = os.environ.get('EMBEDDING_MODEL', 'text-embedding-3-small') 44 | resp = client.embeddings.create(input=texts, model=model) 45 | result = [x.embedding for x in resp.data] 46 | return result 47 | 48 | 49 | def cos_sim(a, b): 50 | a = a if isinstance(a, np.ndarray) else np.array(a) 51 | b = b if isinstance(b, np.ndarray) else np.array(b) 52 | return np.dot(a, b) / (norm(a) * norm(b)) 53 | 54 | 55 | def search_similar_texts(focal: str, texts: [str], top_k=5): 56 | """ 57 | search the most similar texts in texts, and return the top_k similar texts 58 | """ 59 | embeddings = embedding_texts([focal] + texts) 60 | focal_embedding = embeddings[0] 61 | texts_embeddings = embeddings[1:] 62 | similarities = np.dot(texts_embeddings, focal_embedding) 63 | sorted_indices = np.argsort(similarities) 64 | sorted_indices = sorted_indices[::-1] 65 | return [texts[i] for i in sorted_indices[:top_k]] 66 | 67 | 68 | def get_llm_token_limit(model): 69 | """ 70 | return the token limit for the model 71 | """ 72 | if 'gpt-3.5' in model: 73 | return 16 * 1000 74 | if 'gpt-4' in model: 75 | return 128 * 1000 76 | if 'glm-4v' in model: 77 | return 8 * 1000 78 | if 'glm-4' in model: 79 | return 128 * 1000 80 | if 'yi-vision' in model: 81 | return 4 * 1000 82 | if 'yi-large' in model: 83 | return 32 * 1000 84 | 85 | return 16 * 1000 86 | 87 | 88 | def llm_inference(messages, model='gpt-4o', stream=False, api_key=None, base_url=None, **args): 89 | """ 90 | Run LLM (large language model) inference on the provided messages using the specified model. 91 | 92 | @messages: Input messages for the model, like [{'role': 'system', 'content': 'You are a helpful assistant'}, {'role': 'user', 'content': 'What is your name?'}] 93 | @model: Type of model to use. Options are 'normal', 'smart', 'long' 94 | @stream: Boolean indicating if the function should use streaming inference 95 | @temperature: Sampling temperature to use during inference. Must be a float between 0 and 1. Defaults to 0.5. 96 | @api_key: OpenAI API key. If not provided, the function will use the OPENAI_API_KEY environment variable. 97 | @base_url: Base URL for the OpenAI API. If not provided, the function will use the OPENAI_API_BASE environment variable. 98 | @frequency_penalty: Frequency penalty to use during inference. Must be a float between -2 and 2. Defaults to null. 99 | 100 | Returns: 101 | If stream is True, returns a generator that yields the inference results as they become available. 102 | If stream is False, returns a string containing the inference result. 103 | 104 | Note: 105 | The total number of tokens in the messages and the returned string must be less than 4000 when model_variant is 'normal', and less than 16000 when model_variant is 'long'. 106 | """ 107 | 108 | logging.debug(messages) 109 | if model == 'smart': 110 | model = 'gpt-4o' 111 | if model == 'long': 112 | model = 'gpt-4o' 113 | if model == 'normal': 114 | model = 'gpt-3.5-turbo' 115 | if 'azure_' in model: 116 | model = model.replace('azure_', '') 117 | client = _get_azure_client(api_key, base_url) 118 | elif 'doubao' in model: 119 | client, model = _get_doubao_client(api_key, base_url) 120 | else: 121 | client = _get_openai_client(api_key, base_url) 122 | messages = _process_message(messages, model) 123 | if stream: 124 | return _llm_inference_with_stream(client, messages, model, **args) 125 | else: 126 | return _llm_inference_without_stream(client, messages, model, **args) 127 | 128 | 129 | def _process_message(messages, model): 130 | if model == "glm-4v": # 避开 GLM-4V 开源模型,开源模型不需要处理 131 | for message in messages: 132 | # remove the base64 prefix in the image_url such as 'data:image/jpeg;base64,' which is for GLM-4V 133 | if 'content' in message and isinstance(message['content'], list): 134 | for item in message['content']: 135 | if item.get('type') == 'image_url' and 'url' in item.get('image_url', {}): 136 | url = item['image_url']['url'] 137 | base64_index = url.find('base64,') 138 | if base64_index != -1: 139 | item['image_url']['url'] = url[base64_index + len('base64,'):] 140 | if model == "yi-vision": # Yi-Vision Not Support System Role 141 | for message in messages: 142 | if message['role'] == 'system': 143 | message['role'] = 'user' 144 | 145 | return messages 146 | 147 | 148 | def _get_doubao_client(api_key=None, base_url=None): 149 | from volcenginesdkarkruntime import Ark 150 | key = api_key or os.environ.get('OPENAI_API_KEY') 151 | client = Ark(api_key=key) 152 | model = base_url or os.environ.get('OPENAI_API_BASE') 153 | return client, model 154 | 155 | 156 | def _update_llm_args(model, args): 157 | if model in ['qwen-vl-max', 'qwen-vl-plus']: 158 | remove_items = ['temperature', 'frequency_penalty'] 159 | return {k: v for k, v in args.items() if k not in remove_items} 160 | else: 161 | return args 162 | 163 | 164 | def _llm_inference_with_stream(client, messages, model, **args): 165 | try: 166 | args = _update_llm_args(model, args) 167 | response = client.chat.completions.create( 168 | messages=messages, 169 | model=model, 170 | stream=True, 171 | **args 172 | ) 173 | for chunk in response: 174 | if len(chunk.choices) > 0: 175 | # Compatible with service using Azure API proxies, such as One-API 176 | if chunk.choices[0].delta is None: 177 | continue 178 | token = chunk.choices[0].delta.content 179 | if token is None: 180 | continue 181 | yield token 182 | except Exception as e: 183 | logging.exception(e) 184 | raise ValueError('LLM(Large Languate Model) error, Please check your key or base_url, or network') 185 | 186 | 187 | def _llm_inference_without_stream(client, messages, model, **args): 188 | try: 189 | args = _update_llm_args(model, args) 190 | response = client.chat.completions.create( 191 | messages=messages, 192 | model=model, 193 | stream=False, 194 | **args 195 | ) 196 | result = response.choices[0].message.content 197 | return result 198 | except Exception as e: 199 | logging.exception(e) 200 | raise ValueError('LLM(Large Languate Model) error, Please check your key or base_url, or network') 201 | 202 | def speech_to_text(audio_file_path): 203 | """Convert speech in audio to text, return text""" 204 | from GeneralAgent import skills 205 | audio_file = open(audio_file_path, "rb") 206 | client = _get_openai_client() 207 | content = client.audio.transcriptions.create( 208 | model="whisper-1", 209 | file=audio_file, 210 | response_format="text" 211 | ) 212 | 213 | return content 214 | 215 | def text_to_speech(text, voice='onyx', save_path=None): 216 | """ 217 | 文本转语音,返回音频文件路径。 218 | @param text: 要转换的文本 219 | @param voice: 语音名称, onyx: 男性,nova: 女性 220 | @return: 音频文件路径 221 | """ 222 | from GeneralAgent import skills 223 | 224 | # ['nova', 'shimmer', 'echo', 'onyx', 'fable', 'alloy'] 225 | client = _get_openai_client() 226 | response = client.audio.speech.create( 227 | model="tts-1", 228 | voice=voice, 229 | input=text, 230 | ) 231 | file_path = save_path or skills.unique_name() + '.mp3' 232 | response.stream_to_file(file_path) 233 | return file_path 234 | 235 | 236 | def create_image(prompt) -> str: 237 | """draw image given a prompt, returns the image path. Note: limit to generate violent, adult, or hateful content""" 238 | import os 239 | from openai import OpenAI 240 | from GeneralAgent import skills 241 | 242 | client = _get_openai_client() 243 | response = client.images.generate( 244 | model="dall-e-3", 245 | prompt=prompt, 246 | size="1024x1024", 247 | quality="standard", 248 | n=1, 249 | ) 250 | 251 | image_url = response.data[0].url 252 | image_path = skills.try_download_file(image_url) 253 | return image_path 254 | 255 | 256 | def edit_image(image_path:str, prompt:str) -> str: 257 | """Edit image given a prompt, returns the image path""" 258 | import os 259 | from openai import OpenAI 260 | from GeneralAgent import skills 261 | from pathlib import Path 262 | 263 | client = _get_openai_client() 264 | response = client.images.edit( 265 | image = Path(image_path), 266 | prompt = prompt, 267 | n=1, 268 | ) 269 | image_url = response.data[0].url 270 | image_path = skills.try_download_file(image_url) 271 | return image_path -------------------------------------------------------------------------------- /GeneralAgent/skills/python_envs.py: -------------------------------------------------------------------------------- 1 | def get_python_version() -> str: 2 | """ 3 | Return the python version, like "3.9.12" 4 | """ 5 | import platform 6 | 7 | python_version = platform.python_version() 8 | return python_version 9 | 10 | 11 | def get_os_version() -> str: 12 | import platform 13 | 14 | system = platform.system() 15 | if system == "Windows": 16 | version = platform.version() 17 | return f"Windows version: {version}" 18 | elif system == "Darwin": 19 | version = platform.mac_ver()[0] 20 | return f"macOS version: {version}" 21 | elif system == "Linux": 22 | version = platform.platform() 23 | return f"Linux version: {version}" 24 | else: 25 | return "Unknown system" 26 | 27 | 28 | def get_python_code(content: str) -> str: 29 | """ 30 | Return the python code text from content 31 | """ 32 | template = "```python\n(.*?)\n```" 33 | import re 34 | 35 | code = re.findall(template, content, re.S) 36 | if len(code) > 0: 37 | return code[0] 38 | else: 39 | return content 40 | 41 | 42 | def test_get_python_code(): 43 | content = """ 44 | ```python 45 | import os 46 | print(os.getcwd()) 47 | ``` 48 | """ 49 | assert get_python_code(content) == "import os\nprint(os.getcwd())" 50 | 51 | 52 | def load_functions_with_path(python_code_path) -> (list, str): 53 | """ 54 | Load functions from python file 55 | @param python_code_path: the path of python file 56 | @return: a list of functions and error message (if any, else None) 57 | """ 58 | try: 59 | import importlib.util 60 | import inspect 61 | 62 | # 指定要加载的文件路径和文件名 63 | module_name = "skills" 64 | module_file = python_code_path 65 | 66 | # 使用importlib加载文件 67 | spec = importlib.util.spec_from_file_location(module_name, module_file) 68 | module = importlib.util.module_from_spec(spec) 69 | spec.loader.exec_module(module) 70 | 71 | # 获取文件中的所有函数 72 | functions = inspect.getmembers(module, inspect.isfunction) 73 | 74 | # 过滤functions中以下划线开头的函数 75 | functions = filter(lambda f: not f[0].startswith("_"), functions) 76 | 77 | return [f[1] for f in functions], None 78 | except Exception as e: 79 | # 代码可能有错误,加载不起来 80 | import logging 81 | 82 | logging.exception(e) 83 | return [], str(e) 84 | 85 | 86 | def load_functions_with_directory(python_code_dir) -> list: 87 | """ 88 | Load functions from python directory (recursively) 89 | @param python_code_dir: the path of python directory 90 | @return: a list of functions 91 | """ 92 | import os 93 | 94 | total_funs = [] 95 | for file in os.listdir(python_code_dir): 96 | # if file is directory 97 | if os.path.isdir(os.path.join(python_code_dir, file)): 98 | total_funs += load_functions_with_directory( 99 | os.path.join(python_code_dir, file) 100 | ) 101 | else: 102 | # if file is file 103 | if file.endswith(".py") and ( 104 | not file.startswith("__init__") 105 | and not file.startswith("_") 106 | and not file == "main.py" 107 | ): 108 | funcs, error = load_functions_with_path( 109 | os.path.join(python_code_dir, file) 110 | ) 111 | total_funs += funcs 112 | return total_funs 113 | 114 | 115 | def get_function_signature(func, module: str = None): 116 | """Returns a description string of function""" 117 | try: 118 | import inspect 119 | 120 | sig = inspect.signature(func) 121 | sig_str = str(sig) 122 | desc = f"{func.__name__}{sig_str}" 123 | if func.__doc__: 124 | desc += ": " + func.__doc__.strip() 125 | if module is not None: 126 | desc = f"{module}.{desc}" 127 | if inspect.iscoroutinefunction(func): 128 | desc = "" + desc 129 | return desc 130 | except Exception as e: 131 | import logging 132 | 133 | logging.exception(e) 134 | return "" 135 | 136 | 137 | def python_line_is_variable_expression(line): 138 | """ 139 | Return True if line is a variable expression, else False 140 | """ 141 | import ast 142 | 143 | try: 144 | tree = ast.parse(line) 145 | except SyntaxError: 146 | return False 147 | 148 | if len(tree.body) != 1 or not isinstance(tree.body[0], ast.Expr): 149 | return False 150 | 151 | expr = tree.body[0].value 152 | if isinstance(expr, ast.Call): 153 | return False 154 | 155 | return True 156 | -------------------------------------------------------------------------------- /GeneralAgent/skills/token_count.py: -------------------------------------------------------------------------------- 1 | def messages_token_count(messages): 2 | "Calculate and return the total number of tokens in the provided messages." 3 | import tiktoken 4 | encoding = tiktoken.get_encoding("cl100k_base") 5 | tokens_per_message = 4 6 | tokens_per_name = 1 7 | num_tokens = 0 8 | for message in messages: 9 | num_tokens += tokens_per_message 10 | for key, value in message.items(): 11 | if isinstance(value, str): 12 | num_tokens += len(encoding.encode(value)) 13 | if key == "name": 14 | num_tokens += tokens_per_name 15 | if isinstance(value, list): 16 | for item in value: 17 | if item["type"] == "text": 18 | num_tokens += len(encoding.encode(item["text"])) 19 | if item["type"] == "image_url": 20 | num_tokens += (85 + 170 * 2 * 2) # 用最简单的模式来计算 21 | num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> 22 | return num_tokens 23 | 24 | def string_token_count(str): 25 | """Calculate and return the token count in a given string.""" 26 | import tiktoken 27 | encoding = tiktoken.get_encoding("cl100k_base") 28 | tokens = encoding.encode(str) 29 | return len(tokens) 30 | 31 | 32 | def cut_messages(messages, token_limit): 33 | while messages_token_count(messages) > token_limit: 34 | messages.pop(0) 35 | return messages -------------------------------------------------------------------------------- /GeneralAgent/skills/unique_name.py: -------------------------------------------------------------------------------- 1 | 2 | def unique_name(): 3 | """Generates a unique name, suitable for creating non-deletable files.""" 4 | """生成唯一的名称,可用于新建文件名,且文件不删除""" 5 | import uuid 6 | return str(uuid.uuid4()).split('-')[-1] 7 | 8 | def unique_tmp_file_name(): 9 | """Generates a unique temporary file name which needs to be deleted afterwards.""" 10 | """"生成唯一的临时文件名称,且文件需要事后删除""" 11 | import os 12 | tmp_dir = os.path.abspath(os.path.join(os.getcwd(), 'tmp')) 13 | # 如果tmp_dir目录不存在,就创建 14 | if not os.path.exists(tmp_dir): 15 | os.makedirs(tmp_dir) 16 | return tmp_dir + unique_name() -------------------------------------------------------------------------------- /GeneralAgent/skills/web_tools.py: -------------------------------------------------------------------------------- 1 | def google_search(query: str) -> str: 2 | """ 3 | google search with query, return a result in string 4 | """ 5 | import os 6 | import json 7 | import requests 8 | SERPER_API_KEY = os.environ.get('SERPER_API_KEY', None) 9 | if SERPER_API_KEY is None: 10 | raise Exception('Please set SERPER_API_KEY in environment variable first.') 11 | url = "https://google.serper.dev/search" 12 | payload = json.dumps({"q": query}) 13 | headers = { 14 | 'X-API-KEY': SERPER_API_KEY, 15 | 'Content-Type': 'application/json' 16 | } 17 | response = requests.request("POST", url, headers=headers, data=payload) 18 | json_data = json.loads(response.text) 19 | return json.dumps(json_data, ensure_ascii=True, indent=4) 20 | 21 | 22 | def web_search(query: str) -> str: 23 | """ 24 | 网页搜索 25 | """ 26 | return google_search(query) 27 | 28 | 29 | def wikipedia_search(query: str) -> str: 30 | """ 31 | wikipedia search with query, return a result in string 32 | """ 33 | import requests 34 | from bs4 import BeautifulSoup 35 | 36 | def get_page_obs(page): 37 | # find all paragraphs 38 | paragraphs = page.split("\n") 39 | paragraphs = [p.strip() for p in paragraphs if p.strip()] 40 | 41 | # find all sentence 42 | sentences = [] 43 | for p in paragraphs: 44 | sentences += p.split('. ') 45 | sentences = [s.strip() + '.' for s in sentences if s.strip()] 46 | return ' '.join(sentences[:5]) 47 | 48 | def clean_str(s): 49 | return s.replace("\xa0", " ").replace("\n", " ") 50 | 51 | entity = query.replace(" ", "+") 52 | search_url = f"https://en.wikipedia.org/w/index.php?search={entity}" 53 | response_text = requests.get(search_url).text 54 | soup = BeautifulSoup(response_text, features="html.parser") 55 | result_divs = soup.find_all("div", {"class": "mw-search-result-heading"}) 56 | if result_divs: 57 | result_titles = [clean_str(div.get_text().strip()) for div in result_divs] 58 | obs = f"Could not find {query}. Similar: {result_titles[:5]}." 59 | else: 60 | page = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")] 61 | if any("may refer to:" in p for p in page): 62 | obs = wikipedia_search("[" + query + "]") 63 | else: 64 | page_content = "" 65 | for p in page: 66 | if len(p.split(" ")) > 2: 67 | page_content += ' ' + clean_str(p) 68 | if not p.endswith("\n"): 69 | page_content += "\n" 70 | obs = get_page_obs(page_content) 71 | if not obs: 72 | obs = None 73 | return obs 74 | 75 | 76 | def _web_driver_open(url: str, wait_time=10, scroll_to_bottom=False): 77 | """ 78 | open a web page in browser and wait the page load completely, return the Selenium 4 driver. 79 | @param url: the url of the web page 80 | @param wait_time: the time to wait for the page to load completely 81 | @param scroll_to_bottom: whether to scroll to the bottom of the page to trigger potential Ajax requests 82 | """ 83 | import os 84 | from selenium import webdriver 85 | from selenium.webdriver.chrome.options import Options 86 | from selenium import webdriver 87 | from selenium.webdriver.chrome.service import Service 88 | from webdriver_manager.chrome import ChromeDriverManager 89 | from selenium.webdriver.chrome.options import Options 90 | import time 91 | 92 | # 开发环境使用本地chrome浏览器,生产环境使用远程chrome浏览器 93 | CHROME_GRID_URL = os.environ.get('CHROME_GRID_URL', None) 94 | if CHROME_GRID_URL is not None: 95 | chrome_options = Options() 96 | driver = webdriver.Remote(command_executor=CHROME_GRID_URL, options=chrome_options) 97 | else: 98 | # Setup chrome options 99 | chrome_options = Options() 100 | chrome_options.add_argument("--headless") # Ensure GUI is off 101 | chrome_options.add_argument("--no-sandbox") 102 | chrome_options.add_argument("--disable-dev-shm-usage") 103 | # Set a common user agent to mimic a real user 104 | chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36") 105 | # Set path to chromedriver as per your configuration 106 | webdriver_service = Service(ChromeDriverManager().install()) 107 | # Choose Chrome Browser 108 | driver = webdriver.Chrome(service=webdriver_service, options=chrome_options) 109 | 110 | driver.get(url) 111 | driver.implicitly_wait(wait_time) 112 | if scroll_to_bottom: 113 | # Scroll down the page to trigger potential Ajax requests 114 | last_height = driver.execute_script("return document.body.scrollHeight") 115 | for _ in range(2): 116 | # Scroll down to the bottom of the page 117 | driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") 118 | # Wait for new content to load 119 | time.sleep(3) 120 | # Calculate new scroll height and compare with last scroll height 121 | new_height = driver.execute_script("return document.body.scrollHeight") 122 | if new_height == last_height: 123 | break 124 | last_height = new_height 125 | return driver 126 | 127 | 128 | def _web_driver_get_html(driver) -> str: 129 | """ 130 | return clear html content (without scirpt, style and comment) of the Selenium 4 driver, the driver should be ready. 131 | """ 132 | # 通过driver获取网页地址 133 | from bs4 import BeautifulSoup, Comment 134 | from urllib.parse import urljoin 135 | url = driver.current_url 136 | html = driver.page_source 137 | soup = BeautifulSoup(html, 'html.parser') 138 | # 移除script和style 139 | for script_or_style in soup(['script', 'style']): 140 | script_or_style.decompose() # Remove the tag from the soup 141 | # 移除注释 142 | for comment in soup(text=lambda text: isinstance(text, Comment)): 143 | comment.extract() 144 | # 移除不必要的标签和属性、id 145 | for tag in soup(['head', 'meta', 'link', 'title', 'noscript', 'iframe', 'svg', 'canvas', 'audio', 'video', 'embed', 'object', 'param', 'source', 'track', 'map', 'area', 'base', 'basefont', 'bdi', 'bdo', 'br', 'col', 'colgroup', 'datalist', 'details', 'dialog', 'hr', 'img', 'input', 'keygen', 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'script', 'style', 'comment']): 146 | tag.decompose() 147 | # 所有div、span标签的属性全部清空 148 | for tag in soup(['div', 'span']): 149 | tag.attrs = {} 150 | # 补全href地址 151 | for a in soup.find_all('a', href=True): 152 | a['href'] = urljoin(url, a['href']) 153 | # 补全图片 154 | for img in soup.find_all('img', src=True): 155 | img['src'] = urljoin(url, img['src']) 156 | # 返回内容 157 | html = str(soup) 158 | return html 159 | 160 | 161 | def web_get_html(url:str, wait_time=10, scroll_to_bottom=True): 162 | """ 163 | 获取网页的html内容(不包含script, style和comment) 164 | @param url: the url of the web page 165 | @param wait_time: the time to wait for the page to load completely 166 | @param scroll_to_bottom: whether to scroll to the bottom of the page to trigger potential Ajax requests 167 | """ 168 | import logging 169 | driver = None 170 | try: 171 | driver = _web_driver_open(url, wait_time, scroll_to_bottom) 172 | html = _web_driver_get_html(driver) 173 | return html 174 | except Exception as e: 175 | logging.exception(e) 176 | return 'Some Error Occurs:\n' + str(e) 177 | finally: 178 | if driver is not None: 179 | driver.quit() 180 | 181 | 182 | def web_get_text(url:str, wait_time=10, scroll_to_bottom=True): 183 | """ 184 | 获取网页的文本内容 185 | @param url: the url of the web page 186 | @param wait_time: the time to wait for the page to load completely 187 | @param scroll_to_bottom: whether to scroll to the bottom of the page to trigger potential Ajax requests 188 | """ 189 | import logging 190 | driver = None 191 | try: 192 | driver = _web_driver_open(url, wait_time, scroll_to_bottom) 193 | # 'WebDriver' object has no attribute 'find_element_by_tag_name' 194 | # text = driver.find_element_by_tag_name('body').text 195 | text = driver.execute_script("return document.body.innerText") 196 | return text 197 | except Exception as e: 198 | logging.exception(e) 199 | return 'Some Error Occurs:\n' + str(e) 200 | finally: 201 | if driver is not None: 202 | driver.quit() 203 | 204 | 205 | if __name__ == '__main__': 206 | result = google_search('成都 人口') -------------------------------------------------------------------------------- /GeneralAgent/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | 5 | def set_logging_level(): 6 | for handler in logging.root.handlers[:]: 7 | logging.root.removeHandler(handler) 8 | log_level = os.environ.get('AGENT_LOG', 'info') 9 | log_level = log_level.lower() 10 | if log_level == 'debug': 11 | level = logging.DEBUG 12 | elif log_level == 'info': 13 | level = logging.INFO 14 | elif log_level == 'warning': 15 | level = logging.WARNING 16 | elif log_level == 'error': 17 | level = logging.ERROR 18 | else: 19 | level = logging.ERROR 20 | # logging设置显示文件(绝对路径) 21 | logging.basicConfig( 22 | level=level, 23 | format='%(asctime)s %(pathname)s [line:%(lineno)d] %(levelname)s %(funcName)s %(message)s', 24 | datefmt='%Y-%m-%d %H:%M:%S' 25 | ) 26 | 27 | 28 | def encode_image(image_path): 29 | if image_path.startswith('http'): 30 | return image_path 31 | import base64 32 | with open(image_path, "rb") as image_file: 33 | bin_data = base64.b64encode(image_file.read()).decode('utf-8') 34 | image_type = image_path.split('.')[-1].lower() 35 | virtural_url = f"data:image/{image_type};base64,{bin_data}" 36 | return virtural_url 37 | 38 | 39 | def messages_token_count(messages): 40 | "Calculate and return the total number of tokens in the provided messages." 41 | import tiktoken 42 | encoding = tiktoken.get_encoding("cl100k_base") 43 | tokens_per_message = 4 44 | tokens_per_name = 1 45 | num_tokens = 0 46 | for message in messages: 47 | num_tokens += tokens_per_message 48 | for key, value in message.items(): 49 | if isinstance(value, str): 50 | num_tokens += len(encoding.encode(value)) 51 | if key == "name": 52 | num_tokens += tokens_per_name 53 | if isinstance(value, list): 54 | for item in value: 55 | if item["type"] == "text": 56 | num_tokens += len(encoding.encode(item["text"])) 57 | if item["type"] == "image_url": 58 | num_tokens += (85 + 170 * 2 * 2) # 用最简单的模式来计算 59 | num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> 60 | return num_tokens 61 | 62 | def string_token_count(str): 63 | """Calculate and return the token count in a given string.""" 64 | import tiktoken 65 | encoding = tiktoken.get_encoding("cl100k_base") 66 | tokens = encoding.encode(str) 67 | return len(tokens) 68 | 69 | 70 | def cut_messages(messages, token_limit): 71 | while messages_token_count(messages) > token_limit: 72 | messages.pop(0) 73 | return messages -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GeneralAgent: From LLM to Agent 2 | 3 |

4 | CN doc 5 | EN doc 6 |

7 | 8 | GeneralAgent是一个Python原生的Agent框架,旨在将大型语言模型 与 Python 无缝集成。 9 | 10 | 11 | **主要特性** 12 | 13 | * 工具调用:GeneralAgent 不依赖大模型的 function call,通过python代码解释器来调用工具 14 | 15 | * 序列化:GeneralAgent 支持序列化,包括记忆和python执行状态,随用随启 16 | 17 | * 快速配置角色、函数和知识库,创建Agent 18 | 19 | * 执行稳定的复杂业务流程,协调多个Agent完成任务 20 | 21 | * 使用 `agent.run` 函数执行命令并产生结构化输出,超越简单的文本响应 22 | 23 | * 使用 `agent.user_input` 函数与用户进行动态交互 24 | 25 | * 自我调用(探索):GeneralAgent通过自我调用和堆栈记忆,最小化大模型的调用次数,来高效处理复杂任务。更多详情请见我们的 [论文](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf) 26 | 27 | 28 | 29 | ## 安装 30 | 31 | ```bash 32 | pip install GeneralAgent 33 | ``` 34 | 35 | 36 | 37 | ## 配置 38 | ### 方式一:使用环境变量(推荐) 39 | 1. 安装依赖: 40 | ```bash 41 | pip install python-dotenv 42 | ``` 43 | 44 | 2. 参考 [.env.example](./.env.example) 文件,创建并配置 .env 文件: 45 | ```bash 46 | OPENAI_API_KEY=your_openai_api_key 47 | # OPENAI_API_BASE=your_openai_base_url 48 | ``` 49 | 50 | 3. 在代码中加载环境变量: 51 | ```python 52 | from dotenv import load_dotenv 53 | from GeneralAgent import Agent 54 | 55 | load_dotenv() 56 | agent = Agent('You are a helpful agent.') 57 | ``` 58 | 59 | ### 方式二:直接在代码中配置 60 | 61 | ```python 62 | from GeneralAgent import Agent 63 | agent = Agent('You are a helpful agent.', api_key='sk-xxx') 64 | ``` 65 | 66 | 67 | 68 | ## 使用 69 | 70 | ### 快速开始 71 | 72 | ```python 73 | from GeneralAgent import Agent 74 | 75 | agent = Agent('你是一个AI助手') 76 | while True: 77 | query = input() 78 | agent.user_input(query) 79 | print('-'*50) 80 | ``` 81 | 82 | 83 | 84 | ### 函数调用 85 | 86 | ```python 87 | # 函数调用 88 | from GeneralAgent import Agent 89 | 90 | # 函数: 获取天气信息 91 | def get_weather(city: str) -> str: 92 | """ 93 | get weather information 94 | @city: str, city name 95 | @return: str, weather information 96 | """ 97 | return f"{city} weather: sunny" 98 | 99 | 100 | agent = Agent('你是一个天气小助手', functions=[get_weather]) 101 | agent.user_input('成都天气怎么样?') 102 | ``` 103 | 104 | 105 | 106 | ### 知识库 107 | 108 | ```python 109 | # 知识库 110 | from GeneralAgent import Agent 111 | 112 | knowledge_files = ['../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf'] 113 | agent = Agent('你是AI助手,用中文回复。', workspace='9_knowledge_files', knowledge_files=knowledge_files) 114 | agent.user_input('Self call 是什么意思?') 115 | ``` 116 | 117 | 知识库默认使用 GeneralAgent.skills 中 embedding_texts 函数来对文本进行 embedding (默认是OpenAI的text-embedding-3-small模型) 118 | 119 | 你可以重写 embedding_texts 函数,使用其他厂商 或者 本地的 embedding 方法,具体如下: 120 | 121 | ```python 122 | def new_embedding_texts(texts) -> [[float]]: 123 | """ 124 | 对文本数组进行embedding 125 | """ 126 | # 你的embedding方法 127 | return result 128 | from GeneralAgent import skills 129 | skills.embedding_texts = new_embedding_texts 130 | ``` 131 | 132 | 133 | 134 | ### 序列化 135 | 136 | ```python 137 | # 序列化 138 | from GeneralAgent import Agent 139 | 140 | # agent序列化位置,运行过程中会自动保存LLM的messages和python解析器的状态 141 | workspace='./5_serialize' 142 | 143 | role = 'You are a helpful agent.' 144 | agent = Agent(workspace=workspace) 145 | agent.user_input('My name is Shadow.') 146 | 147 | agent = None 148 | agent = Agent(role, workspace=workspace) 149 | agent.user_input('What is my name?') 150 | # Output: Your name is Shadow. How can I help you today, Shadow? 151 | 152 | # agent: 清除记忆 + python序列化状态 153 | agent.clear() 154 | 155 | agent.user_input('What is my name?') 156 | # Output: I'm sorry, but I don't have access to your personal information, including your name. How can I assist you today? 157 | 158 | import shutil 159 | shutil.rmtree(workspace) 160 | ``` 161 | 162 | 163 | 164 | ### 写小说 165 | 166 | ```python 167 | # 写小说 168 | from GeneralAgent import Agent 169 | from GeneralAgent import skills 170 | 171 | # 步骤0: 定义Agent 172 | agent = Agent('你是一个小说家') 173 | 174 | # 步骤1: 从用户处获取小说的名称和主题 175 | # topic = skills.input('请输入小说的名称和主题: ') 176 | topic = '小白兔吃糖不刷牙的故事' 177 | 178 | # 步骤2: 小说的概要 179 | summary = agent.run(f'小说的名称和主题是: {topic},扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。') 180 | 181 | # 步骤3: 小说的章节名称和概要列表 182 | chapters = agent.run('输出小说的章节名称和每个章节的概要,返回列表 [(chapter_title, chapter_summary), ....]', return_type=list) 183 | 184 | # 步骤4: 生成小说每一章节的详细内容 185 | contents = [] 186 | for index, (chapter_title, chapter_summary) in enumerate(chapters): 187 | content = agent.run(f'对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容,注意只返回内容,不要标题。') 188 | content = '\n'.join([x.strip() for x in content.split('\n')]) 189 | contents.append(content) 190 | 191 | # 步骤5: 将小说格式化写入文件 192 | with open('novel.md', 'w') as f: 193 | for index in range(len(chapters)): 194 | f.write(f'### {chapters[index][0]}\n') 195 | f.write(f'{contents[index]}\n\n') 196 | 197 | # 步骤6(可选): 将markdown文件转换为pdf文件 198 | 199 | # 步骤7: 输出小说文件给用户 200 | skills.output('你的小说已经生成[novel.md](novel.md)\n') 201 | ``` 202 | 203 | 204 | 205 | ### 多Agent 206 | 207 | ```python 208 | # 多Agent配合完成任务 209 | from GeneralAgent import Agent 210 | story_writer = Agent('你是一个故事创作家,根据大纲要求或者故事梗概,返回一个更加详细的故事内容。') 211 | humor_enhancer = Agent('你是一个润色作家,将一个故事进行诙谐润色,增加幽默元素。直接输出润色后的故事') 212 | 213 | # 禁用Python运行 214 | story_writer.disable_python_run = True 215 | humor_enhancer.disable_python_run = True 216 | 217 | # topic = skills.input('请输入小说的大纲要求或者故事梗概: ') 218 | topic = '写个小白兔吃糖不刷牙的故事,有教育意义。' 219 | initial_story = story_writer.run(topic) 220 | enhanced_story = humor_enhancer.run(initial_story) 221 | print(enhanced_story) 222 | ``` 223 | 224 | 225 | 226 | 227 | ### 多模态输入 228 | 229 | user_input 的 input 参数,和 run 的 command 参数,支持字符串或者数组。 230 | 231 | 数组时支持多模态,格式为最简模式: ['text_content', {'image': 'path/to/image'}, ...] 232 | 233 | ```python 234 | # 支持多模态: 图片输入 235 | from GeneralAgent import Agent 236 | 237 | agent = Agent('You are a helpful assistant.') 238 | agent.user_input(['what is in the image?', {'image': '../docs/images/self_call.png'}]) 239 | ``` 240 | 241 | 242 | 243 | ### 大模型切换 244 | 245 | #### OpenAI SDK 246 | 247 | 得益于GeneralAgent框架不依赖大模型厂商的 function call 能力实现了函数调用,可以无缝切换不同的大模型实现相同的能力。 248 | 249 | GeneralAgent框架使用OpenAI Python SDK 来支持其他大模型。 250 | 251 | ```python 252 | from GeneralAgent import Agent 253 | 254 | agent = Agent('You are a helpful agent.', model='deepseek-chat', token_limit=32000, api_key='sk-xxx', base_url='https://api.deepseek.com/v1') 255 | agent.user_input('介绍一下成都') 256 | ``` 257 | 258 | 详情见: [examples/8_multi_model.py](./examples/8_multi_model.py) 259 | 260 | 261 | #### Azure OpenAI 262 | 263 | ```python 264 | from GeneralAgent import Agent 265 | 266 | # api_key = os.getenv("OPENAI_API_KEY") 267 | # base_url = os.getenv("OPENAI_API_BASE") 268 | api_key = '8ef0b4df45e444079cd5xxx' # Azure API Key or use OPENAI_API_KEY environment variable 269 | base_url = 'https://xxxx.openai.azure.com/' # Azure API Base URL or use OPENAI_API_BASE environment variable 270 | model = 'azure_cpgpt4' # azure_ with model name, e.g. azure_cpgpt4 271 | # azure api_version is default to '2024-05-01-preview'. You can set by environment variable AZURE_API_VERSION 272 | 273 | agent = Agent('You are a helpful assistant', api_key=api_key, base_url=base_url, model=model) 274 | while True: 275 | query = input('Please input your query:') 276 | agent.user_input(query) 277 | print('-'*50) 278 | ``` 279 | 280 | 281 | #### OneAPI 282 | 283 | 如果其他大模型不支持OpenAI SDK,可以通过 https://github.com/songquanpeng/one-api 来支持。 284 | 285 | 286 | #### 自定义大模型 287 | 288 | 或者重写 GeneralAgent.skills 中 llm_inference 函数来使用其他大模型。 289 | 290 | ```python 291 | from GeneralAgent import skills 292 | def new_llm_inference(messages, model, stream=False, temperature=None, api_key=None, base_url=None): 293 | """ 294 | 使用大模型进行推理 295 | """ 296 | pass 297 | skills.llm_inference = new_llm_inference 298 | ``` 299 | 300 | 301 | 302 | ### 禁用Python运行 303 | 304 | 默认 GeneralAgent 自动运行 LLM 输出的python代码。 305 | 306 | 某些场景下,如果不希望自动运行,设置 `disable_python_run` 为 `True` 即可。 307 | 308 | ```python 309 | from GeneralAgent import Agent 310 | 311 | agent = Agent('你是一个python专家,辅助用户解决python问题。') 312 | agent.disable_python_run = True 313 | agent.user_input('用python实现一个读取文件的函数') 314 | ``` 315 | 316 | ### 隐藏python运行 317 | 318 | 在正式的业务场景中,不希望用户看到python代码的运行,而只是看到最终结果,可以设置 `hide_python_code` 为 `True`。 319 | 320 | ```python 321 | from GeneralAgent import Agent 322 | agent = Agent('You are a helpful assistant.', hide_python_code=True) 323 | agent.user_input('caculate 0.999 ** 1000') 324 | ``` 325 | 326 | 327 | 328 | ### AI搜索 329 | 330 | ```python 331 | # AI搜索 332 | # 运行前置条件: 333 | # 1. 请先配置环境变量 SERPER_API_KEY (https://serper.dev/ 的API KEY); 334 | # 2. 安装 selenium 库: pip install selenium 335 | 336 | from GeneralAgent import Agent 337 | from GeneralAgent import skills 338 | 339 | google_results = [] 340 | 341 | # 步骤1: 第一次google搜索 342 | question = input('请输入问题,进行 AI 搜索: ') 343 | # question = '周鸿祎卖车' 344 | content1 = skills.google_search(question) 345 | google_results.append(content1) 346 | 347 | # 步骤2: 第二次google搜索: 根据第一次搜索结构,获取继续搜索的问题 348 | agent = Agent('你是一个AI搜索助手。') 349 | querys = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{content1}\n\n。请问可以帮助用户,需要继续搜索的关键短语有哪些(最多3个,且和问题本身不太重合)?返回关键短语列表变量([query1, query2])', return_type=list) 350 | print(querys) 351 | for query in querys: 352 | content = skills.google_search(query) 353 | google_results.append(content) 354 | 355 | # 步骤3: 提取重点网页内容 356 | agent.clear() 357 | web_contents = [] 358 | google_result = '\n\n'.join(google_results) 359 | urls = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n。哪些网页对于用户问题比较有帮助?请返回最重要的不超过5个的网页url列表变量([url1, url2, ...])', return_type=list) 360 | for url in urls: 361 | content = skills.web_get_text(url, wait_time=2) 362 | web_contents.append(content) 363 | 364 | # 步骤4: 输出结果 365 | agent.clear() 366 | web_content = '\n\n'.join(web_contents) 367 | agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n部分网页内容: \n{web_content}\n\n。请根据用户问题,搜索引擎结果,网页内容,给出用户详细的回答,要求按一定目录结构来输出,并且使用markdown格式。') 368 | ``` 369 | 370 | ### 更多 371 | 372 | 更多例子请见 [examples](./examples) 373 | 374 | 375 | 376 | ## API 377 | 378 | ### 基础使用 379 | 380 | **Agent.\__init__(self, role: str, workspace: str = None, functions: List[Callable] = [], knowledge_files: List[str] = None)** 381 | 382 | 初始化一个Agent实例。 383 | 384 | - role (str): Agent的角色。 385 | - workspace (str, 可选): Agent的工作空间。默认值为None(不序列化)。如果指定了目录,Agent会自动保存状态并在下次初始化时重新加载。 386 | - functions (List[Callable], 可选): Agent可以调用的函数列表。 387 | - knowledge_files (List[str], 可选): Agent知识库文件路径列表。 388 | - messages (List[str], 可选): Agent的历史消息列表, 消息字段中必须包含 'role', 'content' 字段。 389 | 390 | **Agent.run(self, command: Union[str, List[Union[str, Dict[str, str]]]], return_type: str = str, display: bool = False)** 391 | 392 | 执行命令并返回指定类型的结果。 393 | 394 | - command (Union[str, List[Union[str, Dict[str, str]]]]): 要执行的命令。例如:'describe chengdu' 或 ['what is in image?', {'image': 'path/to/image'}]。 395 | - return_type (str, 可选): 结果的返回类型。默认值为str。 396 | - display (bool, 可选): 是否显示LLM生成的中间内容。默认值为False。 397 | 398 | **Agent.user_input(self, input: Union[str, List[Union[str, Dict[str, str]]]])** 399 | 400 | 响应用户输入,并始终显示LLM生成的中间内容。 401 | 402 | - input (Union[str, List[Union[str, Dict[str, str]]]]): 用户输入。 403 | 404 | **Agent.temporary_context(self, input: Union[str, List[Union[str, Dict[str, str]]]])** 405 | 406 | 对话产生的数据,不进入 agent memory 中。 407 | 408 | - input (Union[str, List[Union[str, Dict[str, str]]]]): 用户输入。 409 | 410 | ```python 411 | from GeneralAgent import Agent 412 | 413 | agent = Agent('You are a helpful assistant.') 414 | with agent.temporary_context(): 415 | agent.user_input('My name is Henry.') 416 | agent.user_input("What's my name?") 417 | ``` 418 | 419 | **Agent.clear(self)** 420 | 421 | 清除Agent的状态。 422 | 423 | ### 高级使用 424 | 425 | [ ] # TODO 426 | 427 | 428 | 429 | 430 | ## 论文 431 | 432 | [General Agent:Self Call and Stack Memory](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf) 433 | 434 | 435 | 436 | 437 | 438 | ## 加入我们👏🏻 439 | 440 | 使用微信扫描下方二维码,加入微信群聊,或参与贡献。 441 | 442 |

443 | wechat 444 |

-------------------------------------------------------------------------------- /README_EN.md: -------------------------------------------------------------------------------- 1 | # GeneralAgent: From LLM to Agent 2 | 3 |

4 | CN doc 5 | EN doc 6 |

7 | 8 | GeneralAgent is a Python-native Agent framework that aims to seamlessly integrate large language models with Python. 9 | 10 | **Main features** 11 | 12 | * Quickly configure role, functions, and knowledge bases to create Agent. 13 | 14 | * Execute stable and complex business processes and coordinate multiple Agents to complete tasks. 15 | 16 | * Use the `agent.run` function to execute commands and produce structured output, beyond simple text responses. 17 | 18 | * Use the `agent.user_input` function to dynamically interact with the user. 19 | 20 | * Tool call: GeneralAgent does not rely on the function call of large models, but calls tools through the python code interpreter. 21 | 22 | * Serialization: GeneralAgent supports serialization, including memory and python execution status, and is ready to use 23 | 24 | * Self-call(experimental): GeneralAgent minimizes the number of calls to large models through self-call and stack memory to efficiently handle complex tasks. For more details, please see our [paper](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf) 25 | 26 | 27 | 28 | ## Installation 29 | 30 | ```bash 31 | pip install GeneralAgent 32 | ``` 33 | 34 | 35 | 36 | ## Configuration 37 | 38 | ### Method 1: Using Environment Variables (Recommended) 39 | 1. Install dependency: 40 | ```bash 41 | pip install python-dotenv 42 | ``` 43 | 44 | 2. Refer to [.env.example](./.env.example) file to create and configure .env file: 45 | ```bash 46 | OPENAI_API_KEY=your_openai_api_key 47 | # OPENAI_API_BASE=your_openai_base_url 48 | ``` 49 | 50 | 3. Load environment variables in code: 51 | ```python 52 | from dotenv import load_dotenv 53 | from GeneralAgent import Agent 54 | 55 | load_dotenv() 56 | agent = Agent('You are a helpful agent.') 57 | ``` 58 | 59 | ### Method 2: Configure in Code 60 | 61 | ```python 62 | from GeneralAgent import Agent 63 | agent = Agent('You are a helpful agent.', api_key='sk-xxx') 64 | ``` 65 | 66 | 67 | 68 | ## Usage 69 | 70 | ### Quick Start 71 | 72 | ```python 73 | from GeneralAgent import Agent 74 | 75 | # Streaming output of intermediate results 76 | def output_callback(token): 77 | token = token or '\n' 78 | print(token, end='', flush=True) 79 | 80 | agent = Agent('You are an AI assistant, reply in Chinese.', output_callback=output_callback) 81 | while True: 82 | query = input('Please enter: ') 83 | agent.user_input(query) 84 | print('-'*50) 85 | ``` 86 | 87 | 88 | 89 | ### Function call 90 | 91 | ```python 92 | # Function call 93 | from GeneralAgent import Agent 94 | 95 | # Function: Get weather information 96 | def get_weather(city: str) -> str: 97 | """ 98 | get weather information 99 | @city: str, city name 100 | @return: str, weather information 101 | """ 102 | return f"{city} weather: sunny" 103 | 104 | agent = Agent('You are a weather assistant', functions=[get_weather]) 105 | agent.user_input('What is the weather like in Chengdu?') 106 | 107 | # Output 108 | # ```python 109 | # city = "Chengdu" 110 | # weather_info = get_weather(city) 111 | # weather_info 112 | # ``` 113 | # The weather in Chengdu is sunny. 114 | # Is there anything else I can help with? 115 | ``` 116 | 117 | 118 | 119 | ### Knowledge Base 120 | 121 | ```python 122 | # Knowledge Base 123 | from GeneralAgent import Agent 124 | 125 | knowledge_files = ['../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf'] 126 | agent = Agent('You are an AI assistant, reply in Chinese.', workspace='9_knowledge_files', knowledge_files=knowledge_files) 127 | agent.user_input('What does Self call mean?') 128 | ``` 129 | 130 | The knowledge base uses the embedding_texts function in GeneralAgent.skills to embed text by default (the default is OpenAI's text-embedding-3-small model) 131 | 132 | You can rewrite the embedding_texts function to use other manufacturers or local embedding methods, as follows: 133 | 134 | ```python 135 | def new_embedding_texts(texts) -> [[float]]: 136 | """ 137 | Embedding text arrays 138 | """ 139 | # Your embedding method 140 | return result 141 | from GeneralAgent import skills 142 | skills.embedding_texts = new_embedding_texts 143 | ``` 144 | 145 | 146 | 147 | ### Serialization 148 | 149 | ```python 150 | # Serialization 151 | from GeneralAgent import Agent 152 | 153 | # Agent serialization location, LLM messages and python parser status will be automatically saved during operation 154 | workspace='./5_serialize' 155 | 156 | role = 'You are a helpful agent.' 157 | agent = Agent(workspace=workspace) 158 | agent.user_input('My name is Shadow.') 159 | 160 | agent = None 161 | agent = Agent(role, workspace=workspace) 162 | agent.user_input('What is my name?') 163 | # Output: Your name is Shadow. How can I help you today, Shadow? 164 | 165 | # agent: Clear memory + python serialization status 166 | agent.clear() 167 | 168 | agent.user_input('What is my name?') 169 | # Output: I'm sorry, but I don't have access to your personal information, including your name. How can I assist you today? 170 | 171 | import shutil 172 | shutil.rmtree(workspace) 173 | ``` 174 | 175 | ### Write a novel 176 | 177 | ```python 178 | # Write a novel 179 | from GeneralAgent import Agent 180 | from GeneralAgent import skills 181 | 182 | # Step 0: Define Agent 183 | agent = Agent('You are a novelist') 184 | 185 | # Step 1: Get the name and topic of the novel from the user 186 | # topic = skills.input('Please enter the name and topic of the novel: ') 187 | topic = 'The story of the little white rabbit eating candy without brushing its teeth' 188 | 189 | # Step 2: Summary of the novel 190 | summary = agent.run(f'The name and topic of the novel are: {topic}, expand and improve the summary of the novel. It is required to be literary, educational, and entertaining. ') 191 | 192 | # Step 3: List of chapter names and summaries of the novel 193 | chapters = agent.run('Output the chapter names of the novel and the summary of each chapter, return a list [(chapter_title, chapter_summary), ....]', return_type=list) 194 | 195 | # Step 4: Generate detailed content of each chapter of the novel 196 | contents = [] 197 | for index, (chapter_title, chapter_summary) in enumerate(chapters): 198 | content = agent.run(f'For chapters: {chapter_title}\n{chapter_summary}. \nOutput detailed content of the chapter, note that only the content is returned, not the title.') 199 | content = '\n'.join([x.strip() for x in content.split('\n')]) 200 | contents.append(content) 201 | 202 | # Step 5: Format the novel and write it to a file 203 | with open('novel.md', 'w') as f: 204 | for index in range(len(chapters)): 205 | f.write(f'### {chapters[index][0]}\n') 206 | f.write(f'{contents[index]}\n\n') 207 | 208 | # Step 6 (optional): Convert markdown file to pdf file 209 | 210 | # Step 7: Output novel file to user 211 | skills.output('Your novel has been generated [novel.md](novel.md)\n') 212 | ``` 213 | 214 | ### Multi-Agent 215 | 216 | ```python 217 | # Multi-Agent cooperates to complete the task 218 | from GeneralAgent import Agent 219 | story_writer = Agent('You are a story writer. According to the outline requirements or story outline, return a more detailed story content.') 220 | humor_enhancer = Agent('You are a polisher. Make a story humorous and add humorous elements. Directly output the polished story') 221 | 222 | # Disable Python running 223 | story_writer.disable_python_run = True 224 | humor_enhancer.disable_python_run = True 225 | 226 | # topic = skills.input('Please enter the outline requirements or story summary of the novel: ') 227 | topic = 'Write a story about a little white rabbit eating candy without brushing its teeth. It has educational significance. ' 228 | initial_story = story_writer.run(topic) 229 | enhanced_story = humor_enhancer.run(initial_story) 230 | print(enhanced_story) 231 | ``` 232 | 233 | 234 | 235 | ### Multimodal input 236 | 237 | The input parameter of user_input and the command parameter of run support strings or arrays. 238 | 239 | Multimodal is supported when the array is used. The format is the simplest mode: ['text_content', {'image': 'path/to/image'}, ...] 240 | 241 | ```python 242 | # Multimodal support: Image input 243 | from GeneralAgent import Agent 244 | 245 | agent = Agent('You are a helpful assistant.') 246 | agent.user_input(['what is in the image?', {'image': '../docs/images/self_call.png'}]) 247 | ``` 248 | 249 | 250 | 251 | 252 | ### LLM switching 253 | 254 | #### OpenAI SDK 255 | 256 | Thanks to the GeneralAgent framework's independent function call capability of large model vendors, it can seamlessly switch between different large models to achieve the same capabilities. 257 | 258 | The GeneralAgent framework uses the OpenAI Python SDK to support other large models. 259 | 260 | ```python 261 | from GeneralAgent import Agent 262 | 263 | agent = Agent('You are a helpful agent.', model='deepseek-chat', token_limit=32000, api_key='sk-xxx', base_url='https://api.deepseek.com/v1') 264 | agent.user_input('Introduce Chengdu') 265 | ``` 266 | 267 | For details, see: [examples/8_multi_model.py](./examples/8_multi_model.py) 268 | 269 | #### Azure OpenAI 270 | 271 | ```python 272 | from GeneralAgent import Agent 273 | 274 | # api_key = os.getenv("OPENAI_API_KEY") 275 | # base_url = os.getenv("OPENAI_API_BASE") 276 | api_key = '8ef0b4df45e444079cd5xxx' # Azure API Key or use OPENAI_API_KEY environment variable 277 | base_url = 'https://xxxx.openai.azure.com/' # Azure API Base URL or use OPENAI_API_BASE environment variable 278 | model = 'azure_cpgpt4' # azure_ with model name, e.g. azure_cpgpt4 279 | # azure api_version is default to '2024-05-01-preview'. You can set by environment variable AZURE_API_VERSION 280 | 281 | agent = Agent('You are a helpful assistant', api_key=api_key, base_url=base_url, model=model) 282 | while True: 283 | query = input('Please input your query:') 284 | agent.user_input(query) 285 | print('-'*50) 286 | ``` 287 | 288 | 289 | #### One API 290 | 291 | If other large models do not support OpenAI SDK, they can be supported through https://github.com/songquanpeng/one-api. 292 | 293 | 294 | #### Custom large model 295 | 296 | Or rewrite the llm_inference function in GeneralAgent.skills to use other large models. 297 | 298 | ```python 299 | from GeneralAgent import skills 300 | def new_llm_inference(messages, model, stream=False, temperature=None, api_key=None, base_url=None): 301 | """ 302 | Use the large model for inference 303 | """ 304 | pass 305 | skills.llm_inference = new_llm_inference 306 | ``` 307 | 308 | 309 | 310 | ### Disable Python run 311 | 312 | By default, GeneralAgent automatically runs the python code output by LLM. 313 | 314 | In some scenarios, if you do not want to run automatically, set `disable_python_run` to `True`. 315 | 316 | ```python 317 | from GeneralAgent import Agent 318 | 319 | agent = Agent('You are a python expert, helping users solve python problems.') 320 | agent.disable_python_run = True 321 | agent.user_input('Use python to implement a function to read files') 322 | ``` 323 | 324 | 325 | 326 | ### Hide Python Run 327 | 328 | In formal business scenarios, if you do not want users to see the running of Python code but only the final result, you can set `hide_python_code` to `True`. 329 | 330 | ```python 331 | from GeneralAgent import Agent 332 | agent = Agent('You are a helpful assistant.', hide_python_code=True) 333 | agent.user_input('caculate 0.999 ** 1000') 334 | ``` 335 | 336 | 337 | 338 | ### AI search 339 | 340 | ```python 341 | # AI search 342 | # Prerequisites: 343 | # 1. Please configure the environment variable SERPER_API_KEY (https://serper.dev/'s API KEY); 344 | # 2. Install the selenium library: pip install selenium 345 | 346 | from GeneralAgent import Agent 347 | from GeneralAgent import skills 348 | 349 | google_results = [] 350 | 351 | # Step 1: First Google search 352 | question = input('Please enter a question and proceed AI search: ') 353 | content1 = skills.google_search(question) 354 | google_results.append(content1) 355 | 356 | # Step 2: Second Google search: According to the first search structure, get the question to continue searching 357 | agent = Agent('You are an AI search assistant.') 358 | queries = agent.run(f'User question: \n{question}\n\nSearch engine results: \n{content1}\n\n. Can you help users, what are the key phrases that need to be searched (up to 3, and not too overlapping with the question itself)? Return the key phrase list variable ([query1, query2])', return_type=list) 359 | print(queries) 360 | for query in queries: 361 | content = skills.google_search(query) 362 | google_results.append(content) 363 | 364 | # Step 3: Extract key web page content 365 | agent.clear() 366 | web_contents = [] 367 | google_result = '\n\n'.join(google_results) 368 | urls = agent.run(f'User question: \n{question}\n\nSearch engine result: \n{google_result}\n\n. Which web pages are more helpful for user questions? Please return the most important webpage url list variable ([url1, url2, ...])', return_type=list) 369 | for url in urls: 370 | content = skills.web_get_text(url, wait_time=2) 371 | web_contents.append(content) 372 | 373 | # Step 4: Output results 374 | agent.clear() 375 | web_content = '\n\n'.join(web_contents) 376 | agent.run(f'User question: \n{question}\n\nSearch engine results: \n{google_result}\n\nPart of the webpage content: \n{web_content}\n\n. Please give the user a detailed answer based on the user's question, search engine results, and webpage content. It is required to be output according to a certain directory structure and use markdown format.') 377 | ``` 378 | 379 | 380 | 381 | ### More 382 | 383 | For more examples, see [examples](./examples) 384 | 385 | 386 | ## API 387 | 388 | ### Basic Usage 389 | 390 | **Agent.\__init__(self, role: str, workspace: str = None, functions: List[Callable] = [], knowledge_files: List[str] = None)** 391 | 392 | Initializes an Agent instance. 393 | 394 | - role (str): The role of the agent. 395 | - workspace (str, optional): The agent's workspace. Default is None (not serialized). If a directory is specified, the agent will automatically save the agent's state and reload it upon the next initialization. 396 | - functions (List[Callable], optional): A list of functions that the agent can call. 397 | - knowledge_files (List[str], optional): A list of file paths for the agent's knowledge base. 398 | - messages (List[str], optional): A list of Agent's historical messages, where each message must contain the 'role' and 'content' fields. 399 | 400 | 401 | **Agent.run(self, command: Union[str, List[Union[str, Dict[str, str]]]], return_type: str = str, display: bool = False)** 402 | 403 | Executes a command and returns the result in the specified return type. 404 | 405 | - command (Union[str, List[Union[str, Dict[str, str]]]]): The command to execute. Examples: 'describe chengdu' or ['what is in image?', {'image': 'path/to/image'}]. 406 | - return_type (str, optional): The return type of the result. Default is str. 407 | - display (bool, optional): Whether to display the intermediate content generated by the LLM. Default is False. 408 | 409 | 410 | **Agent.user_input(self, input: Union[str, List[Union[str, Dict[str, str]]]])** 411 | 412 | Responds to user input and always displays the intermediate content generated by the LLM. 413 | 414 | - input (Union[str, List[Union[str, Dict[str, str]]]]): The user input. 415 | 416 | **Agent.temporary_context(self, input: Union[str, List[Union[str, Dict[str, str]]]])** 417 | 418 | The data generated by the conversation does not enter the agent memory. 419 | - input (Union[str, List[Union[str, Dict[str, str]]]]): The user input. 420 | 421 | ```python 422 | from GeneralAgent import Agent 423 | 424 | agent = Agent('You are a helpful assistant.') 425 | with agent.temporary_context(): 426 | agent.user_input('My name is Henry.') 427 | agent.user_input("What's my name?") 428 | ``` 429 | 430 | **Agent.clear(self)** 431 | 432 | Clears the agent's state. 433 | 434 | 435 | 436 | ### Advanced Usage 437 | 438 | [] # TODO 439 | 440 | 441 | 442 | 443 | ## Paper 444 | 445 | [General Agent: Self Call and Stack Memory](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf) 446 | 447 | 448 | 449 | ## Join us👏🏻 450 | 451 | Use WeChat to scan the QR code below, join the WeChat group chat, or participate in the contribution. 452 | 453 |

454 | wechat 455 |

-------------------------------------------------------------------------------- /docs/develop.md: -------------------------------------------------------------------------------- 1 | # 发布 2 | 3 | ```bash 4 | # 发布pip库 5 | poetry build -f sdist 6 | poetry publish 7 | ``` 8 | 9 | # 测试 10 | 11 | ```shell 12 | # 新建python环境 13 | python -m venv ga 14 | source ga/bin/activate 15 | 16 | # 临时取消python别名 (如果有) 17 | unalias python 18 | 19 | # 安装依赖 20 | pip install . 21 | 22 | # 导出环境变量 23 | export $(grep -v '^#' .env | sed 's/^export //g' | xargs) 24 | 25 | # 测试 26 | cd test 27 | pytest -s -v 28 | ``` -------------------------------------------------------------------------------- /docs/images/2023.11.15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/2023.11.15.jpg -------------------------------------------------------------------------------- /docs/images/2023_11_27_builder_agent.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/2023_11_27_builder_agent.jpg -------------------------------------------------------------------------------- /docs/images/2023_11_27_image_creator.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/2023_11_27_image_creator.jpg -------------------------------------------------------------------------------- /docs/images/Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/Architecture.png -------------------------------------------------------------------------------- /docs/images/Architecture_2023.11.15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/Architecture_2023.11.15.png -------------------------------------------------------------------------------- /docs/images/general_agent_2024.01.16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/general_agent_2024.01.16.png -------------------------------------------------------------------------------- /docs/images/self_call.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/self_call.png -------------------------------------------------------------------------------- /docs/images/stack_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/stack_memory.png -------------------------------------------------------------------------------- /docs/images/wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/wechat.jpg -------------------------------------------------------------------------------- /docs/images/wechat_company.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/wechat_company.jpg -------------------------------------------------------------------------------- /docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf -------------------------------------------------------------------------------- /examples/0_base_usage.py: -------------------------------------------------------------------------------- 1 | from GeneralAgent import Agent 2 | from dotenv import load_dotenv 3 | 4 | load_dotenv() 5 | # agent = Agent('You are a helpful assistant.', temperature=0.5, frequency_penalty=2) 6 | agent = Agent('You are a helpful assistant.') 7 | while True: 8 | query = input('>: ') 9 | agent.user_input(query) 10 | print('-'*50) -------------------------------------------------------------------------------- /examples/10_rag_function.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | load_dotenv() 4 | # RAG function 5 | 6 | # 设置日志级别 7 | import os 8 | os.environ['AGENT_LOG'] = 'debug' 9 | 10 | from GeneralAgent import Agent 11 | 12 | def rag_function(messages): 13 | input = messages[-1]['content'] 14 | print('user input:', input) 15 | # TODO: 根据input或者messages更多信息,返回相关的背景知识 16 | return 'Background: GeneralAgent is a Python library for building AI assistants. It provides a simple API for building conversational agents.' 17 | 18 | agent = Agent('You are a helpful assistant', rag_function=rag_function) 19 | agent.user_input('What is GeneralAgent?') -------------------------------------------------------------------------------- /examples/11_collection_and_store.py: -------------------------------------------------------------------------------- 1 | # 多轮对话搜集信息 & 保存 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | role = """ 8 | 你是一个专业的诊前护士。 9 | 你的主要工作: 和病人沟通,确认详细的病情,保存病历。 10 | 11 | # 1、病情沟通例子 12 | 用户: 我眼睛疼 13 | 你: 疼多旧了? 14 | 用户: 2天 15 | 你: 你还可以看见东西吗?视力有没有影响? 16 | 用户: 还能看见 17 | 你: .... 18 | 19 | 当病情确认,直接输出python代码,使用 save_medical_record 函数保存病历详情。 20 | 21 | medical_record = \"\"\" 22 | 主诉: 眼睛干涩 23 | 现病史: 最近长时间使用电子设备 24 | 既往史: 无特殊情况 25 | 过敏史: 无过敏史 26 | 家族史: 无家族史 27 | 个人史: 生活环境比较潮湿,未使用任何眼睛滴剂或药物缓解症状 28 | \"\"\" 29 | save_medical_record(medical_record) 30 | 31 | """ 32 | 33 | stop = False 34 | # 保存病历函数 35 | def save_medical_record(medical_record): 36 | """ 37 | 保存病历 38 | @param medical_record: 病历内容 39 | """ 40 | # print(medical_record) 41 | with open('medical_record.txt', 'a') as f: 42 | f.write(medical_record) 43 | global stop 44 | stop = True 45 | return "病历已保存" 46 | 47 | 48 | agent = Agent(role, functions=[save_medical_record], hide_python_code=True) 49 | agent.user_input('你可以做什么?') 50 | while not stop: 51 | query = input('请输入: ') 52 | agent.user_input(query) -------------------------------------------------------------------------------- /examples/12_hide_python_code.py: -------------------------------------------------------------------------------- 1 | # 快速开始 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | agent = Agent('You are a helpful assistant.', hide_python_code=True) 7 | agent.user_input('caculate 0.999 ** 1000') -------------------------------------------------------------------------------- /examples/13_image_input.py: -------------------------------------------------------------------------------- 1 | # 支持多模态: 图片输入 2 | # 格式为自定最简模式,: ['text_content', {'image': 'path/to/image'}, ...] 3 | from GeneralAgent.utils import set_logging_level 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | set_logging_level() 8 | 9 | from GeneralAgent import Agent 10 | 11 | agent = Agent('You are a helpful assistant.') 12 | agent.user_input(['what is in the image?', {'image': '../docs/images/self_call.png'}]) -------------------------------------------------------------------------------- /examples/14_doubao_llm.py: -------------------------------------------------------------------------------- 1 | # 使用豆包大模型 2 | # 使用豆包模型,需要先安装库: pip install 'volcengine-python-sdk[ark]' 3 | # model设置为doubao,区分大模型链接库volcengine 4 | # 豆包由于接口上模型是Endpoint。所以使用base_url来指定Endpoint(即哪种模型) 5 | 6 | from GeneralAgent import Agent 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | api_key = 'your_api_key' 12 | endpoint = 'your_endpoint_id' 13 | agent = Agent('You are a helpful assistant', model='doubao', api_key=api_key, base_url=endpoint) 14 | agent.user_input('介绍一下成都') -------------------------------------------------------------------------------- /examples/15_run_check.py: -------------------------------------------------------------------------------- 1 | # agent.run命令的时候,核对生成内容是否合适 2 | from GeneralAgent import Agent 3 | from GeneralAgent import skills 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | # 步骤0: 定义Agent 9 | agent = Agent('你是一个小说家') 10 | 11 | # 步骤1: 从用户处获取小说的名称和主题 12 | # topic = skills.input('请输入小说的名称和主题: ') 13 | topic = '小白兔吃糖不刷牙的故事' 14 | 15 | # 步骤2: 小说的概要 16 | summary = agent.run(f'小说的名称和主题是: {topic},扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。') 17 | 18 | # 步骤3: 小说的章节名称和概要列表 19 | chapters = agent.run('输出小说的章节名称和每个章节的概要,返回列表 [(chapter_title, chapter_summary), ....]', return_type=list, user_check=True) 20 | 21 | # 步骤4: 生成小说每一章节的详细内容 22 | agent.disable_python() 23 | contents = [] 24 | for index, (chapter_title, chapter_summary) in enumerate(chapters): 25 | content = agent.run(f'对于章节: {chapter_title}\n概要: {chapter_summary}. \n写小说这个章节的详细内容,注意只返回内容,不要标题。') 26 | content = '\n'.join([x.strip() for x in content.split('\n')]) 27 | contents.append(content) 28 | 29 | # 步骤5: 将小说格式化写入文件 30 | with open('novel.md', 'w') as f: 31 | for index in range(len(chapters)): 32 | f.write(f'### {chapters[index][0]}\n') 33 | f.write(f'{contents[index]}\n\n') 34 | 35 | # 步骤6(可选): 将markdown文件转换为pdf文件 36 | 37 | # 步骤7: 输出小说文件给用户 38 | skills.output('你的小说已经生成[novel.md](novel.md)\n') -------------------------------------------------------------------------------- /examples/16_test_azure.py: -------------------------------------------------------------------------------- 1 | # 测试Azure Open AI 2 | import os 3 | from GeneralAgent import Agent 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | # api_key = os.getenv("OPENAI_API_KEY") 9 | # base_url = os.getenv("OPENAI_API_BASE") 10 | api_key = '8ef0b4df45e444079cd5xxx' # Azure API Key or use OPENAI_API_KEY environment variable 11 | base_url = 'https://xxxx.openai.azure.com/' # Azure API Base URL or use OPENAI_API_BASE environment variable 12 | model = 'azure_cpgpt4' # azure_ with model name, e.g. azure_cpgpt4 13 | # azure api_version is default to '2024-05-01-preview'. You can set by environment variable AZURE_API_VERSION 14 | 15 | agent = Agent('You are a helpful assistant', api_key=api_key, base_url=base_url, model=model) 16 | while True: 17 | query = input('Please input your query:') 18 | agent.user_input(query) 19 | print('-'*50) 20 | -------------------------------------------------------------------------------- /examples/17_qwen.py: -------------------------------------------------------------------------------- 1 | # 测试阿里千问 2 | api_key = 'sk-xxxx' 3 | base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" 4 | model = 'qwen-vl-max' 5 | 6 | from GeneralAgent import Agent 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | agent = Agent('You are a helpful assistant.', model=model, api_key=api_key, base_url=base_url, temperature=0.5, max_tokens=1000, top_p=0.9, frequency_penalty=1) 12 | agent.run(['what is in the image?', {'image': '../docs/images/self_call.png'}], display=True) -------------------------------------------------------------------------------- /examples/18_translate_agent.py: -------------------------------------------------------------------------------- 1 | # 翻译Agent 2 | from dotenv import load_dotenv 3 | 4 | load_dotenv() 5 | 6 | def split_text(text, max_token=3000, separators='\n'): 7 | """ 8 | Split the text into paragraphs, each paragraph has less than max_token tokens. 9 | """ 10 | import re 11 | from GeneralAgent import skills 12 | pattern = "[" + re.escape(separators) + "]" 13 | paragraphs = list(re.split(pattern, text)) 14 | # print(len(paragraphs)) 15 | result = [] 16 | current = '' 17 | for paragraph in paragraphs: 18 | if skills.string_token_count(current) + skills.string_token_count(paragraph) > max_token: 19 | result.append(current) 20 | current = '' 21 | current += paragraph + '\n' 22 | if len(current) > 0: 23 | result.append(current) 24 | new_result = [] 25 | for x in result: 26 | if skills.string_token_count(x) > max_token: 27 | new_result.extend(split_text(x, max_token=max_token, separators=",。,.;;")) 28 | else: 29 | new_result.append(x) 30 | new_result = [x.strip() for x in new_result if len(x.strip()) > 0] 31 | return new_result 32 | 33 | 34 | def translate_text(text, language, worker=1, reflection_mode=False): 35 | """ 36 | Translates the given text into the specified language, e.g. translate_text('I love china', 'chinese') 37 | @param text: The text to be translated 38 | @param language: The target language 39 | @param worker: The number of threads to use 40 | @param reflection_mode: Whether to enable reflection mode. If True, the agent will reflect on the translation result and make improvements. 41 | """ 42 | from GeneralAgent import skills 43 | from GeneralAgent import Agent 44 | from concurrent.futures import ThreadPoolExecutor 45 | segments = split_text(text, 600) 46 | 47 | def _translate(index, content, language): 48 | role = f"You are an expert linguist, specializing in translation text to {language}." 49 | rules = [ 50 | "翻译结果不要包含在```里面", 51 | "表格、代码、数学公式、图片地址、参考文献等不需要翻译,保持原样", 52 | "只返回翻译和保留的全文,不要任何解释和描述。", 53 | "确保翻译的准确性、流畅性和风格一致性", 54 | "使用目标语言的语法、拼写和标点规则", 55 | "确保术语使用一致并反映源文本领域", 56 | "如果有文化背景,请考虑文化背景" 57 | ] 58 | role += '# rules: ' + '\n\n'.join([f'{i+1}. {rule}' for i, rule in enumerate(rules)]) 59 | agent = Agent(role) 60 | result = agent.run(f'请将以下内容翻译成{language}:\n\n{content}') 61 | if reflection_mode: 62 | reflection_prompt = f"""Give constructive criticism and helpful suggestions to improve the translation. 63 | When writing suggestions, pay attention to whether there are ways to improve the translation's 64 | (i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text), 65 | (ii) fluency (by applying {language} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions), 66 | (iii) style (by ensuring the translations reflect the style of the source text and take into account any cultural context), 67 | (iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {language}). 68 | Write a list of specific, helpful and constructive suggestions for improving the translation. 69 | Each suggestion should address one specific part of the translation. 70 | Output only the suggestions and nothing else.""" 71 | agent.run(reflection_prompt) 72 | result = agent.run(f'根据反思的结果,对上面的翻译结果进行修改,并只输出修改后的翻译结果。') 73 | return index, result 74 | 75 | with ThreadPoolExecutor(worker) as executor: 76 | futures = [executor.submit(_translate, index, content, language) for index, content in enumerate(segments)] 77 | results = [future.result() for future in futures] 78 | results.sort(key=lambda x: x[0]) 79 | return '\n\n'.join([x[1] for x in results]) 80 | 81 | if __name__ == '__main__': 82 | result = translate_text('I love china', 'chinese') 83 | print(result) -------------------------------------------------------------------------------- /examples/19_temporary_context.py: -------------------------------------------------------------------------------- 1 | # 演示临时上下文的用法 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | agent = Agent('You are a helpful assistant.') 8 | with agent.temporary_context(): 9 | agent.user_input('My name is Henry.') 10 | agent.user_input("What's my name?") 11 | 12 | # Expect: I don't know your name. How can I help you today? -------------------------------------------------------------------------------- /examples/1_function_call.py: -------------------------------------------------------------------------------- 1 | # 函数调用 2 | import logging 3 | 4 | logging.basicConfig( 5 | level=logging.DEBUG, 6 | format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s", 7 | handlers=[logging.StreamHandler()], 8 | ) 9 | from GeneralAgent import Agent 10 | from dotenv import load_dotenv 11 | 12 | load_dotenv() 13 | 14 | 15 | # 函数: 获取天气信息 16 | def get_weather(city: str) -> str: 17 | """ 18 | get weather information 19 | @city: str, city name 20 | @return: str, weather information 21 | """ 22 | # return f"{city} weather: sunny" 23 | weather = "sunny" 24 | print(f"{city} weather: {weather}") 25 | return weather 26 | 27 | 28 | # agent = Agent('你是一个天气小助手', functions=[get_weather], model='deepseek-chat') 29 | agent = Agent("你是一个天气小助手", functions=[get_weather]) 30 | agent.user_input("成都天气怎么样?") 31 | 32 | # 输出 33 | # ```python 34 | # city = "成都" 35 | # weather_info = get_weather(city) 36 | # weather_info 37 | # ``` 38 | # 成都的天气是晴天。 39 | # 请问还有什么我可以帮忙的吗? 40 | -------------------------------------------------------------------------------- /examples/20_load_memory.py: -------------------------------------------------------------------------------- 1 | # load messages 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | 8 | messages = [ 9 | {"role": "user", "content": "My name is Yummy."}, 10 | {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"}, 11 | ] 12 | agent = Agent('You are a helpful assistant.', messages=messages) 13 | response = agent.user_input("What's my name?") 14 | 15 | # Expect: Yummy in response 16 | -------------------------------------------------------------------------------- /examples/21_market_search.py: -------------------------------------------------------------------------------- 1 | # 市场信息搜集 2 | # 运行前置条件: 3 | # 1. 安装 BeutifulSoup 库:pip install beautifulsoup4 4 | # 2. 安装 playwrite 库: pip install playwright 5 | from GeneralAgent import Agent 6 | from dotenv import load_dotenv 7 | from playwright.sync_api import sync_playwright 8 | from bs4 import BeautifulSoup 9 | from urllib.parse import quote 10 | import time 11 | 12 | def get_baidu_search_url(keyword): 13 | """生成百度搜索URL,只处理关键词和时间戳""" 14 | current_timestamp = int(time.time()) 15 | past_timestamp = current_timestamp - (24 * 3600) # 24小时前 16 | base_url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd={}&fenlei=256&rqlang=cn&rsv_dl=tb&rsv_enter=1&rsv_btype=i&tfflag=1&gpc=stf%3D{}%2C{}|stftype%3D1" 17 | return base_url.format(quote(keyword), past_timestamp, current_timestamp) 18 | 19 | def extract_news_articles(url): 20 | """提取网页中的新闻文章和URL""" 21 | with sync_playwright() as p: 22 | browser = p.chromium.launch() 23 | page = browser.new_page() 24 | try: 25 | page.goto(url) 26 | page.wait_for_load_state('networkidle') 27 | 28 | content = page.content() 29 | soup = BeautifulSoup(content, 'html.parser') 30 | 31 | articles = [] 32 | if 'baidu.com' in url: 33 | # 百度搜索结果处理 34 | search_results = soup.find_all('div', class_=['result-op', 'result']) 35 | for result in search_results: 36 | title_elem = result.find('h3') 37 | if title_elem: 38 | link = title_elem.find('a') 39 | if link: 40 | articles.append({ 41 | 'title': title_elem.get_text().strip(), 42 | 'url': link.get('href', ''), 43 | 'source': '百度搜索' 44 | }) 45 | else: 46 | # 懂车帝处理(保持原来的逻辑) 47 | links = soup.find_all('a', href=True) 48 | base_url = "https://www.dongchedi.com" 49 | 50 | for link in links: 51 | href = link.get('href', '') 52 | title = link.get_text().strip() 53 | if title and href: # 保留所有可能的文章,让LLM判断 54 | full_url = href if href.startswith('http') else base_url + href 55 | articles.append({ 56 | 'title': title, 57 | 'url': full_url, 58 | 'source': url 59 | }) 60 | 61 | return articles 62 | 63 | except Exception as e: 64 | return f"提取文章时出错: {str(e)}" 65 | finally: 66 | browser.close() 67 | 68 | def process_single_url(url: str, keyword: str, search_description: str, agent: Agent): 69 | """处理单个URL的文章""" 70 | if 'baidu.com' in url: 71 | url = get_baidu_search_url(keyword) 72 | 73 | articles = extract_news_articles(url) 74 | if not isinstance(articles, list): 75 | return f"处理URL {url} 时出错: {articles}" 76 | 77 | if not articles: 78 | return f"URL {url} 未找到任何文章" 79 | 80 | prompt = f""" 81 | 请从以下文章列表中严格筛选出仅与"{keyword}"直接相关的最新新闻。 82 | 83 | 文章列表: 84 | {articles} 85 | 86 | 筛选标准: 87 | 1. 必须在标题中直接提到"{keyword}"或与{keyword}直接相关的产品/事件 88 | 2. 必须是最新的新闻内容,不要选择普通的产品介绍页面 89 | 3. 新闻必须具有时效性和重要性 90 | 91 | 请按照以下格式整理符合条件的文章: 92 | 标题,网址 93 | 94 | 要求: 95 | 1. 使用逗号分隔字段 96 | 2. 每行一篇文章 97 | 3. 第一行为表头 98 | 4. 如果标题包含逗号,用双引号括起来 99 | 5. 按相关性和重要性排序 100 | 6. 只输出100%确定与{keyword}直接相关的文章 101 | """ 102 | return agent.run(prompt, display=False) 103 | 104 | def process_articles_with_command(urls: list, keyword: str, search_description: str = None): 105 | """处理所有URL的文章""" 106 | load_dotenv() 107 | 108 | if not search_description: 109 | search_description = f"寻找与{keyword}相关的最新资讯" 110 | 111 | agent = Agent(f'''你是一个专业的资讯分析助手。 112 | 你的任务是找出与用户需求相关的文章。 113 | 用户搜索需求:{search_description} 114 | ''') 115 | 116 | try: 117 | print(f"\n搜索关键词: {keyword}") 118 | print(f"搜索需求: {search_description}\n") 119 | 120 | all_results = [] 121 | for url in urls: 122 | print(f"\n处理URL: {url}") 123 | result = process_single_url(url, keyword, search_description, agent) 124 | all_results.append(f"\n来自 {url} 的结果:\n{result}") 125 | 126 | return "\n".join(all_results) 127 | 128 | except Exception as e: 129 | return f"处理出错: {str(e)}" 130 | 131 | # 使用示例 132 | if __name__ == "__main__": 133 | keyword = "新能源汽车" 134 | description = "寻找所有和新能源汽车可能相关的动态,只找和新能源汽车最直接相关的最新重要信息(企业,行业政策等)" 135 | 136 | urls = [ 137 | "https://www.dongchedi.com/", 138 | "https://www.baidu.com/s", 139 | "https://36kr.com/", 140 | ] 141 | 142 | result = process_articles_with_command(urls, keyword, description) 143 | print(result) 144 | -------------------------------------------------------------------------------- /examples/2_write_novel.py: -------------------------------------------------------------------------------- 1 | # 工作流: 写小说 2 | from GeneralAgent import Agent 3 | from GeneralAgent import skills 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | # 步骤0: 定义Agent 8 | agent = Agent('你是一个小说家') 9 | 10 | # 步骤1: 从用户处获取小说的名称和主题 11 | # topic = skills.input('请输入小说的名称和主题: ') 12 | topic = '小白兔吃糖不刷牙的故事' 13 | 14 | # 步骤2: 小说的概要 15 | summary = agent.run(f'小说的名称和主题是: {topic},扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。') 16 | 17 | # 步骤3: 小说的章节名称和概要列表 18 | chapters = agent.run('输出小说的章节名称和每个章节的概要,返回列表 [(chapter_title, chapter_summary), ....]', return_type=list) 19 | 20 | # 步骤4: 生成小说每一章节的详细内容 21 | contents = [] 22 | for index, (chapter_title, chapter_summary) in enumerate(chapters): 23 | content = agent.run(f'对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容,注意只返回内容,不要标题。') 24 | content = '\n'.join([x.strip() for x in content.split('\n')]) 25 | contents.append(content) 26 | 27 | # 步骤5: 将小说格式化写入文件 28 | with open('novel.md', 'w') as f: 29 | for index in range(len(chapters)): 30 | f.write(f'### {chapters[index][0]}\n') 31 | f.write(f'{contents[index]}\n\n') 32 | 33 | # 步骤6(可选): 将markdown文件转换为pdf文件 34 | 35 | # 步骤7: 输出小说文件给用户 36 | skills.output('你的小说已经生成[novel.md](novel.md)\n') -------------------------------------------------------------------------------- /examples/3_ai_search.py: -------------------------------------------------------------------------------- 1 | # AI搜索 2 | # 运行前置条件: 3 | # 1. 请先配置环境变量 SERPER_API_KEY (https://serper.dev/ 的API KEY); 4 | # 2. 安装 selenium 库: pip install selenium 5 | 6 | from GeneralAgent import Agent 7 | from GeneralAgent import skills 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | google_results = [] 12 | 13 | # 步骤1: 第一次google搜索 14 | question = input('请输入问题,进行 AI 搜索: ') 15 | # question = '周鸿祎卖车' 16 | content1 = skills.google_search(question) 17 | google_results.append(content1) 18 | 19 | # 步骤2: 第二次google搜索: 根据第一次搜索结构,获取继续搜索的问题 20 | agent = Agent('你是一个AI搜索助手。') 21 | querys = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{content1}\n\n。请问可以帮助用户,需要继续搜索的关键短语有哪些(最多3个,且和问题本身不太重合)?返回关键短语列表变量([query1, query2])', return_type=list) 22 | print(querys) 23 | for query in querys: 24 | content = skills.google_search(query) 25 | google_results.append(content) 26 | 27 | # 步骤3: 提取重点网页内容 28 | agent.clear() 29 | web_contents = [] 30 | google_result = '\n\n'.join(google_results) 31 | urls = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n。哪些网页对于用户问题比较有帮助?请返回最重要的不超过5个的网页url列表变量([url1, url2, ...])', return_type=list) 32 | for url in urls: 33 | print(url) 34 | content = skills.web_get_text(url, wait_time=2) 35 | web_contents.append(content) 36 | 37 | # 步骤4: 输出结果 38 | agent.clear() 39 | web_content = '\n\n'.join(web_contents) 40 | agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n部分网页内容: \n{web_content}\n\n。请根据用户问题,搜索引擎结果,网页内容,给出用户详细的回答,要求按一定目录结构来输出,并且使用markdown格式。') -------------------------------------------------------------------------------- /examples/3_ai_search_simple.py: -------------------------------------------------------------------------------- 1 | # def main(messages, input, files, output_callback, event=None, workspace='./'): 2 | 3 | # question = input('') 4 | question = '周鸿祎卖车' 5 | from GeneralAgent import Agent 6 | from GeneralAgent import skills 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | agent = Agent('You are an AI search assistant.') 11 | 12 | # Google search 13 | google_result = skills.google_search(question) 14 | 15 | # Get important web 16 | urls = agent.run(f'User question: {question}\nSearch results: {google_result}\nReturn up to 5 most relevant URLs.', return_type=list) 17 | web_content = '\n\n'.join([skills.web_get_text(url, wait_time=2) for url in urls]) 18 | 19 | # Display the answer 20 | agent.clear() 21 | agent.run(f'User question: {question}\nSearch results: {google_result}\nWeb content: {web_content}\nProvide a detailed answer in markdown format.', display=True) -------------------------------------------------------------------------------- /examples/4_multi_agents.py: -------------------------------------------------------------------------------- 1 | # 多Agent配合完成任务 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | story_writer = Agent('你是一个故事创作家,根据大纲要求或者故事梗概,返回一个更加详细的故事内容。') 7 | humor_enhancer = Agent('你是一个润色作家,将一个故事进行诙谐润色,增加幽默元素。直接输出润色后的故事') 8 | 9 | # 禁用Python运行 10 | story_writer.disable_python_run = True 11 | humor_enhancer.disable_python_run = True 12 | 13 | # topic = skills.input('请输入小说的大纲要求或者故事梗概: ') 14 | topic = '写个小白兔吃糖不刷牙的故事,有教育意义。' 15 | initial_story = story_writer.run(topic) 16 | enhanced_story = humor_enhancer.run(initial_story) 17 | print(enhanced_story) -------------------------------------------------------------------------------- /examples/5_serialize.py: -------------------------------------------------------------------------------- 1 | # 序列化 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | # agent序列化位置,运行过程中会自动保存LLM的messages和python解析器的状态 8 | workspace='./5_serialize' 9 | 10 | role = 'You are a helpful agent.' 11 | agent = Agent(role, workspace=workspace) 12 | agent.user_input('My name is Shadow.') 13 | 14 | agent = None 15 | agent = Agent(role, workspace=workspace) 16 | agent.user_input('What is my name?') 17 | 18 | # Output: Your name is Shadow. How can I help you today, Shadow? 19 | 20 | # agent: 清除记忆 + python序列化状态 21 | agent.clear() 22 | 23 | agent.user_input('What is my name?') 24 | # I'm sorry, but I don't have access to your personal information, including your name. How can I assist you today? 25 | 26 | import shutil 27 | shutil.rmtree(workspace) -------------------------------------------------------------------------------- /examples/6_disable_python_run.py: -------------------------------------------------------------------------------- 1 | # Disable Python Run 2 | # 默认情况下,GeneralAgent会运行用户输入的Python代码。如果你不希望GeneralAgent运行Python代码,可以通过将 `disable_python_run` 属性设置为 `True` 来禁用Python运行。 3 | from GeneralAgent import Agent 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | agent = Agent('你是一个python专家,辅助用户解决python问题。') 9 | agent.disable_python_run = True 10 | agent.user_input('用python实现一个读取文件的函数') 11 | 12 | # 当然,这里是一个用Python实现的读取文件内容的函数: 13 | 14 | # ```python 15 | # def read_file(file_path): 16 | # try: 17 | # with open(file_path, 'r', encoding='utf-8') as file: 18 | # content = file.read() 19 | # return content 20 | # except FileNotFoundError: 21 | # return "File not found." 22 | # except Exception as e: 23 | # return f"An error occurred: {e}" 24 | 25 | # # 示例用法 26 | # file_content = read_file('example.txt') 27 | # file_content 28 | # ``` 29 | 30 | # 这个函数 `read_file` 接受一个文件路径作为参数,尝试以UTF-8编码读取文件内容,并返回读取到的内容。如果文件未找到或发生其他错误,则返回相应的错误信息。 -------------------------------------------------------------------------------- /examples/7_hide_stream.py: -------------------------------------------------------------------------------- 1 | # 隐藏输出流,不显示给用户 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | agent = Agent('You are a helpful agent.', model='gpt-3.5-turbo') 8 | chengdu_description = agent.run('介绍一下成都', display=True) 9 | print(chengdu_description) -------------------------------------------------------------------------------- /examples/8_multi_model.py: -------------------------------------------------------------------------------- 1 | # 通过OpenAI Python SDK 支持其他大模型 2 | # 或者通过 https://github.com/songquanpeng/one-api 支持其他大模型 3 | from GeneralAgent import Agent 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | models = [ 9 | ('deepseek-chat', 32000, 'sk-xxx', 'https://api.deepseek.com/v1'), # DeepSeek官方支持 10 | ('moonshot-v1-128k', 128000, '$MOONSHOT_API_KEY', 'https://api.moonshot.cn/v1'), # Moonshot官方支持 11 | ('SparkDesk-v3.5', 4000, None, None), 12 | ('glm-4v', 128000, None, None), 13 | ('ERNIE-4.0-8K', 8000, None, None), 14 | ('qwen-turbo', 6000, None, None), 15 | ('hunyuan', 8000, None, None), 16 | ] 17 | 18 | for model, token_limit, api_key, base_url in models: 19 | agent = Agent('You are a helpful agent.', model=model, token_limit=token_limit, api_key=api_key, base_url=base_url) 20 | agent.user_input('介绍一下成都') -------------------------------------------------------------------------------- /examples/9_knowledge_files.py: -------------------------------------------------------------------------------- 1 | # 知识库 2 | from GeneralAgent import Agent 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | files = ['../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf'] 8 | workspace = '9_knowledge_files' 9 | agent = Agent('你是AI助手,用中文回复。', workspace=workspace, knowledge_files=files) 10 | agent.user_input(['Self call 是什么意思?']) 11 | 12 | # 清理掉 13 | import shutil 14 | shutil.rmtree(workspace) 15 | 16 | 17 | # 知识库默认使用 GeneralAgent.skills 中 embedding_texts 函数来对文本进行 embedding (默认是OpenAI的text-embedding-3-small模型) 18 | # 你可以重写 embedding_texts 函数,使用其他厂商 或者 本地的 embedding 方法,具体如下: 19 | 20 | # def new_embedding_texts(texts) -> [[float]]: 21 | # """ 22 | # 对文本数组进行embedding 23 | # """ 24 | # # 你的embedding方法 25 | # return result 26 | # from GeneralAgent import skills 27 | # skills.embedding_texts = new_embedding_texts -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "GeneralAgent" 3 | version = "0.3.29" 4 | description = "General Agent: From LLM to Agent" 5 | authors = ["Chen Li "] 6 | license = "Apache 2.0" 7 | readme = "README.md" 8 | repository = "https://github.com/CosmosShadow/GeneralAgent" 9 | packages = [ 10 | { include = "GeneralAgent" }, 11 | ] 12 | 13 | [tool.poetry.dependencies] 14 | python = ">=3.8.1" 15 | requests = ">=2.31.0" 16 | tinydb = ">=4.8.0" 17 | openai = ">=1.3.3" 18 | jinja2 = ">=3.1.2" 19 | numpy = ">=1.24.4" 20 | tiktoken = ">=0.5.1" 21 | llama-index =">=0.10.44" 22 | codyer = ">=0.0.1" 23 | 24 | [tool.poetry.group.dev.dependencies] 25 | pytest = "^7.4.3" 26 | pytest-asyncio = "^0.21.1" 27 | pymupdf = "1.24.13" 28 | 29 | 30 | [[tool.poetry.source]] 31 | name = "PyPI" 32 | priority="primary" 33 | 34 | 35 | [build-system] 36 | requires = ["poetry-core"] 37 | build-backend = "poetry.core.masonry.api" 38 | 39 | 40 | [tool.poetry.scripts] 41 | GeneralAgent= 'GeneralAgent.cli:main' -------------------------------------------------------------------------------- /test/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/.gitkeep -------------------------------------------------------------------------------- /test/data/Nougat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/Nougat.pdf -------------------------------------------------------------------------------- /test/data/Nougat_piece.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/Nougat_piece.pdf -------------------------------------------------------------------------------- /test/data/a.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/a.py -------------------------------------------------------------------------------- /test/data/hello.py: -------------------------------------------------------------------------------- 1 | print('hello world') -------------------------------------------------------------------------------- /test/data/test.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/test.jpeg -------------------------------------------------------------------------------- /test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | asyncio: asyncio mark -------------------------------------------------------------------------------- /test/test_agent.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from GeneralAgent import Agent 3 | 4 | 5 | def test_math(): 6 | """数学计算测试. 使用run直接返回python表达式的值""" 7 | agent = Agent() 8 | result = agent.run("calculate 0.99 ** 1000", return_type=float) 9 | assert 4.317124741065786e-05 == result 10 | 11 | 12 | def test_function(): 13 | """函数调用测试""" 14 | 15 | def get_weather(city: str) -> str: 16 | """ 17 | get weather information 18 | @city: str, city name 19 | @return: str, weather information 20 | """ 21 | return f"{city} weather: sunny" 22 | 23 | agent = Agent("你是一个天气小助手", functions=[get_weather]) 24 | result = agent.user_input("成都天气怎么样?") 25 | assert "晴" in result or "sunny" in result 26 | 27 | 28 | def test_write_novel(): 29 | # 工作流: 写小说 30 | novel_path = "novel.md" 31 | # 清理掉已经有的小说 32 | import os 33 | 34 | if os.path.exists(novel_path): 35 | os.remove(novel_path) 36 | try: 37 | 38 | # 步骤0: 定义Agent 39 | agent = Agent("你是一个小说家") 40 | 41 | # 步骤1: 从用户处获取小说的名称和主题 42 | # topic = skills.input('请输入小说的名称和主题: ') 43 | topic = "小白兔吃糖不刷牙的故事" 44 | 45 | # 步骤2: 小说的概要 46 | summary = agent.run( 47 | f"小说的名称和主题是: {topic},扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。" 48 | ) 49 | 50 | # 步骤3: 小说的章节名称和概要列表 51 | chapters = agent.run( 52 | "输出小说的章节名称和每个章节的概要,返回列表 [(chapter_title, chapter_summary), ....]", 53 | return_type=list, 54 | ) 55 | 56 | # 步骤4: 生成小说每一章节的详细内容 57 | contents = [] 58 | for index, (chapter_title, chapter_summary) in enumerate(chapters): 59 | content = agent.run( 60 | f"对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容,注意只返回内容,不要标题。" 61 | ) 62 | content = "\n".join([x.strip() for x in content.split("\n")]) 63 | contents.append(content) 64 | 65 | # 步骤5: 将小说格式化写入文件 66 | with open(novel_path, "w") as f: 67 | for index in range(len(chapters)): 68 | f.write(f"### {chapters[index][0]}\n") 69 | f.write(f"{contents[index]}\n\n") 70 | 71 | except Exception as e: 72 | pass 73 | finally: 74 | # 验证小说存在,而且内容不为空 75 | assert os.path.exists(novel_path) 76 | with open(novel_path, "r") as f: 77 | content = f.read() 78 | assert content != "" 79 | assert "### " in content 80 | # 清理掉 81 | if os.path.exists(novel_path): 82 | os.remove(novel_path) 83 | 84 | 85 | def test_knowledge(): 86 | # 知识库 87 | workspace = "9_knowledge_files" 88 | try: 89 | files = ["../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf"] 90 | agent = Agent( 91 | "你是AI助手,用中文回复。", workspace=workspace, knowledge_files=files 92 | ) 93 | result = agent.user_input("Self call 是什么意思?") 94 | assert "LLM" in result 95 | except Exception as e: 96 | raise e 97 | finally: 98 | # 清理掉 99 | import shutil 100 | 101 | shutil.rmtree(workspace) 102 | 103 | 104 | def test_with_query_clear_data_0(): 105 | workspace = "test_with_query_clear_data_0" 106 | import os 107 | 108 | if os.path.exists(workspace): 109 | import shutil 110 | 111 | shutil.rmtree(workspace) 112 | agent = Agent("You are a helpful assistant.", workspace=workspace) 113 | with agent.temporary_context(): 114 | agent.user_input("My name is Henry.") 115 | import json 116 | 117 | with open(f"{workspace}/memory.json", "r") as f: 118 | memory = json.load(f) 119 | assert len(memory) == 0 120 | 121 | 122 | def test_with_query_clear_data_1(): 123 | agent = Agent("You are a helpful assistant.", hide_python_code=True) 124 | with agent.temporary_context(): 125 | agent.user_input("My name is Henry.") 126 | response = agent.user_input("What's my name?") 127 | assert "Henry" not in response 128 | 129 | 130 | def test_with_query_save_data(): 131 | workspace = "test_with_query_save_data" 132 | import os 133 | 134 | if os.path.exists(workspace): 135 | import shutil 136 | 137 | shutil.rmtree(workspace) 138 | agent = Agent("You are a helpful assistant.", workspace=workspace) 139 | agent.user_input("My name is Henry.") 140 | with agent.temporary_context(): 141 | agent.user_input("My name is Jimmy.") 142 | agent.user_input("My name is Yummy.") 143 | import json 144 | 145 | with open(f"{workspace}/memory.json", "r") as f: 146 | memory = json.load(f) 147 | assert len(memory) == 4 148 | 149 | 150 | def test_with_query_clear_data_with_exception_0(): 151 | workspace = "test_with_query_clear_data_with_exception_0" 152 | import os 153 | 154 | if os.path.exists(workspace): 155 | import shutil 156 | 157 | shutil.rmtree(workspace) 158 | try: 159 | agent = Agent("You are a helpful assistant.", workspace=workspace) 160 | with agent.temporary_context(): 161 | agent.user_input("My name is Henry.") 162 | raise Exception("test exception") 163 | except Exception: 164 | ... 165 | finally: 166 | import json 167 | 168 | with open(f"{workspace}/memory.json", "r") as f: 169 | memory = json.load(f) 170 | assert len(memory) == 0 171 | 172 | 173 | def test_with_query_clear_data_with_exception_1(): 174 | workspace = "test_with_query_clear_data_with_exception_1" 175 | import os 176 | 177 | if os.path.exists(workspace): 178 | import shutil 179 | 180 | shutil.rmtree(workspace) 181 | try: 182 | agent = Agent("You are a helpful assistant.", workspace=workspace) 183 | agent.user_input("My name is Yummy.") 184 | with agent.temporary_context(): # no_memory() 185 | agent.user_input("My name is Henry.") 186 | raise Exception("test exception") 187 | except Exception: 188 | ... 189 | finally: 190 | import json 191 | 192 | with open(f"{workspace}/memory.json", "r") as f: 193 | memory = json.load(f) 194 | assert len(memory) == 2 195 | 196 | 197 | def test_load_error_messages(): 198 | messages = [ 199 | {"role": "user", "text": "My name is Yummy."}, 200 | {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"}, 201 | ] 202 | with pytest.raises(AssertionError, match="message format wrong"): 203 | agent = Agent("You are a helpful assistant.", messages=messages) 204 | agent.user_input("What's my name?") 205 | 206 | 207 | def test_load_messages(): 208 | messages = [ 209 | {"role": "user", "content": "My name is Yummy."}, 210 | {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"}, 211 | ] 212 | agent = Agent("You are a helpful assistant.", messages=messages) 213 | response = agent.user_input("What's my name?") 214 | assert "Yummy" in response 215 | -------------------------------------------------------------------------------- /test/test_examples.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from unittest.mock import patch 4 | from GeneralAgent import Agent, skills 5 | 6 | 7 | def test_base_usage(): 8 | agent = Agent("You are a helpful assistant.") 9 | with patch("builtins.print") as mock_print: 10 | response = agent.user_input('Your name is "Tom". Who are you?') 11 | assert "Tom" in response 12 | 13 | 14 | def test_function_call(): 15 | def get_weather(city: str) -> str: 16 | print(f"{city} weather: 晴天") 17 | 18 | agent = Agent("你是一个天气小助手", functions=[get_weather]) 19 | response = agent.user_input("成都天气怎么样?") 20 | assert "晴天" in response 21 | 22 | 23 | def test_write_novel(): 24 | agent = Agent("你是一个小说家") 25 | topic = "小白兔吃糖不刷牙的故事" 26 | summary = agent.run( 27 | f"小说的名称和主题是: {topic},扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。" 28 | ) 29 | chapters = agent.run( 30 | "输出小说的章节名称和每个章节的概要,返回列表 [(chapter_title, chapter_summary), ....]", 31 | return_type=list, 32 | ) 33 | contents = [] 34 | for index, (chapter_title, chapter_summary) in enumerate(chapters): 35 | content = agent.run( 36 | f"对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容,注意只返回内容,不要标题。" 37 | ) 38 | content = "\n".join([x.strip() for x in content.split("\n")]) 39 | contents.append(content) 40 | with open("novel.md", "w") as f: 41 | for index in range(len(chapters)): 42 | f.write(f"### {chapters[index][0]}\n") 43 | f.write(f"{contents[index]}\n\n") 44 | skills.output("你的小说已经生成[novel.md](novel.md)\n") 45 | # 判断文件是否存在 46 | assert os.path.exists("novel.md") 47 | # 判断文件字符数量是否大于 200 48 | with open("novel.md", "r") as f: 49 | assert len(f.read()) > 200 50 | # 删除文件 51 | os.remove("novel.md") 52 | 53 | 54 | def test_multi_agents(): 55 | from GeneralAgent import Agent 56 | 57 | story_writer = Agent( 58 | "你是一个故事创作家,根据大纲要求或者故事梗概,返回一个更加详细的故事内容。" 59 | ) 60 | humor_enhancer = Agent( 61 | "你是一个润色作家,将一个故事进行诙谐润色,增加幽默元素。直接输出润色后的故事" 62 | ) 63 | story_writer.disable_python_run = True 64 | humor_enhancer.disable_python_run = True 65 | topic = "写个小白兔吃糖不刷牙的故事,有教育意义。" 66 | initial_story = story_writer.run(topic) 67 | assert "小白兔" in initial_story 68 | enhanced_story = humor_enhancer.run(initial_story) 69 | assert "小白兔" in enhanced_story 70 | 71 | 72 | def test_serialize(): 73 | workspace = "./5_serialize" 74 | # 如果文件存在则删除 75 | if os.path.exists(workspace): 76 | shutil.rmtree(workspace) 77 | role = "You are a helpful agent." 78 | agent = Agent(role, workspace=workspace) 79 | agent.user_input("My name is Shadow.") 80 | agent = Agent(role, workspace=workspace) 81 | response = agent.user_input("What is my name?") 82 | assert "Shadow" in response 83 | agent.clear() 84 | response = agent.user_input("What is my name?") 85 | assert "Shadow" not in response 86 | shutil.rmtree(workspace) 87 | 88 | 89 | def test_disable_python_run(): 90 | # 在当前目录下创建 a.txt 并写入 “My name is Henry.” 91 | # 如果文件存在则删除 92 | if os.path.exists("a.txt"): 93 | os.remove("a.txt") 94 | with open("a.txt", "w") as f: 95 | f.write("My name is Henry.") 96 | agent = Agent("You are a helpful assistant.") 97 | agent.disable_python_run = True 98 | response = agent.user_input("帮我读取 ./a.txt 中的内容") 99 | assert "Henry" not in response 100 | 101 | 102 | def test_enable_python_run(): 103 | # 在当前目录下创建 a.txt 并写入 “My name is Henry.” 104 | # 如果文件存在则删除 105 | if os.path.exists("a.txt"): 106 | os.remove("a.txt") 107 | with open("a.txt", "w") as f: 108 | f.write("My name is Henry.") 109 | agent = Agent("You are a helpful assistant.") 110 | agent.disable_python_run = False 111 | response = agent.user_input("帮我读取 ./a.txt 中的内容") 112 | assert "Henry" in response 113 | 114 | 115 | def test_hide_stream(capsys): 116 | agent = Agent("You are a helpful assistant.") 117 | agent.hide_stream = False 118 | agent.run("一句话介绍成都", display=False) 119 | captured = capsys.readouterr() 120 | assert len(captured.out) == 0 121 | 122 | 123 | def test_show_stream(capsys): 124 | agent = Agent("You are a helpful assistant.") 125 | agent.hide_stream = False 126 | agent.run("一句话介绍成都", display=True) 127 | captured = capsys.readouterr() 128 | assert len(captured.out) > 0 129 | 130 | 131 | def test_deepseek_chat(): 132 | model = "deepseek-chat" 133 | token_limit = 32000 134 | api_key = os.environ.get("DEEPSEEK_API_KEY") 135 | base_url = "https://api.deepseek.com/v1" 136 | agent = Agent( 137 | "You are a helpful agent.", 138 | model=model, 139 | token_limit=token_limit, 140 | api_key=api_key, 141 | base_url=base_url, 142 | ) 143 | response = agent.run("一句话介绍成都", display=False) 144 | print(response) 145 | assert "成都" in response 146 | 147 | 148 | def test_add_knowledge_files(): 149 | workspace = "./knowledge_files" 150 | if os.path.exists(workspace): 151 | shutil.rmtree(workspace) 152 | file_name = "test_knowledge_file.txt" 153 | with open(file_name, "w") as f: 154 | f.write("My name is Henry") 155 | files = [ 156 | file_name, 157 | ] 158 | agent = Agent( 159 | "你是AI助手,用中文回复。", workspace=workspace, knowledge_files=files 160 | ) 161 | response = agent.user_input(["我叫什么名字?"]) 162 | shutil.rmtree(workspace) 163 | os.remove(file_name) 164 | assert "Henry" in response 165 | 166 | 167 | def test_rag_function(): 168 | def rag_function(messages): 169 | input = messages[-1]["content"] 170 | print("user input:", input) 171 | return "Background: GeneralAgent is a Python library for building AI assistants. It provides a simple API for building conversational agents." 172 | 173 | agent = Agent("You are a helpful assistant", rag_function=rag_function) 174 | response = agent.user_input("What is GeneralAgent?") 175 | assert "GeneralAgent is a Python library" in response 176 | 177 | 178 | def test_collection_and_store(): 179 | role = """ 180 | 你是一个专业的导游。 181 | 你的主要工作: 和游客讲解城市的景点。 182 | 183 | # 1、旅游沟通例子 184 | 用户: 我想去成都玩 185 | 你: 成都是一所宜居的城市,安逸的很 186 | 用户: 成都有什么好吃的? 187 | 你: 火锅 188 | 189 | 当城市确认,直接输出python代码,使用 save_travel_guide_record 函数保存旅游攻略。 190 | 191 | 192 | travel_guide_record = \"\"\" 193 | 城市: 成都 194 | 美食: 火锅 195 | \"\"\" 196 | save_travel_guide_record(travel_guide_record) 197 | 198 | """ 199 | 200 | stop = False 201 | 202 | def save_travel_guide_record(medical_record): 203 | with open("test_collection.txt", "a") as f: 204 | f.write(medical_record) 205 | global stop 206 | stop = True 207 | return "旅行攻略已保存" 208 | 209 | # 删除文件 210 | if os.path.exists("test_collection.txt"): 211 | os.remove("test_collection.txt") 212 | agent = Agent(role, functions=[save_travel_guide_record], hide_python_code=True) 213 | agent.user_input("你想去哪玩?") 214 | agent.user_input("成都") 215 | agent.user_input("") 216 | with open("test_collection.txt", "r") as f: 217 | content = f.read() 218 | assert "成都" in content 219 | 220 | 221 | def test_image_input(): 222 | agent = Agent("You are a helpful assistant.") 223 | response = agent.user_input( 224 | ["What animal in the picture?", {"image": "test/data/test.jpeg"}] 225 | ) 226 | assert "dog" in response 227 | 228 | 229 | def test_temporary_context(): 230 | agent = Agent("You are a helpful assistant.") 231 | with agent.temporary_context(): 232 | agent.user_input("My name is Henry.") 233 | response = agent.user_input("What's my name?") 234 | assert "Henry" not in response 235 | 236 | 237 | def test_load_messages(): 238 | messages = [ 239 | {"role": "user", "content": "My name is Yummy."}, 240 | {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"}, 241 | ] 242 | agent = Agent("You are a helpful assistant.", messages=messages) 243 | response = agent.user_input("What's my name?") 244 | assert "Yummy" in response 245 | -------------------------------------------------------------------------------- /test/test_interpreter_python.py: -------------------------------------------------------------------------------- 1 | import os 2 | from GeneralAgent.interpreter import PythonInterpreter 3 | 4 | 5 | def test_python_interpreter(): 6 | # test run 7 | serialize_path = "test/data/test_interpreter.bin" 8 | if os.path.exists(serialize_path): 9 | os.remove(serialize_path) 10 | 11 | interpreter = PythonInterpreter(serialize_path=serialize_path) 12 | result, is_stop = interpreter.output_parse( 13 | '```python\n#run code\n"hello world"\n```' 14 | ) 15 | print(result) 16 | assert "hello world" in result.strip() 17 | # assert is_stop is False 18 | 19 | # test aug assignment 20 | interpreter.set_variable("a", 10) 21 | result, is_stop = interpreter.output_parse("```python\n#run code\na += 1\n```") 22 | a = interpreter.get_variable("a") 23 | assert a == 11 24 | 25 | result, is_stop = interpreter.output_parse("```python\n#run code\na += 1\n```") 26 | a = interpreter.get_variable("a") 27 | assert a == 12 28 | 29 | # test ann assignment 30 | result, is_stop = interpreter.output_parse("```python\n#run code\na: int = 1\n```") 31 | a = interpreter.get_variable("a") 32 | assert a == 1 33 | 34 | # test normal assignment 35 | result, is_stop = interpreter.output_parse("```python\n#run code\nb = 1\n```") 36 | b = interpreter.get_variable("b") 37 | assert b == 1 38 | 39 | # test multiline code 40 | result, is_stop = interpreter.output_parse( 41 | "```python\n#run code\n[\n 1,\n 2,\n 3\n]\n```" 42 | ) 43 | assert "[1, 2, 3]" == result.split("\n")[-2] 44 | 45 | # test multiple assignment 46 | result, is_stop = interpreter.output_parse("```python\n#run code\na, b = 1, 2\n```") 47 | a = interpreter.get_variable("a") 48 | b = interpreter.get_variable("b") 49 | assert a == 1 50 | assert b == 2 51 | assert "(1, 2)" == result.split("\n")[-2] 52 | 53 | 54 | def test_stack_code(): 55 | serialize_path = "test/data/test_interpreter.bin" 56 | if os.path.exists(serialize_path): 57 | os.remove(serialize_path) 58 | interpreter = PythonInterpreter(serialize_path=serialize_path) 59 | code = """ 60 | ```python 61 | #run code 62 | a = 10 63 | code = "```python\\na += 1\\n```" 64 | interpreter.output_parse(code) 65 | a 66 | ``` 67 | """ 68 | interpreter.set_variable("interpreter", interpreter) 69 | result, is_stop = interpreter.output_parse(code) 70 | # print(result) 71 | assert "11" in result.strip() 72 | 73 | 74 | # output: 75 | # 11 76 | # python runs result: 77 | # run successfully 78 | 79 | 80 | def test_run_code(): 81 | code = """ 82 | def test(): 83 | return "hello world" 84 | test() 85 | """ 86 | interpreter = PythonInterpreter() 87 | result, is_stop = interpreter.run_code(code) 88 | # print(result) 89 | assert "hello world" in result.strip() 90 | -------------------------------------------------------------------------------- /test/test_link_memory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fitz 3 | import pytest 4 | import asyncio 5 | 6 | 7 | @pytest.mark.skip(reason="removed temporarily") 8 | def test_read_paper(): 9 | from GeneralAgent.memory import LinkMemory 10 | 11 | serialize_path = "./summary_memory.json" 12 | if os.path.exists(serialize_path): 13 | os.remove(serialize_path) 14 | memory = LinkMemory(serialize_path=serialize_path) 15 | file_path = "./data/Nougat_piece.pdf" 16 | doc = fitz.open(file_path) 17 | content = "" 18 | for page in doc: 19 | content += "\n" + page.get_text() 20 | memory.add_memory(content, output_callback=None) 21 | spark = memory.get_memory() 22 | # print(f'-----------\n{spark}\n-----------') 23 | assert "Introduction" in spark 24 | 25 | messages = [ 26 | {"role": "user", "content": "论文有哪些贡献?"}, 27 | ] 28 | spark = memory.get_memory(messages) 29 | print(f"-----------\n{spark}\n-----------") 30 | assert "pdf" in spark.lower() 31 | -------------------------------------------------------------------------------- /test/test_skills.py: -------------------------------------------------------------------------------- 1 | from GeneralAgent.skills.python_envs import python_line_is_variable_expression 2 | 3 | 4 | def test_python_line_is_variable_expression(): 5 | assert python_line_is_variable_expression("a") 6 | assert python_line_is_variable_expression("a, b") 7 | assert python_line_is_variable_expression("a + b") 8 | assert python_line_is_variable_expression("vars[0]") 9 | assert python_line_is_variable_expression('scrape_web("https://www.baidu.com")[0]') 10 | 11 | assert python_line_is_variable_expression(" vars[0]") is False 12 | assert python_line_is_variable_expression("print(a)") is False 13 | assert python_line_is_variable_expression("x = a + b") is False 14 | -------------------------------------------------------------------------------- /test/test_skills_llm_inference.py: -------------------------------------------------------------------------------- 1 | from GeneralAgent import skills 2 | 3 | 4 | def test_embedding_texts(): 5 | texts = ["我爱唱歌", "I love singing"] 6 | embeddings = skills.embedding_texts(texts) 7 | a, b = embeddings[0], embeddings[1] 8 | assert skills.cos_sim(a, a) >= 0.999 9 | assert skills.cos_sim(a, b) > 0.7 10 | 11 | 12 | def test_llm_inference(): 13 | messages = [ 14 | {"role": "system", "content": "you are a helpful assistant"}, 15 | {"role": "user", "content": "1 + 1 = ?"}, 16 | ] 17 | result = "" 18 | for x in skills.llm_inference(messages, stream=True): 19 | if x is None: 20 | break 21 | result += x 22 | assert "2" in result 23 | -------------------------------------------------------------------------------- /test/test_skills_memory_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import asyncio 3 | 4 | 5 | content = """ 6 | Nougat: Neural Optical Understanding for Academic Documents 7 | Lukas Blecher⇤ Guillem Cucurull Thomas Scialom Robert Stojnic Meta AI 8 | Abstract 9 | Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human- readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition. 10 | 1 Introduction 11 | The majority of scientific knowledge is stored in books or published in scientific journals, most commonly in the Portable Document Format (PDF). Next to HTML, PDFs are the second most prominent data format on the internet, making up 2.4% of common crawl [1]. However, the information stored in these files is very difficult to extract into any other formats. This is especially true for highly specialized documents, such as scientific research papers, where the semantic information of mathematical expressions is lost. 12 | Existing Optical Character Recognition (OCR) engines, such as Tesseract OCR [2], excel at detecting and classifying individual characters and words in an image, but fail to understand the relationship between them due to their line-by-line approach. This means that they treat superscripts and subscripts in the same way as the surrounding text, which is a significant drawback for mathematical expressions. In mathematical notations like fractions, exponents, and matrices, relative positions of characters are crucial. 13 | Converting academic research papers into machine-readable text also enables accessibility and searchability of science as a whole. The information of millions of academic papers can not be fully accessed because they are locked behind an unreadable format. Existing corpora, such as the S2ORC dataset [3], capture the text of 12M2 papers using GROBID [4], but are missing meaningful representations of the mathematical equations. 14 | To this end, we introduce Nougat, a transformer based model that can convert images of document pages to formatted markup text. 15 | The primary contributions in this paper are 16 | • Release of a pre-trained model capable of converting a PDF to a lightweight markup language. We release the code and the model on GitHub3 17 | • We introduce a pipeline to create dataset for pairing PDFs to source code 18 | • Our method is only dependent on the image of a page, allowing access to scanned papers and books 19 | ⇤Correspondence to: lblecher@meta.com 20 | 2The paper reports 8.1M papers but the authors recently updated the numbers on the GitHub page https://github.com/allenai/s2orc 3 https://github.com/facebookresearch/nougat 21 | """ 22 | 23 | background = """ 24 | #01 Nougat is a Visual Transformer model that performs Optical Character Recognition (OCR) on scientific documents, converting them into a markup language. It aims to enhance the accessibility and searchability of scientific knowledge by bridging the gap between human-readable documents and machine-readable text. The model has been released along with the code for future work on scientific text recognition. Detail in <>, <>, <>, <>, <>, <> 25 | #02 The content discusses the process of splitting a document into pages and predicting the page numbers for each paragraph. It also mentions the use of fuzzy matching to find the exact position within a paragraph. The content acknowledges that there may be artifacts and missing elements in the ground truth data. The results and evaluation section mentions the metrics used to evaluate the model's performance. Detail in <>, <>, <>, <
>, <>, <> 26 | #03 The model presented, Nougat, is an end-to-end trainable encoder-decoder transformer-based model for converting document pages to markup. It relies solely on the rasterized document page and does not rely on OCR or embedded text representations. The model has shown potential for extracting text from digital-born PDFs and converting scanned papers and textbooks. The model's utility is limited by factors such as repetitions and the need for improvements in handling different document styles. The model's generation speed is slower compared to classical approaches but can correctly parse mathematical expressions. Future work includes addressing the tendency for the model to collapse into a repeating loop and improving the handling of inconsistencies across the document. Detail in <>, <>, <>, <>, <>, <> 27 | #04 Training systems for handwritten mathematical expression recognition, generating LaTeX sequences from math formula images using deep neural networks, and pre-training models for document understanding and image recognition. Detail in <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <> 28 | #05 The content discusses various topics including a dataset composition, examples of text generation, derivative rules, pressure calculations, gas mixtures, and molecular Hamiltonian. Detail in <>, <>, <>, <>, <>, <> 29 | #06 jf (t) is a mathematical function. Detail in <>, <> 30 | #07 The content discusses various models and their performance in the field of VQA (Visual Question Answering). It also includes information about a proposed cycle-consistent training framework and its impact on model performance. Additionally, there is mention of evaluation metrics and the effect of hierarchical generation on story generation models. Detail in <>, <>, <>, <>, <>, <> 31 | """ 32 | 33 | 34 | def test_parse_segment_llm_result(): 35 | from GeneralAgent.skills.memory_utils import _parse_segment_llm_result 36 | 37 | string = "<>\n0: 15\n\n<>\n6: 15\n\n<>\n17: 32\n\n<>\n34: 38" 38 | nodes = _parse_segment_llm_result(string) 39 | assert nodes["Abstract"] == (6, 15) 40 | assert len(nodes) == 4 41 | 42 | 43 | def test_segment_text(): 44 | from GeneralAgent import skills 45 | 46 | nodes = skills.segment_text(content) 47 | assert len(nodes) > 0 48 | # assert 'Abstract' in ' '.join(nodes.keys()) 49 | 50 | 51 | def test_summarize_text(): 52 | from GeneralAgent import skills 53 | 54 | summary = skills.summarize_text(content) 55 | # print(summary) 56 | assert len(summary) < len(content) 57 | 58 | 59 | def test_extract_info(): 60 | from GeneralAgent import skills 61 | 62 | task = "今天天气怎么样?" 63 | info = skills.extract_info(background, task) 64 | assert "[Nothing]" in info 65 | 66 | task = "论文有哪贡献?" 67 | info = skills.extract_info(background, task) 68 | print(info) 69 | 70 | task = "论文有哪些限制?" 71 | info = skills.extract_info(background, task) 72 | print(info) 73 | 74 | 75 | def test_parse_extract_info(): 76 | content = """ 77 | #01 78 | <> 79 | 80 | #03 81 | <> 82 | <> 83 | <> 84 | <> 85 | <> 86 | <> 87 | #03 The model's utility is limited by factors such as repetitions and the need for improvements in handling different document styles. The model's generation speed is slower compared to classical approaches but can correctly parse mathematical expressions. Future work includes addressing the tendency for the model to collapse into a repeating loop and improving the handling of inconsistencies across the document. Detail in <>, <> 88 | """ 89 | from GeneralAgent import skills 90 | 91 | numbers, titles = skills.parse_extract_info(content) 92 | assert numbers == [1, 3, 3] 93 | # print(numbers) 94 | assert titles == [ 95 | "Nougat: Neural Optical Understanding for Academic Documents", 96 | "Numbers and Punctuation", 97 | "Math and Plain Text Scores", 98 | "Results and Format of GROBID", 99 | "Comparison of Approaches", 100 | "Repetition Detection and Inference", 101 | "Limitations and Future Work", 102 | "Repetition Detection and Inference", 103 | "Limitations and Future Work", 104 | ] 105 | -------------------------------------------------------------------------------- /test/test_stack_memory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from GeneralAgent.utils import set_logging_level 4 | 5 | set_logging_level() 6 | 7 | 8 | @pytest.mark.skip(reason="removed temporarily") 9 | def test_memory(): 10 | from GeneralAgent.memory import StackMemory, StackMemoryNode 11 | 12 | serialize_path = "./data/memory.json" 13 | if os.path.exists(serialize_path): 14 | os.remove(serialize_path) 15 | memory = StackMemory(serialize_path=serialize_path) 16 | node1 = StackMemoryNode(role="user", content="node1") 17 | node2 = StackMemoryNode(role="system", content="node2") 18 | node3 = StackMemoryNode(role="system", content="node3") 19 | node4 = StackMemoryNode(role="system", content="node4") 20 | memory.add_node(node1) 21 | memory.add_node_in(node1, node2) 22 | memory.add_node_after(node2, node3) 23 | memory.add_node_after(node3, node4) 24 | # [node1 [node2, node3, node4] ] 25 | 26 | def _assert_init_state(memory): 27 | node1 = memory.get_node(1) 28 | node2 = memory.get_node(2) 29 | node3 = memory.get_node(3) 30 | node4 = memory.get_node(4) 31 | assert node1.role == "user" 32 | assert node1.childrens == [2, 3, 4] 33 | assert node2.parent == 1 34 | assert node3.parent == 1 35 | assert node4.parent == 1 36 | assert node2.childrens == [] 37 | assert node3.childrens == [] 38 | assert node4.childrens == [] 39 | assert memory.get_node(1).childrens == [2, 3, 4] 40 | 41 | # first assert 42 | _assert_init_state(memory) 43 | description_1 = str(memory) 44 | 45 | # load from serialized file 46 | memory = None 47 | memory = StackMemory(serialize_path=serialize_path) 48 | _assert_init_state(memory) 49 | description_2 = str(memory) 50 | assert description_1 == description_2 51 | 52 | # test get node 53 | tmp_node = memory.get_node(3) 54 | assert tmp_node.content == "node3" 55 | 56 | # # get todo node 57 | # todo_node = memory.get_todo_node() 58 | # assert todo_node.node_id == 2 59 | 60 | # # success node 61 | # memory.success_node(todo_node) 62 | # assert memory.get_todo_node().node_id == 3 63 | 64 | if os.path.exists(serialize_path): 65 | os.remove(serialize_path) 66 | --------------------------------------------------------------------------------