├── .env.example
├── .gitignore
├── GeneralAgent
    ├── __init__.py
    ├── agent
    │   ├── __init__.py
    │   └── agent.py
    ├── interpreter
    │   ├── __init__.py
    │   ├── applescript_interpreter.py
    │   ├── interpreter.py
    │   ├── knowledge_interpreter.py
    │   ├── python_interpreter.py
    │   ├── role_interpreter.py
    │   └── shell_interpreter.py
    ├── llamaindex.py
    ├── memory
    │   ├── __init__.py
    │   └── normal_memory.py
    ├── skills
    │   ├── __init__.py
    │   ├── file_operation.py
    │   ├── openai_model.py
    │   ├── python_envs.py
    │   ├── token_count.py
    │   ├── unique_name.py
    │   └── web_tools.py
    └── utils.py
├── README.md
├── README_EN.md
├── docs
    ├── develop.md
    ├── images
    │   ├── 2023.11.15.jpg
    │   ├── 2023_11_27_builder_agent.jpg
    │   ├── 2023_11_27_image_creator.jpg
    │   ├── Architecture.png
    │   ├── Architecture_2023.11.15.png
    │   ├── general_agent_2024.01.16.png
    │   ├── self_call.png
    │   ├── stack_memory.png
    │   ├── wechat.jpg
    │   └── wechat_company.jpg
    └── paper
    │   └── General_Agent__Self_Call_And_Stack_Memory.pdf
├── examples
    ├── 0_base_usage.py
    ├── 10_rag_function.py
    ├── 11_collection_and_store.py
    ├── 12_hide_python_code.py
    ├── 13_image_input.py
    ├── 14_doubao_llm.py
    ├── 15_run_check.py
    ├── 16_test_azure.py
    ├── 17_qwen.py
    ├── 18_translate_agent.py
    ├── 19_temporary_context.py
    ├── 1_function_call.py
    ├── 20_load_memory.py
    ├── 21_market_search.py
    ├── 2_write_novel.py
    ├── 3_ai_search.py
    ├── 3_ai_search_simple.py
    ├── 4_multi_agents.py
    ├── 5_serialize.py
    ├── 6_disable_python_run.py
    ├── 7_hide_stream.py
    ├── 8_multi_model.py
    └── 9_knowledge_files.py
├── pyproject.toml
└── test
    ├── data
        ├── .gitkeep
        ├── Nougat.pdf
        ├── Nougat_piece.pdf
        ├── a.py
        ├── hello.py
        └── test.jpeg
    ├── pytest.ini
    ├── test_agent.py
    ├── test_examples.py
    ├── test_interpreter_python.py
    ├── test_link_memory.py
    ├── test_skills.py
    ├── test_skills_llm_inference.py
    ├── test_skills_memory_utils.py
    └── test_stack_memory.py


/.env.example:
--------------------------------------------------------------------------------
 1 | # OPENAI API Key or 兼容OpenAI Python SDK 的其他国产大模型API Key
 2 | OPENAI_API_KEY='your_openai_api_key'
 3 | 
 4 | # OPENAI API 访问地址 or 代理地址 or 兼容OpenAI Python SDK 的其他国产大模型API地址
 5 | OPENAI_API_BASE='https://api.openai.com/v1'
 6 | 
 7 | # 默认大模型
 8 | DEFAULT_LLM_MODEL='gpt-4o'
 9 | 
10 | # embedding模型
11 | OPENAI_EMBEDDING_MODEL='text-embedding-3-small'
12 | 
13 | # 默认大模型的温度
14 | LLM_TEMPERATURE='0.5'
15 | 
16 | AGENT_LOG='info'
17 | 
18 | # SERPER API Key(可选) ./examples/3_ai_search.py 需要用到
19 | SERPER_API_KEY='your_serper_api_key'


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /knowledge_files
 2 | /.vscode
 3 | 
 4 | *.pyc
 5 | GeneralAgent/.env
 6 | GeneralAgent/*.json
 7 | 
 8 | test/data_0.json
 9 | test/data/test_workspace/*
10 | test/tetris.py
11 | test/tetris.py.bak
12 | GeneralAgent/cache_json
13 | test/data/plan_memory.json
14 | test/data/test_interpreter.bin
15 | test/data/b.txt
16 | test/data/a.txt
17 | test/data/hello.pptx
18 | test/.env
19 | 
20 | build/*
21 | dist/*
22 | GeneralAgent.egg-info*
23 | test/multi_lines_input/*
24 | test/multi_lines_input/*
25 | .env
26 | examples/memory.json
27 | test/link_memory.json
28 | test/memory.json
29 | test/llm_cache.json
30 | test/summary_memory.json
31 | */llm_cache.json
32 | test/test_skills/data/*
33 | test/test_skills/llm_cache.json
34 | webui/server/server/applications/test_application_id/bot.json
35 | webui/server/server/applications/test_application_id/main.py
36 | data/*
37 | 
38 | .idea/*
39 | test/test_skills/code/*
40 | test/data/ui/*
41 | test/code/*
42 | test/data/read_interpreter/*
43 | webui/server/server/ts_builder/src/lib/index.tsx
44 | GeneralAgent/skills/a.jpg
45 | examples/code.bin
46 | examples/normal_memory.json
47 | examples/novel.md
48 | examples/9_knowledge_files/*
49 | ga/*
50 | html/*
51 | .history
52 | 


--------------------------------------------------------------------------------
/GeneralAgent/__init__.py:
--------------------------------------------------------------------------------
1 | from .skills import skills
2 | from .agent.agent import Agent


--------------------------------------------------------------------------------
/GeneralAgent/agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/GeneralAgent/agent/__init__.py


--------------------------------------------------------------------------------
/GeneralAgent/agent/agent.py:
--------------------------------------------------------------------------------
  1 | # Agent
  2 | import os
  3 | import logging
  4 | from typing import Union
  5 | from GeneralAgent.memory import NormalMemory
  6 | from GeneralAgent.interpreter import Interpreter
  7 | from GeneralAgent.interpreter import KnowledgeInterpreter
  8 | from GeneralAgent.interpreter import RoleInterpreter, PythonInterpreter
  9 | from GeneralAgent.utils import cut_messages, string_token_count
 10 | 
 11 | 
 12 | def default_output_callback(token):
 13 |     if token is not None:
 14 |         print(token, end="", flush=True)
 15 |     else:
 16 |         print("\n", end="", flush=True)
 17 | 
 18 | 
 19 | def default_check(check_content=None):
 20 |     show = "确认 | 继续 (回车, yes, y, 是, ok) 或者 直接输入你的想法\n"
 21 |     if check_content is not None:
 22 |         show = f"{check_content}\n\n{show}"
 23 |     response = input(show)
 24 |     if response.lower() in ["", "yes", "y", "是", "ok"]:
 25 |         return None
 26 |     else:
 27 |         return response
 28 | 
 29 | 
 30 | class Agent:
 31 |     """
 32 |     Agent
 33 |     """
 34 | 
 35 |     # @memory: Memory
 36 |     # @interpreters: list, interpreters
 37 |     # @output_callback: function, output_callback(content: str) -> None
 38 |     # @python_run_result: str, python run result
 39 |     # @run_level: int, python run level, use for check stack overflow level
 40 |     # @continue_run: bool, continue run when task not finished
 41 |     # @disable_python_run: bool, disable python run
 42 |     # @hide_python_code: bool, hide python code in output
 43 |     memory = None
 44 |     interpreters = []
 45 |     output_callback = None
 46 |     python_run_result = None
 47 |     run_level = 0
 48 |     continue_run = True
 49 |     disable_python_run = False
 50 |     hide_python_code = False
 51 | 
 52 |     def __init__(
 53 |         self,
 54 |         role: str = None,
 55 |         functions: list = [],
 56 |         knowledge_files=[],
 57 |         rag_function=None,
 58 |         workspace: str = None,
 59 |         model=None,
 60 |         token_limit=None,
 61 |         api_key=None,
 62 |         base_url=None,
 63 |         self_call=False,
 64 |         continue_run=False,
 65 |         output_callback=default_output_callback,
 66 |         disable_python_run=False,
 67 |         hide_python_code=False,
 68 |         messages=[],
 69 |         **args,
 70 |     ):
 71 |         """
 72 |         @role: str, Agent角色描述，例如"你是一个小说家"，默认为None
 73 | 
 74 |         @functions: list, Agent可用的函数(工具)列表，默认为[]
 75 | 
 76 |         @knowledge_files: list, 知识库文件列表。当执行delete()函数时，不会删除构建好的知识库(embedding).
 77 | 
 78 |         @rag_function: function, RAG function，用于自定义RAG函数，输入参数为chat模式的messages(包含最近一次输入)，返回值为字符串.
 79 | 
 80 |         @workspace: str, Agent序列化目录地址，如果目录不存在会自动创建，如果workspace不为None，则会从workspace中加载序列化的memory和python代码。默认None表示不序列化，不加载。当knowledge_files不为空时, workspace必须提供
 81 | 
 82 |         @model: str, 模型类型，比如"gpt-3.5-turbo", "gpt-4o"等
 83 | 
 84 |         @token_limit: int, 模型token限制. None: gpt3.5: 16*1000, gpt4: 128*1000, 其他: 16*1000
 85 | 
 86 |         @api_key: str,  OpenAI or other LLM API KEY
 87 | 
 88 |         @base_url: str, OpenAI or other LLM API BASE URL
 89 | 
 90 |         @self_call: bool, 是否开启自我调用(Agent可以写代码来自我调用完成复杂任务), 默认为False.
 91 | 
 92 |         @continue_run: bool, 是否自动继续执行。Agent在任务没有完成时，是否自动执行。默认为True.
 93 | 
 94 |         @output_callback: function, 输出回调函数，用于输出Agent的流式输出结果，默认为None，表示使用默认输出函数(skills.output==print)
 95 | 
 96 |         @disable_python_run  (deprecated) : bool, 是否禁用python运行，默认为False
 97 | 
 98 |         @hide_python_code  (deprecated) : bool, 是否隐藏python代码，默认为False
 99 | 
100 |         @messages: list, 历史对话列表
101 | 
102 |         @args: 其他LLM对话参数
103 | 
104 |             temperature: float, 采样温度
105 | 
106 |             frequency_penalty: float, 频率惩罚, 在 -2 和 2 之间
107 | 
108 |         """
109 |         if workspace is None and len(knowledge_files) > 0:
110 |             raise Exception(
111 |                 "workspace must be provided when knowledge_files is not empty"
112 |             )
113 |         if workspace is not None and not os.path.exists(workspace):
114 |             os.makedirs(workspace)
115 |         self.workspace = workspace
116 |         self.disable_python_run = disable_python_run
117 |         self.hide_python_code = hide_python_code
118 |         self.memory = NormalMemory(serialize_path=self._memory_path, messages=messages)
119 |         self.role_interpreter = RoleInterpreter(role=role, self_call=self_call)
120 |         self.python_interpreter = PythonInterpreter(
121 |             self, serialize_path=self._python_path
122 |         )
123 |         self.python_interpreter.function_tools = functions
124 |         self.model = model or os.environ.get("DEFAULT_LLM_MODEL", "gpt-4o")
125 |         self.token_limit = token_limit or 64 * 1000
126 |         self.api_key = api_key
127 |         self.base_url = base_url
128 |         # self.temperature = temperature
129 |         # self.frequency_penalty = frequency_penalty
130 |         self.llm_args = args
131 |         self.continue_run = continue_run
132 |         self.knowledge_interpreter = KnowledgeInterpreter(
133 |             workspace, knowledge_files=knowledge_files, rag_function=rag_function
134 |         )
135 |         self.interpreters = [
136 |             self.role_interpreter,
137 |             self.python_interpreter,
138 |             self.knowledge_interpreter,
139 |         ]
140 |         self.enter_index = None  # 进入 with 语句时 self.memory.messages 的索引
141 |         self.output_callback = output_callback
142 | 
143 |     def __enter__(self):
144 |         self.enter_index = len(
145 |             self.memory.get_messages()
146 |         )  # Record the index of self.messages
147 |         return self
148 | 
149 |     def __exit__(self, exc_type, exc_val, exc_tb):
150 |         if exc_type:
151 |             self.clear_temporary_messages()
152 |             self.handle_exception(exc_type, exc_val, exc_tb)
153 |         self.clear_temporary_messages()
154 |         return False
155 | 
156 |     @property
157 |     def _memory_path(self):
158 |         if self.workspace is None:
159 |             return None
160 |         else:
161 |             return os.path.join(self.workspace, "memory.json")
162 | 
163 |     @property
164 |     def _python_path(self):
165 |         if self.workspace is None:
166 |             return None
167 |         else:
168 |             return os.path.join(self.workspace, "code.bin")
169 | 
170 |     @property
171 |     def functions(self):
172 |         return self.python_interpreter.function_tools
173 | 
174 |     @functions.setter
175 |     def functions(self, new_value):
176 |         self.python_interpreter.function_tools = new_value
177 | 
178 |     @property
179 |     def role(self):
180 |         return self.role_interpreter.role
181 | 
182 |     @role.setter
183 |     def role(self, new_value):
184 |         self.role_interpreter.role = new_value
185 | 
186 |     class TemporaryManager:
187 |         def __init__(self, agent):
188 |             self.agent = agent
189 | 
190 |         def __enter__(self):
191 |             self.agent.enter_index = len(self.agent.memory.get_messages())
192 |             return self.agent
193 | 
194 |         def __exit__(self, exc_type, exc_val, exc_tb):
195 |             if exc_type:
196 |                 self.agent.clear_temporary_messages()
197 |                 self.agent.handle_exception(exc_type, exc_val, exc_tb)
198 |             self.agent.clear_temporary_messages()
199 |             return False
200 | 
201 |     def temporary_context(self):
202 |         return self.TemporaryManager(self)
203 | 
204 |     def disable_output_callback(self):
205 |         """
206 |         禁用输出回调函数
207 |         """
208 |         self.tmp_output_callback = self.output_callback
209 |         self.output_callback = None
210 | 
211 |     def enable_output_callback(self):
212 |         """
213 |         启用输出回调函数
214 |         """
215 |         self.output_callback = self.tmp_output_callback
216 |         self.tmp_output_callback = None
217 | 
218 |     def disable_python(self):
219 |         """
220 |         禁用python运行
221 |         """
222 |         self.disable_python_run = True
223 | 
224 |     def enable_python(self):
225 |         """
226 |         启用python运行
227 |         """
228 |         self.disable_python_run = False
229 | 
230 |     def run(
231 |         self,
232 |         command: Union[str, list],
233 |         return_type=str,
234 |         display=False,
235 |         verbose=True,
236 |         user_check=False,
237 |         check_render=None,
238 |     ):
239 |         """
240 |         执行command命令，并返回return_type类型的结果
241 | 
242 |         @command: 命令内容, str or list. list: [{'type': 'text', 'text': 'hello world'}, {'type': 'image_url', 'image_url': 'xxxx.jpg'}]
243 | 
244 |         @return_type: type, 返回类型，默认str. 可以是任意的python类型。
245 | 
246 |         @display: bool, 是否显示流输出
247 | 
248 |         @verbose: bool, 是否显示详细输出
249 | 
250 |         @user_check: bool, 是否需要用户确认命令执行后的结果，默认不需要
251 | 
252 |         @check_render: function, 检查渲染函数，用于渲染显示给用户的check内容: check_render(result:return_type) -> str
253 | 
254 |         """
255 |         # 代码调用agent执行，直接run_level+1
256 |         self.run_level += 1
257 |         if not display:
258 |             self.disable_output_callback()
259 |         try:
260 |             result = self._run(command, return_type=return_type, verbose=verbose)
261 |             return result
262 |         except Exception as e:
263 |             logging.exception(e)
264 |             return str(e)
265 |         finally:
266 |             self.run_level -= 1
267 |             if not display:
268 |                 self.enable_output_callback()
269 | 
270 |     def user_input(self, input: Union[str, list], verbose=True):
271 |         """
272 |         Agent接收用户输入
273 | 
274 |         :input: 用户输入内容, str类型 or list: [{'type': 'text', 'text': 'hello world'}, {'type': 'image_url', 'image_url': 'xxxx.jpg'}]
275 |         """
276 |         from GeneralAgent import skills
277 | 
278 |         result = self._run(input, verbose=verbose)
279 |         if self.continue_run and self.run_level == 0:
280 |             # 判断是否继续执行
281 |             messages = self.memory.get_messages()
282 |             messages = cut_messages(messages, 2 * 1000)
283 |             the_prompt = "对于当前状态，无需用户输入或者确认，继续执行任务，请回复yes，其他情况回复no"
284 |             messages += [{"role": "system", "content": the_prompt}]
285 |             response = skills.llm_inference(
286 |                 messages,
287 |                 model="smart",
288 |                 stream=False,
289 |                 api_key=self.api_key,
290 |                 base_url=self.base_url,
291 |                 **self.llm_args,
292 |             )
293 |             if "yes" in response.lower():
294 |                 result = self.run("ok")
295 |         return result
296 | 
297 |     def _run(self, input, return_type=str, verbose=False):
298 |         """
299 |         agent run: parse input -> get llm messages -> run LLM and parse output
300 | 
301 |         @input: str, user's new input, None means continue to run where it stopped
302 | 
303 |         @return_type: type, return type, default str
304 | 
305 |         @verbose: bool, verbose mode
306 |         """
307 | 
308 |         result = ""
309 | 
310 |         def local_output(token):
311 |             nonlocal result
312 |             if token is not None:
313 |                 result += token
314 |             else:
315 |                 result += "\n"
316 |             if self.output_callback is not None:
317 |                 self.output_callback(token)
318 | 
319 |         if self.run_level != 0:
320 |             if return_type == str:
321 |                 add_content = "Directly answer the question, no need to run python\n"
322 |                 # add_content 在前面
323 |                 if isinstance(input, list):
324 |                     input = [add_content] + input
325 |                 else:
326 |                     input = add_content + input
327 |             else:
328 |                 add_content = (
329 |                     "\nYou should return python values in type "
330 |                     + str(return_type)
331 |                     + " by run python code(```python\n#run code\nxxx\n).\n"
332 |                 )
333 |                 # add_content 在后面
334 |                 if isinstance(input, list):
335 |                     input = input + [add_content]
336 |                 else:
337 |                     input = input + add_content
338 |         self._memory_add_input(input)
339 | 
340 |         try_count = 0
341 |         while True:
342 |             messages = self._get_llm_messages()
343 |             output_stop = self._llm_and_parse_output(messages, local_output, verbose)
344 |             if output_stop:
345 |                 local_output(None)
346 |                 if self.python_run_result is not None:
347 |                     result = self.python_run_result
348 |                     self.python_run_result = None
349 |                 if return_type == str:
350 |                     return result
351 |                 if type(result) != return_type and try_count < 1:
352 |                     logging.info("return type should be: return_type")
353 |                     try_count += 1
354 |                     self._memory_add_input("return type should be " + str(return_type))
355 |                     result = ""
356 |                     continue
357 |                 return result
358 | 
359 |     def _memory_add_input(self, input):
360 |         # 记忆添加用户输入
361 |         self.memory.add_message("user", input)
362 | 
363 |     def _get_llm_messages(self):
364 |         # 获取记忆 + prompt
365 |         messages = self.memory.get_messages()
366 |         if self.disable_python_run:
367 |             prompt = "\n\n".join(
368 |                 [
369 |                     interpreter.prompt(messages)
370 |                     for interpreter in self.interpreters
371 |                     if interpreter.__class__ != PythonInterpreter
372 |                 ]
373 |             )
374 |         else:
375 |             prompt = "\n\n".join(
376 |                 [interpreter.prompt(messages) for interpreter in self.interpreters]
377 |             )
378 |         # 动态调整记忆长度
379 |         prompt_count = string_token_count(prompt)
380 |         left_count = int(self.token_limit * 0.9) - prompt_count
381 |         messages = cut_messages(messages, left_count)
382 |         # 组合messages
383 |         messages = [{"role": "system", "content": prompt}] + messages
384 |         return messages
385 | 
386 |     def _llm_and_parse_output(self, messages, output_callback, verbose):
387 |         outputer = _PythonCodeFilter(output_callback, verbose)
388 |         from GeneralAgent import skills
389 | 
390 |         try:
391 |             result = ""
392 |             is_stop = True
393 |             is_break = False
394 |             response = skills.llm_inference(
395 |                 messages,
396 |                 model=self.model,
397 |                 stream=True,
398 |                 api_key=self.api_key,
399 |                 base_url=self.base_url,
400 |                 **self.llm_args,
401 |             )
402 |             message_id = None
403 |             for token in response:
404 |                 if token is None:
405 |                     break
406 |                 result += token
407 |                 outputer.process_text(token)
408 |                 interpreter: Interpreter = None
409 |                 for interpreter in self.interpreters:
410 |                     if (
411 |                         self.disable_python_run
412 |                         and interpreter.__class__ == PythonInterpreter
413 |                     ):
414 |                         continue
415 |                     if interpreter.output_match(result):
416 |                         logging.debug("interpreter: " + interpreter.__class__.__name__)
417 |                         message_id = self.memory.add_message("assistant", result)
418 |                         self.memory.push_stack()
419 |                         output, is_stop = interpreter.output_parse(result)
420 |                         if self.python_run_result is not None:
421 |                             output = output.strip()
422 |                             if len(output) > 50000:
423 |                                 output = output[:50000] + "..."
424 |                         self.memory.pop_stack()
425 |                         message_id = self.memory.append_message(
426 |                             "assistant", "\n" + output + "\n", message_id=message_id
427 |                         )
428 |                         result = ""
429 |                         # if is_stop:
430 |                         outputer.process_text(None)
431 |                         outputer.process_text("```output\n" + output + "\n```\n")
432 |                         if interpreter.__class__ == PythonInterpreter:
433 |                             outputer.exit_python_code()
434 |                         is_break = True
435 |                         break
436 |                 if is_break:
437 |                     break
438 |             if len(result) > 0:
439 |                 message_id = self.memory.add_message("assistant", result)
440 |             outputer.flush()
441 |             return is_stop
442 |         except Exception as e:
443 |             logging.exception(e)
444 |             outputer.process_text(str(e))
445 |             outputer.flush()
446 |             return True
447 | 
448 |     def clear(self):
449 |         """
450 |         清除: 删除memory和python序列化文件。不会删除workspace和知识库。
451 |         """
452 |         if self._memory_path is not None and os.path.exists(self._memory_path):
453 |             os.remove(self._memory_path)
454 |         if self._python_path is not None and os.path.exists(self._python_path):
455 |             os.remove(self._python_path)
456 |         self.memory = NormalMemory(serialize_path=self._memory_path)
457 |         self.python_interpreter = PythonInterpreter(
458 |             self, serialize_path=self._python_path
459 |         )
460 | 
461 |     def clear_temporary_messages(self):
462 |         """
463 |         清除: 临时产生的数据
464 |         """
465 |         assert self.enter_index is not None
466 |         self.memory.recover(self.enter_index)
467 |         self.enter_index = None
468 | 
469 | 
470 | class _PythonCodeFilter:
471 |     """
472 |     Python代码过滤器，用于隐藏Python代码块
473 |     """
474 | 
475 |     def __init__(self, output_callback, verbose):
476 |         """
477 |         构造函数
478 | 
479 |         @output_callback: 输出回调函数
480 | 
481 |         @verbose: 是否显示详细输出
482 |         """
483 |         self.verbose = verbose
484 |         self.in_python_code = False
485 |         self.buffer = ""
486 |         self.output_callback = output_callback
487 | 
488 |     def process_text(self, text):
489 |         """
490 |         处理输入问题
491 |         """
492 |         if self.verbose:
493 |             self.output_callback(text)
494 |         else:
495 |             if text is None:
496 |                 self.flush()
497 |                 self.output_callback(None)
498 |             else:
499 |                 if not self.in_python_code:
500 |                     self.buffer += text
501 |                     self._process_buffer()
502 | 
503 |     def exit_python_code(self):
504 |         """
505 |         退出python代码块
506 |         """
507 |         self.in_python_code = False
508 | 
509 |     def _process_buffer(self):
510 |         format = "```python\n#run code\n"
511 |         if self.buffer.endswith(format):
512 |             self.in_python_code = True
513 |             self.buffer = ""  # 清空缓冲区，因为我们不打印```python
514 |         elif "```" in self.buffer and not self.in_python_code:
515 |             # 清空```之前的内容
516 |             index = self.buffer.rfind("```")
517 |             if index != -1:
518 |                 self.output_callback(self.buffer[:index])
519 |                 self.buffer = self.buffer[index:]
520 |             # 如果缓冲区太大，就表示不是python代码块，直接输出
521 |             if len(self.buffer) > len(format):
522 |                 self.flush()
523 |         else:
524 |             self.output_callback(self.buffer)
525 |             self.buffer = ""
526 | 
527 |     def flush(self):
528 |         if self.buffer:
529 |             self.output_callback(self.buffer)
530 |             self.buffer = ""
531 | 


--------------------------------------------------------------------------------
/GeneralAgent/interpreter/__init__.py:
--------------------------------------------------------------------------------
1 | from .interpreter import Interpreter
2 | from .role_interpreter import RoleInterpreter
3 | from .python_interpreter import PythonInterpreter
4 | from .knowledge_interpreter import KnowledgeInterpreter
5 | from .applescript_interpreter import AppleScriptInterpreter
6 | from .shell_interpreter import ShellInterpreter


--------------------------------------------------------------------------------
/GeneralAgent/interpreter/applescript_interpreter.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from .interpreter import Interpreter
 3 | 
 4 | applescript_prompt = """
 5 | # Run applescript
 6 | * Here are the commands
 7 | ```applescript
 8 | <applescript_command>
 9 | ```
10 | * the command will be executed if in macOS computer.
11 | """
12 | 
13 | class AppleScriptInterpreter(Interpreter):
14 |     output_match_pattern = '```(\n)?applescript(.*?)\n```'
15 | 
16 |     def prompt(self, messages) -> str:
17 |         return applescript_prompt
18 |     
19 |     def output_parse(self, string) -> (str, bool):
20 |         pattern = re.compile(self.output_match_pattern, re.DOTALL)
21 |         match = pattern.search(string)
22 |         assert match is not None
23 |         sys_out = self._run_applescript(match.group(2))
24 |         return sys_out.strip(), True
25 | 
26 |     def _run_applescript(self, content):
27 |         content = content.replace('"', '\\"')
28 |         sys_out = ''
29 |         import subprocess
30 |         try:
31 |             p = subprocess.Popen('osascript -e "{}"'.format(content), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
32 |         except:
33 |             pass
34 |         finally:
35 |             sys_out, err = p.communicate()
36 |             sys_out = sys_out.decode('utf-8')
37 |         sys_out = sys_out.strip()
38 |         if sys_out == '':
39 |             sys_out = 'run successfully'
40 |         return sys_out


--------------------------------------------------------------------------------
/GeneralAgent/interpreter/interpreter.py:
--------------------------------------------------------------------------------
 1 | # Interpreter
 2 | import abc
 3 | import re
 4 | 
 5 | class Interpreter(metaclass=abc.ABCMeta):
 6 |     """
 7 |     Interpreter is the base class for all interpreters.
 8 |     output_match_pattern is the pattern to match the LLM ouput string. for example ```tsx\n(.*?)\n```
 9 |     """
10 |     output_match_pattern = None
11 | 
12 |     def prompt(self, messages) -> str:
13 |         """
14 |         :param messages: list of messages
15 |         :return: string
16 |         """
17 |         return ''
18 | 
19 |     def output_match(self, string) -> bool:
20 |         if self.output_match_pattern is None:
21 |             return False
22 |         match = re.compile(self.output_match_pattern, re.DOTALL).search(string)
23 |         if match is not None:
24 |             return True
25 |         else:
26 |             return False
27 | 
28 |     def output_parse(self, string) -> (str, bool):
29 |         """
30 |         parse the input、output string, and return the output string and is_stop
31 |         """
32 |         return '', False


--------------------------------------------------------------------------------
/GeneralAgent/interpreter/knowledge_interpreter.py:
--------------------------------------------------------------------------------
 1 | # 知识库解析器
 2 | from .interpreter import Interpreter
 3 | from GeneralAgent.llamaindex import create_llamaindex, load_llamaindex, query_llamaindex
 4 | 
 5 | import os
 6 | import json
 7 | import shutil
 8 | 
 9 | class KnowledgeInterpreter(Interpreter):
10 |     """
11 |     知识库解析器，用户解析知识库的问题
12 |     """
13 |     def __init__(self, workspace, knowledge_files=[], rag_function=None) -> None:
14 |         """
15 |         @param workspace: 工作目录
16 |         @param knowledge_files: 知识库文件列表，可以是本地文件或者网络文件，比如['http://xxx.txt', './xxx.pdf']，支持格式为llama库支持的格式
17 |         @param rag_function: 查询函数，输入问题，返回答案列表
18 |         """
19 |         self.workspace = workspace
20 |         self.knowledge_files = knowledge_files
21 |         self.rag_function = rag_function
22 |         self.work = len(knowledge_files) > 0 or (rag_function is not None)
23 | 
24 |         if len(knowledge_files) > 0:
25 |             self._create_index()
26 |         else:
27 |             self.index = None
28 | 
29 |     def _create_index(self):
30 |         """
31 |         构建索引
32 |         """
33 |         llama_dir = os.path.join(self.workspace, 'llama')
34 |         meta_path = os.path.join(llama_dir, 'meta.json')
35 |         data_dir = os.path.join(llama_dir, 'data')
36 |         storage_dir = os.path.join(llama_dir, 'storage')
37 | 
38 |         if not os.path.exists(llama_dir):
39 |             os.makedirs(llama_dir)
40 |         if not os.path.exists(data_dir):
41 |             os.makedirs(data_dir)
42 |         if not os.path.exists(storage_dir):
43 |             os.makedirs(storage_dir)
44 | 
45 |         # 判断是否需要重新构建索引
46 |         files_change = False
47 |         if os.path.exists(meta_path):
48 |             with open(meta_path, 'r') as f:
49 |                 meta = json.load(f)
50 |             # 使用set比较两个列表是否相等
51 |             if set(meta['knowledge_files']) != set(self.knowledge_files):
52 |                 files_change = True
53 |         else:
54 |             files_change = True
55 | 
56 |         # 如果文件有变化，重新构建索引
57 |         if files_change:
58 |             # 删除data目录下的所有文件 & 使用 shutil 库 拷贝knowledge_files到data目录下
59 |             for file in os.listdir(data_dir):
60 |                 os.remove(os.path.join(data_dir, file))
61 |             for file in self.knowledge_files:
62 |                 # 如果文件是网络文件，下载到data目录下
63 |                 if file.startswith('http'):
64 |                     import requests
65 |                     res = requests.get(file)
66 |                     file_name = file.split('/')[-1]
67 |                     with open(os.path.join(data_dir, file_name), 'wb') as f:
68 |                         f.write(res.content)
69 |                 else:
70 |                     file_name = os.path.basename(file)
71 |                     shutil.copy(file, os.path.join(data_dir, file_name))
72 |             self.index = create_llamaindex(data_dir, storage_dir)
73 |             with open(meta_path, 'w') as f:
74 |                 json.dump({'knowledge_files': self.knowledge_files}, f)
75 |         else:
76 |             self.index = load_llamaindex(storage_dir)
77 | 
78 |     def prompt(self, messages) -> str:
79 |         if len(messages) == 0:
80 |             return ''
81 |         if len(self.knowledge_files) == 0 and self.rag_function is None:
82 |             return ''
83 |         background = 'Background:'
84 |         if len(self.knowledge_files) > 0:
85 |             background += query_llamaindex(self.index, messages)
86 |         if self.rag_function is not None:
87 |             background += '\n' + self.rag_function(messages)
88 |         return background


--------------------------------------------------------------------------------
/GeneralAgent/interpreter/python_interpreter.py:
--------------------------------------------------------------------------------
  1 | import re, io, os, sys
  2 | import pickle
  3 | import logging
  4 | from jinja2 import Template
  5 | from functools import partial
  6 | from codyer import skills
  7 | from .interpreter import Interpreter
  8 | 
  9 | 
 10 | def get_python_version() -> str:
 11 |     """
 12 |     Return the python version, like "3.9.12"
 13 |     """
 14 |     import platform
 15 | 
 16 |     python_version = platform.python_version()
 17 |     return python_version
 18 | 
 19 | 
 20 | def get_function_signature(func, module: str = None):
 21 |     """Returns a description string of function"""
 22 |     try:
 23 |         import inspect
 24 | 
 25 |         sig = inspect.signature(func)
 26 |         sig_str = str(sig)
 27 |         desc = f"{func.__name__}{sig_str}"
 28 |         if func.__doc__:
 29 |             desc += ": " + func.__doc__.strip()
 30 |         if module is not None:
 31 |             desc = f"{module}.{desc}"
 32 |         if inspect.iscoroutinefunction(func):
 33 |             desc = "" + desc
 34 |         return desc
 35 |     except Exception as e:
 36 |         import logging
 37 | 
 38 |         logging.exception(e)
 39 |         return ""
 40 | 
 41 | 
 42 | default_import_code = """
 43 | import os, sys, math, time
 44 | from codyer import skills
 45 | """
 46 | 
 47 | 
 48 | class PythonInterpreter(Interpreter):
 49 |     """
 50 |     Python Interpreter: run python code in the interpreter. Not same namespace with the agent & Can Only run synchronous code
 51 |     """
 52 | 
 53 |     output_match_pattern = "```python\n#run code\n(.*?)\n```"
 54 |     agent = None
 55 | 
 56 |     python_prompt_template = """
 57 | # Run python code
 58 | - format: ```python\n#run code\nyour code\n```. Only this format will be executed.
 59 | - Every time you output code, you need to reimport the required library. Each execution only shares variables and functions, without including libraries.
 60 | - Available libraries: {{python_libs}}
 61 | - The following functions can be used in code (already implemented and imported for you, do not import them again):
 62 | ```
 63 | {{python_funcs}}
 64 | ```
 65 | - Example:
 66 | ```python
 67 | #run code
 68 | result = 1 + 1
 69 | result
 70 | ```
 71 | 
 72 | # Show python code
 73 | - format: ```python\n#show code\nyour code\n```. This format will be displayed.
 74 | - Example:
 75 | ```python
 76 | #show code
 77 | print('Hello, world!')
 78 | ```
 79 | """
 80 | 
 81 |     function_tools = []
 82 | 
 83 |     def __init__(
 84 |         self,
 85 |         agent=None,
 86 |         serialize_path: str = None,
 87 |         libs: str = "",
 88 |         import_code: str = None,
 89 |         prompt_append="",
 90 |         stop_wrong_count=3,
 91 |     ):
 92 |         """
 93 |         @serialize_path (str): python解释器的序列化路径，如果为None，则不序列化。举例: './python_interpreter.bin' or 'serialized.pkl'
 94 |         @lib (str, optional): 可以使用的库
 95 |         @import_code (str, optional): code to import. The tools used should be imported. Defaults to default_import_code.
 96 |         @prompt_append: append to the prompt, custom prompt can be added here
 97 |         @stop_wrong_count: stop running when the code is wrong for stop_wrong_count times
 98 |         """
 99 |         self.globals = {}  # global variables shared by all code
100 |         self.agent = agent
101 |         self.python_libs = libs
102 |         self.import_code = import_code or default_import_code
103 |         self.serialize_path = serialize_path
104 |         self.prompt_append = prompt_append
105 |         # self.tools = tools or Tools([])
106 |         self.globals = self.load()
107 |         # count the number of times the code is wrong, and stop running when it reaches the threshold
108 |         self.run_wrong_count = 0
109 |         self.stop_wrong_count = stop_wrong_count
110 | 
111 |     def load(self):
112 |         if self.serialize_path is None:
113 |             return {}
114 |         if os.path.exists(self.serialize_path):
115 |             with open(self.serialize_path, "rb") as f:
116 |                 data = pickle.loads(f.read())
117 |                 return data["globals"]
118 |         return {}
119 | 
120 |     def prompt(self, messages) -> str:
121 |         funtions = "\n\n".join([get_function_signature(x) for x in self.function_tools])
122 |         variables = {
123 |             "python_libs": self.python_libs,
124 |             "python_funcs": funtions,
125 |             "python_version": get_python_version(),
126 |         }
127 |         return (
128 |             Template(self.python_prompt_template).render(**variables)
129 |             + self.prompt_append
130 |         )
131 | 
132 |     def save(self):
133 |         if self.serialize_path is None:
134 |             return
135 |         save_globals = self._remove_unpickleable()
136 |         # save
137 |         with open(self.serialize_path, "wb") as f:
138 |             data = {"globals": save_globals}
139 |             f.write(pickle.dumps(data))
140 | 
141 |     def _remove_unpickleable(self):
142 |         save_globals = self.globals.copy()
143 |         if "__builtins__" in save_globals:
144 |             save_globals.__delitem__("__builtins__")
145 |         keys = list(save_globals.keys())
146 |         for key in keys:
147 |             try:
148 |                 pickle.dumps(save_globals[key])
149 |             except Exception:
150 |                 save_globals.__delitem__(key)
151 |         return save_globals
152 | 
153 |     def output_parse(self, string) -> (str, bool):
154 |         pattern = re.compile(self.output_match_pattern, re.DOTALL)
155 |         match = pattern.search(string)
156 |         assert match is not None
157 |         result, stop = self.run_code(match.group(1))
158 |         result = (
159 |             "\nThe execution of the python code is completed, and the result is as follows:\n"
160 |             + result
161 |             + "\n"
162 |         )
163 |         return result, stop
164 | 
165 |     def run_code(self, code):
166 |         code = self.import_code + "\n" + code
167 |         logging.debug(code)
168 | 
169 |         output = io.StringIO()
170 |         sys.stdout = output
171 | 
172 |         try:
173 |             if self.agent is not None:
174 |                 self.agent.run_level += 1
175 |                 if self.agent is not None:
176 |                     self.globals["agent"] = self.agent
177 |             for fun in self.function_tools:
178 |                 # partial function default is remote function
179 |                 if isinstance(fun, partial):
180 |                     name = fun.args[0]
181 |                 else:
182 |                     name = fun.__name__
183 |                 self.globals[name] = fun
184 |             result = skills._exec(code, self.globals)
185 |             self.run_wrong_count = 0
186 |             stop = True
187 |             # 出现了自我调用，则判断一下层级，如果层级为1，则停止
188 |             if self.agent is not None:
189 |                 stop = self.agent.run_level >= 1
190 |                 self.agent.python_run_result = result
191 |             if result is None:
192 |                 result = output.getvalue()
193 |             else:
194 |                 if output.getvalue().strip() != "":
195 |                     result = output.getvalue() + "\n" + str(result)
196 |             return str(result), stop
197 |         except Exception as e:
198 |             logging.exception(e)
199 |             import traceback
200 | 
201 |             error = traceback.format_exc()
202 |             self.run_wrong_count += 1
203 |             if self.run_wrong_count >= self.stop_wrong_count:
204 |                 raise e
205 |             return error, False
206 |         finally:
207 |             self.save()
208 |             sys.stdout = sys.__stdout__
209 |             if self.agent is not None:
210 |                 self.agent.run_level -= 1
211 | 
212 |     def get_variable(self, name):
213 |         if name in self.globals:
214 |             return self.globals[name]
215 |         else:
216 |             logging.warning(f"Variable {name} not found")
217 |             return None
218 | 
219 |     def set_variable(self, name, value):
220 |         self.globals[name] = value
221 | 


--------------------------------------------------------------------------------
/GeneralAgent/interpreter/role_interpreter.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from jinja2 import Template
 3 | from .interpreter import Interpreter
 4 | 
 5 | default_system_role = """
 6 | Current Time: {{now}}
 7 | You are an agent on the computer, tasked with assisting users in resolving their issues. 
 8 | You have the capability to control the computer and access the internet. 
 9 | All code in ```python ``` will be automatically executed by the system. So if you don't need to run the code, please don't write it in the code block.
10 | All responses should be formatted using markdown. For file references, use the format [title](a.txt), with all files stored in the './' directory.
11 | When result file is ready, provide it to the user with donwload link. 
12 | """
13 | 
14 | class RoleInterpreter(Interpreter):
15 |     """
16 |     RoleInterpreter, a interpreter that can change the role of the agent.
17 |     Note: This should be the first interpreter in the agent.
18 |     """
19 | 
20 |     def __init__(self, system_role=None, self_call=False, search_functions=False, role:str=None) -> None:
21 |         """
22 |         prompt = system_role | default_system_role + role
23 |         @system_role: str, 系统角色. 如果为None，则使用默认系统角色
24 |         @self_call: bool, 是否开启自调用
25 |         @search_functions: bool, 是否开启搜索功能
26 |         @role: str, 用户角色
27 |         """
28 |         self.system_role = system_role
29 |         self.self_control = self_call
30 |         self.search_functions = search_functions
31 |         self.role = role
32 | 
33 |     def prompt(self, messages) -> str:
34 |         if self.system_role is not None:
35 |             prompt = self.system_role
36 |         else:
37 |             prompt = Template(default_system_role).render(now=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
38 |         if self.role is not None:
39 |             prompt += '\n\n' + self.role
40 |         return prompt


--------------------------------------------------------------------------------
/GeneralAgent/interpreter/shell_interpreter.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from .interpreter import Interpreter
 3 | 
 4 | shell_prompt = """
 5 | # Run shell
 6 | * format is : ```shell\\nthe_command\\n```
 7 | * the command will be executed
 8 | """
 9 | 
10 | class ShellInterpreter(Interpreter):
11 |     output_match_pattern = '```shell\n(.*?)\n```'
12 |     
13 |     def __init__(self, workspace='./') -> None:
14 |         self.workspace = workspace
15 | 
16 |     def prompt(self, messages) -> str:
17 |         return shell_prompt
18 | 
19 |     def output_parse(self, string) -> (str, bool):
20 |         pattern = re.compile(self.output_match_pattern, re.DOTALL)
21 |         match = pattern.search(string)
22 |         assert match is not None
23 |         output = self._run_bash(match.group(1))
24 |         return output.strip(), True
25 | 
26 |     def _run_bash(self, content):
27 |         sys_out = ''
28 |         import subprocess
29 |         if 'python ' in content:
30 |             content = content.replace('python ', 'python3 ')
31 |         try:
32 |             p = subprocess.Popen(content, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
33 |         except:
34 |             pass
35 |         finally:
36 |             sys_out, err = p.communicate()
37 |             sys_out = sys_out.decode('utf-8')
38 |         return sys_out


--------------------------------------------------------------------------------
/GeneralAgent/llamaindex.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import logging
  4 | from typing import Any, List
  5 | from llama_index.core import Settings
  6 | from llama_index.core.embeddings import BaseEmbedding
  7 | from llama_index.core import (VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage)
  8 | 
  9 | 
 10 | # 使用: https://github.com/run-llama/llama_index 库构建知识库索引
 11 | # 默认使用 GeneralAgent.skills 中 embedding_texts 函数来embedding，你可以重写 embedding_texts 函数
 12 | 
 13 | # def new_embedding_texts(texts) -> [[float]]:
 14 | #     """
 15 | #     对文本数组进行embedding
 16 | #     """
 17 | #     import os
 18 | #     client = _get_openai_client()
 19 | #     model = os.environ.get('EMBEDDING_MODEL', 'text-embedding-3-small')
 20 | #     resp = client.embeddings.create(input=texts, model=model)
 21 | #     result = [x.embedding for x in resp.data]
 22 | #     return result
 23 | # from GeneralAgent import skills
 24 | # skills.embedding_texts = new_embedding_texts
 25 | 
 26 | class CustomEmbeddings(BaseEmbedding):
 27 |     def __init__(
 28 |         self,
 29 |         **kwargs: Any,
 30 |     ) -> None:
 31 |         super().__init__(**kwargs)
 32 | 
 33 |     @classmethod
 34 |     def class_name(cls) -> str:
 35 |         return "CustomEmbeddings"
 36 | 
 37 |     async def _aget_query_embedding(self, query: str) -> List[float]:
 38 |         return self._get_query_embedding(query)
 39 | 
 40 |     async def _aget_text_embedding(self, text: str) -> List[float]:
 41 |         return self._get_text_embedding(text)
 42 | 
 43 |     def _get_query_embedding(self, query: str) -> List[float]:
 44 |         from GeneralAgent import skills
 45 |         return skills.embedding_texts([query])[0]
 46 | 
 47 |     def _get_text_embedding(self, text: str) -> List[float]:
 48 |         from GeneralAgent import skills
 49 |         return skills.embedding_texts([text])[0]
 50 | 
 51 |     def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
 52 |         from GeneralAgent import skills
 53 |         return skills.embedding_texts(texts)
 54 | 
 55 | embed_model = CustomEmbeddings(embed_batch_size=16)
 56 | Settings.embed_model = embed_model
 57 | 
 58 | 
 59 | def create_llamaindex(data_dir, storage_dir, limit_count=1000000):
 60 |     """
 61 |     创建llamaindex索引
 62 |     @param data_dir: 数据目录
 63 |     @param storage_dir: 存储目录
 64 |     @param limit_count: 限制的token数量
 65 |     """
 66 |     documents = SimpleDirectoryReader(data_dir).load_data()
 67 |     # 限制token数量
 68 |     total_count = 0
 69 |     for doc in documents:
 70 |         total_count += len(doc.get_content())
 71 |     # 英文下，一个单词多个字母，所以乘以4
 72 |     if total_count > limit_count * 4:
 73 |         return None
 74 |     index = VectorStoreIndex.from_documents(documents)
 75 |     index.storage_context.persist(persist_dir=storage_dir)
 76 |     return index
 77 | 
 78 | 
 79 | def load_llamaindex(storage_dir):
 80 |     """
 81 |     从存储中加载索引
 82 |     """
 83 |     storage_context = StorageContext.from_defaults(persist_dir=storage_dir)
 84 |     index = load_index_from_storage(storage_context)
 85 |     return index
 86 | 
 87 | 
 88 | def _get_last_text_query(messages):
 89 |     if len(messages) == 0:
 90 |         return ''
 91 |     for index in range(len(messages) - 1, -1, -1):
 92 |         content = messages[index]['content']
 93 |         if isinstance(content, str):
 94 |             return content
 95 |         elif isinstance(content, list):
 96 |             for item in content:
 97 |                 if item['type'] == 'text':
 98 |                     return item['text']
 99 |     return ''
100 | 
101 | 
102 | def query_llamaindex(index, messages):
103 |     query = _get_last_text_query(messages)
104 |     nodes = index.as_retriever().retrieve(query)
105 |     return '\n\n'.join([node.get_text() for node in nodes])
106 | 
107 | 
108 | def retrieve_knowlege(storage_dir, messages) -> list:
109 |     """
110 |     从知识库中检索，返回检索结果
111 |     @param query_str: 检索字符串
112 |     @return: 检测结果，list of string
113 |     """
114 |     if len(messages) == 0:
115 |         logging.info('messages is empty')
116 |         return ''
117 |     if not os.path.exists(storage_dir):
118 |         logging.info(f'storage_dir {storage_dir} not exists')
119 |         return ''
120 |     query = _get_last_text_query(messages)
121 |     index = load_llamaindex(storage_dir)
122 |     return query_llamaindex(index, query)


--------------------------------------------------------------------------------
/GeneralAgent/memory/__init__.py:
--------------------------------------------------------------------------------
1 | # import 
2 | from .normal_memory import NormalMemory


--------------------------------------------------------------------------------
/GeneralAgent/memory/normal_memory.py:
--------------------------------------------------------------------------------
  1 | # Memeory
  2 | import json
  3 | import os
  4 | import logging
  5 | from GeneralAgent.utils import encode_image
  6 | 
  7 | class NormalMemory:
  8 |     def __init__(self, serialize_path='./memory.json', messages=[]):
  9 |         """
 10 |         @serialize_path: str, 序列化路径，默认为'./memory.json'。如果为None，则使用内存存储
 11 |         """
 12 |         self.messages = []
 13 |         self.serialize_path = serialize_path
 14 |         if serialize_path is not None:
 15 |             if os.path.exists(serialize_path):
 16 |                 with open(serialize_path, 'r', encoding='utf-8') as f:
 17 |                     self.messages = json.load(f)
 18 |         if len(messages) > 0:
 19 |             self._validate_messages(messages)
 20 |             # 将 messages 的内容拼到 self.messages 后面
 21 |             self.messages += messages
 22 | 
 23 |     def save(self):
 24 |         if self.serialize_path is not None:
 25 |             with open(self.serialize_path, 'w', encoding='utf-8') as f:
 26 |                 json.dump(self.messages, f)
 27 | 
 28 |     def push_stack(self):
 29 |         pass
 30 | 
 31 |     def pop_stack(self):
 32 |         pass
 33 | 
 34 |     def add_message(self, role, content):
 35 |         """
 36 |         add a new message
 37 |         @role: str, 'user' or 'assistant'
 38 |         @content: str, message content
 39 |         return message id
 40 |         """
 41 |         assert role in ['user', 'system', 'assistant']
 42 |         if isinstance(content, list):
 43 |             r = []
 44 |             for c in content:
 45 |                 if isinstance(c, dict):
 46 |                     if 'image' in c:
 47 |                         r.append({'type': 'image_url', 'image_url': {'url': encode_image(c['image'])}})
 48 |                     elif 'text' in c:
 49 |                         r.append({'type': 'text', 'text': c['text']})
 50 |                     else:
 51 |                         raise Exception('message type wrong')
 52 |                 else:
 53 |                     r.append({'type': 'text', 'text': c})
 54 |             self.messages.append({'role': role, 'content': r})
 55 |         else:
 56 |             self.messages.append({'role': role, 'content': content})
 57 |         self.save()
 58 | 
 59 |     def append_message(self, role, content, message_id=None):
 60 |         """
 61 |         append a message. when message_id is not None, append to the message with message_id and move it to the end
 62 |         @role: str, 'user' or 'assistant'
 63 |         @content: str, message content
 64 |         return message id
 65 |         """
 66 |         # self.show_messages()
 67 |         assert role in ['user', 'assistant']
 68 |         if message_id is not None:
 69 |             assert message_id >= 0 and message_id < len(self.messages)
 70 |             assert self.messages[message_id]['role'] == role
 71 |             self.messages[message_id]['content'] += '\n' + content
 72 |             # self.messages.append(self.messages.pop(message_id))
 73 |             self.messages = self.messages[:message_id+1]
 74 |             self.save()
 75 |             # self.show_messages()
 76 |             return len(self.messages) - 1
 77 |         else:
 78 |             if len(self.messages) > 0 and self.messages[-1]['role'] == role:
 79 |                 self.messages[-1]['content'] += '\n' + content
 80 |             else:
 81 |                 self.messages.append({'role': role, 'content': content})
 82 |             self.save()
 83 |             # self.show_messages()
 84 |             return len(self.messages) - 1
 85 | 
 86 |     # 恢复 message 数据， [: index]
 87 |     def recover(self, index):
 88 |         """
 89 |         recover the messages to the index
 90 |         """
 91 |         self.messages = self.messages[:index]
 92 |         self.save()
 93 | 
 94 |     def get_messages(self):
 95 |         return self.messages
 96 |     
 97 |     def __str__(self):
 98 |         return json.dumps(self.messages, indent=4)
 99 |     
100 |     def show_messages(self):
101 |         logging.info('-' * 50 + '<Memory>' + '-' * 50)
102 |         for message in self.messages:
103 |             logging.info('[[' + message['role'] + ']]: ' + message['content'][:100])
104 |         logging.info('-' * 50 + '</Memory>' + '-' * 50)
105 | 
106 |     def _validate_messages(self, messages):
107 |         """
108 |         Validate each message in the messages.
109 |         @messages (list): List of messages where each message is a dict with 'role' and 'content'.
110 |         Raises:
111 |             AssertionError: If any message does not conform to the required format ('message format wrong').
112 |         """
113 |         for message in messages:
114 |             assert isinstance(message, dict), 'message format wrong'
115 |             assert 'role' in message, 'message format wrong'
116 |             assert 'content' in message, 'message format wrong'
117 |             assert message['role'] in ['user', 'assistant'], 'message format wrong'
118 | 
119 | 
120 | def test_NormalMemory():
121 |     serialize_path = './memory.json'
122 |     mem = NormalMemory(serialize_path=serialize_path)
123 |     mem.add_message('user', 'hello')
124 |     mem.add_message('assistant', 'hi')
125 |     mem = NormalMemory(serialize_path=serialize_path)
126 |     assert len(mem.get_messages()) == 2
127 |     mem.append_message('assistant', 'hi')
128 |     assert len(mem.get_messages()) == 2


--------------------------------------------------------------------------------
/GeneralAgent/skills/__init__.py:
--------------------------------------------------------------------------------
  1 | # 单列
  2 | import os
  3 | from codyer import skills
  4 | 
  5 | 
  6 | def default_output_callback(token):
  7 |     if token is not None:
  8 |         print(token, end="", flush=True)
  9 |     else:
 10 |         print("\n", end="", flush=True)
 11 | 
 12 | 
 13 | def default_check(check_content=None):
 14 |     show = "确认 | 继续 (回车, yes, y, 是, ok) 或者 直接输入你的想法\n"
 15 |     if check_content is not None:
 16 |         show = f"{check_content}\n\n{show}"
 17 |     response = input(show)
 18 |     if response.lower() in ["", "yes", "y", "是", "ok"]:
 19 |         return None
 20 |     else:
 21 |         return response
 22 | 
 23 | 
 24 | def load_functions_with_path(python_code_path) -> tuple[list, str]:
 25 |     """
 26 |     Load functions from python file
 27 |     @param python_code_path: the path of python file
 28 |     @return: a list of functions and error message (if any, else None)
 29 |     """
 30 |     try:
 31 |         import importlib.util
 32 |         import inspect
 33 | 
 34 |         # 指定要加载的文件路径和文件名
 35 |         module_name = "skills"
 36 |         module_file = python_code_path
 37 | 
 38 |         # 使用importlib加载文件
 39 |         spec = importlib.util.spec_from_file_location(module_name, module_file)
 40 |         module = importlib.util.module_from_spec(spec)
 41 |         spec.loader.exec_module(module)
 42 | 
 43 |         # 获取文件中的所有函数
 44 |         functions = inspect.getmembers(module, inspect.isfunction)
 45 | 
 46 |         # 过滤functions中以下划线开头的函数
 47 |         functions = filter(lambda f: not f[0].startswith("_"), functions)
 48 | 
 49 |         return [f[1] for f in functions], None
 50 |     except Exception as e:
 51 |         # 代码可能有错误，加载不起来
 52 |         import logging
 53 | 
 54 |         logging.exception(e)
 55 |         return [], str(e)
 56 | 
 57 | 
 58 | def load_functions_with_directory(python_code_dir) -> list:
 59 |     """
 60 |     Load functions from python directory (recursively)
 61 |     @param python_code_dir: the path of python directory
 62 |     @return: a list of functions
 63 |     """
 64 |     import os
 65 | 
 66 |     total_funs = []
 67 |     for file in os.listdir(python_code_dir):
 68 |         # if file is directory
 69 |         if os.path.isdir(os.path.join(python_code_dir, file)):
 70 |             total_funs += load_functions_with_directory(
 71 |                 os.path.join(python_code_dir, file)
 72 |             )
 73 |         else:
 74 |             # if file is file
 75 |             if file.endswith(".py") and (
 76 |                 not file.startswith("__init__")
 77 |                 and not file.startswith("_")
 78 |                 and not file == "main.py"
 79 |             ):
 80 |                 funcs, error = load_functions_with_path(
 81 |                     os.path.join(python_code_dir, file)
 82 |                 )
 83 |                 total_funs += funcs
 84 |     return total_funs
 85 | 
 86 | 
 87 | def _exec(code, globals_vars={}):
 88 |     """
 89 |     Execute code and return the last expression
 90 |     """
 91 |     import ast
 92 | 
 93 |     tree = ast.parse(code)
 94 | 
 95 |     try:
 96 |         last_node = tree.body[-1]
 97 |         code_body = tree.body[0:-1]
 98 |         last_expr = ast.unparse(last_node)
 99 | 
100 |         if isinstance(last_node, ast.Assign):
101 |             code_body = tree.body
102 |             expr_left = last_node.targets[-1]
103 |             if isinstance(expr_left, ast.Tuple):
104 |                 last_expr = f"({', '.join([x.id for x in expr_left.elts])})"
105 |             else:
106 |                 last_expr = expr_left.id
107 | 
108 |         elif isinstance(last_node, ast.AugAssign) or isinstance(
109 |             last_node, ast.AnnAssign
110 |         ):
111 |             code_body = tree.body
112 |             last_expr = last_node.target.id
113 | 
114 |         if len(code_body):
115 |             main_code = compile(ast.unparse(code_body), "<string>", "exec")
116 |             exec(main_code, globals_vars)
117 |     except SyntaxError:
118 |         return None
119 | 
120 |     try:
121 |         return eval(
122 |             compile(last_expr, "<string>", "eval"),
123 |             globals_vars,
124 |         )
125 |     except SyntaxError:
126 |         return None
127 | 
128 | 
129 | if len(skills._functions) == 0:
130 |     skills._add_function("input", input)
131 |     skills._add_function("check", default_check)
132 |     skills._add_function("print", default_output_callback)
133 |     skills._add_function("output", default_output_callback)
134 |     skills._add_function("_exec", _exec)
135 |     funcs = load_functions_with_directory(os.path.dirname(__file__))
136 |     for fun in funcs:
137 |         skills._add_function(fun.__name__, fun)
138 | 


--------------------------------------------------------------------------------
/GeneralAgent/skills/file_operation.py:
--------------------------------------------------------------------------------
 1 |     
 2 | def read_pdf_pages(file_path):
 3 |     """Read the pdf file and return a list of strings on each page of the pdf"""
 4 |     """读取pdf文件，返回pdf每页字符串的列表"""
 5 |     import fitz
 6 |     doc = fitz.open(file_path)
 7 |     documents = []
 8 |     for page in doc:
 9 |         documents.append(page.get_text())
10 |     return documents
11 |     
12 | def read_word_pages(file_path):
13 |     """Read the word file and return a list of word paragraph strings"""
14 |     """读取word文件，返回word段落字符串的列表"""
15 |     # https://zhuanlan.zhihu.com/p/146363527
16 |     from docx import Document
17 |     # 打开文档
18 |     document = Document(file_path)
19 |     # 读取标题、段落、列表内容
20 |     ps = [ paragraph.text for paragraph in document.paragraphs]
21 |     return ps
22 | 
23 | def read_ppt(file_path):
24 |     import pptx
25 |     prs = pptx.Presentation(file_path)
26 |     documents = []
27 |     for slide in prs.slides:
28 |         for shape in slide.shapes:
29 |             if hasattr(shape, "text"):
30 |                 documents.append(shape.text)
31 |     return '\n'.join(documents)
32 | 
33 | 
34 | def read_file_content(file_path):
35 |     """return content of txt, md, pdf, docx file"""
36 |     # 支持file_path的类型包括txt、md、pdf、docx
37 |     if file_path.endswith('.pdf'):
38 |         return ' '.join(read_pdf_pages(file_path))
39 |     elif file_path.endswith('.docx'):
40 |         return ' '.join(read_word_pages(file_path))
41 |     elif file_path.endswith('.ppt') or file_path.endswith('.pptx'):
42 |         return read_ppt(file_path)
43 |     else:
44 |         # 默认当做文本文件
45 |         with open(file_path, 'r', encoding='utf-8') as f:
46 |             return '\n'.join(f.readlines())
47 | 
48 | def write_file_content(file_path, content):
49 |     """write content to txt, md"""
50 |     with open(file_path, 'w', encoding='utf-8') as f:
51 |         f.write(content)
52 | 


--------------------------------------------------------------------------------
/GeneralAgent/skills/openai_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | from openai import OpenAI
  4 | from openai import AzureOpenAI
  5 | import numpy as np
  6 | from numpy.linalg import norm
  7 | 
  8 | 
  9 | def _get_openai_client(api_key=None, base_url=None):
 10 |     if api_key is None and 'OPENAI_API_KEY' not in os.environ:
 11 |         raise ValueError('Please set OPENAI_API_KEY in environment')
 12 |     api_key = api_key or os.environ['OPENAI_API_KEY']
 13 |     base_url = base_url or os.environ.get('OPENAI_API_BASE', 'https://api.openai.com/v1')
 14 |     client = OpenAI(api_key=api_key, base_url=base_url, max_retries=3)
 15 |     return client
 16 | 
 17 | 
 18 | def _get_azure_client(api_key=None, base_url=None):
 19 |     if api_key is None and 'OPENAI_API_KEY' not in os.environ:
 20 |         raise ValueError('Please set OPENAI_API_KEY (Azure API Key) in environment')
 21 |     api_key = api_key or os.environ['OPENAI_API_KEY']
 22 |     if base_url is None and 'OPENAI_API_BASE' not in os.environ:
 23 |         raise ValueError('Please set OPENAI_API_BASE (Azure API Base URL) in environment')
 24 |     base_url = base_url or os.environ['OPENAI_API_BASE']
 25 |     api_version = os.environ.get('AZURE_API_VERSION', '2024-05-01-preview')
 26 |     client = AzureOpenAI(
 27 |         api_key=api_key,
 28 |         api_version=api_version,
 29 |         azure_endpoint=base_url,
 30 |     )
 31 |     return client
 32 | 
 33 | 
 34 | def embedding_texts(texts, model=None) -> [[float]]:
 35 |     """
 36 |     对文本数组进行embedding
 37 |     """
 38 |     if model is not None and 'azure_' in model:
 39 |         client = _get_azure_client()
 40 |         model = model.replace('azure_', '')
 41 |     else:
 42 |         client = _get_openai_client()
 43 |         model = os.environ.get('EMBEDDING_MODEL', 'text-embedding-3-small')
 44 |     resp = client.embeddings.create(input=texts, model=model)
 45 |     result = [x.embedding for x in resp.data]
 46 |     return result
 47 | 
 48 | 
 49 | def cos_sim(a, b):
 50 |     a = a if isinstance(a, np.ndarray) else np.array(a)
 51 |     b = b if isinstance(b, np.ndarray) else np.array(b)
 52 |     return np.dot(a, b) / (norm(a) * norm(b))
 53 | 
 54 | 
 55 | def search_similar_texts(focal: str, texts: [str], top_k=5):
 56 |     """
 57 |     search the most similar texts in texts, and return the top_k similar texts
 58 |     """
 59 |     embeddings = embedding_texts([focal] + texts)
 60 |     focal_embedding = embeddings[0]
 61 |     texts_embeddings = embeddings[1:]
 62 |     similarities = np.dot(texts_embeddings, focal_embedding)
 63 |     sorted_indices = np.argsort(similarities)
 64 |     sorted_indices = sorted_indices[::-1]
 65 |     return [texts[i] for i in sorted_indices[:top_k]]
 66 | 
 67 | 
 68 | def get_llm_token_limit(model):
 69 |     """
 70 |     return the token limit for the model
 71 |     """
 72 |     if 'gpt-3.5' in model:
 73 |         return 16 * 1000
 74 |     if 'gpt-4' in model:
 75 |         return 128 * 1000
 76 |     if 'glm-4v' in model:
 77 |         return 8 * 1000
 78 |     if 'glm-4' in model:
 79 |         return 128 * 1000
 80 |     if 'yi-vision' in model:
 81 |         return 4 * 1000
 82 |     if 'yi-large' in model:
 83 |         return 32 * 1000
 84 | 
 85 |     return 16 * 1000
 86 | 
 87 | 
 88 | def llm_inference(messages, model='gpt-4o', stream=False,  api_key=None, base_url=None, **args):
 89 |     """
 90 |     Run LLM (large language model) inference on the provided messages using the specified model.
 91 | 
 92 |     @messages: Input messages for the model, like [{'role': 'system', 'content': 'You are a helpful assistant'}, {'role': 'user', 'content': 'What is your name?'}]
 93 |     @model: Type of model to use. Options are 'normal', 'smart', 'long'
 94 |     @stream: Boolean indicating if the function should use streaming inference
 95 |     @temperature: Sampling temperature to use during inference. Must be a float between 0 and 1. Defaults to 0.5.
 96 |     @api_key: OpenAI API key. If not provided, the function will use the OPENAI_API_KEY environment variable.
 97 |     @base_url: Base URL for the OpenAI API. If not provided, the function will use the OPENAI_API_BASE environment variable.
 98 |     @frequency_penalty: Frequency penalty to use during inference. Must be a float between -2 and 2. Defaults to null.
 99 | 
100 |     Returns:
101 |     If stream is True, returns a generator that yields the inference results as they become available.
102 |     If stream is False, returns a string containing the inference result.
103 | 
104 |     Note:
105 |     The total number of tokens in the messages and the returned string must be less than 4000 when model_variant is 'normal', and less than 16000 when model_variant is 'long'.
106 |     """
107 | 
108 |     logging.debug(messages)
109 |     if model == 'smart':
110 |         model = 'gpt-4o'
111 |     if model == 'long':
112 |         model = 'gpt-4o'
113 |     if model == 'normal':
114 |         model = 'gpt-3.5-turbo'
115 |     if 'azure_' in model:
116 |         model = model.replace('azure_', '')
117 |         client = _get_azure_client(api_key, base_url)
118 |     elif 'doubao' in model:
119 |         client, model = _get_doubao_client(api_key, base_url)
120 |     else:
121 |         client = _get_openai_client(api_key, base_url)
122 |     messages = _process_message(messages, model)
123 |     if stream:
124 |         return _llm_inference_with_stream(client, messages, model, **args)
125 |     else:
126 |         return _llm_inference_without_stream(client, messages, model, **args)
127 | 
128 | 
129 | def _process_message(messages, model):
130 |     if model == "glm-4v":  # 避开 GLM-4V 开源模型，开源模型不需要处理
131 |         for message in messages:
132 |             # remove the base64 prefix in the image_url such as 'data:image/jpeg;base64,' which is for GLM-4V
133 |             if 'content' in message and isinstance(message['content'], list):
134 |                 for item in message['content']:
135 |                     if item.get('type') == 'image_url' and 'url' in item.get('image_url', {}):
136 |                         url = item['image_url']['url']
137 |                         base64_index = url.find('base64,')
138 |                         if base64_index != -1:
139 |                             item['image_url']['url'] = url[base64_index + len('base64,'):]
140 |     if model == "yi-vision":  # Yi-Vision Not Support System Role
141 |         for message in messages:
142 |             if message['role'] == 'system':
143 |                 message['role'] = 'user'
144 | 
145 |     return messages
146 | 
147 | 
148 | def _get_doubao_client(api_key=None, base_url=None):
149 |     from volcenginesdkarkruntime import Ark
150 |     key = api_key or os.environ.get('OPENAI_API_KEY')
151 |     client = Ark(api_key=key)
152 |     model = base_url or os.environ.get('OPENAI_API_BASE')
153 |     return client, model
154 | 
155 | 
156 | def _update_llm_args(model, args):
157 |     if model in ['qwen-vl-max', 'qwen-vl-plus']:
158 |         remove_items = ['temperature', 'frequency_penalty']
159 |         return {k: v for k, v in args.items() if k not in remove_items}
160 |     else:
161 |         return args
162 | 
163 | 
164 | def _llm_inference_with_stream(client, messages, model, **args):
165 |     try:
166 |         args = _update_llm_args(model, args)
167 |         response = client.chat.completions.create(
168 |             messages=messages,
169 |             model=model,
170 |             stream=True,
171 |             **args
172 |         )
173 |         for chunk in response:
174 |             if len(chunk.choices) > 0:
175 |                 # Compatible with service using Azure API proxies, such as One-API
176 |                 if chunk.choices[0].delta is None:
177 |                     continue
178 |                 token = chunk.choices[0].delta.content
179 |                 if token is None:
180 |                     continue
181 |                 yield token
182 |     except Exception as e:
183 |         logging.exception(e)
184 |         raise ValueError('LLM(Large Languate Model) error, Please check your key or base_url, or network')
185 | 
186 | 
187 | def _llm_inference_without_stream(client, messages, model, **args):
188 |     try:
189 |         args = _update_llm_args(model, args)
190 |         response = client.chat.completions.create(
191 |             messages=messages,
192 |             model=model,
193 |             stream=False,
194 |             **args
195 |         )
196 |         result = response.choices[0].message.content
197 |         return result
198 |     except Exception as e:
199 |         logging.exception(e)
200 |         raise ValueError('LLM(Large Languate Model) error, Please check your key or base_url, or network')
201 |     
202 | def speech_to_text(audio_file_path):
203 |     """Convert speech in audio to text, return text"""
204 |     from GeneralAgent import skills
205 |     audio_file = open(audio_file_path, "rb")
206 |     client = _get_openai_client()
207 |     content = client.audio.transcriptions.create(
208 |         model="whisper-1", 
209 |         file=audio_file, 
210 |         response_format="text"
211 |     )
212 | 
213 |     return content
214 | 
215 | def text_to_speech(text, voice='onyx', save_path=None):
216 |     """
217 |     文本转语音，返回音频文件路径。
218 |     @param text: 要转换的文本
219 |     @param voice: 语音名称, onyx: 男性，nova: 女性
220 |     @return: 音频文件路径
221 |     """
222 |     from GeneralAgent import skills
223 | 
224 |     # ['nova', 'shimmer', 'echo', 'onyx', 'fable', 'alloy']
225 |     client = _get_openai_client()
226 |     response = client.audio.speech.create(
227 |         model="tts-1",
228 |         voice=voice,
229 |         input=text,
230 |     )
231 |     file_path = save_path or skills.unique_name() + '.mp3'
232 |     response.stream_to_file(file_path)
233 |     return file_path
234 | 
235 | 
236 | def create_image(prompt) -> str:
237 |     """draw image given a prompt, returns the image path. Note: limit to generate violent, adult, or hateful content"""
238 |     import os
239 |     from openai import OpenAI
240 |     from GeneralAgent import skills
241 | 
242 |     client = _get_openai_client()
243 |     response = client.images.generate(
244 |         model="dall-e-3",
245 |         prompt=prompt,
246 |         size="1024x1024",
247 |         quality="standard",
248 |         n=1,
249 |     )
250 | 
251 |     image_url = response.data[0].url
252 |     image_path = skills.try_download_file(image_url)
253 |     return image_path
254 | 
255 | 
256 | def edit_image(image_path:str, prompt:str) -> str:
257 |     """Edit image given a prompt, returns the image path"""
258 |     import os
259 |     from openai import OpenAI
260 |     from GeneralAgent import skills
261 |     from pathlib import Path
262 | 
263 |     client = _get_openai_client()
264 |     response = client.images.edit(
265 |         image = Path(image_path),
266 |         prompt = prompt,
267 |         n=1,
268 |     )
269 |     image_url = response.data[0].url
270 |     image_path = skills.try_download_file(image_url)
271 |     return image_path


--------------------------------------------------------------------------------
/GeneralAgent/skills/python_envs.py:
--------------------------------------------------------------------------------
  1 | def get_python_version() -> str:
  2 |     """
  3 |     Return the python version, like "3.9.12"
  4 |     """
  5 |     import platform
  6 | 
  7 |     python_version = platform.python_version()
  8 |     return python_version
  9 | 
 10 | 
 11 | def get_os_version() -> str:
 12 |     import platform
 13 | 
 14 |     system = platform.system()
 15 |     if system == "Windows":
 16 |         version = platform.version()
 17 |         return f"Windows version: {version}"
 18 |     elif system == "Darwin":
 19 |         version = platform.mac_ver()[0]
 20 |         return f"macOS version: {version}"
 21 |     elif system == "Linux":
 22 |         version = platform.platform()
 23 |         return f"Linux version: {version}"
 24 |     else:
 25 |         return "Unknown system"
 26 | 
 27 | 
 28 | def get_python_code(content: str) -> str:
 29 |     """
 30 |     Return the python code text from content
 31 |     """
 32 |     template = "```python\n(.*?)\n```"
 33 |     import re
 34 | 
 35 |     code = re.findall(template, content, re.S)
 36 |     if len(code) > 0:
 37 |         return code[0]
 38 |     else:
 39 |         return content
 40 | 
 41 | 
 42 | def test_get_python_code():
 43 |     content = """
 44 | ```python
 45 | import os
 46 | print(os.getcwd())
 47 | ```
 48 | """
 49 |     assert get_python_code(content) == "import os\nprint(os.getcwd())"
 50 | 
 51 | 
 52 | def load_functions_with_path(python_code_path) -> (list, str):
 53 |     """
 54 |     Load functions from python file
 55 |     @param python_code_path: the path of python file
 56 |     @return: a list of functions and error message (if any, else None)
 57 |     """
 58 |     try:
 59 |         import importlib.util
 60 |         import inspect
 61 | 
 62 |         # 指定要加载的文件路径和文件名
 63 |         module_name = "skills"
 64 |         module_file = python_code_path
 65 | 
 66 |         # 使用importlib加载文件
 67 |         spec = importlib.util.spec_from_file_location(module_name, module_file)
 68 |         module = importlib.util.module_from_spec(spec)
 69 |         spec.loader.exec_module(module)
 70 | 
 71 |         # 获取文件中的所有函数
 72 |         functions = inspect.getmembers(module, inspect.isfunction)
 73 | 
 74 |         # 过滤functions中以下划线开头的函数
 75 |         functions = filter(lambda f: not f[0].startswith("_"), functions)
 76 | 
 77 |         return [f[1] for f in functions], None
 78 |     except Exception as e:
 79 |         # 代码可能有错误，加载不起来
 80 |         import logging
 81 | 
 82 |         logging.exception(e)
 83 |         return [], str(e)
 84 | 
 85 | 
 86 | def load_functions_with_directory(python_code_dir) -> list:
 87 |     """
 88 |     Load functions from python directory (recursively)
 89 |     @param python_code_dir: the path of python directory
 90 |     @return: a list of functions
 91 |     """
 92 |     import os
 93 | 
 94 |     total_funs = []
 95 |     for file in os.listdir(python_code_dir):
 96 |         # if file is directory
 97 |         if os.path.isdir(os.path.join(python_code_dir, file)):
 98 |             total_funs += load_functions_with_directory(
 99 |                 os.path.join(python_code_dir, file)
100 |             )
101 |         else:
102 |             # if file is file
103 |             if file.endswith(".py") and (
104 |                 not file.startswith("__init__")
105 |                 and not file.startswith("_")
106 |                 and not file == "main.py"
107 |             ):
108 |                 funcs, error = load_functions_with_path(
109 |                     os.path.join(python_code_dir, file)
110 |                 )
111 |                 total_funs += funcs
112 |     return total_funs
113 | 
114 | 
115 | def get_function_signature(func, module: str = None):
116 |     """Returns a description string of function"""
117 |     try:
118 |         import inspect
119 | 
120 |         sig = inspect.signature(func)
121 |         sig_str = str(sig)
122 |         desc = f"{func.__name__}{sig_str}"
123 |         if func.__doc__:
124 |             desc += ": " + func.__doc__.strip()
125 |         if module is not None:
126 |             desc = f"{module}.{desc}"
127 |         if inspect.iscoroutinefunction(func):
128 |             desc = "" + desc
129 |         return desc
130 |     except Exception as e:
131 |         import logging
132 | 
133 |         logging.exception(e)
134 |         return ""
135 | 
136 | 
137 | def python_line_is_variable_expression(line):
138 |     """
139 |     Return True if line is a variable expression, else False
140 |     """
141 |     import ast
142 | 
143 |     try:
144 |         tree = ast.parse(line)
145 |     except SyntaxError:
146 |         return False
147 | 
148 |     if len(tree.body) != 1 or not isinstance(tree.body[0], ast.Expr):
149 |         return False
150 | 
151 |     expr = tree.body[0].value
152 |     if isinstance(expr, ast.Call):
153 |         return False
154 | 
155 |     return True
156 | 


--------------------------------------------------------------------------------
/GeneralAgent/skills/token_count.py:
--------------------------------------------------------------------------------
 1 | def messages_token_count(messages):
 2 |     "Calculate and return the total number of tokens in the provided messages."
 3 |     import tiktoken
 4 |     encoding = tiktoken.get_encoding("cl100k_base")
 5 |     tokens_per_message = 4
 6 |     tokens_per_name = 1
 7 |     num_tokens = 0
 8 |     for message in messages:
 9 |         num_tokens += tokens_per_message
10 |         for key, value in message.items():
11 |             if isinstance(value, str):
12 |                 num_tokens += len(encoding.encode(value))
13 |                 if key == "name":
14 |                     num_tokens += tokens_per_name
15 |             if isinstance(value, list):
16 |                 for item in value:
17 |                     if item["type"] == "text":
18 |                         num_tokens += len(encoding.encode(item["text"]))
19 |                     if item["type"] == "image_url":
20 |                         num_tokens += (85 + 170 * 2 * 2)    # 用最简单的模式来计算
21 |     num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
22 |     return num_tokens
23 | 
24 | def string_token_count(str):
25 |     """Calculate and return the token count in a given string."""
26 |     import tiktoken
27 |     encoding = tiktoken.get_encoding("cl100k_base")
28 |     tokens = encoding.encode(str)
29 |     return len(tokens)
30 | 
31 | 
32 | def cut_messages(messages, token_limit):
33 |     while messages_token_count(messages) > token_limit:
34 |         messages.pop(0)
35 |     return messages


--------------------------------------------------------------------------------
/GeneralAgent/skills/unique_name.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def unique_name():
 3 |     """Generates a unique name, suitable for creating non-deletable files."""
 4 |     """生成唯一的名称，可用于新建文件名，且文件不删除"""
 5 |     import uuid
 6 |     return str(uuid.uuid4()).split('-')[-1]
 7 | 
 8 | def unique_tmp_file_name():
 9 |     """Generates a unique temporary file name which needs to be deleted afterwards."""
10 |     """"生成唯一的临时文件名称，且文件需要事后删除"""
11 |     import os
12 |     tmp_dir = os.path.abspath(os.path.join(os.getcwd(), 'tmp'))
13 |     # 如果tmp_dir目录不存在，就创建
14 |     if not os.path.exists(tmp_dir):
15 |         os.makedirs(tmp_dir)
16 |     return tmp_dir + unique_name()


--------------------------------------------------------------------------------
/GeneralAgent/skills/web_tools.py:
--------------------------------------------------------------------------------
  1 | def google_search(query: str) -> str:
  2 |     """
  3 |     google search with query, return a result in string
  4 |     """
  5 |     import os
  6 |     import json
  7 |     import requests
  8 |     SERPER_API_KEY = os.environ.get('SERPER_API_KEY', None)
  9 |     if SERPER_API_KEY is None:
 10 |         raise Exception('Please set SERPER_API_KEY in environment variable first.')
 11 |     url = "https://google.serper.dev/search"
 12 |     payload = json.dumps({"q": query})
 13 |     headers = {
 14 |         'X-API-KEY': SERPER_API_KEY,
 15 |         'Content-Type': 'application/json'
 16 |     }
 17 |     response = requests.request("POST", url, headers=headers, data=payload)
 18 |     json_data = json.loads(response.text)
 19 |     return json.dumps(json_data, ensure_ascii=True, indent=4)
 20 | 
 21 | 
 22 | def web_search(query: str) -> str:
 23 |     """
 24 |     网页搜索
 25 |     """
 26 |     return google_search(query)
 27 | 
 28 | 
 29 | def wikipedia_search(query: str) -> str:
 30 |     """
 31 |     wikipedia search with query, return a result in string
 32 |     """
 33 |     import requests
 34 |     from bs4 import BeautifulSoup
 35 | 
 36 |     def get_page_obs(page):
 37 |         # find all paragraphs
 38 |         paragraphs = page.split("\n")
 39 |         paragraphs = [p.strip() for p in paragraphs if p.strip()]
 40 | 
 41 |         # find all sentence
 42 |         sentences = []
 43 |         for p in paragraphs:
 44 |             sentences += p.split('. ')
 45 |         sentences = [s.strip() + '.' for s in sentences if s.strip()]
 46 |         return ' '.join(sentences[:5])
 47 | 
 48 |     def clean_str(s):
 49 |         return s.replace("\xa0", " ").replace("\n", " ")
 50 | 
 51 |     entity = query.replace(" ", "+")
 52 |     search_url = f"https://en.wikipedia.org/w/index.php?search={entity}"
 53 |     response_text = requests.get(search_url).text
 54 |     soup = BeautifulSoup(response_text, features="html.parser")
 55 |     result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})
 56 |     if result_divs:
 57 |         result_titles = [clean_str(div.get_text().strip()) for div in result_divs]
 58 |         obs = f"Could not find {query}. Similar: {result_titles[:5]}."
 59 |     else:
 60 |         page = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")]
 61 |         if any("may refer to:" in p for p in page):
 62 |             obs = wikipedia_search("[" + query + "]")
 63 |         else:
 64 |             page_content = ""
 65 |             for p in page:
 66 |                 if len(p.split(" ")) > 2:
 67 |                     page_content += ' ' + clean_str(p)
 68 |                     if not p.endswith("\n"):
 69 |                         page_content += "\n"
 70 |             obs = get_page_obs(page_content)
 71 |             if not obs:
 72 |                 obs = None
 73 |     return obs
 74 | 
 75 | 
 76 | def _web_driver_open(url: str, wait_time=10, scroll_to_bottom=False):
 77 |     """
 78 |     open a web page in browser and wait the page load completely, return the Selenium 4 driver.
 79 |     @param url: the url of the web page
 80 |     @param wait_time: the time to wait for the page to load completely
 81 |     @param scroll_to_bottom: whether to scroll to the bottom of the page to trigger potential Ajax requests
 82 |     """
 83 |     import os
 84 |     from selenium import webdriver
 85 |     from selenium.webdriver.chrome.options import Options
 86 |     from selenium import webdriver
 87 |     from selenium.webdriver.chrome.service import Service
 88 |     from webdriver_manager.chrome import ChromeDriverManager
 89 |     from selenium.webdriver.chrome.options import Options
 90 |     import time
 91 | 
 92 |     # 开发环境使用本地chrome浏览器，生产环境使用远程chrome浏览器
 93 |     CHROME_GRID_URL = os.environ.get('CHROME_GRID_URL', None)
 94 |     if CHROME_GRID_URL is not None:
 95 |         chrome_options = Options()
 96 |         driver = webdriver.Remote(command_executor=CHROME_GRID_URL, options=chrome_options)
 97 |     else:
 98 |         # Setup chrome options
 99 |         chrome_options = Options()
100 |         chrome_options.add_argument("--headless")  # Ensure GUI is off
101 |         chrome_options.add_argument("--no-sandbox")
102 |         chrome_options.add_argument("--disable-dev-shm-usage")
103 |         # Set a common user agent to mimic a real user
104 |         chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36")
105 |         # Set path to chromedriver as per your configuration
106 |         webdriver_service = Service(ChromeDriverManager().install())
107 |         # Choose Chrome Browser
108 |         driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
109 | 
110 |     driver.get(url)
111 |     driver.implicitly_wait(wait_time)
112 |     if scroll_to_bottom:
113 |             # Scroll down the page to trigger potential Ajax requests
114 |         last_height = driver.execute_script("return document.body.scrollHeight")
115 |         for _ in range(2):
116 |             # Scroll down to the bottom of the page
117 |             driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
118 |             # Wait for new content to load
119 |             time.sleep(3)
120 |             # Calculate new scroll height and compare with last scroll height
121 |             new_height = driver.execute_script("return document.body.scrollHeight")
122 |             if new_height == last_height:
123 |                 break
124 |             last_height = new_height
125 |     return driver
126 | 
127 | 
128 | def _web_driver_get_html(driver) -> str:
129 |     """
130 |     return clear html content (without scirpt, style and comment) of the Selenium 4 driver, the driver should be ready.
131 |     """
132 |     # 通过driver获取网页地址
133 |     from bs4 import BeautifulSoup, Comment
134 |     from urllib.parse import urljoin
135 |     url = driver.current_url
136 |     html = driver.page_source
137 |     soup = BeautifulSoup(html, 'html.parser')
138 |     # 移除script和style
139 |     for script_or_style in soup(['script', 'style']):
140 |         script_or_style.decompose()  # Remove the tag from the soup
141 |     # 移除注释
142 |     for comment in soup(text=lambda text: isinstance(text, Comment)):
143 |         comment.extract()
144 |     # 移除不必要的标签和属性、id
145 |     for tag in soup(['head', 'meta', 'link', 'title', 'noscript', 'iframe', 'svg', 'canvas', 'audio', 'video', 'embed', 'object', 'param', 'source', 'track', 'map', 'area', 'base', 'basefont', 'bdi', 'bdo', 'br', 'col', 'colgroup', 'datalist', 'details', 'dialog', 'hr', 'img', 'input', 'keygen', 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'script', 'style', 'comment']):
146 |         tag.decompose()
147 |     # 所有div、span标签的属性全部清空
148 |     for tag in soup(['div', 'span']):
149 |         tag.attrs = {}
150 |     # 补全href地址
151 |     for a in soup.find_all('a', href=True):
152 |         a['href'] = urljoin(url, a['href'])
153 |     # 补全图片
154 |     for img in soup.find_all('img', src=True):
155 |         img['src'] = urljoin(url, img['src'])
156 |     # 返回内容
157 |     html = str(soup)
158 |     return html
159 | 
160 | 
161 | def web_get_html(url:str, wait_time=10, scroll_to_bottom=True):
162 |     """
163 |     获取网页的html内容(不包含script, style和comment)
164 |     @param url: the url of the web page
165 |     @param wait_time: the time to wait for the page to load completely
166 |     @param scroll_to_bottom: whether to scroll to the bottom of the page to trigger potential Ajax requests
167 |     """
168 |     import logging
169 |     driver = None
170 |     try:
171 |         driver = _web_driver_open(url, wait_time, scroll_to_bottom)
172 |         html = _web_driver_get_html(driver)
173 |         return html
174 |     except Exception as e:
175 |         logging.exception(e)
176 |         return 'Some Error Occurs:\n' + str(e)
177 |     finally:
178 |         if driver is not None:
179 |             driver.quit()
180 | 
181 | 
182 | def web_get_text(url:str, wait_time=10, scroll_to_bottom=True):
183 |     """
184 |     获取网页的文本内容
185 |     @param url: the url of the web page
186 |     @param wait_time: the time to wait for the page to load completely
187 |     @param scroll_to_bottom: whether to scroll to the bottom of the page to trigger potential Ajax requests
188 |     """
189 |     import logging
190 |     driver = None
191 |     try:
192 |         driver = _web_driver_open(url, wait_time, scroll_to_bottom)
193 |         # 'WebDriver' object has no attribute 'find_element_by_tag_name'
194 |         # text = driver.find_element_by_tag_name('body').text
195 |         text = driver.execute_script("return document.body.innerText")
196 |         return text
197 |     except Exception as e:
198 |         logging.exception(e)
199 |         return 'Some Error Occurs:\n' + str(e)
200 |     finally:
201 |         if driver is not None:
202 |             driver.quit()
203 | 
204 | 
205 | if __name__ == '__main__':
206 |     result = google_search('成都 人口')


--------------------------------------------------------------------------------
/GeneralAgent/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | 
 5 | def set_logging_level():
 6 |     for handler in logging.root.handlers[:]:
 7 |        logging.root.removeHandler(handler)
 8 |     log_level = os.environ.get('AGENT_LOG', 'info')
 9 |     log_level = log_level.lower()
10 |     if log_level == 'debug':
11 |         level = logging.DEBUG
12 |     elif log_level == 'info':
13 |         level = logging.INFO
14 |     elif log_level == 'warning':
15 |         level = logging.WARNING
16 |     elif log_level == 'error':
17 |         level = logging.ERROR
18 |     else:
19 |         level = logging.ERROR
20 |     # logging设置显示文件(绝对路径)
21 |     logging.basicConfig(
22 |         level=level,
23 |         format='%(asctime)s %(pathname)s [line:%(lineno)d] %(levelname)s %(funcName)s %(message)s',
24 |         datefmt='%Y-%m-%d %H:%M:%S'
25 |     )
26 | 
27 | 
28 | def encode_image(image_path):
29 |     if image_path.startswith('http'):
30 |         return image_path
31 |     import base64
32 |     with open(image_path, "rb") as image_file:
33 |         bin_data = base64.b64encode(image_file.read()).decode('utf-8')
34 |     image_type = image_path.split('.')[-1].lower()
35 |     virtural_url = f"data:image/{image_type};base64,{bin_data}"
36 |     return virtural_url
37 | 
38 | 
39 | def messages_token_count(messages):
40 |     "Calculate and return the total number of tokens in the provided messages."
41 |     import tiktoken
42 |     encoding = tiktoken.get_encoding("cl100k_base")
43 |     tokens_per_message = 4
44 |     tokens_per_name = 1
45 |     num_tokens = 0
46 |     for message in messages:
47 |         num_tokens += tokens_per_message
48 |         for key, value in message.items():
49 |             if isinstance(value, str):
50 |                 num_tokens += len(encoding.encode(value))
51 |                 if key == "name":
52 |                     num_tokens += tokens_per_name
53 |             if isinstance(value, list):
54 |                 for item in value:
55 |                     if item["type"] == "text":
56 |                         num_tokens += len(encoding.encode(item["text"]))
57 |                     if item["type"] == "image_url":
58 |                         num_tokens += (85 + 170 * 2 * 2)    # 用最简单的模式来计算
59 |     num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
60 |     return num_tokens
61 | 
62 | def string_token_count(str):
63 |     """Calculate and return the token count in a given string."""
64 |     import tiktoken
65 |     encoding = tiktoken.get_encoding("cl100k_base")
66 |     tokens = encoding.encode(str)
67 |     return len(tokens)
68 | 
69 | 
70 | def cut_messages(messages, token_limit):
71 |     while messages_token_count(messages) > token_limit:
72 |         messages.pop(0)
73 |     return messages


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # GeneralAgent: From LLM to Agent
  2 | 
  3 | <p align="center">
  4 | <a href="README.md"><img src="https://img.shields.io/badge/文档-中文版-blue.svg" alt="CN doc"></a>
  5 | <a href="README_EN.md"><img src="https://img.shields.io/badge/document-English-blue.svg" alt="EN doc"></a>
  6 | </p>
  7 | 
  8 | GeneralAgent是一个Python原生的Agent框架，旨在将大型语言模型 与 Python 无缝集成。
  9 | 
 10 | 
 11 | **主要特性**
 12 | 
 13 | * 工具调用：GeneralAgent 不依赖大模型的 function call，通过python代码解释器来调用工具
 14 | 
 15 | * 序列化：GeneralAgent 支持序列化，包括记忆和python执行状态，随用随启
 16 | 
 17 | * 快速配置角色、函数和知识库，创建Agent
 18 | 
 19 | * 执行稳定的复杂业务流程，协调多个Agent完成任务
 20 | 
 21 | * 使用 `agent.run` 函数执行命令并产生结构化输出，超越简单的文本响应
 22 | 
 23 | * 使用 `agent.user_input` 函数与用户进行动态交互
 24 | 
 25 | * 自我调用(探索)：GeneralAgent通过自我调用和堆栈记忆，最小化大模型的调用次数，来高效处理复杂任务。更多详情请见我们的 [论文](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf)
 26 | 
 27 | 
 28 | 
 29 | ## 安装
 30 | 
 31 | ```bash
 32 | pip install GeneralAgent
 33 | ```
 34 | 
 35 | 
 36 | 
 37 | ## 配置
 38 | ### 方式一：使用环境变量（推荐）
 39 | 1. 安装依赖：
 40 | ```bash
 41 | pip install python-dotenv
 42 | ```
 43 | 
 44 | 2. 参考 [.env.example](./.env.example) 文件，创建并配置 .env 文件：
 45 | ```bash
 46 | OPENAI_API_KEY=your_openai_api_key
 47 | # OPENAI_API_BASE=your_openai_base_url
 48 | ```
 49 | 
 50 | 3. 在代码中加载环境变量：
 51 | ```python
 52 | from dotenv import load_dotenv
 53 | from GeneralAgent import Agent
 54 | 
 55 | load_dotenv()
 56 | agent = Agent('You are a helpful agent.')
 57 | ```
 58 | 
 59 | ### 方式二：直接在代码中配置
 60 | 
 61 | ```python
 62 | from GeneralAgent import Agent
 63 | agent = Agent('You are a helpful agent.', api_key='sk-xxx')
 64 | ```
 65 | 
 66 | 
 67 | 
 68 | ## 使用
 69 | 
 70 | ### 快速开始
 71 | 
 72 | ```python
 73 | from GeneralAgent import Agent
 74 | 
 75 | agent = Agent('你是一个AI助手')
 76 | while True:
 77 |     query = input()
 78 |     agent.user_input(query)
 79 |     print('-'*50)
 80 | ```
 81 | 
 82 | 
 83 | 
 84 | ### 函数调用
 85 | 
 86 | ```python
 87 | # 函数调用
 88 | from GeneralAgent import Agent
 89 | 
 90 | # 函数: 获取天气信息
 91 | def get_weather(city: str) -> str:
 92 |     """
 93 |     get weather information
 94 |     @city: str, city name
 95 |     @return: str, weather information
 96 |     """
 97 |     return f"{city} weather: sunny"
 98 | 
 99 | 
100 | agent = Agent('你是一个天气小助手', functions=[get_weather])
101 | agent.user_input('成都天气怎么样？')
102 | ```
103 | 
104 | 
105 | 
106 | ### 知识库
107 | 
108 | ```python
109 | # 知识库
110 | from GeneralAgent import Agent
111 | 
112 | knowledge_files = ['../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf']
113 | agent = Agent('你是AI助手，用中文回复。', workspace='9_knowledge_files', knowledge_files=knowledge_files)
114 | agent.user_input('Self call 是什么意思？')
115 | ```
116 | 
117 | 知识库默认使用 GeneralAgent.skills 中 embedding_texts 函数来对文本进行 embedding (默认是OpenAI的text-embedding-3-small模型)
118 | 
119 | 你可以重写 embedding_texts 函数，使用其他厂商 或者 本地的 embedding 方法，具体如下:
120 | 
121 | ```python
122 | def new_embedding_texts(texts) -> [[float]]:
123 |     """
124 |     对文本数组进行embedding
125 |     """
126 |     # 你的embedding方法
127 |     return result
128 | from GeneralAgent import skills
129 | skills.embedding_texts = new_embedding_texts
130 | ```
131 | 
132 | 
133 | 
134 | ### 序列化
135 | 
136 | ```python
137 | # 序列化
138 | from GeneralAgent import Agent
139 | 
140 | # agent序列化位置，运行过程中会自动保存LLM的messages和python解析器的状态
141 | workspace='./5_serialize'
142 | 
143 | role = 'You are a helpful agent.'
144 | agent = Agent(workspace=workspace)
145 | agent.user_input('My name is Shadow.')
146 | 
147 | agent = None
148 | agent = Agent(role, workspace=workspace)
149 | agent.user_input('What is my name?')
150 | # Output: Your name is Shadow. How can I help you today, Shadow?
151 | 
152 | # agent: 清除记忆 + python序列化状态
153 | agent.clear()
154 | 
155 | agent.user_input('What is my name?')
156 | # Output: I'm sorry, but I don't have access to your personal information, including your name. How can I assist you today?
157 | 
158 | import shutil
159 | shutil.rmtree(workspace)
160 | ```
161 | 
162 | 
163 | 
164 | ### 写小说
165 | 
166 | ```python
167 | # 写小说
168 | from GeneralAgent import Agent
169 | from GeneralAgent import skills
170 | 
171 | # 步骤0: 定义Agent
172 | agent = Agent('你是一个小说家')
173 | 
174 | # 步骤1: 从用户处获取小说的名称和主题
175 | # topic = skills.input('请输入小说的名称和主题: ')
176 | topic = '小白兔吃糖不刷牙的故事'
177 | 
178 | # 步骤2: 小说的概要
179 | summary = agent.run(f'小说的名称和主题是: {topic}，扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。')
180 | 
181 | # 步骤3: 小说的章节名称和概要列表
182 | chapters = agent.run('输出小说的章节名称和每个章节的概要，返回列表 [(chapter_title, chapter_summary), ....]', return_type=list)
183 | 
184 | # 步骤4: 生成小说每一章节的详细内容
185 | contents = []
186 | for index, (chapter_title, chapter_summary) in enumerate(chapters):
187 |     content = agent.run(f'对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容，注意只返回内容，不要标题。')
188 |     content = '\n'.join([x.strip() for x in content.split('\n')])
189 |     contents.append(content)
190 | 
191 | # 步骤5: 将小说格式化写入文件
192 | with open('novel.md', 'w') as f:
193 |     for index in range(len(chapters)):
194 |         f.write(f'### {chapters[index][0]}\n')
195 |         f.write(f'{contents[index]}\n\n')
196 | 
197 | # 步骤6(可选): 将markdown文件转换为pdf文件
198 | 
199 | # 步骤7: 输出小说文件给用户
200 | skills.output('你的小说已经生成[novel.md](novel.md)\n')
201 | ```
202 | 
203 | 
204 | 
205 | ### 多Agent
206 | 
207 | ```python
208 | # 多Agent配合完成任务
209 | from GeneralAgent import Agent
210 | story_writer = Agent('你是一个故事创作家，根据大纲要求或者故事梗概，返回一个更加详细的故事内容。')
211 | humor_enhancer = Agent('你是一个润色作家，将一个故事进行诙谐润色，增加幽默元素。直接输出润色后的故事')
212 | 
213 | # 禁用Python运行
214 | story_writer.disable_python_run = True
215 | humor_enhancer.disable_python_run = True
216 | 
217 | # topic = skills.input('请输入小说的大纲要求或者故事梗概: ')
218 | topic = '写个小白兔吃糖不刷牙的故事，有教育意义。'
219 | initial_story = story_writer.run(topic)
220 | enhanced_story = humor_enhancer.run(initial_story)
221 | print(enhanced_story)
222 | ```
223 | 
224 | 
225 | 
226 | 
227 | ### 多模态输入
228 | 
229 | user_input 的 input 参数，和 run 的 command 参数，支持字符串或者数组。
230 | 
231 | 数组时支持多模态，格式为最简模式: ['text_content', {'image': 'path/to/image'}, ...]
232 | 
233 | ```python
234 | # 支持多模态: 图片输入
235 | from GeneralAgent import Agent
236 | 
237 | agent = Agent('You are a helpful assistant.')
238 | agent.user_input(['what is in the image?', {'image': '../docs/images/self_call.png'}])
239 | ```
240 | 
241 | 
242 | 
243 | ### 大模型切换
244 | 
245 | #### OpenAI SDK
246 | 
247 | 得益于GeneralAgent框架不依赖大模型厂商的 function call 能力实现了函数调用，可以无缝切换不同的大模型实现相同的能力。
248 | 
249 | GeneralAgent框架使用OpenAI Python SDK 来支持其他大模型。
250 | 
251 | ```python
252 | from GeneralAgent import Agent
253 | 
254 | agent = Agent('You are a helpful agent.', model='deepseek-chat', token_limit=32000, api_key='sk-xxx', base_url='https://api.deepseek.com/v1')
255 | agent.user_input('介绍一下成都')
256 | ```
257 | 
258 | 详情见: [examples/8_multi_model.py](./examples/8_multi_model.py)
259 | 
260 | 
261 | #### Azure OpenAI 
262 | 
263 | ```python
264 | from GeneralAgent import Agent
265 | 
266 | # api_key = os.getenv("OPENAI_API_KEY")
267 | # base_url = os.getenv("OPENAI_API_BASE")
268 | api_key = '8ef0b4df45e444079cd5xxx' # Azure API Key or use OPENAI_API_KEY environment variable
269 | base_url = 'https://xxxx.openai.azure.com/' # Azure API Base URL or use OPENAI_API_BASE environment variable
270 | model = 'azure_cpgpt4' # azure_ with model name, e.g. azure_cpgpt4
271 | # azure api_version is default to '2024-05-01-preview'. You can set by environment variable AZURE_API_VERSION
272 | 
273 | agent = Agent('You are a helpful assistant', api_key=api_key, base_url=base_url, model=model)
274 | while True:
275 |     query = input('Please input your query:')
276 |     agent.user_input(query)
277 |     print('-'*50)
278 | ```
279 | 
280 | 
281 | #### OneAPI
282 | 
283 | 如果其他大模型不支持OpenAI SDK，可以通过 https://github.com/songquanpeng/one-api 来支持。
284 | 
285 | 
286 | #### 自定义大模型
287 | 
288 | 或者重写 GeneralAgent.skills 中 llm_inference 函数来使用其他大模型。
289 | 
290 | ```python
291 | from GeneralAgent import skills
292 | def new_llm_inference(messages, model, stream=False, temperature=None, api_key=None, base_url=None):
293 |     """
294 |     使用大模型进行推理
295 |     """
296 |     pass
297 | skills.llm_inference = new_llm_inference
298 | ```
299 | 
300 | 
301 | 
302 | ### 禁用Python运行
303 | 
304 | 默认 GeneralAgent 自动运行 LLM 输出的python代码。
305 | 
306 | 某些场景下，如果不希望自动运行，设置 `disable_python_run` 为 `True` 即可。
307 | 
308 | ```python
309 | from GeneralAgent import Agent
310 | 
311 | agent = Agent('你是一个python专家，辅助用户解决python问题。')
312 | agent.disable_python_run = True
313 | agent.user_input('用python实现一个读取文件的函数')
314 | ```
315 | 
316 | ### 隐藏python运行
317 | 
318 | 在正式的业务场景中，不希望用户看到python代码的运行，而只是看到最终结果，可以设置 `hide_python_code` 为 `True`。
319 | 
320 | ```python
321 | from GeneralAgent import Agent
322 | agent = Agent('You are a helpful assistant.', hide_python_code=True)
323 | agent.user_input('caculate 0.999 ** 1000')
324 | ```
325 | 
326 | 
327 | 
328 | ### AI搜索
329 | 
330 | ```python
331 | # AI搜索
332 | # 运行前置条件: 
333 | # 1. 请先配置环境变量 SERPER_API_KEY (https://serper.dev/ 的API KEY)；
334 | # 2. 安装 selenium 库: pip install selenium
335 | 
336 | from GeneralAgent import Agent
337 | from GeneralAgent import skills
338 | 
339 | google_results = []
340 | 
341 | # 步骤1: 第一次google搜索
342 | question = input('请输入问题，进行 AI 搜索: ')
343 | # question = '周鸿祎卖车'
344 | content1 = skills.google_search(question)
345 | google_results.append(content1)
346 | 
347 | # 步骤2: 第二次google搜索: 根据第一次搜索结构，获取继续搜索的问题
348 | agent = Agent('你是一个AI搜索助手。')
349 | querys = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{content1}\n\n。请问可以帮助用户，需要继续搜索的关键短语有哪些(最多3个，且和问题本身不太重合)？返回关键短语列表变量([query1, query2])', return_type=list)
350 | print(querys)
351 | for query in querys:
352 |     content = skills.google_search(query)
353 |     google_results.append(content)
354 | 
355 | # 步骤3: 提取重点网页内容
356 | agent.clear()
357 | web_contents = []
358 | google_result = '\n\n'.join(google_results)
359 | urls = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n。哪些网页对于用户问题比较有帮助？请返回最重要的不超过5个的网页url列表变量([url1, url2, ...])', return_type=list)
360 | for url in urls:
361 |     content = skills.web_get_text(url, wait_time=2)
362 |     web_contents.append(content)
363 | 
364 | # 步骤4: 输出结果
365 | agent.clear()
366 | web_content = '\n\n'.join(web_contents)
367 | agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n部分网页内容: \n{web_content}\n\n。请根据用户问题，搜索引擎结果，网页内容，给出用户详细的回答，要求按一定目录结构来输出，并且使用markdown格式。')
368 | ```
369 | 
370 | ### 更多
371 | 
372 | 更多例子请见 [examples](./examples)
373 | 
374 | 
375 | 
376 | ## API
377 | 
378 | ### 基础使用
379 | 
380 | **Agent.\__init__(self, role: str, workspace: str = None, functions: List[Callable] = [], knowledge_files: List[str] = None)**
381 | 
382 | 初始化一个Agent实例。
383 | 
384 | - role (str): Agent的角色。
385 | - workspace (str, 可选): Agent的工作空间。默认值为None（不序列化）。如果指定了目录，Agent会自动保存状态并在下次初始化时重新加载。
386 | - functions (List[Callable], 可选): Agent可以调用的函数列表。
387 | - knowledge_files (List[str], 可选): Agent知识库文件路径列表。
388 | - messages (List[str], 可选): Agent的历史消息列表, 消息字段中必须包含 'role', 'content' 字段。
389 | 
390 | **Agent.run(self, command: Union[str, List[Union[str, Dict[str, str]]]], return_type: str = str, display: bool = False)**
391 | 
392 | 执行命令并返回指定类型的结果。
393 | 
394 | - command (Union[str, List[Union[str, Dict[str, str]]]]): 要执行的命令。例如：'describe chengdu' 或 ['what is in image?', {'image': 'path/to/image'}]。
395 | - return_type (str, 可选): 结果的返回类型。默认值为str。
396 | - display (bool, 可选): 是否显示LLM生成的中间内容。默认值为False。
397 | 
398 | **Agent.user_input(self, input: Union[str, List[Union[str, Dict[str, str]]]])**
399 | 
400 | 响应用户输入，并始终显示LLM生成的中间内容。
401 | 
402 | - input (Union[str, List[Union[str, Dict[str, str]]]]): 用户输入。
403 | 
404 | **Agent.temporary_context(self, input: Union[str, List[Union[str, Dict[str, str]]]])**
405 | 
406 | 对话产生的数据，不进入 agent memory 中。
407 | 
408 | - input (Union[str, List[Union[str, Dict[str, str]]]]): 用户输入。
409 | 
410 | ```python
411 | from GeneralAgent import Agent
412 | 
413 | agent = Agent('You are a helpful assistant.')
414 | with agent.temporary_context():
415 |     agent.user_input('My name is Henry.')
416 | agent.user_input("What's my name?")
417 | ```
418 | 
419 | **Agent.clear(self)**
420 | 
421 | 清除Agent的状态。
422 | 
423 | ### 高级使用
424 | 
425 | [ ] # TODO
426 | 
427 | 
428 | 
429 | 
430 | ## 论文
431 | 
432 | [General Agent：Self Call and Stack Memory](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf)
433 | 
434 | 
435 | 
436 | 
437 | 
438 | ## 加入我们👏🏻
439 | 
440 | 使用微信扫描下方二维码，加入微信群聊，或参与贡献。
441 | 
442 | <p align="center">
443 | <img src="./docs/images/wechat.jpg" alt="wechat" width=400/>
444 | </p>


--------------------------------------------------------------------------------
/README_EN.md:
--------------------------------------------------------------------------------
  1 | # GeneralAgent: From LLM to Agent
  2 | 
  3 | <p align="center">
  4 | <a href="README.md"><img src="https://img.shields.io/badge/文档-中文版-blue.svg" alt="CN doc"></a>
  5 | <a href="README_EN.md"><img src="https://img.shields.io/badge/document-English-blue.svg" alt="EN doc"></a>
  6 | </p>
  7 | 
  8 | GeneralAgent is a Python-native Agent framework that aims to seamlessly integrate large language models with Python.
  9 | 
 10 | **Main features**
 11 | 
 12 | * Quickly configure role, functions, and knowledge bases to create Agent.
 13 | 
 14 | * Execute stable and complex business processes and coordinate multiple Agents to complete tasks.
 15 | 
 16 | * Use the `agent.run` function to execute commands and produce structured output, beyond simple text responses.
 17 | 
 18 | * Use the `agent.user_input` function to dynamically interact with the user.
 19 | 
 20 | * Tool call: GeneralAgent does not rely on the function call of large models, but calls tools through the python code interpreter.
 21 | 
 22 | * Serialization: GeneralAgent supports serialization, including memory and python execution status, and is ready to use
 23 | 
 24 | * Self-call(experimental): GeneralAgent minimizes the number of calls to large models through self-call and stack memory to efficiently handle complex tasks. For more details, please see our [paper](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf)
 25 | 
 26 | 
 27 | 
 28 | ## Installation
 29 | 
 30 | ```bash
 31 | pip install GeneralAgent
 32 | ```
 33 | 
 34 | 
 35 | 
 36 | ## Configuration
 37 | 
 38 | ### Method 1: Using Environment Variables (Recommended)
 39 | 1. Install dependency:
 40 | ```bash
 41 | pip install python-dotenv
 42 | ```
 43 | 
 44 | 2. Refer to [.env.example](./.env.example) file to create and configure .env file:
 45 | ```bash
 46 | OPENAI_API_KEY=your_openai_api_key
 47 | # OPENAI_API_BASE=your_openai_base_url
 48 | ```
 49 | 
 50 | 3. Load environment variables in code:
 51 | ```python
 52 | from dotenv import load_dotenv
 53 | from GeneralAgent import Agent
 54 | 
 55 | load_dotenv()
 56 | agent = Agent('You are a helpful agent.')
 57 | ```
 58 | 
 59 | ### Method 2: Configure in Code
 60 | 
 61 | ```python
 62 | from GeneralAgent import Agent
 63 | agent = Agent('You are a helpful agent.', api_key='sk-xxx')
 64 | ```
 65 | 
 66 | 
 67 | 
 68 | ## Usage
 69 | 
 70 | ### Quick Start
 71 | 
 72 | ```python
 73 | from GeneralAgent import Agent
 74 | 
 75 | # Streaming output of intermediate results
 76 | def output_callback(token):
 77 |     token = token or '\n'
 78 |     print(token, end='', flush=True)
 79 | 
 80 | agent = Agent('You are an AI assistant, reply in Chinese.', output_callback=output_callback)
 81 | while True:
 82 |     query = input('Please enter: ')
 83 |     agent.user_input(query)
 84 |     print('-'*50)
 85 | ```
 86 | 
 87 | 
 88 | 
 89 | ### Function call
 90 | 
 91 | ```python
 92 | # Function call
 93 | from GeneralAgent import Agent
 94 | 
 95 | # Function: Get weather information
 96 | def get_weather(city: str) -> str:
 97 |     """
 98 |     get weather information
 99 |     @city: str, city name
100 |     @return: str, weather information
101 |     """
102 |     return f"{city} weather: sunny"
103 | 
104 | agent = Agent('You are a weather assistant', functions=[get_weather])
105 | agent.user_input('What is the weather like in Chengdu?')
106 | 
107 | # Output
108 | # ```python
109 | # city = "Chengdu"
110 | # weather_info = get_weather(city)
111 | # weather_info
112 | # ```
113 | # The weather in Chengdu is sunny.
114 | # Is there anything else I can help with?
115 | ```
116 | 
117 | 
118 | 
119 | ### Knowledge Base
120 | 
121 | ```python
122 | # Knowledge Base
123 | from GeneralAgent import Agent
124 | 
125 | knowledge_files = ['../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf']
126 | agent = Agent('You are an AI assistant, reply in Chinese.', workspace='9_knowledge_files', knowledge_files=knowledge_files)
127 | agent.user_input('What does Self call mean?')
128 | ```
129 | 
130 | The knowledge base uses the embedding_texts function in GeneralAgent.skills to embed text by default (the default is OpenAI's text-embedding-3-small model)
131 | 
132 | You can rewrite the embedding_texts function to use other manufacturers or local embedding methods, as follows:
133 | 
134 | ```python
135 | def new_embedding_texts(texts) -> [[float]]:
136 |     """
137 |     Embedding text arrays
138 |     """
139 |     # Your embedding method
140 |     return result
141 | from GeneralAgent import skills
142 | skills.embedding_texts = new_embedding_texts
143 | ```
144 | 
145 | 
146 | 
147 | ### Serialization
148 | 
149 | ```python
150 | # Serialization
151 | from GeneralAgent import Agent
152 | 
153 | # Agent serialization location, LLM messages and python parser status will be automatically saved during operation
154 | workspace='./5_serialize'
155 | 
156 | role = 'You are a helpful agent.'
157 | agent = Agent(workspace=workspace)
158 | agent.user_input('My name is Shadow.')
159 | 
160 | agent = None
161 | agent = Agent(role, workspace=workspace)
162 | agent.user_input('What is my name?')
163 | # Output: Your name is Shadow. How can I help you today, Shadow?
164 | 
165 | # agent: Clear memory + python serialization status
166 | agent.clear()
167 | 
168 | agent.user_input('What is my name?')
169 | # Output: I'm sorry, but I don't have access to your personal information, including your name. How can I assist you today?
170 | 
171 | import shutil
172 | shutil.rmtree(workspace)
173 | ```
174 | 
175 | ### Write a novel
176 | 
177 | ```python
178 | # Write a novel
179 | from GeneralAgent import Agent
180 | from GeneralAgent import skills
181 | 
182 | # Step 0: Define Agent
183 | agent = Agent('You are a novelist')
184 | 
185 | # Step 1: Get the name and topic of the novel from the user
186 | # topic = skills.input('Please enter the name and topic of the novel: ')
187 | topic = 'The story of the little white rabbit eating candy without brushing its teeth'
188 | 
189 | # Step 2: Summary of the novel
190 | summary = agent.run(f'The name and topic of the novel are: {topic}, expand and improve the summary of the novel. It is required to be literary, educational, and entertaining. ')
191 | 
192 | # Step 3: List of chapter names and summaries of the novel
193 | chapters = agent.run('Output the chapter names of the novel and the summary of each chapter, return a list [(chapter_title, chapter_summary), ....]', return_type=list)
194 | 
195 | # Step 4: Generate detailed content of each chapter of the novel
196 | contents = []
197 | for index, (chapter_title, chapter_summary) in enumerate(chapters):
198 |     content = agent.run(f'For chapters: {chapter_title}\n{chapter_summary}. \nOutput detailed content of the chapter, note that only the content is returned, not the title.')
199 |     content = '\n'.join([x.strip() for x in content.split('\n')])
200 |     contents.append(content)
201 | 
202 | # Step 5: Format the novel and write it to a file
203 | with open('novel.md', 'w') as f:
204 |     for index in range(len(chapters)):
205 |         f.write(f'### {chapters[index][0]}\n')
206 |         f.write(f'{contents[index]}\n\n')
207 | 
208 | # Step 6 (optional): Convert markdown file to pdf file
209 | 
210 | # Step 7: Output novel file to user
211 | skills.output('Your novel has been generated [novel.md](novel.md)\n')
212 | ```
213 | 
214 | ### Multi-Agent
215 | 
216 | ```python
217 | # Multi-Agent cooperates to complete the task
218 | from GeneralAgent import Agent
219 | story_writer = Agent('You are a story writer. According to the outline requirements or story outline, return a more detailed story content.')
220 | humor_enhancer = Agent('You are a polisher. Make a story humorous and add humorous elements. Directly output the polished story')
221 | 
222 | # Disable Python running
223 | story_writer.disable_python_run = True
224 | humor_enhancer.disable_python_run = True
225 | 
226 | # topic = skills.input('Please enter the outline requirements or story summary of the novel: ')
227 | topic = 'Write a story about a little white rabbit eating candy without brushing its teeth. It has educational significance. '
228 | initial_story = story_writer.run(topic)
229 | enhanced_story = humor_enhancer.run(initial_story)
230 | print(enhanced_story)
231 | ```
232 | 
233 | 
234 | 
235 | ### Multimodal input
236 | 
237 | The input parameter of user_input and the command parameter of run support strings or arrays.
238 | 
239 | Multimodal is supported when the array is used. The format is the simplest mode: ['text_content', {'image': 'path/to/image'}, ...]
240 | 
241 | ```python
242 | # Multimodal support: Image input
243 | from GeneralAgent import Agent
244 | 
245 | agent = Agent('You are a helpful assistant.')
246 | agent.user_input(['what is in the image?', {'image': '../docs/images/self_call.png'}])
247 | ```
248 | 
249 | 
250 | 
251 | 
252 | ### LLM switching
253 | 
254 | #### OpenAI SDK
255 | 
256 | Thanks to the GeneralAgent framework's independent function call capability of large model vendors, it can seamlessly switch between different large models to achieve the same capabilities.
257 | 
258 | The GeneralAgent framework uses the OpenAI Python SDK to support other large models.
259 | 
260 | ```python
261 | from GeneralAgent import Agent
262 | 
263 | agent = Agent('You are a helpful agent.', model='deepseek-chat', token_limit=32000, api_key='sk-xxx', base_url='https://api.deepseek.com/v1')
264 | agent.user_input('Introduce Chengdu')
265 | ```
266 | 
267 | For details, see: [examples/8_multi_model.py](./examples/8_multi_model.py)
268 | 
269 | #### Azure OpenAI 
270 | 
271 | ```python
272 | from GeneralAgent import Agent
273 | 
274 | # api_key = os.getenv("OPENAI_API_KEY")
275 | # base_url = os.getenv("OPENAI_API_BASE")
276 | api_key = '8ef0b4df45e444079cd5xxx' # Azure API Key or use OPENAI_API_KEY environment variable
277 | base_url = 'https://xxxx.openai.azure.com/' # Azure API Base URL or use OPENAI_API_BASE environment variable
278 | model = 'azure_cpgpt4' # azure_ with model name, e.g. azure_cpgpt4
279 | # azure api_version is default to '2024-05-01-preview'. You can set by environment variable AZURE_API_VERSION
280 | 
281 | agent = Agent('You are a helpful assistant', api_key=api_key, base_url=base_url, model=model)
282 | while True:
283 |     query = input('Please input your query:')
284 |     agent.user_input(query)
285 |     print('-'*50)
286 | ```
287 | 
288 | 
289 | #### One API
290 | 
291 | If other large models do not support OpenAI SDK, they can be supported through https://github.com/songquanpeng/one-api.
292 | 
293 | 
294 | #### Custom large model
295 | 
296 | Or rewrite the llm_inference function in GeneralAgent.skills to use other large models.
297 | 
298 | ```python
299 | from GeneralAgent import skills
300 | def new_llm_inference(messages, model, stream=False, temperature=None, api_key=None, base_url=None):
301 |     """
302 |     Use the large model for inference
303 |     """
304 |     pass
305 | skills.llm_inference = new_llm_inference
306 | ```
307 | 
308 | 
309 | 
310 | ### Disable Python run
311 | 
312 | By default, GeneralAgent automatically runs the python code output by LLM.
313 | 
314 | In some scenarios, if you do not want to run automatically, set `disable_python_run` to `True`.
315 | 
316 | ```python
317 | from GeneralAgent import Agent
318 | 
319 | agent = Agent('You are a python expert, helping users solve python problems.')
320 | agent.disable_python_run = True
321 | agent.user_input('Use python to implement a function to read files')
322 | ```
323 | 
324 | 
325 | 
326 | ### Hide Python Run
327 | 
328 | In formal business scenarios, if you do not want users to see the running of Python code but only the final result, you can set `hide_python_code` to `True`.
329 | 
330 | ```python
331 | from GeneralAgent import Agent
332 | agent = Agent('You are a helpful assistant.', hide_python_code=True)
333 | agent.user_input('caculate 0.999 ** 1000')
334 | ```
335 | 
336 | 
337 | 
338 | ### AI search
339 | 
340 | ```python
341 | # AI search
342 | # Prerequisites:
343 | # 1. Please configure the environment variable SERPER_API_KEY (https://serper.dev/'s API KEY);
344 | # 2. Install the selenium library: pip install selenium
345 | 
346 | from GeneralAgent import Agent
347 | from GeneralAgent import skills
348 | 
349 | google_results = []
350 | 
351 | # Step 1: First Google search
352 | question = input('Please enter a question and proceed AI search: ')
353 | content1 = skills.google_search(question)
354 | google_results.append(content1)
355 | 
356 | # Step 2: Second Google search: According to the first search structure, get the question to continue searching
357 | agent = Agent('You are an AI search assistant.')
358 | queries = agent.run(f'User question: \n{question}\n\nSearch engine results: \n{content1}\n\n. Can you help users, what are the key phrases that need to be searched (up to 3, and not too overlapping with the question itself)? Return the key phrase list variable ([query1, query2])', return_type=list)
359 | print(queries)
360 | for query in queries:
361 | content = skills.google_search(query)
362 | google_results.append(content)
363 | 
364 | # Step 3: Extract key web page content
365 | agent.clear()
366 | web_contents = []
367 | google_result = '\n\n'.join(google_results)
368 | urls = agent.run(f'User question: \n{question}\n\nSearch engine result: \n{google_result}\n\n. Which web pages are more helpful for user questions? Please return the most important webpage url list variable ([url1, url2, ...])', return_type=list)
369 | for url in urls:
370 | content = skills.web_get_text(url, wait_time=2)
371 | web_contents.append(content)
372 | 
373 | # Step 4: Output results
374 | agent.clear()
375 | web_content = '\n\n'.join(web_contents)
376 | agent.run(f'User question: \n{question}\n\nSearch engine results: \n{google_result}\n\nPart of the webpage content: \n{web_content}\n\n. Please give the user a detailed answer based on the user's question, search engine results, and webpage content. It is required to be output according to a certain directory structure and use markdown format.')
377 | ```
378 | 
379 | 
380 | 
381 | ### More
382 | 
383 | For more examples, see [examples](./examples)
384 | 
385 | 
386 | ## API
387 | 
388 | ### Basic Usage
389 | 
390 | **Agent.\__init__(self, role: str, workspace: str = None, functions: List[Callable] = [], knowledge_files: List[str] = None)**
391 | 
392 | Initializes an Agent instance.
393 | 
394 | - role (str): The role of the agent.
395 | - workspace (str, optional): The agent's workspace. Default is None (not serialized). If a directory is specified, the agent will automatically save the agent's state and reload it upon the next initialization.
396 | - functions (List[Callable], optional): A list of functions that the agent can call.
397 | - knowledge_files (List[str], optional): A list of file paths for the agent's knowledge base.
398 | - messages (List[str], optional): A list of Agent's historical messages, where each message must contain the 'role' and 'content' fields.
399 | 
400 | 
401 | **Agent.run(self, command: Union[str, List[Union[str, Dict[str, str]]]], return_type: str = str, display: bool = False)**
402 | 
403 | Executes a command and returns the result in the specified return type.
404 | 
405 | - command (Union[str, List[Union[str, Dict[str, str]]]]): The command to execute. Examples: 'describe chengdu' or ['what is in image?', {'image': 'path/to/image'}].
406 | - return_type (str, optional): The return type of the result. Default is str.
407 | - display (bool, optional): Whether to display the intermediate content generated by the LLM. Default is False.
408 | 
409 | 
410 | **Agent.user_input(self, input: Union[str, List[Union[str, Dict[str, str]]]])**
411 | 
412 | Responds to user input and always displays the intermediate content generated by the LLM.
413 | 
414 | - input (Union[str, List[Union[str, Dict[str, str]]]]): The user input.
415 | 
416 | **Agent.temporary_context(self, input: Union[str, List[Union[str, Dict[str, str]]]])**
417 | 
418 | The data generated by the conversation does not enter the agent memory.
419 | - input (Union[str, List[Union[str, Dict[str, str]]]]): The user input.
420 | 
421 | ```python
422 | from GeneralAgent import Agent
423 | 
424 | agent = Agent('You are a helpful assistant.')
425 | with agent.temporary_context():
426 |     agent.user_input('My name is Henry.')
427 | agent.user_input("What's my name?")
428 | ```
429 | 
430 | **Agent.clear(self)**
431 | 
432 | Clears the agent's state.
433 | 
434 | 
435 | 
436 | ### Advanced Usage
437 | 
438 | [] # TODO
439 | 
440 | 
441 | 
442 | 
443 | ## Paper
444 | 
445 | [General Agent: Self Call and Stack Memory](./docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf)
446 | 
447 | 
448 | 
449 | ## Join us👏🏻
450 | 
451 | Use WeChat to scan the QR code below, join the WeChat group chat, or participate in the contribution.
452 | 
453 | <p align="center">
454 | <img src="./docs/images/wechat.jpg" alt="wechat" width=400/>
455 | </p>


--------------------------------------------------------------------------------
/docs/develop.md:
--------------------------------------------------------------------------------
 1 | # 发布
 2 | 
 3 | ```bash
 4 | # 发布pip库
 5 | poetry build -f sdist
 6 | poetry publish
 7 | ```
 8 | 
 9 | # 测试
10 | 
11 | ```shell
12 | # 新建python环境
13 | python -m venv ga
14 | source ga/bin/activate
15 | 
16 | # 临时取消python别名 (如果有)
17 | unalias python
18 | 
19 | # 安装依赖
20 | pip install .
21 | 
22 | # 导出环境变量
23 | export $(grep -v '^#' .env | sed 's/^export //g' | xargs)
24 | 
25 | # 测试
26 | cd test
27 | pytest -s -v
28 | ```


--------------------------------------------------------------------------------
/docs/images/2023.11.15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/2023.11.15.jpg


--------------------------------------------------------------------------------
/docs/images/2023_11_27_builder_agent.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/2023_11_27_builder_agent.jpg


--------------------------------------------------------------------------------
/docs/images/2023_11_27_image_creator.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/2023_11_27_image_creator.jpg


--------------------------------------------------------------------------------
/docs/images/Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/Architecture.png


--------------------------------------------------------------------------------
/docs/images/Architecture_2023.11.15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/Architecture_2023.11.15.png


--------------------------------------------------------------------------------
/docs/images/general_agent_2024.01.16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/general_agent_2024.01.16.png


--------------------------------------------------------------------------------
/docs/images/self_call.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/self_call.png


--------------------------------------------------------------------------------
/docs/images/stack_memory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/stack_memory.png


--------------------------------------------------------------------------------
/docs/images/wechat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/wechat.jpg


--------------------------------------------------------------------------------
/docs/images/wechat_company.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/images/wechat_company.jpg


--------------------------------------------------------------------------------
/docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf


--------------------------------------------------------------------------------
/examples/0_base_usage.py:
--------------------------------------------------------------------------------
 1 | from GeneralAgent import Agent
 2 | from dotenv import load_dotenv
 3 | 
 4 | load_dotenv()
 5 | # agent = Agent('You are a helpful assistant.', temperature=0.5, frequency_penalty=2)
 6 | agent = Agent('You are a helpful assistant.')
 7 | while True:
 8 |     query = input('>: ')
 9 |     agent.user_input(query)
10 |     print('-'*50)


--------------------------------------------------------------------------------
/examples/10_rag_function.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | 
 3 | load_dotenv()
 4 | # RAG function
 5 | 
 6 | # 设置日志级别
 7 | import os
 8 | os.environ['AGENT_LOG'] = 'debug'
 9 | 
10 | from GeneralAgent import Agent
11 | 
12 | def rag_function(messages):
13 |     input = messages[-1]['content']
14 |     print('user input:', input)
15 |     # TODO: 根据input或者messages更多信息，返回相关的背景知识
16 |     return 'Background: GeneralAgent is a Python library for building AI assistants. It provides a simple API for building conversational agents.'
17 | 
18 | agent = Agent('You are a helpful assistant', rag_function=rag_function)
19 | agent.user_input('What is GeneralAgent?')


--------------------------------------------------------------------------------
/examples/11_collection_and_store.py:
--------------------------------------------------------------------------------
 1 | # 多轮对话搜集信息 & 保存
 2 | from GeneralAgent import Agent
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | 
 7 | role = """
 8 | 你是一个专业的诊前护士。
 9 | 你的主要工作: 和病人沟通，确认详细的病情，保存病历。
10 | 
11 | # 1、病情沟通例子
12 | 用户: 我眼睛疼
13 | 你: 疼多旧了？
14 | 用户: 2天
15 | 你: 你还可以看见东西吗？视力有没有影响？
16 | 用户: 还能看见
17 | 你: ....
18 | 
19 | 当病情确认，直接输出python代码，使用 save_medical_record 函数保存病历详情。
20 | 
21 | medical_record = \"\"\"
22 | 主诉： 眼睛干涩
23 | 现病史： 最近长时间使用电子设备
24 | 既往史： 无特殊情况
25 | 过敏史： 无过敏史
26 | 家族史： 无家族史
27 | 个人史： 生活环境比较潮湿，未使用任何眼睛滴剂或药物缓解症状
28 | \"\"\"
29 | save_medical_record(medical_record)
30 | 
31 | """
32 | 
33 | stop = False
34 | # 保存病历函数
35 | def save_medical_record(medical_record): 
36 |     """
37 |     保存病历
38 |     @param medical_record: 病历内容
39 |     """
40 |     # print(medical_record)
41 |     with open('medical_record.txt', 'a') as f:
42 |         f.write(medical_record)
43 |     global stop
44 |     stop = True
45 |     return "病历已保存"
46 | 
47 | 
48 | agent = Agent(role, functions=[save_medical_record], hide_python_code=True)
49 | agent.user_input('你可以做什么?')
50 | while not stop:
51 |     query = input('请输入: ')
52 |     agent.user_input(query)


--------------------------------------------------------------------------------
/examples/12_hide_python_code.py:
--------------------------------------------------------------------------------
1 | # 快速开始
2 | from GeneralAgent import Agent
3 | from dotenv import load_dotenv
4 | 
5 | load_dotenv()
6 | agent = Agent('You are a helpful assistant.', hide_python_code=True)
7 | agent.user_input('caculate 0.999 ** 1000')


--------------------------------------------------------------------------------
/examples/13_image_input.py:
--------------------------------------------------------------------------------
 1 | # 支持多模态: 图片输入
 2 | # 格式为自定最简模式，: ['text_content', {'image': 'path/to/image'}, ...]
 3 | from GeneralAgent.utils import set_logging_level
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | set_logging_level()
 8 | 
 9 | from GeneralAgent import Agent
10 | 
11 | agent = Agent('You are a helpful assistant.')
12 | agent.user_input(['what is in the image?', {'image': '../docs/images/self_call.png'}])


--------------------------------------------------------------------------------
/examples/14_doubao_llm.py:
--------------------------------------------------------------------------------
 1 | # 使用豆包大模型
 2 | # 使用豆包模型，需要先安装库: pip install 'volcengine-python-sdk[ark]'
 3 | # model设置为doubao，区分大模型链接库volcengine
 4 | # 豆包由于接口上模型是Endpoint。所以使用base_url来指定Endpoint(即哪种模型)
 5 | 
 6 | from GeneralAgent import Agent
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | api_key = 'your_api_key'
12 | endpoint = 'your_endpoint_id'
13 | agent = Agent('You are a helpful assistant', model='doubao', api_key=api_key, base_url=endpoint)
14 | agent.user_input('介绍一下成都')


--------------------------------------------------------------------------------
/examples/15_run_check.py:
--------------------------------------------------------------------------------
 1 | # agent.run命令的时候，核对生成内容是否合适
 2 | from GeneralAgent import Agent
 3 | from GeneralAgent import skills
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | 
 8 | # 步骤0: 定义Agent
 9 | agent = Agent('你是一个小说家')
10 | 
11 | # 步骤1: 从用户处获取小说的名称和主题
12 | # topic = skills.input('请输入小说的名称和主题: ')
13 | topic = '小白兔吃糖不刷牙的故事'
14 | 
15 | # 步骤2: 小说的概要
16 | summary = agent.run(f'小说的名称和主题是: {topic}，扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。')
17 | 
18 | # 步骤3: 小说的章节名称和概要列表
19 | chapters = agent.run('输出小说的章节名称和每个章节的概要，返回列表 [(chapter_title, chapter_summary), ....]', return_type=list, user_check=True)
20 | 
21 | # 步骤4: 生成小说每一章节的详细内容
22 | agent.disable_python()
23 | contents = []
24 | for index, (chapter_title, chapter_summary) in enumerate(chapters):
25 |     content = agent.run(f'对于章节: {chapter_title}\n概要: {chapter_summary}. \n写小说这个章节的详细内容，注意只返回内容，不要标题。')
26 |     content = '\n'.join([x.strip() for x in content.split('\n')])
27 |     contents.append(content)
28 | 
29 | # 步骤5: 将小说格式化写入文件
30 | with open('novel.md', 'w') as f:
31 |     for index in range(len(chapters)):
32 |         f.write(f'### {chapters[index][0]}\n')
33 |         f.write(f'{contents[index]}\n\n')
34 | 
35 | # 步骤6(可选): 将markdown文件转换为pdf文件
36 | 
37 | # 步骤7: 输出小说文件给用户
38 | skills.output('你的小说已经生成[novel.md](novel.md)\n')


--------------------------------------------------------------------------------
/examples/16_test_azure.py:
--------------------------------------------------------------------------------
 1 | # 测试Azure Open AI
 2 | import os
 3 | from GeneralAgent import Agent
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | 
 8 | # api_key = os.getenv("OPENAI_API_KEY")
 9 | # base_url = os.getenv("OPENAI_API_BASE")
10 | api_key = '8ef0b4df45e444079cd5xxx' # Azure API Key or use OPENAI_API_KEY environment variable
11 | base_url = 'https://xxxx.openai.azure.com/' # Azure API Base URL or use OPENAI_API_BASE environment variable
12 | model = 'azure_cpgpt4' # azure_ with model name, e.g. azure_cpgpt4
13 | # azure api_version is default to '2024-05-01-preview'. You can set by environment variable AZURE_API_VERSION
14 | 
15 | agent = Agent('You are a helpful assistant', api_key=api_key, base_url=base_url, model=model)
16 | while True:
17 |     query = input('Please input your query:')
18 |     agent.user_input(query)
19 |     print('-'*50)
20 | 


--------------------------------------------------------------------------------
/examples/17_qwen.py:
--------------------------------------------------------------------------------
 1 | # 测试阿里千问
 2 | api_key = 'sk-xxxx'
 3 | base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
 4 | model  = 'qwen-vl-max'
 5 | 
 6 | from GeneralAgent import Agent
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | 
11 | agent = Agent('You are a helpful assistant.', model=model, api_key=api_key, base_url=base_url, temperature=0.5, max_tokens=1000, top_p=0.9, frequency_penalty=1)
12 | agent.run(['what is in the image?', {'image': '../docs/images/self_call.png'}], display=True)


--------------------------------------------------------------------------------
/examples/18_translate_agent.py:
--------------------------------------------------------------------------------
 1 | # 翻译Agent
 2 | from dotenv import load_dotenv
 3 | 
 4 | load_dotenv()
 5 | 
 6 | def split_text(text, max_token=3000, separators='\n'):
 7 |     """
 8 |     Split the text into paragraphs, each paragraph has less than max_token tokens.
 9 |     """
10 |     import re
11 |     from GeneralAgent import skills
12 |     pattern = "[" + re.escape(separators) + "]"
13 |     paragraphs = list(re.split(pattern, text))
14 |     # print(len(paragraphs))
15 |     result = []
16 |     current = ''
17 |     for paragraph in paragraphs:
18 |         if skills.string_token_count(current) + skills.string_token_count(paragraph) > max_token:
19 |             result.append(current)
20 |             current = ''
21 |         current += paragraph + '\n'
22 |     if len(current) > 0:
23 |         result.append(current)
24 |     new_result = []
25 |     for x in result:
26 |         if skills.string_token_count(x) > max_token:
27 |             new_result.extend(split_text(x, max_token=max_token, separators="，。,.;；"))
28 |         else:
29 |             new_result.append(x)
30 |     new_result = [x.strip() for x in new_result if len(x.strip()) > 0]
31 |     return new_result
32 | 
33 | 
34 | def translate_text(text, language, worker=1, reflection_mode=False):
35 |     """
36 |     Translates the given text into the specified language, e.g. translate_text('I love china', 'chinese')
37 |     @param text: The text to be translated
38 |     @param language: The target language
39 |     @param worker: The number of threads to use
40 |     @param reflection_mode: Whether to enable reflection mode. If True, the agent will reflect on the translation result and make improvements.
41 |     """
42 |     from GeneralAgent import skills
43 |     from GeneralAgent import Agent
44 |     from concurrent.futures import ThreadPoolExecutor
45 |     segments = split_text(text, 600)
46 | 
47 |     def _translate(index, content, language):
48 |         role = f"You are an expert linguist, specializing in translation text to {language}."
49 |         rules = [
50 |             "翻译结果不要包含在```里面",
51 |             "表格、代码、数学公式、图片地址、参考文献等不需要翻译，保持原样",
52 |             "只返回翻译和保留的全文，不要任何解释和描述。",
53 |             "确保翻译的准确性、流畅性和风格一致性",
54 |             "使用目标语言的语法、拼写和标点规则",
55 |             "确保术语使用一致并反映源文本领域",
56 |             "如果有文化背景，请考虑文化背景"
57 |         ]
58 |         role += '# rules: ' + '\n\n'.join([f'{i+1}. {rule}' for i, rule in enumerate(rules)])
59 |         agent = Agent(role)
60 |         result = agent.run(f'请将以下内容翻译成{language}:\n\n{content}')
61 |         if reflection_mode:
62 |             reflection_prompt = f"""Give constructive criticism and helpful suggestions to improve the translation. 
63 |             When writing suggestions, pay attention to whether there are ways to improve the translation's 
64 |             (i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),
65 |             (ii) fluency (by applying {language} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),
66 |             (iii) style (by ensuring the translations reflect the style of the source text and take into account any cultural context),
67 |             (iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {language}).
68 |             Write a list of specific, helpful and constructive suggestions for improving the translation.
69 |             Each suggestion should address one specific part of the translation.
70 |             Output only the suggestions and nothing else."""
71 |             agent.run(reflection_prompt)
72 |             result = agent.run(f'根据反思的结果，对上面的翻译结果进行修改，并只输出修改后的翻译结果。')
73 |         return index, result
74 | 
75 |     with ThreadPoolExecutor(worker) as executor:
76 |         futures = [executor.submit(_translate, index, content, language) for index, content in enumerate(segments)]
77 |         results = [future.result() for future in futures]
78 |         results.sort(key=lambda x: x[0])
79 |         return '\n\n'.join([x[1] for x in results])
80 |     
81 | if __name__ == '__main__':
82 |     result = translate_text('I love china', 'chinese')
83 |     print(result)


--------------------------------------------------------------------------------
/examples/19_temporary_context.py:
--------------------------------------------------------------------------------
 1 | # 演示临时上下文的用法
 2 | from GeneralAgent import Agent
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | 
 7 | agent = Agent('You are a helpful assistant.')
 8 | with agent.temporary_context():
 9 |     agent.user_input('My name is Henry.')
10 | agent.user_input("What's my name?")
11 | 
12 | # Expect: I don't know your name. How can I help you today?


--------------------------------------------------------------------------------
/examples/1_function_call.py:
--------------------------------------------------------------------------------
 1 | # 函数调用
 2 | import logging
 3 | 
 4 | logging.basicConfig(
 5 |     level=logging.DEBUG,
 6 |     format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s",
 7 |     handlers=[logging.StreamHandler()],
 8 | )
 9 | from GeneralAgent import Agent
10 | from dotenv import load_dotenv
11 | 
12 | load_dotenv()
13 | 
14 | 
15 | # 函数: 获取天气信息
16 | def get_weather(city: str) -> str:
17 |     """
18 |     get weather information
19 |     @city: str, city name
20 |     @return: str, weather information
21 |     """
22 |     # return f"{city} weather: sunny"
23 |     weather = "sunny"
24 |     print(f"{city} weather: {weather}")
25 |     return weather
26 | 
27 | 
28 | # agent = Agent('你是一个天气小助手', functions=[get_weather], model='deepseek-chat')
29 | agent = Agent("你是一个天气小助手", functions=[get_weather])
30 | agent.user_input("成都天气怎么样？")
31 | 
32 | # 输出
33 | # ```python
34 | # city = "成都"
35 | # weather_info = get_weather(city)
36 | # weather_info
37 | # ```
38 | # 成都的天气是晴天。
39 | # 请问还有什么我可以帮忙的吗？
40 | 


--------------------------------------------------------------------------------
/examples/20_load_memory.py:
--------------------------------------------------------------------------------
 1 | # load messages
 2 | from GeneralAgent import Agent
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | 
 7 | 
 8 | messages = [
 9 |         {"role": "user", "content": "My name is Yummy."},
10 |         {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"},
11 |     ]
12 | agent = Agent('You are a helpful assistant.', messages=messages)
13 | response = agent.user_input("What's my name?")
14 | 
15 | # Expect: Yummy in response
16 | 


--------------------------------------------------------------------------------
/examples/21_market_search.py:
--------------------------------------------------------------------------------
  1 | # 市场信息搜集
  2 | # 运行前置条件: 
  3 | # 1. 安装 BeutifulSoup 库：pip install beautifulsoup4
  4 | # 2. 安装 playwrite 库: pip install playwright
  5 | from GeneralAgent import Agent
  6 | from dotenv import load_dotenv
  7 | from playwright.sync_api import sync_playwright
  8 | from bs4 import BeautifulSoup
  9 | from urllib.parse import quote
 10 | import time
 11 | 
 12 | def get_baidu_search_url(keyword):
 13 |     """生成百度搜索URL，只处理关键词和时间戳"""
 14 |     current_timestamp = int(time.time())
 15 |     past_timestamp = current_timestamp - (24 * 3600)  # 24小时前
 16 |     base_url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd={}&fenlei=256&rqlang=cn&rsv_dl=tb&rsv_enter=1&rsv_btype=i&tfflag=1&gpc=stf%3D{}%2C{}|stftype%3D1"
 17 |     return base_url.format(quote(keyword), past_timestamp, current_timestamp)
 18 | 
 19 | def extract_news_articles(url):
 20 |     """提取网页中的新闻文章和URL"""
 21 |     with sync_playwright() as p:
 22 |         browser = p.chromium.launch()
 23 |         page = browser.new_page()
 24 |         try:
 25 |             page.goto(url)
 26 |             page.wait_for_load_state('networkidle')
 27 |             
 28 |             content = page.content()
 29 |             soup = BeautifulSoup(content, 'html.parser')
 30 |             
 31 |             articles = []
 32 |             if 'baidu.com' in url:
 33 |                 # 百度搜索结果处理
 34 |                 search_results = soup.find_all('div', class_=['result-op', 'result'])
 35 |                 for result in search_results:
 36 |                     title_elem = result.find('h3')
 37 |                     if title_elem:
 38 |                         link = title_elem.find('a')
 39 |                         if link:
 40 |                             articles.append({
 41 |                                 'title': title_elem.get_text().strip(),
 42 |                                 'url': link.get('href', ''),
 43 |                                 'source': '百度搜索'
 44 |                             })
 45 |             else:
 46 |                 # 懂车帝处理（保持原来的逻辑）
 47 |                 links = soup.find_all('a', href=True)
 48 |                 base_url = "https://www.dongchedi.com"
 49 |                 
 50 |                 for link in links:
 51 |                     href = link.get('href', '')
 52 |                     title = link.get_text().strip()
 53 |                     if title and href:  # 保留所有可能的文章，让LLM判断
 54 |                         full_url = href if href.startswith('http') else base_url + href
 55 |                         articles.append({
 56 |                             'title': title,
 57 |                             'url': full_url,
 58 |                             'source': url
 59 |                         })
 60 |             
 61 |             return articles
 62 |             
 63 |         except Exception as e:
 64 |             return f"提取文章时出错: {str(e)}"
 65 |         finally:
 66 |             browser.close()
 67 | 
 68 | def process_single_url(url: str, keyword: str, search_description: str, agent: Agent):
 69 |     """处理单个URL的文章"""
 70 |     if 'baidu.com' in url:
 71 |         url = get_baidu_search_url(keyword)
 72 |     
 73 |     articles = extract_news_articles(url)
 74 |     if not isinstance(articles, list):
 75 |         return f"处理URL {url} 时出错: {articles}"
 76 |     
 77 |     if not articles:
 78 |         return f"URL {url} 未找到任何文章"
 79 |     
 80 |     prompt = f"""
 81 |     请从以下文章列表中严格筛选出仅与"{keyword}"直接相关的最新新闻。
 82 | 
 83 |     文章列表：
 84 |     {articles}
 85 | 
 86 |     筛选标准：
 87 |     1. 必须在标题中直接提到"{keyword}"或与{keyword}直接相关的产品/事件
 88 |     2. 必须是最新的新闻内容，不要选择普通的产品介绍页面
 89 |     3. 新闻必须具有时效性和重要性
 90 | 
 91 |     请按照以下格式整理符合条件的文章：
 92 |     标题,网址
 93 | 
 94 |     要求：
 95 |     1. 使用逗号分隔字段
 96 |     2. 每行一篇文章
 97 |     3. 第一行为表头
 98 |     4. 如果标题包含逗号，用双引号括起来
 99 |     5. 按相关性和重要性排序
100 |     6. 只输出100%确定与{keyword}直接相关的文章
101 |     """
102 |     return agent.run(prompt, display=False)
103 | 
104 | def process_articles_with_command(urls: list, keyword: str, search_description: str = None):
105 |     """处理所有URL的文章"""
106 |     load_dotenv()
107 |     
108 |     if not search_description:
109 |         search_description = f"寻找与{keyword}相关的最新资讯"
110 |     
111 |     agent = Agent(f'''你是一个专业的资讯分析助手。
112 | 你的任务是找出与用户需求相关的文章。
113 | 用户搜索需求：{search_description}
114 | ''')
115 |     
116 |     try:
117 |         print(f"\n搜索关键词: {keyword}")
118 |         print(f"搜索需求: {search_description}\n")
119 |         
120 |         all_results = []
121 |         for url in urls:
122 |             print(f"\n处理URL: {url}")
123 |             result = process_single_url(url, keyword, search_description, agent)
124 |             all_results.append(f"\n来自 {url} 的结果：\n{result}")
125 |         
126 |         return "\n".join(all_results)
127 |             
128 |     except Exception as e:
129 |         return f"处理出错: {str(e)}"
130 | 
131 | # 使用示例
132 | if __name__ == "__main__":
133 |     keyword = "新能源汽车"
134 |     description = "寻找所有和新能源汽车可能相关的动态，只找和新能源汽车最直接相关的最新重要信息（企业，行业政策等）"
135 |     
136 |     urls = [
137 |         "https://www.dongchedi.com/",
138 |         "https://www.baidu.com/s",
139 |         "https://36kr.com/",
140 |     ]
141 |     
142 |     result = process_articles_with_command(urls, keyword, description)
143 |     print(result)
144 | 


--------------------------------------------------------------------------------
/examples/2_write_novel.py:
--------------------------------------------------------------------------------
 1 | # 工作流: 写小说
 2 | from GeneralAgent import Agent
 3 | from GeneralAgent import skills
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | # 步骤0: 定义Agent
 8 | agent = Agent('你是一个小说家')
 9 | 
10 | # 步骤1: 从用户处获取小说的名称和主题
11 | # topic = skills.input('请输入小说的名称和主题: ')
12 | topic = '小白兔吃糖不刷牙的故事'
13 | 
14 | # 步骤2: 小说的概要
15 | summary = agent.run(f'小说的名称和主题是: {topic}，扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。')
16 | 
17 | # 步骤3: 小说的章节名称和概要列表
18 | chapters = agent.run('输出小说的章节名称和每个章节的概要，返回列表 [(chapter_title, chapter_summary), ....]', return_type=list)
19 | 
20 | # 步骤4: 生成小说每一章节的详细内容
21 | contents = []
22 | for index, (chapter_title, chapter_summary) in enumerate(chapters):
23 |     content = agent.run(f'对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容，注意只返回内容，不要标题。')
24 |     content = '\n'.join([x.strip() for x in content.split('\n')])
25 |     contents.append(content)
26 | 
27 | # 步骤5: 将小说格式化写入文件
28 | with open('novel.md', 'w') as f:
29 |     for index in range(len(chapters)):
30 |         f.write(f'### {chapters[index][0]}\n')
31 |         f.write(f'{contents[index]}\n\n')
32 | 
33 | # 步骤6(可选): 将markdown文件转换为pdf文件
34 | 
35 | # 步骤7: 输出小说文件给用户
36 | skills.output('你的小说已经生成[novel.md](novel.md)\n')


--------------------------------------------------------------------------------
/examples/3_ai_search.py:
--------------------------------------------------------------------------------
 1 | # AI搜索
 2 | # 运行前置条件: 
 3 | # 1. 请先配置环境变量 SERPER_API_KEY (https://serper.dev/ 的API KEY)；
 4 | # 2. 安装 selenium 库: pip install selenium
 5 | 
 6 | from GeneralAgent import Agent
 7 | from GeneralAgent import skills
 8 | from dotenv import load_dotenv
 9 | 
10 | load_dotenv()
11 | google_results = []
12 | 
13 | # 步骤1: 第一次google搜索
14 | question = input('请输入问题，进行 AI 搜索: ')
15 | # question = '周鸿祎卖车'
16 | content1 = skills.google_search(question)
17 | google_results.append(content1)
18 | 
19 | # 步骤2: 第二次google搜索: 根据第一次搜索结构，获取继续搜索的问题
20 | agent = Agent('你是一个AI搜索助手。')
21 | querys = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{content1}\n\n。请问可以帮助用户，需要继续搜索的关键短语有哪些(最多3个，且和问题本身不太重合)？返回关键短语列表变量([query1, query2])', return_type=list)
22 | print(querys)
23 | for query in querys:
24 |     content = skills.google_search(query)
25 |     google_results.append(content)
26 | 
27 | # 步骤3: 提取重点网页内容
28 | agent.clear()
29 | web_contents = []
30 | google_result = '\n\n'.join(google_results)
31 | urls = agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n。哪些网页对于用户问题比较有帮助？请返回最重要的不超过5个的网页url列表变量([url1, url2, ...])', return_type=list)
32 | for url in urls:
33 |     print(url)
34 |     content = skills.web_get_text(url, wait_time=2)
35 |     web_contents.append(content)
36 | 
37 | # 步骤4: 输出结果
38 | agent.clear()
39 | web_content = '\n\n'.join(web_contents)
40 | agent.run(f'用户问题: \n{question}\n\n搜索引擎结果: \n{google_result}\n\n部分网页内容: \n{web_content}\n\n。请根据用户问题，搜索引擎结果，网页内容，给出用户详细的回答，要求按一定目录结构来输出，并且使用markdown格式。')


--------------------------------------------------------------------------------
/examples/3_ai_search_simple.py:
--------------------------------------------------------------------------------
 1 | # def main(messages, input, files, output_callback, event=None, workspace='./'):
 2 | 
 3 | # question = input('')
 4 | question = '周鸿祎卖车'
 5 | from GeneralAgent import Agent
 6 | from GeneralAgent import skills
 7 | from dotenv import load_dotenv
 8 | 
 9 | load_dotenv()
10 | agent = Agent('You are an AI search assistant.')
11 | 
12 | # Google search
13 | google_result = skills.google_search(question)
14 | 
15 | # Get important web
16 | urls = agent.run(f'User question: {question}\nSearch results: {google_result}\nReturn up to 5 most relevant URLs.', return_type=list)
17 | web_content = '\n\n'.join([skills.web_get_text(url, wait_time=2) for url in urls])
18 | 
19 | # Display the answer
20 | agent.clear()
21 | agent.run(f'User question: {question}\nSearch results: {google_result}\nWeb content: {web_content}\nProvide a detailed answer in markdown format.', display=True)


--------------------------------------------------------------------------------
/examples/4_multi_agents.py:
--------------------------------------------------------------------------------
 1 | # 多Agent配合完成任务
 2 | from GeneralAgent import Agent
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | story_writer = Agent('你是一个故事创作家，根据大纲要求或者故事梗概，返回一个更加详细的故事内容。')
 7 | humor_enhancer = Agent('你是一个润色作家，将一个故事进行诙谐润色，增加幽默元素。直接输出润色后的故事')
 8 | 
 9 | # 禁用Python运行
10 | story_writer.disable_python_run = True
11 | humor_enhancer.disable_python_run = True
12 | 
13 | # topic = skills.input('请输入小说的大纲要求或者故事梗概: ')
14 | topic = '写个小白兔吃糖不刷牙的故事，有教育意义。'
15 | initial_story = story_writer.run(topic)
16 | enhanced_story = humor_enhancer.run(initial_story)
17 | print(enhanced_story)


--------------------------------------------------------------------------------
/examples/5_serialize.py:
--------------------------------------------------------------------------------
 1 | # 序列化
 2 | from GeneralAgent import Agent
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | 
 7 | # agent序列化位置，运行过程中会自动保存LLM的messages和python解析器的状态
 8 | workspace='./5_serialize'
 9 | 
10 | role = 'You are a helpful agent.'
11 | agent = Agent(role, workspace=workspace)
12 | agent.user_input('My name is Shadow.')
13 | 
14 | agent = None
15 | agent = Agent(role, workspace=workspace)
16 | agent.user_input('What is my name?')
17 | 
18 | # Output: Your name is Shadow. How can I help you today, Shadow?
19 | 
20 | # agent: 清除记忆 + python序列化状态
21 | agent.clear()
22 | 
23 | agent.user_input('What is my name?')
24 | # I'm sorry, but I don't have access to your personal information, including your name. How can I assist you today?
25 | 
26 | import shutil
27 | shutil.rmtree(workspace)


--------------------------------------------------------------------------------
/examples/6_disable_python_run.py:
--------------------------------------------------------------------------------
 1 | # Disable Python Run
 2 | # 默认情况下，GeneralAgent会运行用户输入的Python代码。如果你不希望GeneralAgent运行Python代码，可以通过将 `disable_python_run` 属性设置为 `True` 来禁用Python运行。
 3 | from GeneralAgent import Agent
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | 
 8 | agent = Agent('你是一个python专家，辅助用户解决python问题。')
 9 | agent.disable_python_run = True
10 | agent.user_input('用python实现一个读取文件的函数')
11 | 
12 | # 当然，这里是一个用Python实现的读取文件内容的函数：
13 | 
14 | # ```python
15 | # def read_file(file_path):
16 | #     try:
17 | #         with open(file_path, 'r', encoding='utf-8') as file:
18 | #             content = file.read()
19 | #         return content
20 | #     except FileNotFoundError:
21 | #         return "File not found."
22 | #     except Exception as e:
23 | #         return f"An error occurred: {e}"
24 | 
25 | # # 示例用法
26 | # file_content = read_file('example.txt')
27 | # file_content
28 | # ```
29 | 
30 | # 这个函数 `read_file` 接受一个文件路径作为参数，尝试以UTF-8编码读取文件内容，并返回读取到的内容。如果文件未找到或发生其他错误，则返回相应的错误信息。


--------------------------------------------------------------------------------
/examples/7_hide_stream.py:
--------------------------------------------------------------------------------
1 | # 隐藏输出流，不显示给用户
2 | from GeneralAgent import Agent
3 | from dotenv import load_dotenv
4 | 
5 | load_dotenv()
6 | 
7 | agent = Agent('You are a helpful agent.', model='gpt-3.5-turbo')
8 | chengdu_description = agent.run('介绍一下成都', display=True)
9 | print(chengdu_description)


--------------------------------------------------------------------------------
/examples/8_multi_model.py:
--------------------------------------------------------------------------------
 1 | # 通过OpenAI Python SDK 支持其他大模型
 2 | # 或者通过 https://github.com/songquanpeng/one-api 支持其他大模型
 3 | from GeneralAgent import Agent
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | 
 8 | models = [
 9 |     ('deepseek-chat', 32000, 'sk-xxx', 'https://api.deepseek.com/v1'),  # DeepSeek官方支持
10 |     ('moonshot-v1-128k', 128000, '$MOONSHOT_API_KEY', 'https://api.moonshot.cn/v1'),  # Moonshot官方支持
11 |     ('SparkDesk-v3.5', 4000, None, None),
12 |     ('glm-4v', 128000, None, None),
13 |     ('ERNIE-4.0-8K', 8000, None, None),
14 |     ('qwen-turbo', 6000, None, None),
15 |     ('hunyuan', 8000, None, None),
16 | ]
17 | 
18 | for model, token_limit, api_key, base_url in models:
19 |     agent = Agent('You are a helpful agent.', model=model, token_limit=token_limit, api_key=api_key, base_url=base_url)
20 |     agent.user_input('介绍一下成都')


--------------------------------------------------------------------------------
/examples/9_knowledge_files.py:
--------------------------------------------------------------------------------
 1 | # 知识库
 2 | from GeneralAgent import Agent
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | 
 7 | files = ['../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf']
 8 | workspace = '9_knowledge_files'
 9 | agent = Agent('你是AI助手，用中文回复。', workspace=workspace, knowledge_files=files)
10 | agent.user_input(['Self call 是什么意思？'])
11 | 
12 | # 清理掉
13 | import shutil
14 | shutil.rmtree(workspace)
15 | 
16 | 
17 | # 知识库默认使用 GeneralAgent.skills 中 embedding_texts 函数来对文本进行 embedding (默认是OpenAI的text-embedding-3-small模型)
18 | # 你可以重写 embedding_texts 函数，使用其他厂商 或者 本地的 embedding 方法，具体如下:
19 | 
20 | # def new_embedding_texts(texts) -> [[float]]:
21 | #     """
22 | #     对文本数组进行embedding
23 | #     """
24 | #     # 你的embedding方法
25 | #     return result
26 | # from GeneralAgent import skills
27 | # skills.embedding_texts = new_embedding_texts


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "GeneralAgent"
 3 | version = "0.3.29"
 4 | description = "General Agent: From LLM to Agent"
 5 | authors = ["Chen Li <lichenarthurdata@gmail.com>"]
 6 | license = "Apache 2.0"
 7 | readme = "README.md"
 8 | repository = "https://github.com/CosmosShadow/GeneralAgent"
 9 | packages = [
10 |     { include = "GeneralAgent" },
11 | ]
12 | 
13 | [tool.poetry.dependencies]
14 | python = ">=3.8.1"
15 | requests = ">=2.31.0"
16 | tinydb = ">=4.8.0"
17 | openai = ">=1.3.3"
18 | jinja2 = ">=3.1.2"
19 | numpy = ">=1.24.4"
20 | tiktoken = ">=0.5.1"
21 | llama-index =">=0.10.44"
22 | codyer = ">=0.0.1"
23 | 
24 | [tool.poetry.group.dev.dependencies]
25 | pytest = "^7.4.3"
26 | pytest-asyncio = "^0.21.1"
27 | pymupdf = "1.24.13"
28 | 
29 | 
30 | [[tool.poetry.source]]
31 | name = "PyPI"
32 | priority="primary"
33 | 
34 | 
35 | [build-system]
36 | requires = ["poetry-core"]
37 | build-backend = "poetry.core.masonry.api"
38 | 
39 | 
40 | [tool.poetry.scripts]
41 | GeneralAgent= 'GeneralAgent.cli:main'


--------------------------------------------------------------------------------
/test/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/.gitkeep


--------------------------------------------------------------------------------
/test/data/Nougat.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/Nougat.pdf


--------------------------------------------------------------------------------
/test/data/Nougat_piece.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/Nougat_piece.pdf


--------------------------------------------------------------------------------
/test/data/a.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/a.py


--------------------------------------------------------------------------------
/test/data/hello.py:
--------------------------------------------------------------------------------
1 | print('hello world')


--------------------------------------------------------------------------------
/test/data/test.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CosmosShadow/GeneralAgent/9d8519c39968db55f1fb980e33e0bc544fcaf30f/test/data/test.jpeg


--------------------------------------------------------------------------------
/test/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     asyncio: asyncio mark


--------------------------------------------------------------------------------
/test/test_agent.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from GeneralAgent import Agent
  3 | 
  4 | 
  5 | def test_math():
  6 |     """数学计算测试. 使用run直接返回python表达式的值"""
  7 |     agent = Agent()
  8 |     result = agent.run("calculate 0.99 ** 1000", return_type=float)
  9 |     assert 4.317124741065786e-05 == result
 10 | 
 11 | 
 12 | def test_function():
 13 |     """函数调用测试"""
 14 | 
 15 |     def get_weather(city: str) -> str:
 16 |         """
 17 |         get weather information
 18 |         @city: str, city name
 19 |         @return: str, weather information
 20 |         """
 21 |         return f"{city} weather: sunny"
 22 | 
 23 |     agent = Agent("你是一个天气小助手", functions=[get_weather])
 24 |     result = agent.user_input("成都天气怎么样？")
 25 |     assert "晴" in result or "sunny" in result
 26 | 
 27 | 
 28 | def test_write_novel():
 29 |     # 工作流: 写小说
 30 |     novel_path = "novel.md"
 31 |     # 清理掉已经有的小说
 32 |     import os
 33 | 
 34 |     if os.path.exists(novel_path):
 35 |         os.remove(novel_path)
 36 |     try:
 37 | 
 38 |         # 步骤0: 定义Agent
 39 |         agent = Agent("你是一个小说家")
 40 | 
 41 |         # 步骤1: 从用户处获取小说的名称和主题
 42 |         # topic = skills.input('请输入小说的名称和主题: ')
 43 |         topic = "小白兔吃糖不刷牙的故事"
 44 | 
 45 |         # 步骤2: 小说的概要
 46 |         summary = agent.run(
 47 |             f"小说的名称和主题是: {topic}，扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。"
 48 |         )
 49 | 
 50 |         # 步骤3: 小说的章节名称和概要列表
 51 |         chapters = agent.run(
 52 |             "输出小说的章节名称和每个章节的概要，返回列表 [(chapter_title, chapter_summary), ....]",
 53 |             return_type=list,
 54 |         )
 55 | 
 56 |         # 步骤4: 生成小说每一章节的详细内容
 57 |         contents = []
 58 |         for index, (chapter_title, chapter_summary) in enumerate(chapters):
 59 |             content = agent.run(
 60 |                 f"对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容，注意只返回内容，不要标题。"
 61 |             )
 62 |             content = "\n".join([x.strip() for x in content.split("\n")])
 63 |             contents.append(content)
 64 | 
 65 |         # 步骤5: 将小说格式化写入文件
 66 |         with open(novel_path, "w") as f:
 67 |             for index in range(len(chapters)):
 68 |                 f.write(f"### {chapters[index][0]}\n")
 69 |                 f.write(f"{contents[index]}\n\n")
 70 | 
 71 |     except Exception as e:
 72 |         pass
 73 |     finally:
 74 |         # 验证小说存在，而且内容不为空
 75 |         assert os.path.exists(novel_path)
 76 |         with open(novel_path, "r") as f:
 77 |             content = f.read()
 78 |             assert content != ""
 79 |             assert "### " in content
 80 |         # 清理掉
 81 |         if os.path.exists(novel_path):
 82 |             os.remove(novel_path)
 83 | 
 84 | 
 85 | def test_knowledge():
 86 |     # 知识库
 87 |     workspace = "9_knowledge_files"
 88 |     try:
 89 |         files = ["../docs/paper/General_Agent__Self_Call_And_Stack_Memory.pdf"]
 90 |         agent = Agent(
 91 |             "你是AI助手，用中文回复。", workspace=workspace, knowledge_files=files
 92 |         )
 93 |         result = agent.user_input("Self call 是什么意思？")
 94 |         assert "LLM" in result
 95 |     except Exception as e:
 96 |         raise e
 97 |     finally:
 98 |         # 清理掉
 99 |         import shutil
100 | 
101 |         shutil.rmtree(workspace)
102 | 
103 | 
104 | def test_with_query_clear_data_0():
105 |     workspace = "test_with_query_clear_data_0"
106 |     import os
107 | 
108 |     if os.path.exists(workspace):
109 |         import shutil
110 | 
111 |         shutil.rmtree(workspace)
112 |     agent = Agent("You are a helpful assistant.", workspace=workspace)
113 |     with agent.temporary_context():
114 |         agent.user_input("My name is Henry.")
115 |     import json
116 | 
117 |     with open(f"{workspace}/memory.json", "r") as f:
118 |         memory = json.load(f)
119 |         assert len(memory) == 0
120 | 
121 | 
122 | def test_with_query_clear_data_1():
123 |     agent = Agent("You are a helpful assistant.", hide_python_code=True)
124 |     with agent.temporary_context():
125 |         agent.user_input("My name is Henry.")
126 |     response = agent.user_input("What's my name?")
127 |     assert "Henry" not in response
128 | 
129 | 
130 | def test_with_query_save_data():
131 |     workspace = "test_with_query_save_data"
132 |     import os
133 | 
134 |     if os.path.exists(workspace):
135 |         import shutil
136 | 
137 |         shutil.rmtree(workspace)
138 |     agent = Agent("You are a helpful assistant.", workspace=workspace)
139 |     agent.user_input("My name is Henry.")
140 |     with agent.temporary_context():
141 |         agent.user_input("My name is Jimmy.")
142 |     agent.user_input("My name is Yummy.")
143 |     import json
144 | 
145 |     with open(f"{workspace}/memory.json", "r") as f:
146 |         memory = json.load(f)
147 |         assert len(memory) == 4
148 | 
149 | 
150 | def test_with_query_clear_data_with_exception_0():
151 |     workspace = "test_with_query_clear_data_with_exception_0"
152 |     import os
153 | 
154 |     if os.path.exists(workspace):
155 |         import shutil
156 | 
157 |         shutil.rmtree(workspace)
158 |     try:
159 |         agent = Agent("You are a helpful assistant.", workspace=workspace)
160 |         with agent.temporary_context():
161 |             agent.user_input("My name is Henry.")
162 |             raise Exception("test exception")
163 |     except Exception:
164 |         ...
165 |     finally:
166 |         import json
167 | 
168 |         with open(f"{workspace}/memory.json", "r") as f:
169 |             memory = json.load(f)
170 |             assert len(memory) == 0
171 | 
172 | 
173 | def test_with_query_clear_data_with_exception_1():
174 |     workspace = "test_with_query_clear_data_with_exception_1"
175 |     import os
176 | 
177 |     if os.path.exists(workspace):
178 |         import shutil
179 | 
180 |         shutil.rmtree(workspace)
181 |     try:
182 |         agent = Agent("You are a helpful assistant.", workspace=workspace)
183 |         agent.user_input("My name is Yummy.")
184 |         with agent.temporary_context():  # no_memory()
185 |             agent.user_input("My name is Henry.")
186 |             raise Exception("test exception")
187 |     except Exception:
188 |         ...
189 |     finally:
190 |         import json
191 | 
192 |         with open(f"{workspace}/memory.json", "r") as f:
193 |             memory = json.load(f)
194 |             assert len(memory) == 2
195 | 
196 | 
197 | def test_load_error_messages():
198 |     messages = [
199 |         {"role": "user", "text": "My name is Yummy."},
200 |         {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"},
201 |     ]
202 |     with pytest.raises(AssertionError, match="message format wrong"):
203 |         agent = Agent("You are a helpful assistant.", messages=messages)
204 |         agent.user_input("What's my name?")
205 | 
206 | 
207 | def test_load_messages():
208 |     messages = [
209 |         {"role": "user", "content": "My name is Yummy."},
210 |         {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"},
211 |     ]
212 |     agent = Agent("You are a helpful assistant.", messages=messages)
213 |     response = agent.user_input("What's my name?")
214 |     assert "Yummy" in response
215 | 


--------------------------------------------------------------------------------
/test/test_examples.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | from unittest.mock import patch
  4 | from GeneralAgent import Agent, skills
  5 | 
  6 | 
  7 | def test_base_usage():
  8 |     agent = Agent("You are a helpful assistant.")
  9 |     with patch("builtins.print") as mock_print:
 10 |         response = agent.user_input('Your name is "Tom". Who are you?')
 11 |         assert "Tom" in response
 12 | 
 13 | 
 14 | def test_function_call():
 15 |     def get_weather(city: str) -> str:
 16 |         print(f"{city} weather: 晴天")
 17 | 
 18 |     agent = Agent("你是一个天气小助手", functions=[get_weather])
 19 |     response = agent.user_input("成都天气怎么样？")
 20 |     assert "晴天" in response
 21 | 
 22 | 
 23 | def test_write_novel():
 24 |     agent = Agent("你是一个小说家")
 25 |     topic = "小白兔吃糖不刷牙的故事"
 26 |     summary = agent.run(
 27 |         f"小说的名称和主题是: {topic}，扩展和完善一下小说概要。要求具备文艺性、教育性、娱乐性。"
 28 |     )
 29 |     chapters = agent.run(
 30 |         "输出小说的章节名称和每个章节的概要，返回列表 [(chapter_title, chapter_summary), ....]",
 31 |         return_type=list,
 32 |     )
 33 |     contents = []
 34 |     for index, (chapter_title, chapter_summary) in enumerate(chapters):
 35 |         content = agent.run(
 36 |             f"对于章节: {chapter_title}\n{chapter_summary}. \n输出章节的详细内容，注意只返回内容，不要标题。"
 37 |         )
 38 |         content = "\n".join([x.strip() for x in content.split("\n")])
 39 |         contents.append(content)
 40 |     with open("novel.md", "w") as f:
 41 |         for index in range(len(chapters)):
 42 |             f.write(f"### {chapters[index][0]}\n")
 43 |             f.write(f"{contents[index]}\n\n")
 44 |     skills.output("你的小说已经生成[novel.md](novel.md)\n")
 45 |     # 判断文件是否存在
 46 |     assert os.path.exists("novel.md")
 47 |     # 判断文件字符数量是否大于 200
 48 |     with open("novel.md", "r") as f:
 49 |         assert len(f.read()) > 200
 50 |     # 删除文件
 51 |     os.remove("novel.md")
 52 | 
 53 | 
 54 | def test_multi_agents():
 55 |     from GeneralAgent import Agent
 56 | 
 57 |     story_writer = Agent(
 58 |         "你是一个故事创作家，根据大纲要求或者故事梗概，返回一个更加详细的故事内容。"
 59 |     )
 60 |     humor_enhancer = Agent(
 61 |         "你是一个润色作家，将一个故事进行诙谐润色，增加幽默元素。直接输出润色后的故事"
 62 |     )
 63 |     story_writer.disable_python_run = True
 64 |     humor_enhancer.disable_python_run = True
 65 |     topic = "写个小白兔吃糖不刷牙的故事，有教育意义。"
 66 |     initial_story = story_writer.run(topic)
 67 |     assert "小白兔" in initial_story
 68 |     enhanced_story = humor_enhancer.run(initial_story)
 69 |     assert "小白兔" in enhanced_story
 70 | 
 71 | 
 72 | def test_serialize():
 73 |     workspace = "./5_serialize"
 74 |     # 如果文件存在则删除
 75 |     if os.path.exists(workspace):
 76 |         shutil.rmtree(workspace)
 77 |     role = "You are a helpful agent."
 78 |     agent = Agent(role, workspace=workspace)
 79 |     agent.user_input("My name is Shadow.")
 80 |     agent = Agent(role, workspace=workspace)
 81 |     response = agent.user_input("What is my name?")
 82 |     assert "Shadow" in response
 83 |     agent.clear()
 84 |     response = agent.user_input("What is my name?")
 85 |     assert "Shadow" not in response
 86 |     shutil.rmtree(workspace)
 87 | 
 88 | 
 89 | def test_disable_python_run():
 90 |     # 在当前目录下创建 a.txt 并写入 “My name is Henry.”
 91 |     # 如果文件存在则删除
 92 |     if os.path.exists("a.txt"):
 93 |         os.remove("a.txt")
 94 |     with open("a.txt", "w") as f:
 95 |         f.write("My name is Henry.")
 96 |     agent = Agent("You are a helpful assistant.")
 97 |     agent.disable_python_run = True
 98 |     response = agent.user_input("帮我读取 ./a.txt 中的内容")
 99 |     assert "Henry" not in response
100 | 
101 | 
102 | def test_enable_python_run():
103 |     # 在当前目录下创建 a.txt 并写入 “My name is Henry.”
104 |     # 如果文件存在则删除
105 |     if os.path.exists("a.txt"):
106 |         os.remove("a.txt")
107 |     with open("a.txt", "w") as f:
108 |         f.write("My name is Henry.")
109 |     agent = Agent("You are a helpful assistant.")
110 |     agent.disable_python_run = False
111 |     response = agent.user_input("帮我读取 ./a.txt 中的内容")
112 |     assert "Henry" in response
113 | 
114 | 
115 | def test_hide_stream(capsys):
116 |     agent = Agent("You are a helpful assistant.")
117 |     agent.hide_stream = False
118 |     agent.run("一句话介绍成都", display=False)
119 |     captured = capsys.readouterr()
120 |     assert len(captured.out) == 0
121 | 
122 | 
123 | def test_show_stream(capsys):
124 |     agent = Agent("You are a helpful assistant.")
125 |     agent.hide_stream = False
126 |     agent.run("一句话介绍成都", display=True)
127 |     captured = capsys.readouterr()
128 |     assert len(captured.out) > 0
129 | 
130 | 
131 | def test_deepseek_chat():
132 |     model = "deepseek-chat"
133 |     token_limit = 32000
134 |     api_key = os.environ.get("DEEPSEEK_API_KEY")
135 |     base_url = "https://api.deepseek.com/v1"
136 |     agent = Agent(
137 |         "You are a helpful agent.",
138 |         model=model,
139 |         token_limit=token_limit,
140 |         api_key=api_key,
141 |         base_url=base_url,
142 |     )
143 |     response = agent.run("一句话介绍成都", display=False)
144 |     print(response)
145 |     assert "成都" in response
146 | 
147 | 
148 | def test_add_knowledge_files():
149 |     workspace = "./knowledge_files"
150 |     if os.path.exists(workspace):
151 |         shutil.rmtree(workspace)
152 |     file_name = "test_knowledge_file.txt"
153 |     with open(file_name, "w") as f:
154 |         f.write("My name is Henry")
155 |     files = [
156 |         file_name,
157 |     ]
158 |     agent = Agent(
159 |         "你是AI助手，用中文回复。", workspace=workspace, knowledge_files=files
160 |     )
161 |     response = agent.user_input(["我叫什么名字？"])
162 |     shutil.rmtree(workspace)
163 |     os.remove(file_name)
164 |     assert "Henry" in response
165 | 
166 | 
167 | def test_rag_function():
168 |     def rag_function(messages):
169 |         input = messages[-1]["content"]
170 |         print("user input:", input)
171 |         return "Background: GeneralAgent is a Python library for building AI assistants. It provides a simple API for building conversational agents."
172 | 
173 |     agent = Agent("You are a helpful assistant", rag_function=rag_function)
174 |     response = agent.user_input("What is GeneralAgent?")
175 |     assert "GeneralAgent is a Python library" in response
176 | 
177 | 
178 | def test_collection_and_store():
179 |     role = """
180 |     你是一个专业的导游。
181 |     你的主要工作: 和游客讲解城市的景点。
182 | 
183 |     # 1、旅游沟通例子
184 |     用户: 我想去成都玩
185 |     你: 成都是一所宜居的城市，安逸的很
186 |     用户: 成都有什么好吃的？
187 |     你: 火锅
188 | 
189 |     当城市确认，直接输出python代码，使用 save_travel_guide_record 函数保存旅游攻略。
190 | 
191 | 
192 |     travel_guide_record = \"\"\"
193 |     城市： 成都
194 |     美食： 火锅
195 |     \"\"\"
196 |     save_travel_guide_record(travel_guide_record)
197 | 
198 |     """
199 | 
200 |     stop = False
201 | 
202 |     def save_travel_guide_record(medical_record):
203 |         with open("test_collection.txt", "a") as f:
204 |             f.write(medical_record)
205 |         global stop
206 |         stop = True
207 |         return "旅行攻略已保存"
208 | 
209 |     # 删除文件
210 |     if os.path.exists("test_collection.txt"):
211 |         os.remove("test_collection.txt")
212 |     agent = Agent(role, functions=[save_travel_guide_record], hide_python_code=True)
213 |     agent.user_input("你想去哪玩？")
214 |     agent.user_input("成都")
215 |     agent.user_input("")
216 |     with open("test_collection.txt", "r") as f:
217 |         content = f.read()
218 |     assert "成都" in content
219 | 
220 | 
221 | def test_image_input():
222 |     agent = Agent("You are a helpful assistant.")
223 |     response = agent.user_input(
224 |         ["What animal in the picture?", {"image": "test/data/test.jpeg"}]
225 |     )
226 |     assert "dog" in response
227 | 
228 | 
229 | def test_temporary_context():
230 |     agent = Agent("You are a helpful assistant.")
231 |     with agent.temporary_context():
232 |         agent.user_input("My name is Henry.")
233 |     response = agent.user_input("What's my name?")
234 |     assert "Henry" not in response
235 | 
236 | 
237 | def test_load_messages():
238 |     messages = [
239 |         {"role": "user", "content": "My name is Yummy."},
240 |         {"role": "assistant", "content": "Hello, Yummy! How can I assist you today?"},
241 |     ]
242 |     agent = Agent("You are a helpful assistant.", messages=messages)
243 |     response = agent.user_input("What's my name?")
244 |     assert "Yummy" in response
245 | 


--------------------------------------------------------------------------------
/test/test_interpreter_python.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from GeneralAgent.interpreter import PythonInterpreter
 3 | 
 4 | 
 5 | def test_python_interpreter():
 6 |     # test run
 7 |     serialize_path = "test/data/test_interpreter.bin"
 8 |     if os.path.exists(serialize_path):
 9 |         os.remove(serialize_path)
10 | 
11 |     interpreter = PythonInterpreter(serialize_path=serialize_path)
12 |     result, is_stop = interpreter.output_parse(
13 |         '```python\n#run code\n"hello world"\n```'
14 |     )
15 |     print(result)
16 |     assert "hello world" in result.strip()
17 |     # assert is_stop is False
18 | 
19 |     # test aug assignment
20 |     interpreter.set_variable("a", 10)
21 |     result, is_stop = interpreter.output_parse("```python\n#run code\na += 1\n```")
22 |     a = interpreter.get_variable("a")
23 |     assert a == 11
24 | 
25 |     result, is_stop = interpreter.output_parse("```python\n#run code\na += 1\n```")
26 |     a = interpreter.get_variable("a")
27 |     assert a == 12
28 | 
29 |     # test ann assignment
30 |     result, is_stop = interpreter.output_parse("```python\n#run code\na: int = 1\n```")
31 |     a = interpreter.get_variable("a")
32 |     assert a == 1
33 | 
34 |     # test normal assignment
35 |     result, is_stop = interpreter.output_parse("```python\n#run code\nb = 1\n```")
36 |     b = interpreter.get_variable("b")
37 |     assert b == 1
38 | 
39 |     # test multiline code
40 |     result, is_stop = interpreter.output_parse(
41 |         "```python\n#run code\n[\n    1,\n    2,\n    3\n]\n```"
42 |     )
43 |     assert "[1, 2, 3]" == result.split("\n")[-2]
44 | 
45 |     # test multiple assignment
46 |     result, is_stop = interpreter.output_parse("```python\n#run code\na, b = 1, 2\n```")
47 |     a = interpreter.get_variable("a")
48 |     b = interpreter.get_variable("b")
49 |     assert a == 1
50 |     assert b == 2
51 |     assert "(1, 2)" == result.split("\n")[-2]
52 | 
53 | 
54 | def test_stack_code():
55 |     serialize_path = "test/data/test_interpreter.bin"
56 |     if os.path.exists(serialize_path):
57 |         os.remove(serialize_path)
58 |     interpreter = PythonInterpreter(serialize_path=serialize_path)
59 |     code = """
60 | ```python
61 | #run code
62 | a = 10
63 | code = "```python\\na += 1\\n```"
64 | interpreter.output_parse(code)
65 | a
66 | ```
67 | """
68 |     interpreter.set_variable("interpreter", interpreter)
69 |     result, is_stop = interpreter.output_parse(code)
70 |     # print(result)
71 |     assert "11" in result.strip()
72 | 
73 | 
74 | # output:
75 | # 11
76 | # python runs result:
77 | # run successfully
78 | 
79 | 
80 | def test_run_code():
81 |     code = """
82 | def test():
83 |     return "hello world"
84 | test()
85 | """
86 |     interpreter = PythonInterpreter()
87 |     result, is_stop = interpreter.run_code(code)
88 |     # print(result)
89 |     assert "hello world" in result.strip()
90 | 


--------------------------------------------------------------------------------
/test/test_link_memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import fitz
 3 | import pytest
 4 | import asyncio
 5 | 
 6 | 
 7 | @pytest.mark.skip(reason="removed temporarily")
 8 | def test_read_paper():
 9 |     from GeneralAgent.memory import LinkMemory
10 | 
11 |     serialize_path = "./summary_memory.json"
12 |     if os.path.exists(serialize_path):
13 |         os.remove(serialize_path)
14 |     memory = LinkMemory(serialize_path=serialize_path)
15 |     file_path = "./data/Nougat_piece.pdf"
16 |     doc = fitz.open(file_path)
17 |     content = ""
18 |     for page in doc:
19 |         content += "\n" + page.get_text()
20 |     memory.add_memory(content, output_callback=None)
21 |     spark = memory.get_memory()
22 |     # print(f'-----------\n{spark}\n-----------')
23 |     assert "Introduction" in spark
24 | 
25 |     messages = [
26 |         {"role": "user", "content": "论文有哪些贡献?"},
27 |     ]
28 |     spark = memory.get_memory(messages)
29 |     print(f"-----------\n{spark}\n-----------")
30 |     assert "pdf" in spark.lower()
31 | 


--------------------------------------------------------------------------------
/test/test_skills.py:
--------------------------------------------------------------------------------
 1 | from GeneralAgent.skills.python_envs import python_line_is_variable_expression
 2 | 
 3 | 
 4 | def test_python_line_is_variable_expression():
 5 |     assert python_line_is_variable_expression("a")
 6 |     assert python_line_is_variable_expression("a, b")
 7 |     assert python_line_is_variable_expression("a + b")
 8 |     assert python_line_is_variable_expression("vars[0]")
 9 |     assert python_line_is_variable_expression('scrape_web("https://www.baidu.com")[0]')
10 | 
11 |     assert python_line_is_variable_expression(" vars[0]") is False
12 |     assert python_line_is_variable_expression("print(a)") is False
13 |     assert python_line_is_variable_expression("x = a + b") is False
14 | 


--------------------------------------------------------------------------------
/test/test_skills_llm_inference.py:
--------------------------------------------------------------------------------
 1 | from GeneralAgent import skills
 2 | 
 3 | 
 4 | def test_embedding_texts():
 5 |     texts = ["我爱唱歌", "I love singing"]
 6 |     embeddings = skills.embedding_texts(texts)
 7 |     a, b = embeddings[0], embeddings[1]
 8 |     assert skills.cos_sim(a, a) >= 0.999
 9 |     assert skills.cos_sim(a, b) > 0.7
10 | 
11 | 
12 | def test_llm_inference():
13 |     messages = [
14 |         {"role": "system", "content": "you are a helpful assistant"},
15 |         {"role": "user", "content": "1 + 1 = ?"},
16 |     ]
17 |     result = ""
18 |     for x in skills.llm_inference(messages, stream=True):
19 |         if x is None:
20 |             break
21 |         result += x
22 |     assert "2" in result
23 | 


--------------------------------------------------------------------------------
/test/test_skills_memory_utils.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import asyncio
  3 | 
  4 | 
  5 | content = """
  6 | Nougat: Neural Optical Understanding for Academic Documents
  7 | Lukas Blecher⇤ Guillem Cucurull Thomas Scialom Robert Stojnic Meta AI
  8 | Abstract
  9 | Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human- readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
 10 | 1 Introduction
 11 | The majority of scientific knowledge is stored in books or published in scientific journals, most commonly in the Portable Document Format (PDF). Next to HTML, PDFs are the second most prominent data format on the internet, making up 2.4% of common crawl [1]. However, the information stored in these files is very difficult to extract into any other formats. This is especially true for highly specialized documents, such as scientific research papers, where the semantic information of mathematical expressions is lost.
 12 | Existing Optical Character Recognition (OCR) engines, such as Tesseract OCR [2], excel at detecting and classifying individual characters and words in an image, but fail to understand the relationship between them due to their line-by-line approach. This means that they treat superscripts and subscripts in the same way as the surrounding text, which is a significant drawback for mathematical expressions. In mathematical notations like fractions, exponents, and matrices, relative positions of characters are crucial.
 13 | Converting academic research papers into machine-readable text also enables accessibility and searchability of science as a whole. The information of millions of academic papers can not be fully accessed because they are locked behind an unreadable format. Existing corpora, such as the S2ORC dataset [3], capture the text of 12M2 papers using GROBID [4], but are missing meaningful representations of the mathematical equations.
 14 | To this end, we introduce Nougat, a transformer based model that can convert images of document pages to formatted markup text.
 15 | The primary contributions in this paper are
 16 | • Release of a pre-trained model capable of converting a PDF to a lightweight markup language. We release the code and the model on GitHub3
 17 | • We introduce a pipeline to create dataset for pairing PDFs to source code
 18 | • Our method is only dependent on the image of a page, allowing access to scanned papers and books
 19 | ⇤Correspondence to: lblecher@meta.com
 20 | 2The paper reports 8.1M papers but the authors recently updated the numbers on the GitHub page https://github.com/allenai/s2orc 3 https://github.com/facebookresearch/nougat
 21 | """
 22 | 
 23 | background = """
 24 | #01 Nougat is a Visual Transformer model that performs Optical Character Recognition (OCR) on scientific documents, converting them into a markup language. It aims to enhance the accessibility and searchability of scientific knowledge by bridging the gap between human-readable documents and machine-readable text. The model has been released along with the code for future work on scientific text recognition. Detail in <<Nougat: Neural Optical Understanding for Academic Documents>>, <<Introduction>>, <<Related Work>>, <<Model>>, <<Data Augmentation>>, <<Datasets>>
 25 | #02 The content discusses the process of splitting a document into pages and predicting the page numbers for each paragraph. It also mentions the use of fuzzy matching to find the exact position within a paragraph. The content acknowledges that there may be artifacts and missing elements in the ground truth data. The results and evaluation section mentions the metrics used to evaluate the model's performance. Detail in <<Page Index>>, <<Staircase Fit>>, <<Predictions>>, <<Figure 4: Splitting Paragraphs into Pages>>, <<Bag of Words Matching>>, <<Fuzzy Matching>>
 26 | #03 The model presented, Nougat, is an end-to-end trainable encoder-decoder transformer-based model for converting document pages to markup. It relies solely on the rasterized document page and does not rely on OCR or embedded text representations. The model has shown potential for extracting text from digital-born PDFs and converting scanned papers and textbooks. The model's utility is limited by factors such as repetitions and the need for improvements in handling different document styles. The model's generation speed is slower compared to classical approaches but can correctly parse mathematical expressions. Future work includes addressing the tendency for the model to collapse into a repeating loop and improving the handling of inconsistencies across the document. Detail in <<Numbers and Punctuation>>, <<Math and Plain Text Scores>>, <<Results and Format of GROBID>>, <<Comparison of Approaches>>, <<Repetition Detection and Inference>>, <<Limitations and Future Work>>
 27 | #04 Training systems for handwritten mathematical expression recognition, generating LaTeX sequences from math formula images using deep neural networks, and pre-training models for document understanding and image recognition. Detail in <<Training Handwritten Mathematical Expression Recognition>>, <<Neural Markup Generation with Visual Attention>>, <<Multi-Scale Attention for Handwritten Mathematical Expression Recognition>>, <<Translating Math Formula Images to LaTeX Sequences>>, <<Handwritten Mathematical Expression Recognition with Bidirectionally Trained Transformer>>, <<Competition on Recognition of Handwritten Mathematical Expressions>>, <<LaTeX OCR>>, <<Attention Is All You Need>>, <<LayoutLM: Pre-training of Text and Layout for Document Image Understanding>>, <<LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding>>, <<LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking>>, <<Online publishing via pdf2htmlEX>>, <<DocFormer: End-to-End Transformer for Document Understanding>>, <<Representation Learning for Information Extraction from Form-like Documents>>, <<OCR-free Document Understanding Transformer>>, <<Swin Transformer: Hierarchical Vision Transformer using Shifted Windows>>, <<An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale>>, <<BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension>>, <<Galactica: A Large Language Model for Science>>, <<Decoupled Weight Decay Regularization>>, <<Best practices for convolutional neural networks applied to visual document analysis>>, <<Albumentations: Fast and Flexible Image Augmentations>>, <<OCR-IDL: OCR Annotations for Industry Document Library Dataset>>, <<PDFFigures 2.0: Mining Figures from Research Papers>>, <<Binary codes capable of correcting deletions, insertions, and reversals>>, <<Distributional Structure>>, <<Beyond neural scaling laws: beating power law scaling via data pruning>>, <<Bleu: a Method for Automatic Evaluation of Machine Translation>>, <<METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments>>, <<The Curious Case of Neural Text Degeneration>>, <<Calculus>>, <<Kinetics and Thermodynamics in High-Temperature Gases>>, <<Hierarchical Neural Story Generation>>
 28 | #05 The content discusses various topics including a dataset composition, examples of text generation, derivative rules, pressure calculations, gas mixtures, and molecular Hamiltonian. Detail in <<Annual Meeting>>, <<Cycle-Consistency for Visual Question Answering>>, <<Dataset Composition>>, <<Examples>>, <<Derivative of a Constant>>, <<Molecular Hamiltonian and Anharmonic Coupling Terms>>
 29 | #06 jf (t) is a mathematical function. Detail in <<Segment 1>>, <<Segment 2>>
 30 | #07 The content discusses various models and their performance in the field of VQA (Visual Question Answering). It also includes information about a proposed cycle-consistent training framework and its impact on model performance. Additionally, there is mention of evaluation metrics and the effect of hierarchical generation on story generation models. Detail in <<Introduction1>>, <<Description of the Method>>, <<Comparison with Other Models>>, <<Evaluation Metrics and Results>>, <<Effect of Hierarchical Generation>>, <<Perplexity and Accuracy Analysis>>
 31 | """
 32 | 
 33 | 
 34 | def test_parse_segment_llm_result():
 35 |     from GeneralAgent.skills.memory_utils import _parse_segment_llm_result
 36 | 
 37 |     string = "<<Nougat: Neural Optical Understanding for Academic Documents>>\n0: 15\n\n<<Abstract>>\n6: 15\n\n<<Introduction>>\n17: 32\n\n<<Primary Contributions>>\n34: 38"
 38 |     nodes = _parse_segment_llm_result(string)
 39 |     assert nodes["Abstract"] == (6, 15)
 40 |     assert len(nodes) == 4
 41 | 
 42 | 
 43 | def test_segment_text():
 44 |     from GeneralAgent import skills
 45 | 
 46 |     nodes = skills.segment_text(content)
 47 |     assert len(nodes) > 0
 48 |     # assert 'Abstract' in ' '.join(nodes.keys())
 49 | 
 50 | 
 51 | def test_summarize_text():
 52 |     from GeneralAgent import skills
 53 | 
 54 |     summary = skills.summarize_text(content)
 55 |     # print(summary)
 56 |     assert len(summary) < len(content)
 57 | 
 58 | 
 59 | def test_extract_info():
 60 |     from GeneralAgent import skills
 61 | 
 62 |     task = "今天天气怎么样?"
 63 |     info = skills.extract_info(background, task)
 64 |     assert "[Nothing]" in info
 65 | 
 66 |     task = "论文有哪贡献?"
 67 |     info = skills.extract_info(background, task)
 68 |     print(info)
 69 | 
 70 |     task = "论文有哪些限制?"
 71 |     info = skills.extract_info(background, task)
 72 |     print(info)
 73 | 
 74 | 
 75 | def test_parse_extract_info():
 76 |     content = """
 77 | #01
 78 | <<Nougat: Neural Optical Understanding for Academic Documents>>
 79 | 
 80 | #03
 81 | <<Numbers and Punctuation>>
 82 | <<Math and Plain Text Scores>>
 83 | <<Results and Format of GROBID>>
 84 | <<Comparison of Approaches>>
 85 | <<Repetition Detection and Inference>>
 86 | <<Limitations and Future Work>>
 87 | #03 The model's utility is limited by factors such as repetitions and the need for improvements in handling different document styles. The model's generation speed is slower compared to classical approaches but can correctly parse mathematical expressions. Future work includes addressing the tendency for the model to collapse into a repeating loop and improving the handling of inconsistencies across the document. Detail in <<Repetition Detection and Inference>>, <<Limitations and Future Work>>
 88 | """
 89 |     from GeneralAgent import skills
 90 | 
 91 |     numbers, titles = skills.parse_extract_info(content)
 92 |     assert numbers == [1, 3, 3]
 93 |     # print(numbers)
 94 |     assert titles == [
 95 |         "Nougat: Neural Optical Understanding for Academic Documents",
 96 |         "Numbers and Punctuation",
 97 |         "Math and Plain Text Scores",
 98 |         "Results and Format of GROBID",
 99 |         "Comparison of Approaches",
100 |         "Repetition Detection and Inference",
101 |         "Limitations and Future Work",
102 |         "Repetition Detection and Inference",
103 |         "Limitations and Future Work",
104 |     ]
105 | 


--------------------------------------------------------------------------------
/test/test_stack_memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from GeneralAgent.utils import set_logging_level
 4 | 
 5 | set_logging_level()
 6 | 
 7 | 
 8 | @pytest.mark.skip(reason="removed temporarily")
 9 | def test_memory():
10 |     from GeneralAgent.memory import StackMemory, StackMemoryNode
11 | 
12 |     serialize_path = "./data/memory.json"
13 |     if os.path.exists(serialize_path):
14 |         os.remove(serialize_path)
15 |     memory = StackMemory(serialize_path=serialize_path)
16 |     node1 = StackMemoryNode(role="user", content="node1")
17 |     node2 = StackMemoryNode(role="system", content="node2")
18 |     node3 = StackMemoryNode(role="system", content="node3")
19 |     node4 = StackMemoryNode(role="system", content="node4")
20 |     memory.add_node(node1)
21 |     memory.add_node_in(node1, node2)
22 |     memory.add_node_after(node2, node3)
23 |     memory.add_node_after(node3, node4)
24 |     # [node1 [node2, node3, node4] ]
25 | 
26 |     def _assert_init_state(memory):
27 |         node1 = memory.get_node(1)
28 |         node2 = memory.get_node(2)
29 |         node3 = memory.get_node(3)
30 |         node4 = memory.get_node(4)
31 |         assert node1.role == "user"
32 |         assert node1.childrens == [2, 3, 4]
33 |         assert node2.parent == 1
34 |         assert node3.parent == 1
35 |         assert node4.parent == 1
36 |         assert node2.childrens == []
37 |         assert node3.childrens == []
38 |         assert node4.childrens == []
39 |         assert memory.get_node(1).childrens == [2, 3, 4]
40 | 
41 |     # first assert
42 |     _assert_init_state(memory)
43 |     description_1 = str(memory)
44 | 
45 |     # load from serialized file
46 |     memory = None
47 |     memory = StackMemory(serialize_path=serialize_path)
48 |     _assert_init_state(memory)
49 |     description_2 = str(memory)
50 |     assert description_1 == description_2
51 | 
52 |     # test get node
53 |     tmp_node = memory.get_node(3)
54 |     assert tmp_node.content == "node3"
55 | 
56 |     # # get todo node
57 |     # todo_node = memory.get_todo_node()
58 |     # assert todo_node.node_id == 2
59 | 
60 |     # # success node
61 |     # memory.success_node(todo_node)
62 |     # assert memory.get_todo_node().node_id == 3
63 | 
64 |     if os.path.exists(serialize_path):
65 |         os.remove(serialize_path)
66 | 


--------------------------------------------------------------------------------