├── .env
├── .gitignore
├── Agent
    ├── Action.py
    ├── AutoGPT.py
    ├── __init__.py
    ├── __pycache__
    │   ├── Action.cpython-39.pyc
    │   ├── AutoGPT.cpython-39.pyc
    │   └── __init__.cpython-39.pyc
    └── image_summarize.py
├── README.md
├── Tools
    ├── EmailTool.py
    ├── ExcelTool.py
    ├── FileQATool.py
    ├── FileTool.py
    ├── PythonTool.py
    ├── Tools.py
    ├── WriterTool.py
    ├── __init__.py
    ├── __pycache__
    │   ├── EmailTool.cpython-39.pyc
    │   ├── ExcelTool.cpython-39.pyc
    │   ├── FileQATool.cpython-39.pyc
    │   ├── FileTool.cpython-39.pyc
    │   ├── PythonTool.cpython-39.pyc
    │   ├── Tools.cpython-39.pyc
    │   ├── WriterTool.cpython-39.pyc
    │   ├── __init__.cpython-39.pyc
    │   └── openimage.cpython-39.pyc
    └── openimage.py
├── Utils
    ├── CallbackHandlers.py
    ├── PrintUtils.py
    ├── __init__.py
    └── __pycache__
    │   ├── CallbackHandlers.cpython-39.pyc
    │   ├── PrintUtils.cpython-39.pyc
    │   └── __init__.cpython-39.pyc
├── base64_images.txt
├── data
    ├── 2023年8月-9月销售记录.xlsx
    ├── test_image.png
    ├── 供应商名录.xlsx
    └── 供应商资格要求.pdf
├── examples.txt
├── full_messages.json
├── image
    ├── chat.png
    ├── inspectExcel.png
    ├── run_code.png
    ├── thought0.png
    ├── tool0.png
    ├── tool1.png
    └── tool_all.png
├── main.py
├── prompts
    ├── main
    │   ├── final_step.txt
    │   └── main.txt
    └── tools
    │   └── excel_analyser.txt
├── readme_CN.md
├── requirements.txt
├── test.py
└── tools_description.txt


/.env:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=""
2 | OPENAI_BASE_URL="https://api.fe8.cn/v1"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### Python template
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | 
163 | 


--------------------------------------------------------------------------------
/Agent/Action.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field
2 | from typing import List, Optional, Dict, Any
3 | 
4 | 
5 | class Action(BaseModel):
6 |     name: str = Field(description="工具或指令名称")
7 |     args: Optional[Dict[str, Any]] = Field(description="工具或指令参数，由参数名称和参数值组成")
8 | 


--------------------------------------------------------------------------------
/Agent/AutoGPT.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import List, Optional, Tuple
  3 | 
  4 | from langchain.memory.chat_memory import BaseChatMemory
  5 | from langchain.tools.render import render_text_description
  6 | from langchain_core.language_models.chat_models import BaseChatModel
  7 | from langchain.memory import ConversationTokenBufferMemory, VectorStoreRetrieverMemory
  8 | from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
  9 | from langchain.schema.output_parser import StrOutputParser
 10 | from langchain.tools.base import BaseTool
 11 | from langchain.vectorstores.base import VectorStoreRetriever
 12 | from langchain_core.memory import BaseMemory
 13 | from langchain_core.prompts import PromptTemplate
 14 | from pydantic import ValidationError
 15 | from langchain.prompts import PromptTemplate
 16 | from langchain.schema.runnable import RunnableLambda
 17 | 
 18 | from langchain_core.messages import HumanMessage
 19 | from Agent.Action import Action
 20 | import jinja2
 21 | from Utils.CallbackHandlers import *
 22 | 
 23 | 
 24 | class AutoGPT:
 25 |     """AutoGPT：基于Langchain实现"""
 26 | 
 27 |     @staticmethod
 28 |     def __chinese_friendly(string) -> str:
 29 |         lines = string.split('\n')
 30 |         for i, line in enumerate(lines):
 31 |             if line.startswith('{') and line.endswith('}'):
 32 |                 try:
 33 |                     lines[i] = json.dumps(json.loads(line), ensure_ascii=False)
 34 |                 except:
 35 |                     pass
 36 |         return '\n'.join(lines)
 37 | 
 38 |     @staticmethod
 39 |     def __format_long_term_memory(task_description: str, memory: BaseChatMemory) -> str:
 40 |         return memory.load_memory_variables(
 41 |             {"prompt": task_description}
 42 |         )["history"]
 43 | 
 44 |     @staticmethod
 45 |     def __format_short_term_memory(memory: BaseChatMemory) -> str:
 46 |         messages = memory.chat_memory.messages
 47 |         string_messages = [messages[i].content for i in range(1, len(messages))]
 48 |         return "\n".join(string_messages)
 49 | 
 50 |     def __init__(
 51 |             self,
 52 |             llm: BaseChatModel,
 53 |             tools: List[BaseTool],
 54 |             work_dir: str = "./data",
 55 |             main_prompt_file: str = "./prompts/main/main.json",
 56 |             final_prompt_file: str = "./prompts/main/final_step.json",
 57 |             max_thought_steps: Optional[int] = 10,
 58 |             memery_retriever: Optional[VectorStoreRetriever] = None,
 59 |     ):
 60 |         self.llm = llm
 61 |         self.tools = tools
 62 |         self.work_dir = work_dir
 63 |         self.max_thought_steps = max_thought_steps
 64 |         self.memery_retriever = memery_retriever
 65 |         self.data_dict = {
 66 |             "context": {
 67 |                 "images": []
 68 |             },
 69 |             "task_description": {},
 70 |             "short_term_memory": {},
 71 |             "long_term_memory": {}
 72 |         }
 73 |         self.use_final_prompt = False
 74 |         # OutputFixingParser： 如果输出格式不正确，尝试修复
 75 |         self.output_parser = PydanticOutputParser(pydantic_object=Action)
 76 |         self.robust_parser = OutputFixingParser.from_llm(parser=self.output_parser, llm=self.llm)
 77 | 
 78 |         self.main_prompt_file = main_prompt_file
 79 |         self.final_prompt_file = final_prompt_file
 80 | 
 81 |         self.__init_prompt_templates()
 82 |         self.__init_chains()
 83 | 
 84 |         self.verbose_handler = ColoredPrintHandler(color=THOUGHT_COLOR)
 85 | 
 86 |     def __init_prompt_templates(self):
 87 | 
 88 |         self.main_prompt = PromptTemplate.from_file(
 89 |             self.main_prompt_file,
 90 |         ).partial(
 91 |             work_dir=self.work_dir,
 92 |             tools=render_text_description(self.tools),
 93 |             format_instructions=self.__chinese_friendly(
 94 |                 self.output_parser.get_format_instructions(),
 95 |             )
 96 |         )
 97 |         self.final_prompt = PromptTemplate.from_file(
 98 |             self.final_prompt_file, 
 99 |         )
100 | 
101 |     def __init_chains(self):
102 |         # 主流程的chain
103 |         #self.main_chain = (self.main_prompt | self.llm | StrOutputParser())
104 |         self.main_chain = (RunnableLambda(self.handle_messages) | self.llm | StrOutputParser())
105 |         # 最终一步的chain
106 |         #self.final_chain = (self.final_prompt | self.llm | StrOutputParser())
107 |         self.final_chain = (RunnableLambda(self.handle_messages) | self.llm | StrOutputParser())
108 |         
109 |     def handle_messages(self, data_dict):
110 |         """
111 |         Generate a structured message containing the prompt text and any additional elements like images.
112 |         """
113 |         
114 |         # 使用 main_prompt 生成基于当前状态的文本
115 |         if self.use_final_prompt:
116 |             formatted_texts = self.final_prompt.format(**data_dict)
117 |         else:
118 |             formatted_texts = self.main_prompt.format(**data_dict)
119 |        # print("data_dict之后的formatted_texts:", formatted_texts)
120 |         messages = [{
121 |             "type": "text",
122 |             "text": (
123 |                 f"{formatted_texts}"
124 |             )
125 |         }]
126 |         
127 | 
128 |         # # 检查是否有图片并添加到消息中
129 |         # if "images" in data_dict.get("context", {}):
130 |         #     for image in data_dict["context"]["images"]:
131 |         #         image_message = {
132 |         #             "type": "image_url",
133 |         #             "image_url": {"url": f"data:image/jpeg;base64,{image}"}
134 |         #         }
135 |         #         messages.append(image_message)
136 |                 
137 |                 
138 |                 
139 |         if "images" in data_dict.get("context", {}):
140 |             with open('base64_images.txt', 'w') as file:  # 打开一个文本文件用于写入图像的 base64 编码
141 |                 for image in data_dict["context"]["images"]:
142 |                     image_message = {
143 |                         "type": "image_url",
144 |                         "image_url": {"url": f"data:image/jpeg;base64,{image}"}
145 |                     }
146 |                     messages.append(image_message)
147 |                     file.write(image + '\n')  # 将base64编码写入文件，每个图像编码后跟一个换行符
148 | 
149 |             #print("Base64 encoded images have been written to 'base64_images.txt'")
150 | 
151 |         # 将所有消息以完整的 JSON 格式写入另一个文件
152 | 
153 |         with open('full_messages.json', 'w', encoding='utf-8') as msg_file:
154 |             json.dump(messages, msg_file, ensure_ascii=False, indent=4)
155 | 
156 |        # print("Complete messages have been written to 'full_messages.json'")
157 |         #print("Messages before sending:", messages)
158 |         # for message in messages:
159 |         #     if message['type'] == 'image_url':
160 |         #         print("Found image with URL starting:", message['image_url']['url'][:30])
161 |         #     else:
162 |         #         print("Found text message")
163 | 
164 |         return [HumanMessage(content=messages)]
165 | 
166 | 
167 | 
168 |     def __find_tool(self, tool_name: str) -> Optional[BaseTool]:
169 |         for tool in self.tools:
170 |             if tool.name == tool_name:
171 |                 return tool
172 |         return None
173 | 
174 |     def __step(self,
175 |                task_description,
176 |                short_term_memory,
177 |                long_term_memory,data_dict,verbose=False) -> Tuple[Action, str]:
178 |         """执行一步思考"""
179 |         # 添加和格式化必要的信息到data_dict
180 |         if 'task_description' in data_dict:
181 |             data_dict['task_description'] = task_description
182 |         if 'short_term_memory' in data_dict:
183 |             data_dict['short_term_memory'] = self.__format_short_term_memory(short_term_memory)
184 |         if 'long_term_memory' in data_dict:
185 |             data_dict['long_term_memory'] = self.__format_long_term_memory(task_description, long_term_memory) if long_term_memory is not None else ""
186 |             
187 |         response = ''
188 |         #print("Before invoking:", data_dict)
189 |         # 使用invoke替代stream来获取响应
190 |         response = self.main_chain.invoke(data_dict)
191 |         
192 |         if response is None or 'choices' not in response:
193 |             raise ValueError("API调用未返回有效的响应或响应结构不符合预期")
194 |         print("After invoking, response:", response)
195 |         
196 |         action = self.robust_parser.parse(response)
197 |         # if verbose:
198 |         #     print("思考过程 :", response)
199 |         return action, response,data_dict
200 | 
201 |     def __final_step(self, short_term_memory, task_description,data_dict) -> str:
202 |         """最后一步, 生成最终的输出"""
203 |         self.use_final_prompt = True
204 |         
205 |         if 'task_description' in data_dict:
206 |             data_dict['task_description'] = task_description
207 |         if 'short_term_memory' in data_dict:
208 |             data_dict['formatted_short_term_memory'] = self.__format_short_term_memory(short_term_memory)
209 |             
210 |         response = self.final_chain.invoke(data_dict)
211 |         self.use_final_prompt = False
212 |         
213 |         return response
214 | 
215 |     def __exec_action(self, action: Action) -> str:
216 |         # 查找工具
217 |         tool = self.__find_tool(action.name)
218 |         if tool is None:
219 |             observation = (
220 |                 f"Error: 找不到工具或指令 '{action.name}'. "
221 |                 f"请从提供的工具/指令列表中选择，请确保按对顶格式输出。"
222 |             )
223 |         else:
224 |             try:
225 |                 # 执行工具
226 |                 observation = tool.run(action.args)
227 |             except ValidationError as e:
228 |                 # 工具的入参异常
229 |                 observation = (
230 |                     f"Validation Error in args: {str(e)}, args: {action.args}"
231 |                 )
232 |             except Exception as e:
233 |                 # 工具执行异常
234 |                 observation = f"Error: {str(e)}, {type(e).__name__}, args: {action.args}"
235 | 
236 |         return observation
237 | 
238 |     def __init_short_term_memory(self) -> BaseChatMemory:
239 |         short_term_memory = ConversationTokenBufferMemory(
240 |             llm=self.llm,
241 |             max_token_limit=4000,
242 |         )
243 |         short_term_memory.save_context(
244 |             {"input": "\n初始化"},
245 |             {"output": "\n开始"}
246 |         )
247 |         return short_term_memory
248 | 
249 |     def __connect_long_term_memory(self) -> BaseMemory:
250 |         if self.memery_retriever is not None:
251 |             long_term_memory = VectorStoreRetrieverMemory(
252 |                 retriever=self.memery_retriever,
253 |             )
254 |         else:
255 |             long_term_memory = None
256 |         return long_term_memory
257 | 
258 |     @staticmethod
259 |     def __update_short_term_memory(
260 |             short_term_memory: BaseChatMemory,
261 |             response: str,
262 |             observation: str,
263 |             
264 |             
265 |     ):
266 |         short_term_memory.save_context(
267 |             {"input": response},
268 |             
269 |             {"output": "\n此工具的返回结果:\n" + observation}
270 |         )
271 | 
272 |     @staticmethod
273 |     def __update_long_term_memory(
274 |             long_term_memory: BaseMemory,
275 |             task_description: str,
276 |             final_reply: str
277 |     ):
278 |         if long_term_memory is not None:
279 |             long_term_memory.save_context(
280 |                 {"input": task_description},
281 |                 {"output": final_reply}
282 |             )
283 | 
284 |     @staticmethod
285 |     def __show_observation(observation: str, verbose: bool):
286 |         if verbose:
287 |             color_print(f"----\n结果:\n{observation}", OBSERVATION_COLOR)
288 | 
289 |     def run(self, task_description, verbose=False) -> str:
290 |         # 初始化短时记忆
291 |         short_term_memory = self.__init_short_term_memory()
292 |         # 连接长时记忆（如果有）
293 |         long_term_memory = self.__connect_long_term_memory()
294 | 
295 |         # 思考步数
296 |         thought_step_count = 0
297 | 
298 |         # 开始逐步思考
299 |         while thought_step_count < self.max_thought_steps:
300 |             if verbose:
301 |                 color_print(f">>>>Round: {thought_step_count}<<<<", ROUND_COLOR)
302 | 
303 |             # 执行一步思考
304 |             action, response,data_dict = self.__step(
305 |                 task_description=task_description,
306 |                 short_term_memory=short_term_memory,
307 |                 long_term_memory=long_term_memory,data_dict=self.data_dict,verbose=True
308 |             )
309 |             self.data_dict =data_dict
310 |             # 如果是结束指令，执行最后一步
311 |             if action.name == "FINISH":
312 |                 break
313 |             action_name =str(action.name)
314 |             # 执行动作
315 |             response += "\n本轮对话调用的工具为：{}".format(action_name)
316 |             observation = self.__exec_action(action)
317 |             # 检查observation类型，并相应处理
318 |             if isinstance(observation, list):
319 |                 # 假设observation是图像的base64列表
320 |                 self.data_dict["context"]["images"].extend(observation)
321 |                 observation = "<image>"
322 |                 self.__update_short_term_memory(
323 |                     short_term_memory, response, observation
324 |                 )
325 |             else:
326 |                 # 如果observation是字符串，显示并继续处理
327 |                 self.__show_observation(observation, verbose)
328 |                 # 更新短时记忆
329 |                 self.__update_short_term_memory(
330 |                     short_term_memory, response, observation
331 |                 )
332 | 
333 |             thought_step_count += 1
334 | 
335 |         if thought_step_count >= self.max_thought_steps:
336 |             # 如果思考步数达到上限，返回错误信息
337 |             reply = "抱歉，我没能完成您的任务。"
338 |             self.data_dict = {
339 |             "context": {
340 |                 "images": []
341 |             },
342 |             "task_description": {},
343 |             "short_term_memory": {},
344 |             "long_term_memory": {}
345 |         }
346 |         else:
347 |             # 否则，执行最后一步
348 |             
349 |             reply = self.__final_step(short_term_memory, task_description,data_dict=self.data_dict)
350 |             self.data_dict = {
351 |             "context": {
352 |                 "images": []
353 |             },
354 |             "task_description": {},
355 |             "short_term_memory": {},
356 |             "long_term_memory": {}
357 |         }
358 |         # 更新长时记忆
359 |         self.__update_long_term_memory(long_term_memory, task_description, reply)
360 | 
361 |         return reply
362 | 


--------------------------------------------------------------------------------
/Agent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Agent/__init__.py


--------------------------------------------------------------------------------
/Agent/__pycache__/Action.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Agent/__pycache__/Action.cpython-39.pyc


--------------------------------------------------------------------------------
/Agent/__pycache__/AutoGPT.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Agent/__pycache__/AutoGPT.cpython-39.pyc


--------------------------------------------------------------------------------
/Agent/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Agent/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Agent/image_summarize.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | 
 4 | from langchain_community.chat_models import ChatOpenAI
 5 | from langchain_core.messages import HumanMessage
 6 | # 加载环境变量
 7 | from dotenv import load_dotenv, find_dotenv
 8 | _ = load_dotenv(find_dotenv())
 9 | 
10 | #本文是用于 多模态RAG的 图像提取 ，提取成base64码和图像总结文本，用于多模态向量库的
11 | def encode_image(image_path):
12 |     """Getting the base64 string"""
13 |     with open(image_path, "rb") as image_file:
14 |         return base64.b64encode(image_file.read()).decode("utf-8")
15 | 
16 | 
17 | def image_summarize(img_base64, prompt):
18 |     """Make image summary"""
19 |     model =  ChatOpenAI(model="gpt-4-vision-preview",max_tokens=4096)
20 | 
21 |     msg = model(
22 |         [
23 |             HumanMessage(
24 |                 content=[
25 |                     {"type": "text", "text": prompt},
26 |                     {
27 |                         "type": "image_url",
28 |                         "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
29 |                     },
30 |                 ]
31 |             )
32 |         ]
33 |     )
34 |     return msg.content
35 | 
36 | def generate_img_summaries(path):
37 |     """
38 |     Generate summaries and base64 encoded strings for images
39 |     path: Path to list of .jpg files extracted by Unstructured
40 |     """
41 |     found_images = False
42 | 
43 |     # Store base64 encoded images
44 |     img_base64_list = []
45 | 
46 |     # Store image summaries
47 |     image_summaries = []
48 | 
49 |     # Prompt
50 |     prompt = """You are an assistant tasked with summarizing images for retrieval. \
51 |     These summaries will be embedded and used to retrieve the raw image. \
52 |     Give a concise summary of the image that is well optimized for retrieval.用中文描述"""
53 | 
54 |     # 遍历目录中的文件
55 |     for img_file in sorted(os.listdir(path)):
56 |         if img_file.endswith(".png"):
57 |             img_path = os.path.join(path, img_file)
58 |             base64_image = encode_image(img_path)
59 |             img_base64_list.append(base64_image)
60 |             image_summaries.append(image_summarize(base64_image, prompt))
61 |             # 找到图片文件时更新标志
62 |             found_images = True
63 | 
64 |     # 如果没有找到任何图片文件，打印消息
65 |     if not found_images:
66 |         print("No images found.")
67 | 
68 |     return img_base64_list, image_summaries
69 | 
70 | fpath = "./Agent"
71 | # Image summaries
72 | img_base64_list, image_summaries = generate_img_summaries(fpath)
73 | # 检查并打印 image_summaries 列表
74 | if image_summaries:
75 |     for summary in image_summaries:
76 |         print(summary)
77 | else:
78 |     print("No summaries found.")


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPT Automated Multimodal Task Project
 2 | **选择中文:** [中文](/readme_CN.md)
 3 | **Language:** [![Chinese](https://img.shields.io/badge/Language-中文-blue)](/readme_CN.md)
 4 | This project demonstrates how to utilize GPT to automatically retrieve files (such as PDF, XLS, Word, etc.) within a repository and complete multimodal tasks. By sending video frames from home cameras into the repository, it can automate the judgment of whether there are dangerous situations at home (leveraging the large model's understanding of the world).
 5 | 
 6 | ## Simple Operation Demonstration
 7 | ### Usage Example
 8 | ![Command Window Dialogue](image/chat.png)
 9 | After running `main.py`, you can interact with the agent in the command window. The agent can deconstruct key concepts such as "sales not meeting targets" and think about executable actions (such as calling tools) based on the chain of thought technique. The specific thought tree main prompt can be seen in `prompts\main\main.txt`. The output natural language represents concrete thinking, while human brain thinking is sometimes more abstract. The agent's thinking is purely natural language.
10 | ![Thought Process](image/thought0.png)
11 | 
12 | ### Multi-round Dialogue Task Completion Example
13 | Using the `ListDirectory` tool, as shown, the correct JSON format is output to call the tool and list all files in the repository, which leads to the next round of thinking.
14 | ![Correct JSON Output for Tool Call](image/tool0.png)
15 | For example, executing the tool "AskDocument" to query another large model to obtain the desired document content.
16 | ![Executing the AskDocument Tool](image/tool1.png)
17 | 
18 | ### Current Tools List
19 | ![All Tools](image/tool_all.png)
20 | The agent, as the main dispatcher, can call multiple AIs to perform tasks such as sending emails, querying document contents, etc. This method is more suitable for autoagent scenarios than RAG.
21 | - **openimage**: Used to open images in the repository and add them to the dialogue, giving the agent the ability to query images.
22 | - **ExcelTool**: Can retrieve all columns and the first three rows of an Excel sheet, allowing the agent to judge whether the table content is what is needed.
23 | ![Inspect Excel Sheet](image/inspectExcel.png)
24 | - **PythonTool**: Defines an AI that accepts the agent's query (including the path of the Excel file to be analyzed), allowing the AI to write code and run it to calculate and sum variables in the table for analysis.
25 | ![Run Code](image/run_code.png)
26 | 
27 | Considering that the agent, as the dispatcher, does not occupy the main_prompt token, it saves tokens and avoids forgetting the ultimate goal of completing the user's query due to interspersed multitasks.
28 | 
29 | ### Future Plans
30 | - Add UI to allow image input through the chat window.
31 | - Add parsing for Word or web graphic formats.
32 | 
33 | ## Environment Setup
34 | 
35 | ### Step 1: Set up .env
36 | Configure your own API key in the `.env` file in the project.
37 | `OPENAI_API_KEY=sk-xxxx`
38 | You need to purchase the API key from [here](https://devcto.com/).
39 | 
40 | ### Step 2: Set Local Libraries
41 | Execute the following command:
42 | `export HNSWLIB_NO_NATIVE=1`
43 | 
44 | ### Step 3: Install Dependencies
45 | Execute the following command:
46 | `pip install -r requirements.txt`
47 | 
48 | 
49 | ### Step 4: Run the Project
50 | 
51 | ##### Run the main.py File
52 | 
53 | ##### Then input the question in the interface:
54 | 🤖: How can I help you?
55 | 👨: What is the sales figure for September? (You need to input this yourself)
56 | >>>>Round: 0<<<<
57 | 
58 | ##### Example Questions:
59 | * What is the sales figure for September?
60 | * What is the best-selling product?
61 | * Identify suppliers with sales not meeting targets.
62 | * Send an email to these two suppliers notifying them of this.
63 | * Compare the sales performance of August and September and write a report.
64 | * Summarize the contents of the warehouse images.
65 | 
66 | This is a basic version of multimodal capabilities, further updates are needed due to the langchain version updates.
67 | 


--------------------------------------------------------------------------------
/Tools/EmailTool.py:
--------------------------------------------------------------------------------
 1 | import webbrowser
 2 | import urllib.parse
 3 | import re
 4 | 
 5 | 
 6 | def _is_valid_email(email: str) -> bool:
 7 |     receivers = email.split(';')
 8 |     # 正则表达式匹配电子邮件
 9 |     pattern = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
10 |     for receiver in receivers:
11 |         if not bool(re.match(pattern, receiver.strip())):
12 |             return False
13 |     return True
14 | 
15 | 
16 | def send_email(
17 |         to: str,
18 |         subject: str,
19 |         body: str,
20 |         cc: str = None,
21 |         bcc: str = None,
22 | ) -> str:
23 |     """给指定的邮箱发送邮件"""
24 | 
25 |     if not _is_valid_email(to):
26 |         return f"电子邮件地址 {to} 不合法"
27 | 
28 |     # 对邮件的主题和正文进行URL编码
29 |     subject_code = urllib.parse.quote(subject)
30 |     body_code = urllib.parse.quote(body)
31 | 
32 |     # 构造mailto链接
33 |     mailto_url = f'mailto:{to}?subject={subject_code}&body={body_code}'
34 |     if cc is not None:
35 |         cc = urllib.parse.quote(cc)
36 |         mailto_url += f'&cc={cc}'
37 |     if bcc is not None:
38 |         bcc = urllib.parse.quote(bcc)
39 |         mailto_url += f'&bcc={bcc}'
40 | 
41 |     webbrowser.open(mailto_url)
42 | 
43 |     return f"状态: 成功\n备注: 已发送邮件给 {to}, 标题: {subject}"
44 | 


--------------------------------------------------------------------------------
/Tools/ExcelTool.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def get_sheet_names(
 5 |         filename: str
 6 | ) -> str:
 7 |     """获取 Excel 文件的工作表名称"""
 8 |     excel_file = pd.ExcelFile(filename)
 9 |     sheet_names = excel_file.sheet_names
10 |     return f"这是 '{filename}' 文件的工作表名称：\n\n{sheet_names}"
11 | 
12 | '''get_sheet_names 函数
13 | 
14 | 目的：获取并返回一个 Excel 文件中所有工作表（sheet）的名称。
15 | 逻辑：
16 | 使用 pandas 库的 ExcelFile 类来加载 Excel 文件。
17 | 通过 sheet_names 属性获取所有工作表的名称。
18 | 将获取到的工作表名称格式化为字符串，并返回。'''
19 | 
20 | def get_column_names(
21 |         filename: str
22 | ) -> str:
23 |     """获取 Excel 文件的列名"""
24 | 
25 |     # 读取 Excel 文件的第一个工作表
26 |     df = pd.read_excel(filename, sheet_name=0)  # sheet_name=0 表示第一个工作表
27 | 
28 |     column_names = '\n'.join(
29 |         df.columns.to_list()
30 |     )
31 | 
32 |     result = f"这是 '{filename}' 文件第一个工作表的列名：\n\n{column_names}"
33 |     return result
34 | '''使用 df.columns.to_list() 获取该工作表的所有列名，并将它们转换为列表。
35 | 将列名列表转换为字符串形式，并返回。'''
36 | 
37 | def get_first_n_rows(
38 |         filename: str,
39 |         n: int = 3
40 | ) -> str:
41 |     """获取 Excel 文件的前 n 行"""
42 | 
43 |     result = get_sheet_names(filename) + "\n\n"
44 | 
45 |     result += get_column_names(filename) + "\n\n"
46 | 
47 |     # 读取 Excel 文件的第一个工作表
48 |     df = pd.read_excel(filename, sheet_name=0)  # sheet_name=0 表示第一个工作表
49 | 
50 |     n_lines = '\n'.join(
51 |         df.head(n).to_string(index=False, header=True).split('\n')
52 |     )
53 | 
54 |     result += f"这是 '{filename}' 文件第一个工作表的前{n}行样例：\n\n{n_lines}"
55 |     return result
56 | # 指定文件路径
57 | filename = r'.\auto-gpt-work\data\2023年8月-9月销售记录.xlsx'
58 | 
59 | 


--------------------------------------------------------------------------------
/Tools/FileQATool.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from langchain.embeddings.openai import OpenAIEmbeddings
 3 | from langchain.schema import Document
 4 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 5 | from langchain.vectorstores import Chroma
 6 | from langchain.document_loaders import PyPDFLoader
 7 | from langchain.document_loaders.word_document import UnstructuredWordDocumentLoader
 8 | from langchain.chains import RetrievalQA
 9 | from langchain.llms import OpenAI
10 | 
11 | 
12 | class FileLoadFactory:
13 |     @staticmethod
14 |     def get_loader(filename: str):
15 |         ext = get_file_extension(filename)
16 |         if ext == "pdf":
17 |             return PyPDFLoader(filename)
18 |         elif ext == "docx" or ext == "doc":
19 |             return UnstructuredWordDocumentLoader(filename)
20 |         else:
21 |             raise NotImplementedError(f"File extension {ext} not supported.")
22 | 
23 | 
24 | def get_file_extension(filename: str) -> str:
25 |     return filename.split(".")[-1]
26 | 
27 | 
28 | def load_docs(filename: str) -> List[Document]:
29 |     file_loader = FileLoadFactory.get_loader(filename)
30 |     pages = file_loader.load_and_split()
31 |     return pages
32 | 
33 | 
34 | def ask_docment(
35 |         filename: str,
36 |         query: str,
37 | ) -> str:
38 |     """根据一个PDF文档的内容，回答一个问题"""
39 | 
40 |     raw_docs = load_docs(filename)
41 |     if len(raw_docs) == 0:
42 |         return "抱歉，文档内容为空"
43 |     text_splitter = RecursiveCharacterTextSplitter(
44 |         chunk_size=200,
45 |         chunk_overlap=100,
46 |         length_function=len,
47 |         add_start_index=True,
48 |     )
49 |     documents = text_splitter.split_documents(raw_docs)
50 |     if documents is None or len(documents) == 0:
51 |         return "无法读取文档内容"
52 |     db = Chroma.from_documents(documents, OpenAIEmbeddings(model="text-embedding-ada-002"))
53 |     qa_chain = RetrievalQA.from_chain_type(
54 |         llm=OpenAI(
55 |             temperature=0,
56 |             model_kwargs={
57 |                 "seed": 42
58 |             },
59 |         ),  # 语言模型
60 |         chain_type="stuff",  # prompt的组织方式，后面细讲
61 |         retriever=db.as_retriever()  # 检索器
62 |     )
63 |     response = qa_chain.run(query + "(请用中文回答)")
64 |     return response
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     filename = "../data/2023年10月份销售计划.docx"
69 |     query = "销售额达标的标准是多少？"
70 |     response = ask_docment(filename, query)
71 |     print(response)
72 | 


--------------------------------------------------------------------------------
/Tools/FileTool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def list_files_in_directory(path: str) -> str:
 5 |     """List all file names in the directory"""
 6 |     file_names = os.listdir(path)
 7 | 
 8 |     # Join the file names into a single string, separated by a newline
 9 |     return "\n".join(file_names)
10 | 


--------------------------------------------------------------------------------
/Tools/PythonTool.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from langchain.tools import StructuredTool
  3 | from langchain_core.output_parsers import BaseOutputParser, StrOutputParser
  4 | from langchain_core.prompts import PromptTemplate
  5 | 
  6 | from Utils.CallbackHandlers import ColoredPrintHandler
  7 | from Utils.PrintUtils import CODE_COLOR
  8 | from langchain_openai import ChatOpenAI
  9 | from .ExcelTool import get_first_n_rows, get_column_names
 10 | from langchain_experimental.utilities import PythonREPL
 11 | 
 12 | 
 13 | class PythonCodeParser(BaseOutputParser):
 14 |     """从OpenAI返回的文本中提取Python代码。"""
 15 | 
 16 |     @staticmethod
 17 |     def __remove_marked_lines(input_str: str) -> str:
 18 |         lines = input_str.strip().split('\n')
 19 |         if lines and lines[0].strip().startswith('```'):
 20 |             del lines[0]
 21 |         if lines and lines[-1].strip().startswith('```'):
 22 |             del lines[-1]
 23 | 
 24 |         ans = '\n'.join(lines)
 25 |         return ans
 26 | 
 27 |     def parse(self, text: str) -> str:
 28 |         # 使用正则表达式找到所有的Python代码块
 29 |         python_code_blocks = re.findall(r'```python\n(.*?)\n```', text, re.DOTALL)
 30 |         # 从re返回结果提取出Python代码文本
 31 |         python_code = None
 32 |         if len(python_code_blocks) > 0:
 33 |             python_code = python_code_blocks[0]
 34 |             python_code = self.__remove_marked_lines(python_code)
 35 |         return python_code
 36 | 
 37 | 
 38 | class ExcelAnalyser:
 39 |     """
 40 |     通过程序脚本分析一个结构化文件（例如excel文件）的内容。
 41 |     输人中必须包含文件的完整路径和具体分析方式和分析依据，阈值常量等。
 42 |     """
 43 | 
 44 |     def __init__(self, prompt_file="./prompts/tools/excel_analyser.txt", verbose=False):
 45 |         self.prompt = PromptTemplate.from_file(prompt_file)
 46 |         self.verbose = verbose
 47 |         self.verbose_handler = ColoredPrintHandler(CODE_COLOR)
 48 | 
 49 |     def analyse(self, query, filename):
 50 | 
 51 |         """分析一个结构化文件（例如excel文件）的内容。"""
 52 | 
 53 |         # columns = get_column_names(filename)
 54 |         inspections = get_first_n_rows(filename, 3)
 55 | 
 56 |         llm = ChatOpenAI(
 57 |             model="gpt-4-1106-preview",
 58 |             temperature=0,
 59 |             model_kwargs={
 60 |                 "seed": 42
 61 |             },
 62 |         )
 63 | 
 64 |         code_parser = PythonCodeParser()
 65 |         chain = self.prompt | llm | StrOutputParser()
 66 | 
 67 |         response = ""
 68 | 
 69 |         for c in chain.stream({
 70 |             "query": query,
 71 |             "filename": filename,
 72 |             "inspections": inspections
 73 |         }, config={
 74 |             "callbacks": [
 75 |                 self.verbose_handler
 76 |             ] if self.verbose else []
 77 |         }):
 78 |             response += c
 79 | 
 80 |         code = code_parser.parse(response)
 81 | 
 82 |         if code:
 83 |             ans = query+"\n"+PythonREPL().run(code)
 84 |             return ans
 85 |         else:
 86 |             return "没有找到可执行的Python代码"
 87 | 
 88 |     def as_tool(self):
 89 |         return StructuredTool.from_function(
 90 |             func=self.analyse,
 91 |             name="AnalyseExcel",
 92 |             description=self.__class__.__doc__.replace("\n", ""),
 93 |         )
 94 | 
 95 | 
 96 | if __name__ == "__main__":
 97 |     print(ExcelAnalyser().analyse(
 98 |         query="8月销售额",
 99 |         filename="../data/2023年8月-9月销售记录.xlsx"
100 |     ))
101 | 


--------------------------------------------------------------------------------
/Tools/Tools.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | warnings.filterwarnings("ignore")
 4 | 
 5 | from langchain.tools import StructuredTool
 6 | from .FileQATool import ask_docment
 7 | from .WriterTool import write
 8 | from .EmailTool import send_email
 9 | from .ExcelTool import get_first_n_rows
10 | from .FileTool import list_files_in_directory
11 | from .openimage import image_to_base64_tool
12 | 
13 | document_qa_tool = StructuredTool.from_function(
14 |     func=ask_docment,
15 |     name="AskDocument",
16 |     description="根据一个Word或PDF文档的内容，回答一个问题。考虑上下文信息，确保问题对相关概念的定义表述完整。",
17 | )
18 | 
19 | image_open = StructuredTool.from_function(
20 |     func=image_to_base64_tool,
21 |     name="openimage",
22 |     description="输入图像地址链接返回base64编码图像，即打开图像文件",
23 | )
24 | 
25 | 
26 | document_generation_tool = StructuredTool.from_function(
27 |     func=write,
28 |     name="GenerateDocument",
29 |     description="根据需求描述生成一篇正式文档",
30 | )
31 | 
32 | email_tool = StructuredTool.from_function(
33 |     func=send_email,
34 |     name="SendEmail",
35 |     description="给指定的邮箱发送邮件。确保邮箱地址是xxx@xxx.xxx的格式。多个邮箱地址以';'分割。",
36 | )
37 | 
38 | excel_inspection_tool = StructuredTool.from_function(
39 |     func=get_first_n_rows,
40 |     name="InspectExcel",
41 |     description="探查表格文件的内容和结构，只展示它的列名和前n行，n默认为3",
42 | )
43 | 
44 | directory_inspection_tool = StructuredTool.from_function(
45 |     func=list_files_in_directory,
46 |     name="ListDirectory",
47 |     description="探查文件夹的内容和结构，展示它的文件名和文件夹名",
48 | )
49 | 
50 | finish_placeholder = StructuredTool.from_function(
51 |     func=lambda: None,
52 |     name="FINISH",
53 |     description="用于表示任务完成的占位符工具"
54 | )
55 | 


--------------------------------------------------------------------------------
/Tools/WriterTool.py:
--------------------------------------------------------------------------------
 1 | from langchain.prompts import ChatPromptTemplate
 2 | from langchain.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate
 3 | from langchain_core.output_parsers import StrOutputParser
 4 | from langchain_core.runnables import RunnablePassthrough
 5 | from langchain_openai import ChatOpenAI
 6 | 
 7 | 
 8 | def write(query: str, verbose=False):
 9 |     """按用户要求撰写文档"""
10 |     template = ChatPromptTemplate.from_messages(
11 |         [
12 |             SystemMessagePromptTemplate.from_template(
13 |                 "你是专业的文档写手。你根据客户的要求，写一份文档。输出中文。"),
14 |             HumanMessagePromptTemplate.from_template("{query}"),
15 |         ]
16 |     )
17 | 
18 |     chain = {"query": RunnablePassthrough()} | template | ChatOpenAI() | StrOutputParser()
19 | 
20 |     return chain.invoke(query)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     print(write("写一封邮件给张三，内容是：你好，我是李四。"))
25 | 


--------------------------------------------------------------------------------
/Tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from .Tools import (
 2 |     document_qa_tool,
 3 |     document_generation_tool,
 4 |     email_tool,
 5 |     excel_inspection_tool,
 6 |     directory_inspection_tool,
 7 |     finish_placeholder,
 8 |     image_open
 9 |     
10 | )


--------------------------------------------------------------------------------
/Tools/__pycache__/EmailTool.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/EmailTool.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/ExcelTool.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/ExcelTool.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/FileQATool.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/FileQATool.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/FileTool.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/FileTool.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/PythonTool.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/PythonTool.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/Tools.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/Tools.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/WriterTool.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/WriterTool.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/__pycache__/openimage.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Tools/__pycache__/openimage.cpython-39.pyc


--------------------------------------------------------------------------------
/Tools/openimage.py:
--------------------------------------------------------------------------------
 1 | from langchain.prompts import ChatPromptTemplate
 2 | from langchain.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate
 3 | from langchain_core.output_parsers import StrOutputParser
 4 | from langchain_core.runnables import RunnablePassthrough
 5 | from langchain_openai import ChatOpenAI
 6 | import base64
 7 | import io
 8 | import re
 9 | from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
10 | 
11 | 
12 |     
13 | def image_to_base64_tool(image_path: str):
14 |     """输入图像地址链接返回base64编码图像，即打开图像文件"""
15 |     try:
16 |         with open(image_path, "rb") as image_file:
17 |             image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
18 |             return [image_base64]
19 |     except FileNotFoundError:
20 |         return "File not found."
21 |     except Exception as e:
22 |         return f"An error occurred: {str(e)}"


--------------------------------------------------------------------------------
/Utils/CallbackHandlers.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Union, Any
 2 | from uuid import UUID
 3 | 
 4 | from langchain_core.callbacks import BaseCallbackHandler
 5 | from langchain_core.outputs import GenerationChunk, ChatGenerationChunk, LLMResult
 6 | 
 7 | from Utils.PrintUtils import *
 8 | 
 9 | 
10 | class ColoredPrintHandler(BaseCallbackHandler):
11 |     def __init__(self, color: str):
12 |         BaseCallbackHandler.__init__(self)
13 |         self._color = color
14 | 
15 |     def on_llm_new_token(
16 |             self,
17 |             token: str,
18 |             *,
19 |             chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
20 |             run_id: UUID,
21 |             parent_run_id: Optional[UUID] = None,
22 |             **kwargs: Any,
23 |     ) -> Any:
24 |         color_print(token, self._color, end="")
25 |         return token
26 | 
27 |     def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
28 |         color_print("\n", self._color, end="")
29 |         return response


--------------------------------------------------------------------------------
/Utils/PrintUtils.py:
--------------------------------------------------------------------------------
 1 | from colorama import init, Fore, Back, Style
 2 | import sys
 3 | 
 4 | THOUGHT_COLOR = Fore.GREEN
 5 | OBSERVATION_COLOR = Fore.YELLOW
 6 | ROUND_COLOR = Fore.BLUE
 7 | CODE_COLOR = Fore.WHITE
 8 | 
 9 | 
10 | def color_print(text, color=None, end="\n"):
11 |     if color is not None:
12 |         content = color + text + Style.RESET_ALL + end
13 |     else:
14 |         content = text + end
15 |     sys.stdout.write(content)
16 |     sys.stdout.flush()
17 | 


--------------------------------------------------------------------------------
/Utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Utils/__init__.py


--------------------------------------------------------------------------------
/Utils/__pycache__/CallbackHandlers.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Utils/__pycache__/CallbackHandlers.cpython-39.pyc


--------------------------------------------------------------------------------
/Utils/__pycache__/PrintUtils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Utils/__pycache__/PrintUtils.cpython-39.pyc


--------------------------------------------------------------------------------
/Utils/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/Utils/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/base64_images.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/base64_images.txt


--------------------------------------------------------------------------------
/data/2023年8月-9月销售记录.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/data/2023年8月-9月销售记录.xlsx


--------------------------------------------------------------------------------
/data/test_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/data/test_image.png


--------------------------------------------------------------------------------
/data/供应商名录.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/data/供应商名录.xlsx


--------------------------------------------------------------------------------
/data/供应商资格要求.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/data/供应商资格要求.pdf


--------------------------------------------------------------------------------
/examples.txt:
--------------------------------------------------------------------------------
1 | 9月份的销售额是多少
2 | 销售总额最大的产品是什么
3 | 帮我找出最近一个月销售额不达标的供应商
4 | 给对方发一封邮件通知此事
5 | 对比8月和9月销售情况，写一份报告
6 | 总结一下仓库图像的内容


--------------------------------------------------------------------------------
/full_messages.json:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "type": "text",
4 |         "text": "你是强大的AI助手，可以使用工具与指令自动化解决问题。\n\n你的任务是:\n帮我找出最近一个月销售额不达标的供应商\n如果此任务表达“没有了”、“已完成”或类似意思，你直接输出下述工具中的FINISH即可。\n\n你需要的所有文件资料都在以下目录:\ndir_path=./data\n访问文件时请确保文件路径完整。\n\n你可以使用以下工具或指令，它们又称为动作或actions:\nAskDocument: AskDocument(filename: str, query: str) -> str - 根据一个Word或PDF文档的内容，回答一个问题。考虑上下文信息，确保问题对相关概念的定义表述完整。\nGenerateDocument: GenerateDocument(query: str, verbose=False) - 根据需求描述生成一篇正式文档\nSendEmail: SendEmail(to: str, subject: str, body: str, cc: str = None, bcc: str = None) -> str - 给指定的邮箱发送邮件。确保邮箱地址是xxx@xxx.xxx的格式。多个邮箱地址以';'分割。\nInspectExcel: InspectExcel(filename: str, n: int = 3) -> str - 探查表格文件的内容和结构，只展示它的列名和前n行，n默认为3\nopenimage: openimage(image_path: str) - 输入图像地址链接返回base64编码图像，即打开图像文件\nListDirectory: ListDirectory(path: str) -> str - 探查文件夹的内容和结构，展示它的文件名和文件夹名\nFINISH: FINISH() - 用于表示任务完成的占位符工具\nAnalyseExcel: AnalyseExcel(query, filename) - 通过程序脚本分析一个结构化文件（例如excel文件）的内容。    输人中必须包含文件的完整路径和具体分析方式和分析依据，阈值常量等。\n\n你必须遵循以下约束来完成任务。\n1. 每次你的决策只使用一种工具，你可以使用任意多次。\n2. 确保你调用的指令或使用的工具在上述给定的工具列表中。\n3. 确保你的回答不会包含违法或有侵犯性的信息。\n4. 如果你已经完成所有任务，确保以\"FINISH\"指令结束，不过要注意如果刚调用完工具，在当前的任务执行记录中看见工具有output返回才算完成任务。\n5. 用中文思考和输出。\n6. 如果执行某个指令或工具失败，尝试改变参数或参数格式再次调用。\n7. 你生成的回复必须遵循上文中给定的事实信息。不可以编造信息。DO NOT MAKE UP INFORMATION.\n8. 如果得到的结果不正确，尝试更换表达方式。\n9. 已经得到的信息，不要反复查询。\n10. 确保你生成的动作是可以精确执行的。动作做中可以包括具体方法和目标输出。\n11. 看到一个概念时尝试获取它的准确定义，并分析从哪些输入可以得到它的具体取值。\n12. 生成一个自然语言查询时，请在查询中包含全部的已知信息。\n13. 在执行分析或计算动作前，确保该分析或计算中涉及的所有子概念都已经得到了定义。\n14. 禁止打印一个文件的全部内容，这样的操作代价太大，且会造成不可预期的后果，是被严格禁止的。\n15. 不要向用户提问。\n16. 如果任务描述中有文件目录地址，尝试用工具去打开或者解读它。\n\n相关的历史记录:\n\n\n当前的任务执行记录（图像数据不会在这里）:\n\n开始\n\n输出形式：\n（1）首先，根据以下格式说明，输出你的思考过程:\n关键概念: 任务中涉及的组合型概念或实体。已经明确获得取值的关键概念，将其取值完整备注在概念后。\n概念拆解: 将任务中的关键概念拆解为一系列待查询的子要素。每个关键概念一行，后接这个概念的子要素，每个子要素一行，行前以' -'开始。已经明确获得取值的子概念，将其取值完整备注在子概念后。\n反思:\n    自我反思，观察以前的执行记录，思考概念拆解是否完整、准确。\n    一步步思考是否每一个的关键概念或要素的查询都得到了准确的结果。\n    反思你已经得到哪个要素/概念。你的到的要素/概念取值是否正确。从当前的信息中还不能得到哪些要素/概念。\n    每个反思一行，行前以' -'开始。\n思考: 观察执行记录和你的自我反思，并一步步思考\n  A. 分析要素间的依赖关系，例如，如果需要获得要素X和Y的值：\n    i. 是否需要先获得X的值/定义，才能通过X来获得Y？\n    ii. 如果先获得X，是否可以通过X筛选Y，减少穷举每个Y的代价？\n    iii. X和Y是否存在在同一数据源中，能否在获取X的同时获取Y？\n    iv. 是否还有更高效或更聪明的办法来查询一个概念或要素？\n    v. 如果上一次尝试查询一个概念或要素时失败了，是否可以尝试从另一个资源中再次查询？\n    vi. 诸如此类，你可以扩展更多的思考 ...\n  B. 根据以上分析，排列子要素间的查询优先级\n  C. 找出当前需要获得取值的子要素\n  注意，不要对要素的取值/定义做任何假设，确保你的信息来自给定的数据源！\n推理: 根据你的反思与思考，一步步推理被选择的子要素取值的获取方式。如果前一次的计划失败了，请检查输入中是否包含每个概念/要素的明确定义，并尝试细化你的查询描述。\n计划: 严格遵守以下规则，计划你的当前动作。\n  A. 详细列出当前动作的执行计划。只计划一步的动作。PLAN ONE STEP ONLY!\n  B. 一步步分析，包括数据源，对数据源的操作方式，对数据的分析方法。有哪些已知常量可以直接代入此次分析。\n  C. 不要尝试计算文件的每一个元素，这种计算代价太高，是严格禁止的。你可以通过分析找到更有效的方法，比如条件筛选。\n  D. 上述分析是否依赖某个要素的取值/定义，且该要素的取值/定义尚未获得。若果是，重新规划当前动作，确保所有依赖的要素的取值/定义都已经获得。\n  E. 不要对要素的取值/定义做任何假设，确保你的信息来自给定的数据源。不要编造信息。DO NOT MAKE UP ANY INFORMATION!!!\n  F. 确保你执行的动作涉及的所有要素都已获得确切的取值/定义。\n  G. 如果全部子任务已完成，请用FINISH动作结束任务，确认是否已经完成，工具调用完要在当前的任务执行记录中看到工具的output才算完成才能输出FINISH，不过强调FINISH的输出格式在工具或actions中的一样。\n  H. 如果出现：“结果:Error: 找不到工具或指令....  ”诸如以上等等说明可能是action的输出格式不对，工具或指令的输出格式不对，finish的输出格式也和其他工具的一样。\n\n（2）然后，根据以下格式说明，输出你选择执行的动作/工具:\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{\"properties\": {\"name\": {\"title\": \"Name\", \"description\": \"工具或指令名称\", \"type\": \"string\"}, \"args\": {\"title\": \"Args\", \"description\": \"工具或指令参数，由参数名称和参数值组成\", \"type\": \"object\"}}, \"required\": [\"name\"]}\n```\n\n"
5 |     }
6 | ]


--------------------------------------------------------------------------------
/image/chat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/image/chat.png


--------------------------------------------------------------------------------
/image/inspectExcel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/image/inspectExcel.png


--------------------------------------------------------------------------------
/image/run_code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/image/run_code.png


--------------------------------------------------------------------------------
/image/thought0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/image/thought0.png


--------------------------------------------------------------------------------
/image/tool0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/image/tool0.png


--------------------------------------------------------------------------------
/image/tool1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/image/tool1.png


--------------------------------------------------------------------------------
/image/tool_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Metavers1/GPT-Autoagent-Multimodal-Task-Project/6672293d7b2adf8785120e8042e07cb11e166c28/image/tool_all.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # 加载环境变量
 2 | from dotenv import load_dotenv, find_dotenv
 3 | _ = load_dotenv(find_dotenv())
 4 | 
 5 | from Agent.AutoGPT import AutoGPT
 6 | from langchain_openai import  OpenAIEmbeddings
 7 | from langchain_community.chat_models import ChatOpenAI
 8 | from langchain_community.vectorstores import Chroma
 9 | from langchain.schema import Document
10 | from Tools import *
11 | from Tools.PythonTool import ExcelAnalyser
12 | from langchain.chat_models import ChatVertexAI
13 | 
14 | def launch_agent(agent: AutoGPT):
15 |     human_icon = "\U0001F468"
16 |     ai_icon = "\U0001F916"
17 | 
18 |     while True:
19 |         task = input(f"{ai_icon}：有什么可以帮您？\n{human_icon}：")
20 |         if task.strip().lower() == "quit":
21 |             break
22 |         reply = agent.run(task, verbose=True)
23 |         print(f"{ai_icon}：{reply}\n")
24 | 
25 | 
26 | def main():
27 | 
28 |     # # 语言模型
29 |     # llm = ChatOpenAI(
30 |     #     model="gpt-4-1106-preview",
31 |     #     temperature=0,
32 |     #     model_kwargs={
33 |     #         "seed": 42
34 |     #     },
35 |     # )
36 |     llm = ChatOpenAI(model="gpt-4-vision-preview",max_tokens=4096)
37 | 
38 |     # 存储长时记忆的向量数据库
39 |     db = Chroma.from_documents([Document(page_content="")], OpenAIEmbeddings(model="text-embedding-ada-002"))
40 |     retriever = db.as_retriever(
41 |         search_kwargs={"k": 1}
42 |     )
43 | 
44 |     # 自定义工具集
45 |     tools = [
46 |         document_qa_tool,
47 |         document_generation_tool,
48 |         email_tool,
49 |         excel_inspection_tool,
50 |         image_open,
51 |         directory_inspection_tool,
52 |         finish_placeholder,
53 |         ExcelAnalyser(
54 |             prompt_file="./prompts/tools/excel_analyser.txt",
55 |             verbose=True
56 |         ).as_tool()
57 |     ]
58 | 
59 |     # 定义智能体
60 |     agent = AutoGPT(
61 |         llm=llm,
62 |         tools=tools,
63 |         work_dir="./data",
64 |         main_prompt_file="./prompts/main/main.txt",
65 |         final_prompt_file="./prompts/main/final_step.txt",
66 |         max_thought_steps=20,
67 |         memery_retriever=retriever
68 |     )
69 | 
70 |     # 运行智能体
71 |     launch_agent(agent)
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     main()
76 | 


--------------------------------------------------------------------------------
/prompts/main/final_step.txt:
--------------------------------------------------------------------------------
1 | 你的任务是:
2 | {task_description}
3 | 
4 | 以下是你的思考过程和使用工具与外部资源交互的结果。
5 | {short_term_memory}
6 | 
7 | 你已经完成任务。
8 | 现在请根据上述结果简要总结出你的最终答案。
9 | 直接给出答案。不用再解释或分析你的思考过程。如果是生成类任务则直接把与外部资源交互生成的内容复制过来。


--------------------------------------------------------------------------------
/prompts/main/main.txt:
--------------------------------------------------------------------------------
 1 | 你是强大的AI助手，可以使用工具与指令自动化解决问题。
 2 | 
 3 | 你的任务是:
 4 | {task_description}
 5 | 如果此任务表达“没有了”、“已完成”或类似意思，你直接输出下述工具中的FINISH即可。
 6 | 
 7 | 你需要的所有文件资料都在以下目录:
 8 | dir_path={work_dir}
 9 | 访问文件时请确保文件路径完整。
10 | 
11 | 你可以使用以下工具或指令，它们又称为动作或actions:
12 | {tools}
13 | 
14 | 你必须遵循以下约束来完成任务。
15 | 1. 每次你的决策只使用一种工具，你可以使用任意多次。
16 | 2. 确保你调用的指令或使用的工具在上述给定的工具列表中。
17 | 3. 确保你的回答不会包含违法或有侵犯性的信息。
18 | 4. 如果你已经完成所有任务，确保以"FINISH"指令结束，不过要注意如果刚调用完工具，在当前的任务执行记录中看见工具有output返回才算完成任务。
19 | 5. 用中文思考和输出。
20 | 6. 如果执行某个指令或工具失败，尝试改变参数或参数格式再次调用。
21 | 7. 你生成的回复必须遵循上文中给定的事实信息。不可以编造信息。DO NOT MAKE UP INFORMATION.
22 | 8. 如果得到的结果不正确，尝试更换表达方式。
23 | 9. 已经得到的信息，不要反复查询。
24 | 10. 确保你生成的动作是可以精确执行的。动作做中可以包括具体方法和目标输出。
25 | 11. 看到一个概念时尝试获取它的准确定义，并分析从哪些输入可以得到它的具体取值。
26 | 12. 生成一个自然语言查询时，请在查询中包含全部的已知信息。
27 | 13. 在执行分析或计算动作前，确保该分析或计算中涉及的所有子概念都已经得到了定义。
28 | 14. 禁止打印一个文件的全部内容，这样的操作代价太大，且会造成不可预期的后果，是被严格禁止的。
29 | 15. 不要向用户提问。
30 | 16. 如果任务描述中有文件目录地址，尝试用工具去打开或者解读它。
31 | 
32 | 相关的历史记录:
33 | {long_term_memory}
34 | 
35 | 当前的任务执行记录（图像数据不会在这里）:
36 | {short_term_memory}
37 | 
38 | 输出形式：
39 | （1）首先，根据以下格式说明，输出你的思考过程:
40 | 关键概念: 任务中涉及的组合型概念或实体。已经明确获得取值的关键概念，将其取值完整备注在概念后。
41 | 概念拆解: 将任务中的关键概念拆解为一系列待查询的子要素。每个关键概念一行，后接这个概念的子要素，每个子要素一行，行前以' -'开始。已经明确获得取值的子概念，将其取值完整备注在子概念后。
42 | 反思:
43 |     自我反思，观察以前的执行记录，思考概念拆解是否完整、准确。
44 |     一步步思考是否每一个的关键概念或要素的查询都得到了准确的结果。
45 |     反思你已经得到哪个要素/概念。你的到的要素/概念取值是否正确。从当前的信息中还不能得到哪些要素/概念。
46 |     每个反思一行，行前以' -'开始。
47 | 思考: 观察执行记录和你的自我反思，并一步步思考
48 |   A. 分析要素间的依赖关系，例如，如果需要获得要素X和Y的值：
49 |     i. 是否需要先获得X的值/定义，才能通过X来获得Y？
50 |     ii. 如果先获得X，是否可以通过X筛选Y，减少穷举每个Y的代价？
51 |     iii. X和Y是否存在在同一数据源中，能否在获取X的同时获取Y？
52 |     iv. 是否还有更高效或更聪明的办法来查询一个概念或要素？
53 |     v. 如果上一次尝试查询一个概念或要素时失败了，是否可以尝试从另一个资源中再次查询？
54 |     vi. 诸如此类，你可以扩展更多的思考 ...
55 |   B. 根据以上分析，排列子要素间的查询优先级
56 |   C. 找出当前需要获得取值的子要素
57 |   注意，不要对要素的取值/定义做任何假设，确保你的信息来自给定的数据源！
58 | 推理: 根据你的反思与思考，一步步推理被选择的子要素取值的获取方式。如果前一次的计划失败了，请检查输入中是否包含每个概念/要素的明确定义，并尝试细化你的查询描述。
59 | 计划: 严格遵守以下规则，计划你的当前动作。
60 |   A. 详细列出当前动作的执行计划。只计划一步的动作。PLAN ONE STEP ONLY!
61 |   B. 一步步分析，包括数据源，对数据源的操作方式，对数据的分析方法。有哪些已知常量可以直接代入此次分析。
62 |   C. 不要尝试计算文件的每一个元素，这种计算代价太高，是严格禁止的。你可以通过分析找到更有效的方法，比如条件筛选。
63 |   D. 上述分析是否依赖某个要素的取值/定义，且该要素的取值/定义尚未获得。若果是，重新规划当前动作，确保所有依赖的要素的取值/定义都已经获得。
64 |   E. 不要对要素的取值/定义做任何假设，确保你的信息来自给定的数据源。不要编造信息。DO NOT MAKE UP ANY INFORMATION!!!
65 |   F. 确保你执行的动作涉及的所有要素都已获得确切的取值/定义。
66 |   G. 如果全部子任务已完成，请用FINISH动作结束任务，确认是否已经完成，工具调用完要在当前的任务执行记录中看到工具的output才算完成才能输出FINISH，不过强调FINISH的输出格式在工具或actions中的一样。
67 |   H. 如果出现：“结果:Error: 找不到工具或指令....  ”诸如以上等等说明可能是action的输出格式不对，工具或指令的输出格式不对，finish的输出格式也和其他工具的一样。
68 | 
69 | （2）然后，根据以下格式说明，输出你选择执行的动作/工具:
70 | {format_instructions}
71 | 
72 | 


--------------------------------------------------------------------------------
/prompts/tools/excel_analyser.txt:
--------------------------------------------------------------------------------
 1 | 你的任务是先分析，再生成代码
 2 | 
 3 | 请根据用户的输入，一步步分析：
 4 | （1）用户的输入是否依赖某个条件，而这个条件没有明确赋值？
 5 | （2）我是否需要对某个变量的值做假设？
 6 | 
 7 | 如果我需要对某个变量的值做假设，请直接输出：
 8 | ```python
 9 | print("我需要知道____的值，才能生成代码。请完善你的查询。") # 请将____替换为需要假设的的条件
10 | ```
11 | 
12 | 否则，生成一段Python代码，分析指定文件的内容。
13 | 
14 | 你可以使用的库只包括：Pandas, re, math, datetime, openpyxl
15 | 确保你的代码只使用上述库，否则你的代码将无法运行。
16 | 
17 | 给定文件为：
18 | {filename}
19 | 
20 | 文件内容样例：
21 | {inspections}
22 | 
23 | 你输出的Python代码前后必须有markdown标识符，如下所示：
24 | ```python
25 | # example code
26 | print('hello world')
27 | ```
28 | 
29 | 确保你的代码是可以运行的，文件名直接写死在代码里即可。
30 | 你生成代码中所有的常量都必须来自我给你的信息或来自文件本身。不要编造任何常量。
31 | 如果常量缺失，你的代码将无法运行。你可以拒绝生成代码，但是不要生成编造的代码。
32 | 确保你生成的代码最终以print的方式输出结果(回答用户的问题)。
33 | 
34 | 用户输入：
35 | {query}
36 | 


--------------------------------------------------------------------------------
/readme_CN.md:
--------------------------------------------------------------------------------
 1 | # GPT 自动化多模态任务项目
 2 | **Language:** [![English](https://img.shields.io/badge/Language-English-blue)](/README.md)
 3 | 
 4 | 本项目展示了如何利用 GPT 自动化检索仓库内的文件（如 PDF、XLS、Word 等）并完成多模态任务。可将家庭摄像头的视频帧送入仓库，可以自动化判断家庭是否危险的事情（利用大模型对世界的理解力）。
 5 | 
 6 | ## 简单的运行展示
 7 | ### 使用示例
 8 | ![命令窗口对话](image/chat.png)
 9 | 运行 `main.py` 后，在命令窗口中对话，agent 可以自行拆解关键概念如“销售额不达标”等，并思考可以执行的操作（如调用工具），这些操作基于 chain of thought 技术，具体的思维树main提示词可见`prompts\main\main.txt`。输出的自然语言代表具象的思维，人类大脑里的思维还要再复杂一些有时候可以称为抽象的字符，agent的思维是纯自然语言。
10 | ![thought](image/thought0.png)
11 | ### 多轮对话任务完成示例
12 | 使用`ListDirectory`工具，如图可正确输出json格式调用工具列出仓库所有文件，并以此展开下一轮思考。
13 | ![正确输出 JSON 格式调用工具](image/tool0.png)
14 | 如执行工具“AskDocument”提问以调取另一个大模型来获取想要知道的文档内容。
15 | ![执行工具 AskDocument](image/tool1.png)
16 | 
17 | ### 当前工具列表
18 | ![所有工具](image/tool_all.png)
19 | agent作为主要的调度者，可以调用多个AI来执行写邮件发送邮件，问询文档内容等等，其中问询另一个AI想要知道的文档中的内容或者结论，此种方法比RAG更适合autoagent场合。
20 | - **openimage**: 用于打开仓库的图像以加入对话，赋予agent查询图像的能力。
21 | - **ExcelTool**: 可以获取Excel表格的所有列以及前三行，送入agent让agent判断表格内容是否是想要的。
22 | ![检查 Excel 表格](image/inspectExcel.png)
23 | - **PythonTool**: 定义了一个AI接受agent的query(包含需要分析的Excel文件路径)，AI可以编写code并运行来计算加总表格内的一些变量来分析表格。
24 | ![运行代码](image/run_code.png)
25 | 
26 | 考虑到 agent 作为调度者，不占用 main_prompt 的 token，节省 token 并避免因为中间夹杂多任务而忘记最终目的是完成用户的query。
27 | 
28 | 
29 | ### 未来计划
30 | - 增加UI以至可以聊天窗口输入图像
31 | - 以及增加解析word或者网页图文格式。
32 | 
33 | ## 环境配置
34 | 
35 | ### 第一步：设置 .env
36 | 将项目中的 `.env` 文件中的 `OPENAI_API_KEY` 配置为自己的 API 密钥。
37 | ```
38 | OPENAI_API_KEY=sk-xxxx
39 | ```
40 | 需要在 [此处](https://devcto.com/) 购买 API 密钥。
41 | 
42 | ### 第二步：设置本地库
43 | 执行下面命令：
44 | ```
45 | export HNSWLIB_NO_NATIVE=1
46 | ```
47 | ### 第三步：安装依赖包
48 | 
49 | 执行下面命令：
50 | ```
51 | pip install -r requirements.txt
52 | ```
53 | 
54 | ### 第四步：运行
55 | 
56 | ##### 运行 main.py 文件
57 | 
58 | ##### 然后在界面里面输入问题：
59 | 🤖：有什么可以帮您？
60 | 👨：9月份的销售额是多少（需要自己输入）
61 | >>>>Round: 0<<<<
62 | 
63 | ##### 问题参考：
64 | * 9月份的销售额是多少
65 | * 销售总额最大的产品是什么
66 | * 帮我找出销售额不达标的供应商
67 | * 给这两家供应商发一封邮件通知此事
68 | * 对比8月和9月销售情况，写一份报告
69 | * 总结仓库图像内容
70 | 
71 | 此为多模态的基础版本，因langchain版本更新，因此还需后续更新。
72 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | colorama==0.4.6
 2 | langchain==0.1.12
 3 | langchain_experimental==0.0.54
 4 | langchain_openai==0.0.8
 5 | pandas==2.2.1
 6 | pydantic==1.10.12
 7 | python-dotenv==1.0.1
 8 | chroma-hnswlib==0.7.2
 9 | chromadb==0.4.5
10 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | # 可能需要导入其他一些必要的模块
 3 | from Tools.PythonTool import ExcelAnalyser
 4 | # 加载环境变量
 5 | from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
 6 | from langchain.schema.output_parser import StrOutputParser
 7 | from dotenv import load_dotenv, find_dotenv
 8 | _ = load_dotenv(find_dotenv())
 9 | from langchain_core.prompts import PromptTemplate
10 | from Agent.AutoGPT import AutoGPT
11 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings
12 | from langchain_community.vectorstores import Chroma
13 | from langchain.schema import Document
14 | from Tools import *
15 | from Tools.PythonTool import ExcelAnalyser
16 | from Agent.Action import Action
17 | from langchain.tools.render import render_text_description
18 | # 导入其他可能需要的模块或类
19 | from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
20 | import re
21 | tools = [
22 |     document_qa_tool,
23 |     document_generation_tool,
24 |     email_tool,
25 |     excel_inspection_tool,
26 |     directory_inspection_tool,
27 |     finish_placeholder,
28 |     ExcelAnalyser(
29 |         prompt_file="./prompts/tools/excel_analyser.txt",
30 |         verbose=True
31 |     ).as_tool()
32 | ]
33 | 
34 | rendered_tools_description = render_text_description(tools)
35 | main_prompt_file: str = "./prompts/main/main.txt"
36 | work_dir: str = "./data"
37 | output_parser = PydanticOutputParser(pydantic_object=Action)
38 | def __chinese_friendly(string) -> str:
39 |     lines = string.split('\n')
40 |     for i, line in enumerate(lines):
41 |         if line.startswith('{') and line.endswith('}'):
42 |             try:
43 |                 lines[i] = json.dumps(json.loads(line), ensure_ascii=False)
44 |             except:
45 |                 pass
46 |     return '\n'.join(lines)
47 | 
48 | main_prompt = PromptTemplate.from_file(
49 | main_prompt_file
50 | ).partial(
51 |     work_dir=work_dir,
52 |     tools=rendered_tools_description,
53 |     format_instructions=__chinese_friendly(
54 |         output_parser.get_format_instructions(),
55 |     )
56 | )
57 | # 定义要写入的文件名
58 | output_file = 'tools_description.txt'
59 | 
60 | # 打开文件进行写入
61 | with open(output_file, 'w', encoding='utf-8') as file:
62 |     file.write("Rendered Tools Description:\n")
63 |     file.write(rendered_tools_description)
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/tools_description.txt:
--------------------------------------------------------------------------------
1 | Rendered Tools Description:
2 | AskDocument: AskDocument(filename: str, query: str) -> str - 根据一个Word或PDF文档的内容，回答一个问题。考虑上下文信息，确保问题对相关概念的定义表述完整。
3 | GenerateDocument: GenerateDocument(query: str, verbose=False) - 根据需求描述生成一篇正式文档
4 | SendEmail: SendEmail(to: str, subject: str, body: str, cc: str = None, bcc: str = None) -> str - 给指定的邮箱发送邮件。确保邮箱地址是xxx@xxx.xxx的格式。多个邮箱地址以';'分割。
5 | InspectExcel: InspectExcel(filename: str, n: int = 3) -> str - 探查表格文件的内容和结构，只展示它的列名和前n行，n默认为3
6 | ListDirectory: ListDirectory(path: str) -> str - 探查文件夹的内容和结构，展示它的文件名和文件夹名
7 | FINISH: FINISH() - 用于表示任务完成的占位符工具
8 | AnalyseExcel: AnalyseExcel(query, filename) - 通过程序脚本分析一个结构化文件（例如excel文件）的内容。    输人中必须包含文件的完整路径和具体分析方式和分析依据，阈值常量等。


--------------------------------------------------------------------------------