├── .gitignore
├── imgs
├── logo.webp
└── solve.png
├── main.py
├── readme.md
├── readme_en.md
├── requirements.txt
└── src
└── action
├── Calculate.py
└── Programmer.py
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | .DS_Store
3 | tmp/
4 | data/
5 | pdf/
6 | .idea/
7 | logs/
8 |
9 | *.jsonl
10 | *.json
11 | *.txt
12 | localFile/
13 | # ./generate_data/*.josnl
14 | # ./generate_data/*/*/*.josnl
15 |
16 | # Byte-compiled / optimized / DLL files
17 | __pycache__/
18 | *.py[cod]
19 | *$py.class
20 |
21 | # C extensions
22 | *.so
23 |
24 | # merged_weights
25 | hf_merge/
26 |
27 | # Distribution / packaging
28 | .Python
29 | build/
30 | develop-eggs/
31 | dist/
32 | downloads/
33 | eggs/
34 | .eggs/
35 | lib/
36 | lib64/
37 | parts/
38 | sdist/
39 | var/
40 | wheels/
41 | share/python-wheels/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 | MANIFEST
46 |
47 | # PyInstaller
48 | # Usually these files are written by a python script from a template
49 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
50 | *.manifest
51 | *.spec
52 |
53 | # Installer logs
54 | pip-log.txt
55 | pip-delete-this-directory.txt
56 |
57 | # Unit test / coverage reports
58 | htmlcov/
59 | .tox/
60 | .nox/
61 | .coverage
62 | .coverage.*
63 | .cache
64 | nosetests.xml
65 | coverage.xml
66 | *.cover
67 | *.py,cover
68 | .hypothesis/
69 | .pytest_cache/
70 | cover/
71 |
72 | # Translations
73 | *.mo
74 | *.pot
75 |
76 | # Django stuff:
77 | *.log
78 | local_settings.py
79 | db.sqlite3
80 | db.sqlite3-journal
81 |
82 | # Flask stuff:
83 | instance/
84 | .webassets-cache
85 |
86 | # Scrapy stuff:
87 | .scrapy
88 |
89 | # Sphinx documentation
90 | docs/_build/
91 |
92 | # PyBuilder
93 | .pybuilder/
94 | target/
95 |
96 | # Jupyter Notebook
97 | .ipynb_checkpoints
98 |
99 | # IPython
100 | profile_default/
101 | ipython_config.py
102 |
103 | # pyenv
104 | # For a library or package, you might want to ignore these files since the code is
105 | # intended to run in multiple environments; otherwise, check them in:
106 | # .python-version
107 |
108 | # pipenv
109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
112 | # install all needed dependencies.
113 | #Pipfile.lock
114 |
115 | # poetry
116 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
117 | # This is especially recommended for binary packages to ensure reproducibility, and is more
118 | # commonly ignored for libraries.
119 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
120 | #poetry.lock
121 |
122 | # pdm
123 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
124 | #pdm.lock
125 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
126 | # in version control.
127 | # https://pdm.fming.dev/#use-with-ide
128 | .pdm.toml
129 |
130 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131 | __pypackages__/
132 |
133 | # Celery stuff
134 | celerybeat-schedule
135 | celerybeat.pid
136 |
137 | # SageMath parsed files
138 | *.sage.py
139 |
140 | # Environments
141 | .env
142 | .venv
143 | env/
144 | venv/
145 | ENV/
146 | env.bak/
147 | venv.bak/
148 |
149 | # Spyder project settings
150 | .spyderproject
151 | .spyproject
152 |
153 | # Rope project settings
154 | .ropeproject
155 |
156 | # mkdocs documentation
157 | /site
158 |
159 | # mypy
160 | .mypy_cache/
161 | .dmypy.json
162 | dmypy.json
163 |
164 | # Pyre type checker
165 | .pyre/
166 |
167 | # pytype static type analyzer
168 | .pytype/
169 |
170 | # Cython debug symbols
171 | cython_debug/
172 |
173 | # PyCharm
174 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
175 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
176 | # and can be added to the global gitignore or merged into this file. For a more nuclear
177 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
178 | #.idea/
179 |
180 |
--------------------------------------------------------------------------------
/imgs/logo.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XiangJinyu/Math-Multi-Agent/07c44240f490b3d7414d2087d809aa0998523d12/imgs/logo.webp
--------------------------------------------------------------------------------
/imgs/solve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XiangJinyu/Math-Multi-Agent/07c44240f490b3d7414d2087d809aa0998523d12/imgs/solve.png
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | from openai import OpenAI
3 | from src.action.Calculate import WolframAlphaQuery, simple_calculate
4 | from src.action import Programmer
5 | print('ok')
6 | import openai
7 | import os
8 | import re
9 | import ast
10 | from datetime import datetime
11 | import time
12 |
13 | api_key_1 = "输入GPT密钥"
14 | base_url_1 = "输入转接地址"
15 |
16 | api_key_2 = "输入Claude密钥"
17 | base_url_2 = "输入转接地址"
18 |
19 | GPT_client = openai.Client(api_key=api_key_1, base_url=base_url_1)
20 |
21 | Claude_client = OpenAI(api_key=api_key_2, base_url=base_url_2)
22 |
23 | question = """
24 | 3、A 与 B 二人进行 “ 抽鬼牌 ”游戏 。游戏开始时, A 手中有n张两两不同的牌 。 B 手上有n+1张牌,其中n张牌与 A 手中的牌相同,另一张为“鬼牌 ”,与其他所有牌都不同。游戏规则为:
25 |
26 | i) 双方交替从对方手中抽取一张牌, A 先从 B 手中抽取。
27 |
28 | ii) 若某位玩家抽到对方的牌与自己手中的某张牌一致,则将两张牌丢弃。
29 |
30 | iii) 最后剩一张牌(鬼牌)时,持有鬼牌的玩家为输家。
31 |
32 | 假设每一次抽牌从对方手上抽到任一张牌的概率都相同,请问下列n中哪个n使 A 的胜率最大?(单选题)
33 |
34 |
35 | A.n = 31
36 |
37 |
38 | B.n = 32
39 |
40 |
41 | C.n = 999
42 |
43 |
44 | D.n = 1000
45 |
46 | E.对所有的n,A 的胜率都一样
47 | """
48 |
49 |
50 | def askLLM(messages, max_retries=10, delay=2):
51 | """
52 | 参数:
53 | - messages: 发送到模型的消息
54 | - max_retries: 最大重试次数
55 | - delay: 重试之间的等待时间(秒)
56 |
57 | 返回:
58 | - 模型的响应内容,或在重试次数耗尽后返回None
59 | """
60 | MODEL = "gpt-4-turbo-2024-04-09"
61 | attempt = 0
62 |
63 | while attempt < max_retries:
64 | try:
65 | response = GPT_client.chat.completions.create(
66 | model=MODEL, messages=messages, temperature=0.7, max_tokens=3000
67 | )
68 | return response.choices[0].message.content
69 | except Exception as e:
70 | print(f"尝试 {attempt + 1}/{max_retries} 失败: {e}")
71 | time.sleep(delay)
72 | attempt += 1
73 |
74 | print("重试次数已耗尽,未能成功获取响应。")
75 | return None
76 |
77 |
78 | def actLLM(messages, the_model, max_retries=10, delay=2):
79 | attempt = 0
80 |
81 | while attempt < max_retries:
82 | try:
83 |
84 | if the_model == "GPT":
85 | MODEL = "gpt-4-turbo-2024-04-09"
86 | response = GPT_client.chat.completions.create(
87 | model=MODEL, messages=messages, temperature=0.7, max_tokens=3000
88 | )
89 | return response.choices[0].message.content
90 |
91 | if the_model == "Claude":
92 | MODEL = "claude-3-opus-20240229"
93 | response = Claude_client.chat.completions.create(
94 | model=MODEL, messages=messages, temperature=0.7, max_tokens=3000
95 | )
96 | return response.choices[0].message.content
97 |
98 | except Exception as e:
99 | print(f"尝试 {attempt + 1}/{max_retries} 失败: {e}")
100 | time.sleep(delay)
101 | attempt += 1
102 |
103 | print("重试次数已耗尽,未能成功获取响应。")
104 | return None
105 |
106 |
107 | def extract_code_block(code_block):
108 | match = re.search(r'```python(.*?)```', code_block, re.DOTALL)
109 | if match:
110 | code = match.group(1)
111 | try:
112 | code = code.encode('utf-8', 'ignore').decode('utf-8')
113 | except UnicodeEncodeError:
114 | code = code.encode('utf-8', 'ignore').decode('utf-8')
115 | return code
116 | else:
117 | return 'No code'
118 |
119 |
120 | def choose_action(list, message, question, the_model):
121 | action = list[0]
122 | query = list[1]
123 |
124 | if action == "wolfram_alpha":
125 | app_id = '输入你的wolfram_alpha key'
126 | Wolfram = WolframAlphaQuery(app_id)
127 | response_data = Wolfram.send_query(query)
128 | if response_data:
129 | answer = Wolfram.process_response(response_data)
130 | print(answer)
131 | return answer
132 | else:
133 | return "高级计算出错"
134 |
135 | if action == "simple_calculate":
136 | answer = simple_calculate(query)
137 | print("Result:", answer)
138 | return answer
139 |
140 | if action == "deep_thinking":
141 | think_message = [{"role": "system",
142 | "content": f"""你是一名数学指导,think step by step,
143 | 根据目前的对话信息及问题,进行深度分析,提出建设性的建议和问题。分析当前亟待解决的问题以及可能解决问题的一些路径,并尝试解决它们。不需要提出接下来需要调用的方法。尽可能多地使用LateX公式进行推导。
144 | """},
145 | {"role": "user", "content": "历史对话内容如下:" + str(message) + "\n当前疑问:" + query}]
146 | answer = actLLM(think_message, the_model)
147 | return answer
148 |
149 | if action == "deduction":
150 | derivation_message = [{"role": "system",
151 | "content": f"""你是一名数学推导大师,think step by step,
152 | 根据目前的对话信息及问题,进行深度分析,严谨地使用LaTeX格式进行推导。
153 | 请先列出已知信息,然后再开始严谨的推导。
154 | 尽可能多地使用LateX公式进行推导。
155 | 不需要提出接下来需要调用的方法。
156 | """},
157 | {"role": "user", "content": "历史对话内容如下:" + str(message) + "\n当前疑问:" + query}]
158 | answer = actLLM(derivation_message, the_model)
159 | return answer
160 | if action == "programmer":
161 | programmer = Programmer.Python_Programmer(f"问题:{question}\n目的:{query}")
162 | answer = programmer.solve_problem()
163 | for ans in answer:
164 | print(ans)
165 | return str(answer)
166 |
167 | if action == "Resolve":
168 | return True
169 |
170 | else:
171 | return "调用出错"
172 |
173 |
174 | def main():
175 | # 获取当前时间
176 | start_time = datetime.now()
177 | print("开始时间:", start_time)
178 | # m为总共有几名角色进行解题
179 | m = 5
180 | # n为每个角色解题总共最多几轮
181 | n = 15
182 | # 生成m个思路,为不同角色提供不同的解题路径
183 | process_message = [{"role": "system",
184 | "content": rf"""
185 | 你是一名数学家,思考{m}个不同的解题方向和思路,使用List的格式输出。(输出格式为:[str,str,...])
186 | 除了输出Python格式的List之外,不要输出任何其它的内容,你的回复将直接用于python程序的解析。
187 | 不要给出答案,而是给出不同解题的方向和思路。使用中文。
188 | """},
189 | {"role": "user", "content": question}]
190 | way = askLLM(process_message)
191 |
192 | # 将m个思路解析成列表
193 | way_list = ast.literal_eval(way)
194 | print(way)
195 |
196 | total_summary = ""
197 | # 开启进程循环,这里的process遍历一个为解题一轮
198 | for process in range(m):
199 | # 选择方法Prompt
200 | choose_template = [{"role": "system",
201 | "content": r"""你将对对话解析,认为接下来需要调用的方法及输入的内容是什么?使用List的格式输出。(输出格式为:str,str)
202 | 可以调用的方法如下:
203 | ```
204 | 1.wolfram_alpha : 该方法可以对复杂的方程或函数进行求解。Output Format:["wolfram_alpha","LaTeX格式表示的公式(只有公式,没有任何其它解释)"]. Example:["wolfram_alpha","\int_{0}^{5} \frac{x^{3}}{5+3x} \cdot \frac{x+5}{x+2}"]
205 | 2.simple_calculate : 该方法可以对简单的方程或函数进行求解。Output Format:["simple_calculate","LaTeX格式表示的公式(只有公式,没有任何其它解释)"]. Example:["simple_calculate","\int_{0}^{5} \frac{x^{3}}{5+3x} dx"]
206 | 3.deep_thinking : 该方法会对当前已知信息进行深度分析,并给出接下来可能的探索方向。Output Format:["deep_thinking","目前的困惑或疑问"]. Example:["deep_thinking","如何找到一条路径以最大化右转数量,并最小化总等待时间?"]
207 | 4.deduction :该方法启动链式思考,分析需要得到的结论,进行逐步推导.Output Format:["deduction","LaTeX格式表示需要推导的公式或用自然语言描述需要推导的问题"]. Example:["deduction","对已知的方程进行推导"]
208 | 5.programmer :调用程序员,为你撰写Python代码求解出所需要的问题的数值解。调用该方法时,请完整精确说出需求(包括优化的方法等),因为程序员只能看到题目,无法看到你之前的推导过程。Output Format:["programmer","LaTeX格式表示需要推导的公式或用自然语言描述需要推导的问题,涉及的公式或问题及参数必须详细准确,不能有信息缺失"]. Example:["programmer","用蒙特卡洛算法,写一个python代码,求解一个a=3,b=4,高为5的椭球的体积"]
209 | 6.Resolve : 该方法代表你已经得出了最终答案,将输出最终答案并结束这个问题的解答。Output Format:["Resolve","最终答案/结论或求解结果"]. Example:["Resolve","通过上述推导步骤,我给出该题最终结果为18"]
210 | ```
211 | 只能调用一个方法,不要输出其它任何解释,只输出List.
212 | LaTeX格式表示的公式禁止使用\[,\],\(以及\)。请统一使用$$。
213 | Latex表达时,也请使用\来调用特殊格式,而不是双斜杠。因为你的公式直接被读取,不用担心被解析为转义字符。
214 | """}]
215 | # 主进程Prompt
216 | message = [{"role": "system",
217 | "content": r"""你是一名数学学家,请先拆解问题,然后think step by step,
218 | 可以使用Latex表达式,进行公式推导,分析当前已知信息,然后根据目前的推导,决策接下来需要进行的步骤。
219 | 注意,LaTeX格式表示的公式禁止使用\[,\],\(以及\)。请统一使用$$。
220 | 你的推导尽可能严谨,因为这个题目是一个全球数学竞赛题(允许进行编程求解)将你的每个推导结论转为疑问,来进行进一步思考和自问。
221 | 在最后的结尾,必须加上你认为下一步可以做的行动,可以调用以下行为:
222 | ```
223 | 1.调用wolfram_alpha : 该方法可以对复杂的方程或函数进行求解,在说明使用该方法时,请一并使用LaTeX格式给出方程/公式
224 | 2.调用simple_calculate : 该方法可以对简单的方程或函数进行求解,如初等函数或简单积分微分,在说明使用该方法时,请一并使用LaTeX格式给出方程/公式
225 | 3.调用deep_thinking : 该方法会对当前已知信息进行深度分析,并给出接下来可能的探索方向
226 | 4.使用deduction : 该方法启动链式思考,分析需要得到的结论,进行逐步推导
227 | 5.使用programmer : 该方法调用程序员,是一个较通用的方法,为你撰写Python代码求解出所需要的问题的数值解,该方法应该作为优先级较高的方法。可以进行大规模计算以及解决复杂的数值解问题,也可在推导/证明等过程中方便试错和推测最终答案。你几乎可以在任何情况下使用这个方法。但如果较复杂的程序会导致超时(运行大于10分钟),这时可以考虑测试特殊情况或将范围缩小进行代码运行后参考,或者更换其它解决方式。
228 | 6.Resolve : 该方法代表你已经得出了最终答案,将输出最终答案并结束这个问题的解答。注意,这个方法不会帮助你解决问题,仅仅是在你确定最终答案后进行汇报的方式。
229 | ```
230 | 每次只能选择一个行动,并使用着重符号标明需要调用的行为。
231 | 始终使用中文输出回答。
232 | """}, {"role": "user", "content": question}, {"role": "user", "content": way_list[process]}]
233 |
234 | answer_article = ""
235 |
236 | # 不同角色交替使用GPT和Claude来作为action的Model,增加答案可能性
237 | if process % 2 == 0:
238 | the_model = "Claude"
239 | else:
240 | the_model = "GPT"
241 |
242 | # 单进程循环,这里i遍历一个代表思考一轮(进行一轮行动)
243 | for i in range(n):
244 | # 每一轮开始时,都对当前形势进行思考,作为answer
245 | answer = askLLM(message)
246 | answer_article += answer + "\n"
247 | print(answer)
248 | message.append({"role": "assistant", "content": answer})
249 |
250 | # 分析完当前形势后,挑选下一步的行动
251 | choose_message = choose_template
252 | choose_message.append(
253 | {"role": "user", "content": str(message[-1])})
254 | choice = askLLM(choose_message)
255 | answer_article += choice + "\n"
256 | print(choice)
257 |
258 | # 将选择行动的str解析为list
259 | choice_list = ast.literal_eval(choice)
260 |
261 | # 将选择行动的list传入选择行动函数,调用对应的方法,返回结果
262 | answer = choose_action(choice_list, message, question, the_model)
263 | answer_article += str(answer) + "\n"
264 | print(answer)
265 |
266 | # 当得出角色认为的正确答案,则退出该角色的解题循环
267 | if answer is True:
268 | final_answer = choice_list[1]
269 | answer_article += "本循环最终答案:" + final_answer + "\n"
270 | print(final_answer)
271 | break
272 |
273 | # 将调用行动得到的答案加入到主进程中
274 | message.append({"role": "user", "content": answer})
275 |
276 | # 当角色进程来到最后一轮时,强制给出目前认为最可能的答案
277 | if i == n - 1:
278 | message.append({"role": "user",
279 | "content": "现在,根据之前所有的推导和证明,给出你认为最可能的答案或结果,哪怕这个答案你还不够确定,并且不需要给出下一步行动。"})
280 | answer = askLLM(message)
281 | answer_article += answer + "\n"
282 | print(answer)
283 |
284 | # 对刚刚角色的整个做题流程进行总结
285 | summary_message = [{"role": "system",
286 | "content": rf"""
287 | 根据题目及完整解题流程,给出一个精简的解题流程总结,step by step,给出每一步的解题的说明。你的总结应该尽可能专业,并包含涉及到的公式。也需要明确给出解题过程中最终得出的结论(如果严谨得出,信心程度为强,如果只是推测或其它方式,则为一般或弱)。
288 | output format:
289 | ```
290 | 1.
291 | 2.
292 | 3.
293 | ...
294 | 最终结论:xxx
295 | 信心程度:强/一般/弱
296 | ```
297 | """},
298 | {"role": "user", "content": f"\n原问题:{question}\n\n解题过程:{answer_article}"}]
299 |
300 | model = "Claude"
301 | summary = actLLM(summary_message, model)
302 |
303 | summary = f"关键解题步骤摘要及结论:\n\n{summary}\n\n"
304 |
305 | # 将每一轮的结果都封装到一起
306 | total_summary += f"# 第{process + 1}名角色解题关键过程及结果为:\n\n{summary}"
307 |
308 | # 将关键结果总结和完整解题步骤整理在一起,方便查看
309 | answer_article = summary + answer_article
310 |
311 | # 将完整结果存入txt文件中
312 | with open(f"answer_article_{process + 1}.txt", "w", encoding="utf-8") as file:
313 | file.write(answer_article)
314 |
315 | # 评审角色对每一轮(即每个角色)的结果进行整理分析,评选出最可能的结果
316 | review_message = [{"role": "system",
317 | "content": rf"""
318 | 根据问题及多个角色的推理过程及得到的答案,先给出你的总结和分析,step by step,最终给出你认为最可能的正确答案。
319 | 数值解,理论推导,解析解等方法都可以作为最终答案,主要关注给出每个答案的人数及过程是否正确。
320 | """},
321 | {"role": "user", "content": f"\n问题:{question}\n\n解题结果:{total_summary}"}]
322 | print(total_summary)
323 | review = askLLM(review_message)
324 |
325 | print(review)
326 |
327 | # 获取当前时间
328 | end_time = datetime.now()
329 | print("开始时间:", start_time)
330 | print("结束时间:", end_time)
331 |
332 | # 计算耗时
333 | time_difference = end_time - start_time
334 | total_seconds = time_difference.total_seconds()
335 | # 将总秒数转换为分钟
336 | total_minutes = total_seconds / 60
337 | print("耗时分钟:", total_minutes)
338 |
339 | # 将最终评审结果整理到txt文件中
340 | with open(f"Final_answer.txt", "w", encoding="utf-8") as file:
341 | file.write(review)
342 |
343 |
344 | if __name__ == "__main__":
345 | main()
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Math-Multi-Agent
4 |
5 | > 2024年阿里全球数学竞赛AI赛道全球第2名项目(特工宇宙)解决方案。
6 |
7 |
8 |
9 | [![Forks][forks-shield]][forks-url]
10 | [![Stargazers][stars-shield]][stars-url]
11 | [![Issues][issues-shield]][issues-url]
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
25 | 简体中文 | English
26 |
27 | 报告Bug
28 | ·
29 | 提出新特性
30 |
87 |
88 |
18 |
19 |
20 |
21 |
22 |
24 | English | 简体中文
25 |
26 | Report Bug
27 | ·
28 | Request Feature
29 |
84 |
85 |