├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── Transparency_FAQ.md
├── assets
├── games_setting.png
├── k-level-reasoning-egs.png
├── k-level-reasoning-w-llms.png
├── k-level-reasoning.png
└── playground.png
├── exp
└── WAC
│ ├── human annotation.zip
│ └── records.zip
├── k-reasoning
├── G08A
│ ├── evaluate.py
│ ├── game.py
│ ├── main.py
│ ├── player
│ │ ├── __init__.py
│ │ ├── basic_player.py
│ │ ├── k_level_reasoning_player.py
│ │ └── reasoning_player.py
│ └── run.sh
├── README.md
└── SAG
│ ├── evaluate.py
│ ├── game.py
│ ├── main.py
│ ├── player
│ ├── __init__.py
│ ├── basic_player.py
│ ├── k_level_reasoning_player.py
│ └── reasoning_player.py
│ └── run.sh
├── requirements.txt
└── src
├── .DS_Store
├── Alympics.py
├── run.py
└── waterAllocation.py
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Ll]og/
33 | [Ll]ogs/
34 |
35 | # Visual Studio 2015/2017 cache/options directory
36 | .vs/
37 | # Uncomment if you have tasks that create the project's static files in wwwroot
38 | #wwwroot/
39 |
40 | # Visual Studio 2017 auto generated files
41 | Generated\ Files/
42 |
43 | # MSTest test Results
44 | [Tt]est[Rr]esult*/
45 | [Bb]uild[Ll]og.*
46 |
47 | # NUnit
48 | *.VisualState.xml
49 | TestResult.xml
50 | nunit-*.xml
51 |
52 | # Build Results of an ATL Project
53 | [Dd]ebugPS/
54 | [Rr]eleasePS/
55 | dlldata.c
56 |
57 | # Benchmark Results
58 | BenchmarkDotNet.Artifacts/
59 |
60 | # .NET Core
61 | project.lock.json
62 | project.fragment.lock.json
63 | artifacts/
64 |
65 | # ASP.NET Scaffolding
66 | ScaffoldingReadMe.txt
67 |
68 | # StyleCop
69 | StyleCopReport.xml
70 |
71 | # Files built by Visual Studio
72 | *_i.c
73 | *_p.c
74 | *_h.h
75 | *.ilk
76 | *.meta
77 | *.obj
78 | *.iobj
79 | *.pch
80 | *.pdb
81 | *.ipdb
82 | *.pgc
83 | *.pgd
84 | *.rsp
85 | *.sbr
86 | *.tlb
87 | *.tli
88 | *.tlh
89 | *.tmp
90 | *.tmp_proj
91 | *_wpftmp.csproj
92 | *.log
93 | *.tlog
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298 | *.vbp
299 |
300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301 | *.dsw
302 | *.dsp
303 |
304 | # Visual Studio 6 technical files
305 | *.ncb
306 | *.aps
307 |
308 | # Visual Studio LightSwitch build output
309 | **/*.HTMLClient/GeneratedArtifacts
310 | **/*.DesktopClient/GeneratedArtifacts
311 | **/*.DesktopClient/ModelManifest.xml
312 | **/*.Server/GeneratedArtifacts
313 | **/*.Server/ModelManifest.xml
314 | _Pvt_Extensions
315 |
316 | # Paket dependency manager
317 | .paket/paket.exe
318 | paket-files/
319 |
320 | # FAKE - F# Make
321 | .fake/
322 |
323 | # CodeRush personal settings
324 | .cr/personal
325 |
326 | # Python Tools for Visual Studio (PTVS)
327 | __pycache__/
328 | *.pyc
329 |
330 | # Cake - Uncomment if you are using it
331 | # tools/**
332 | # !tools/packages.config
333 |
334 | # Tabs Studio
335 | *.tss
336 |
337 | # Telerik's JustMock configuration file
338 | *.jmconfig
339 |
340 | # BizTalk build output
341 | *.btp.cs
342 | *.btm.cs
343 | *.odx.cs
344 | *.xsd.cs
345 |
346 | # OpenCover UI analysis results
347 | OpenCover/
348 |
349 | # Azure Stream Analytics local run output
350 | ASALocalRun/
351 |
352 | # MSBuild Binary and Structured Log
353 | *.binlog
354 |
355 | # NVidia Nsight GPU debugger configuration file
356 | *.nvuser
357 |
358 | # MFractors (Xamarin productivity tool) working folder
359 | .mfractor/
360 |
361 | # Local History for Visual Studio
362 | .localhistory/
363 |
364 | # Visual Studio History (VSHistory) files
365 | .vshistory/
366 |
367 | # BeatPulse healthcheck temp database
368 | healthchecksdb
369 |
370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
371 | MigrationBackup/
372 |
373 | # Ionide (cross platform F# VS Code tools) working folder
374 | .ionide/
375 |
376 | # Fody - auto-generated XML schema
377 | FodyWeavers.xsd
378 |
379 | # VS Code files for those working on multiple tools
380 | .vscode/*
381 | !.vscode/settings.json
382 | !.vscode/tasks.json
383 | !.vscode/launch.json
384 | !.vscode/extensions.json
385 | *.code-workspace
386 |
387 | # Local History for Visual Studio Code
388 | .history/
389 |
390 | # Windows Installer files from build outputs
391 | *.cab
392 | *.msi
393 | *.msix
394 | *.msm
395 | *.msp
396 |
397 | # JetBrains Rider
398 | *.sln.iml
399 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Microsoft Open Source Code of Conduct
2 |
3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4 |
5 | Resources:
6 |
7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ALYMPICS: Language Agents Meet Game Theory
2 |
3 | **Alympics** is a platform that leverages Large Language Model (LLM) agents to facilitate investigations in game theory.
4 |
5 | See our paper: [ALYMPICS: LLM Agents Meet Game Theory -- Exploring Strategic Decision-Making with AI Agents](https://arxiv.org/pdf/2311.03220)
6 |
7 | ## Architecture of Alympics
8 |
9 |
10 |
11 | The architecture of Alympics comprises the Sandbox Playground and Players. The Sandbox Playground creates an environment where game settings, as specified by researchers, are executed. Agent players, along with the optional human players, actively engage in the game within this environment.
12 |
13 | - Sandbox Playground: The Sandbox Playground serves as the environment for conducting games, providing a versatile and controlled space for agent players interactions.
14 | - Agent Players: Agent Players constitute an indispensable component of the Alympics framework, embodying LLM-powered agent entities that participate in strategic interactions within the Sandbox Playground.
15 |
16 |
17 | ## Contributions
18 |
19 | - The proposal of an original, LLM agent-based framework to facilitate game theory research.
20 | - The demonstration of Alympics’s application through a comprehensive pilot case study.
21 | - The emphasis on the significance of leveraging LLM agents to scrutinize strategic decision-making within a controlled and reproducible environment. This endeavor not only enriches the field of game theory but also has the potential to inspire research in other domains where decision-making assumes a pivotal role.
22 |
23 | ## Directory Structure
24 | The code directory structure is
25 | ```
26 | $src
27 | ├─ run.py
28 | ├─ Utils.py # The basic Playground class, the Player class and the LLM API
29 | └─ waterAllocation.py # An example of using playground
30 | ```
31 | **Please complete the configuration of LLM in the Utils.py first.**
32 |
33 |
34 | ## Example
35 | Alympics provides a research platform for conducting experiments on complex strategic gaming problems. As a pilot demonstration, we developed a game called the ’Water Allocation Challenge’ to illustrate how it can be leveraged for game theory research.
36 |
37 | The details can be found in our paper.
38 |
39 | ## Citation
40 |
41 | ```
42 | @misc{mao2023alympics,
43 | title={ALYMPICS: Language Agents Meet Game Theory},
44 | author={Shaoguang Mao and Yuzhe Cai and Yan Xia and Wenshan Wu and Xun Wang and Fengyi Wang and Tao Ge and Furu Wei},
45 | year={2023},
46 | eprint={2311.03220},
47 | archivePrefix={arXiv},
48 | primaryClass={cs.CL}
49 | }
50 | ```
51 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Security
4 |
5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
6 |
7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
8 |
9 | ## Reporting Security Issues
10 |
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 |
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 |
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 |
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18 |
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 |
21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 | * Full paths of source file(s) related to the manifestation of the issue
23 | * The location of the affected source code (tag/branch/commit or direct URL)
24 | * Any special configuration required to reproduce the issue
25 | * Step-by-step instructions to reproduce the issue
26 | * Proof-of-concept or exploit code (if possible)
27 | * Impact of the issue, including how an attacker might exploit the issue
28 |
29 | This information will help us triage your report more quickly.
30 |
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 |
33 | ## Preferred Languages
34 |
35 | We prefer all communications to be in English.
36 |
37 | ## Policy
38 |
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 |
41 |
42 |
--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
1 | # TODO: The maintainer of this repo has not yet edited this file
2 |
3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
4 |
5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
8 |
9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 |
11 | # Support
12 |
13 | ## How to file issues and get help
14 |
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing
16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or
17 | feature request as a new Issue.
18 |
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 |
23 | ## Microsoft Support Policy
24 |
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 |
--------------------------------------------------------------------------------
/Transparency_FAQ.md:
--------------------------------------------------------------------------------
1 | # Transparency.md for ALYMPICS
2 |
3 | ## Overview
4 | ALYMPICS is a novel platform that integrates Large Language Model (LLM) agents into game theory scenarios. It aims to simulate human behavior in strategic decision-making environments, using a survival game model to explore the dynamics of multi-agent interactions and competition for limited resources.
5 |
6 | ## Objective
7 | The main goal of ALYMPICS is to provide a controlled, scalable, and reproducible environment to study human-like strategic behaviors in AI agents. By incorporating varying degrees of resource availability and diverse agent personalities, the platform facilitates an in-depth analysis of strategic engagements and adaptations in complex socioeconomic contexts.
8 |
9 | ## Audience
10 | This documentation is intended for researchers, developers, and enthusiasts in the fields of artificial intelligence, game theory, and multi-agent systems. It addresses the potential applications, limitations, and optimal use of the ALYMPICS platform.
11 |
12 | ## Key Features
13 | - Simulation of human-like behaviors in AI agents.
14 | - Dynamic scenarios with adjustable variables like resource availability.
15 | - Realistic modeling of multi-agent collaborations and competitions.
16 | - Insights into strategic decision-making within AI systems.
17 |
18 | ## Limitations
19 | - The behaviors of LLM agents in ALYMPICS might not perfectly mirror real human behaviors in every aspect.
20 | - The current version focuses on a specific game scenario; further development is required to generalize the platform to other contexts.
21 | - The platform's performance may vary based on the complexity of the game scenario and the computational resources available.
22 |
23 | ## Best Practices for Performance
24 | - Ensure adequate computational resources for running complex simulations.
25 | - Experiment with different settings of resource availability and agent personalities to observe varied strategic behaviors.
26 | - Use the platform's adjustable variables to tailor the simulation to specific research questions or hypotheses.
27 |
28 | ## Social Impact Statement
29 | ALYMPICS is designed to advance our understanding of AI's capabilities in strategic decision-making. While it offers valuable insights, we acknowledge the ethical considerations in deploying AI in real-world scenarios. We are committed to responsible research and development, ensuring that our work aligns with ethical guidelines and contributes positively to society. Our work is for research purposes only, and further testing and validation would be needed to use it outside of a research context.
30 |
31 | ## Feedback and Collaboration
32 | We encourage feedback and collaboration from our audience. If you have suggestions, questions, or would like to contribute to the project, please contact us at Shaoguang.Mao@microsoft.com.
33 |
34 | ## Future Updates
35 | The ALYMPICS platform is an evolving project. We plan to expand its scope to include a wider range of game theory scenarios and enhance the realism of agent behaviors. Stay tuned for updates in our code repository and other publications.
36 |
37 | ## Conclusion
38 | ALYMPICS represents a significant step forward in the integration of AI into game theory. We hope that this platform will be a valuable tool for researchers and developers interested in exploring the strategic capabilities of AI agents.
39 |
--------------------------------------------------------------------------------
/assets/games_setting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/games_setting.png
--------------------------------------------------------------------------------
/assets/k-level-reasoning-egs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/k-level-reasoning-egs.png
--------------------------------------------------------------------------------
/assets/k-level-reasoning-w-llms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/k-level-reasoning-w-llms.png
--------------------------------------------------------------------------------
/assets/k-level-reasoning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/k-level-reasoning.png
--------------------------------------------------------------------------------
/assets/playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/playground.png
--------------------------------------------------------------------------------
/exp/WAC/human annotation.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/exp/WAC/human annotation.zip
--------------------------------------------------------------------------------
/exp/WAC/records.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/exp/WAC/records.zip
--------------------------------------------------------------------------------
/k-reasoning/G08A/evaluate.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 | import os
4 | from glob import glob
5 |
6 | import matplotlib.pyplot as plt
7 |
8 | class G08AEvaluator():
9 |
10 | def __init__(self, players, opponents, exp_rnd, exp_num, result_dir, output_dir) -> None:
11 | self.players = players.split(",")
12 | self.opponents = opponents.split(",")
13 |
14 | self.exp_rnd = exp_rnd
15 | self.exp_num = exp_num
16 |
17 | self.result_dir = result_dir
18 | self.output_dir = output_dir
19 |
20 | if not os.path.exists(self.output_dir):
21 | os.makedirs(self.output_dir, exist_ok=True)
22 |
23 | def win_rate(self):
24 | print("="*40+" Win Rate "+"="*40)
25 |
26 | players, opponents = self.players, self.opponents
27 | win_result = {}
28 |
29 | for agent in players:
30 | win_result.setdefault(agent, {})
31 | for computer in opponents:
32 | exp = f"{self.result_dir}/{agent}_VS_{computer}*.json"
33 | cots = glob(exp)
34 |
35 | wins = {}
36 | total_round = 0
37 | for result in cots:
38 | with open(result) as fin:
39 | result = json.load(fin)["winners"]
40 | total_round = len(cots)*len(result)
41 | for rnd in result:
42 | if int(rnd)>self.exp_rnd: continue
43 | for player in result[rnd]:
44 | wins.setdefault(player, [0]*(len(result)))
45 | wins[player][int(rnd)-1]+=1
46 |
47 | win_rate = sum(wins.get("Alex", [0]))/(total_round)
48 | win_result[agent][computer] = win_rate
49 |
50 | average = {}
51 | for i, agent in enumerate(win_result):
52 | average[agent] = list(win_result[agent].values())
53 | average[agent] = sum(average[agent])/len(average[agent])
54 |
55 | print(f"{'':12s}\t"+"\t".join([f"{agent:7s}" for agent in players]))
56 | for computer in opponents:
57 | print(f"{computer:12s}",end="\t")
58 | print("\t".join([f"{win_result[agent][computer]:<7.2f}" if win_result[agent][computer]>=0 else f"{'':7s}" for agent in win_result]))
59 |
60 | print(f"{'Average':12s}",end="\t")
61 | print("\t".join([f"{average[agent]:<7.2f}" if average[agent]>=0 else f"{'':7s}" for agent in win_result]))
62 |
63 | print()
64 |
65 | def adaption_index(self):
66 | print("="*40+" Adaption Index "+"="*40)
67 |
68 | players, opponents = self.players, self.opponents
69 | learning_result = {}
70 |
71 | for oppo in opponents:
72 | exp_result = {}
73 | for agent in players:
74 | exps = glob(f"{self.result_dir}/{agent}_VS_{oppo}*.json")
75 | for exp in exps:
76 | with open(exp) as fin:
77 | logs = json.load(fin)
78 | exp_ground = logs["biddings"]
79 | target_div = []
80 | for r in range(0, 10):
81 | bids = [exp_ground[p][r] for p in exp_ground]
82 | target = sum(bids)/len(bids)*0.8
83 | player_bid = exp_ground["Alex"][r]
84 | target_div.append(abs(player_bid-target))
85 | exp_result.setdefault(agent, [])
86 | exp_result[agent].append(sum(target_div[5:])/sum(target_div[:5])) # [Target Deviation @ (second half)] / [Target Deviation @ (first half)]
87 | learning_result[oppo]=exp_result
88 |
89 | print(f"{'':8s}\t"+"\t".join([f"{agent:2s}" for agent in players]))
90 |
91 | for oppo in opponents:
92 | exp_result = learning_result[oppo]
93 |
94 | maxrate = list(set([sum(exp_result[agent])/len(exp_result[agent]) if exp_result.get(agent) else 10 for agent in players]))
95 | maxrate.sort()
96 |
97 | print(f"{oppo:8s}", end='\t')
98 | print('\t'.join([f"{sum(exp_result[agent])/len(exp_result[agent]):<.2f}" if exp_result.get(agent) else f"{'':2s}" for agent in players]))
99 |
100 | agent_sum = {}
101 | for agent in players:
102 | agent_sum[agent]=[]
103 | for oppo in opponents:
104 | agent_oppo_learning = learning_result[oppo].get(agent,[])
105 | if agent_oppo_learning:
106 | agent_sum[agent].append(sum(agent_oppo_learning)/len(agent_oppo_learning))
107 |
108 | print(f"{'Average':8s}", end='\t')
109 | print('\t'.join([f"{sum(agent_sum[agent])/len(agent_sum[agent]):<.2f}" if len(agent_sum.get(agent,[]))==len(opponents) else f"{'':2s}" for agent in players]))
110 |
111 | print()
112 |
113 | def extract_PCoT_prediction(self):
114 | """
115 | Parse the prediction result of PCoT from the response.
116 | """
117 |
118 | import openai
119 | import time
120 |
121 | # Fill in your config information to conduct experiments.
122 | openai.api_type = ""
123 | openai.api_base = ""
124 | openai.api_version = ""
125 | openai.api_key = ""
126 | ENGINE = "gpt4-32k"
127 |
128 | def re_extract(message):
129 | matchs = re.finditer("Player (\d)(\s\(\w+\))?:\s*(\d+)", message)
130 | matchs = list(matchs)
131 | try:
132 | assert 5>=len(matchs) >=4, message
133 | except BaseException:
134 | return {}
135 | result = [m.groups()[2] for m in matchs]
136 | if len(result)==5:
137 | return {p: n for p, n in zip(['Alex', 'Bob', 'Cindy', 'David', 'Eric'], result)}
138 | else:
139 | return {p: n for p, n in zip([ 'Bob', 'Cindy', 'David', 'Eric'], result)}
140 |
141 | def gpt_extract(message):
142 | status = 0
143 | times = 0
144 | while status != 1:
145 | try:
146 | response = openai.ChatCompletion.create(
147 | engine=ENGINE,
148 | messages = [{"role":"system", "content":"""Read the following statement and extract a prediction of the number chosen by each player in json format. Output format:{"Player": Player's number}"}"""}, {"role": "user", "content": message}],
149 | temperature=0.7,
150 | max_tokens=80,
151 | top_p=0.95,
152 | frequency_penalty=0,
153 | presence_penalty=0,
154 | stop=None)
155 | response = response['choices'][0]['message']['content']
156 | bidding_info = json.loads(response)
157 | status = 1
158 | return bidding_info
159 | except Exception as e:
160 | print(e)
161 | times+=1
162 | if times>=2:
163 | return {}
164 | time.sleep(15)
165 |
166 | pcot_exps = glob(f"{self.result_dir}/pcot_VS_*.json")
167 | error_r = []
168 | flag = False
169 |
170 | exps_result = {}
171 | for exp in pcot_exps:
172 | with open(exp) as fin:
173 | messages=json.load(fin)["message"]["Alex"]
174 | exps_result[exp]={}
175 | for i in range(2, min(len(messages), 41), 4):
176 | message=messages[i]["content"]
177 | result = re_extract(message)
178 | if not result:
179 | result = gpt_extract(message)
180 | if not result:
181 | print(message)
182 | error_r.append(message)
183 | flag = True
184 | break
185 | exps_result[exp][(i-2)//4]=result
186 | if flag:
187 | break
188 |
189 | with open(f"{self.output_dir}/pcot_prediction.json","w") as fout:
190 | new_result = {}
191 | for exp in exps_result:
192 | new_result[os.path.basename(exp)[:-5]] = exps_result[exp]
193 | json.dump(new_result, fout, indent=4)
194 |
195 | def prediction_accuracy(self):
196 | opponents = self.opponents
197 | for oppo in opponents:
198 | with open(f"{self.output_dir}/pcot_prediction.json") as fin:
199 | new_result = json.load(fin)
200 |
201 | pcot_avg_div = {}
202 |
203 | for exp in new_result:
204 | m = re.match(f"pcot_VS_{oppo}_(\d)", exp)
205 | if not m: continue
206 | exp_num = m.groups()[0]
207 | with open(f"{self.result_dir}/{exp}.json") as fin:
208 | exp_ground = json.load(fin)["biddings"]
209 | result = new_result[exp]
210 | for r in result:
211 | try:
212 | prediction = {p:int(result[r][p]) for p in result[r] if p!="Alex"}
213 | except:
214 | continue
215 | round_ground = {p: exp_ground[p][int(r)] for p in exp_ground if p!="Alex"}
216 | predict_avg = sum(prediction.values())/len(prediction)
217 | ground_avg = sum(round_ground.values())/len(round_ground)
218 | pcot_avg_div.setdefault(int(r), [])
219 | pcot_avg_div[int(r)].append(abs(predict_avg-ground_avg))
220 |
221 | kr_avg_div = {}
222 | kr_exps = glob(f"{self.result_dir}/kr_VS_*.json")
223 | for exp in kr_exps:
224 | m = re.match(f"{self.result_dir}/kr_VS_{oppo}_(\d).json", exp)
225 | if not m: continue
226 | exp_num = m.groups()[0]
227 | with open(exp) as fin:
228 | logs = json.load(fin)
229 | exp_ground = logs["biddings"]
230 | result = logs["logs"]["Alex"]
231 | for r in range(0, 10):
232 | try:
233 | prediction = result[f'round{r+1}']["prediction"]
234 | except:
235 | continue
236 | round_ground = {p: exp_ground[p][r] for p in exp_ground if p!="Alex"}
237 | predict_avg = sum(prediction.values())/len(prediction)
238 | ground_avg = sum(round_ground.values())/len(round_ground)
239 | kr_avg_div.setdefault(r, [])
240 | kr_avg_div[r].append(abs(predict_avg-ground_avg))
241 |
242 |
243 | #Export the prediction accuracy chart.
244 |
245 | x = [f"R{i+1}" for i in range(10)]
246 | y1 = [sum(pcot_avg_div[r])/len(pcot_avg_div[r]) for r in range(10)]
247 | y2 = [sum(kr_avg_div[r])/len(kr_avg_div[r]) for r in range(10)]
248 |
249 | # Create the plot
250 | plt.figure(figsize=(4, 3))
251 |
252 | # Plot the first line
253 | plt.plot(x, y1, label=f'PCoT vs {oppo}', linewidth=2)
254 | for i in range(len(x)):
255 | plt.plot(x[i], y1[i], marker='s', color='#1f77b4')
256 |
257 | # Plot the second line
258 | plt.plot(x, y2, label=f'K-R vs {oppo}')
259 | for i in range(len(x)):
260 | plt.plot(x[i], y2[i], marker='s', color='#ff7f0e')
261 |
262 | plt.xticks(fontsize=12)
263 | plt.yticks(fontsize=14)
264 | plt.ylim(top=20)
265 |
266 | # Show the legend
267 | plt.legend(fontsize=12)
268 |
269 | # Show the plot
270 | plt.savefig(f'{self.output_dir}/PA_{oppo}.pdf', format='pdf', bbox_inches='tight')
271 |
272 | print("="*20+f" Prediction Accuracy Metric has been exported to \"{self.output_dir}\" "+"="*20)
273 |
274 |
275 | def main(args):
276 |
277 | evaluator = G08AEvaluator(args.players, args.opponents, args.exp_rnd, args.exp_num, args.result_dir, args.output_dir)
278 |
279 | evaluator.win_rate()
280 | evaluator.adaption_index()
281 |
282 | # the calculation of Prediction Accuracy is used only for pcot and kr.
283 | # evaluator.prediction_accuracy()
284 |
285 |
286 | if __name__=="__main__":
287 | import argparse
288 | parser = argparse.ArgumentParser()
289 |
290 | parser.add_argument("--players", type=str, default="kr")
291 | parser.add_argument("--opponents", type=str, default="agent")
292 | parser.add_argument("--result_dir", type=str, default="result")
293 | parser.add_argument("--output_dir", type=str, default="output")
294 | parser.add_argument('--exp_rnd', type=int, default=10)
295 | parser.add_argument('--exp_num', type=int, default=10)
296 |
297 | args = parser.parse_args()
298 | main(args)
--------------------------------------------------------------------------------
/k-reasoning/G08A/game.py:
--------------------------------------------------------------------------------
1 | from copy import deepcopy
2 |
3 | round_number = round
4 |
5 | class G08A():
6 | def __init__(self, players) -> None:
7 | self.all_players = players[::]
8 | self.survival_players = players[::]
9 | self.round_winner = {}
10 |
11 | def daily_bidding(self, players):
12 | Average = 0
13 | for player in players:
14 | player.act()
15 | Average += player.last_bidding
16 |
17 | Average /= len(players)
18 | Target = round_number(Average * 0.8, 2)
19 |
20 | return Average, Target
21 |
22 | def round_deduction(self, players, winner):
23 | """
24 | player who did not win loses 1 HP
25 | """
26 | for player in players:
27 | if player.name not in winner:
28 | player.deduction(1)
29 |
30 | def check_winner(self, players, target):
31 | win_bid = sorted([(abs(player.last_bidding - target), player.last_bidding) for player in players])[0][1]
32 | winners = [player.name for player in players if player.last_bidding==win_bid]
33 |
34 | return winners, win_bid
35 |
36 | def check_tie(self, players):
37 | if len(players)<2: return False
38 | return len(set([player.last_bidding for player in players]))==1
39 |
40 | def run_single_round(self, round_id):
41 | for player in self.survival_players:
42 | player.start_round(round_id)
43 |
44 | Average, Target = self.daily_bidding(self.survival_players)
45 |
46 | Tie_status = self.check_tie(self.survival_players)
47 | if Tie_status: # If all players choose the same number, there is no winner.
48 | WINNER = []
49 | else:
50 | WINNER, WINNER_BID = self.check_winner(self.survival_players, Target)
51 | WINNER_str = ", ".join(WINNER)
52 |
53 | self.round_winner[round_id] = WINNER
54 |
55 | self.round_deduction(self.survival_players, WINNER)
56 |
57 | bidding_numbers = [f"{player.last_bidding}" for player in self.survival_players]
58 | history_biddings = {player.name: deepcopy(player.biddings) for player in self.survival_players}
59 | bidding_details = [f"{player.name} chose {player.last_bidding}" for player in self.survival_players]
60 | diff_details = [
61 | f"{player.name}: |{player.last_bidding} - {Target}| = {round_number(abs(player.last_bidding - Target))}"
62 | for player in self.survival_players
63 | ]
64 | player_details = [player.show_info() for player in self.survival_players]
65 |
66 | bidding_numbers = " + ".join(bidding_numbers)
67 | bidding_details = ", ".join(bidding_details)
68 | diff_details = ", ".join(diff_details)
69 | player_details = ", ".join(player_details)
70 | if Tie_status:
71 | BIDDING_INFO = f"Thank you all for participating in Round {round_id}. In this round, {bidding_details}.\nAll players chose the same number, so all players lose 1 point. After the deduction, player information is: {player_details}."
72 | else:
73 | BIDDING_INFO = f"Thank you all for participating in Round {round_id}. In this round, {bidding_details}.\nThe average is ({bidding_numbers}) / {len(self.survival_players)} = {Average}.\nThe average {Average} multiplied by 0.8 equals {Target}.\n{diff_details}\n{WINNER}'s choice of {WINNER_BID} is closest to {Target}. Round winner: {WINNER_str}. All other players lose 1 point. After the deduction, player information is: {player_details}."
74 |
75 | survival_players = []
76 | dead_players = []
77 | for player in self.survival_players:
78 | win = player.name in WINNER
79 | player.notice_round_result(round_id, BIDDING_INFO, Target, win, bidding_details, history_biddings)
80 |
81 | if player.hp <= 0:
82 | dead_players.append(player)
83 | else:
84 | survival_players.append(player)
85 |
86 | self.survival_players = survival_players
87 |
88 | for out in dead_players:
89 | for other_player in survival_players:
90 | if other_player.is_agent:
91 | other_player.message += [{"role":"system","content":f"{out.name}'s hp is below 0, so {out.name} has been eliminated from the challenge!"}]
92 |
93 | for player in self.survival_players:
94 | player.end_round()
95 |
96 | print("Round ",round_id,": ",bidding_details)
97 |
98 | def run_multi_round(self, max_round):
99 |
100 | for player in self.all_players:
101 | player.ROUND_WINNER=self.round_winner
102 |
103 | for i in range(1, max_round+1):
104 | self.run_single_round(i)
--------------------------------------------------------------------------------
/k-reasoning/G08A/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | from player import *
5 | from game import G08A
6 |
7 | # Fill in your config information to conduct experiments.
8 | openai.api_type = ""
9 | openai.api_base = ""
10 | openai.api_version = ""
11 | openai.api_key = ""
12 |
13 | ENGINE = "gpt4-32k"
14 |
15 | def build_player(strategy, name, persona, mean=50, std=0, player_names = []):
16 | """
17 | Player Factory
18 | """
19 | if strategy=="agent":
20 | return AgentPlayer(name, persona, ENGINE)
21 | elif strategy=="cot":
22 | return CoTAgentPlayer(name, persona, ENGINE)
23 | elif strategy=="persona":
24 | return PersonaAgentPlayer(name, persona, ENGINE)
25 | elif strategy=="reflect":
26 | return ReflectionAgentPlayer(name, persona, ENGINE)
27 | elif strategy=="refine":
28 | return SelfRefinePlayer(name, persona, ENGINE)
29 | elif strategy=="pcot":
30 | return PredictionCoTAgentPlayer(name, persona, ENGINE)
31 | elif strategy=="kr":
32 | return KLevelReasoningPlayer(name, persona, ENGINE, player_names)
33 | elif strategy=="spp":
34 | return SPPAgentPlayer(name, persona, ENGINE)
35 | elif strategy in ["fix", "last", "mono", "monorand"]:
36 | return ProgramPlayer(name, strategy, mean, std)
37 | else:
38 | raise NotImplementedError
39 |
40 |
41 | def main(args):
42 | #Predefined Persona information
43 | PERSONA_A = "You are Alex and involved in a survive challenge. "
44 | PERSONA_B = "You are Bob and involved in a survive challenge. "
45 | PERSONA_C = "You are Cindy and involved in a survive challenge. "
46 | PERSONA_D = "You are David and involved in a survive challenge. "
47 | PERSONA_E = "You are Eric and involved in a survive challenge. "
48 |
49 | for exp_no in range(args.start_exp, args.exp_num):
50 | players=[]
51 | player_names = ["Alex", "Bob", "Cindy", "David", "Eric"]
52 |
53 | # build player
54 | A = build_player(args.player_strategy, "Alex", PERSONA_A, player_names=player_names)
55 | # Modify PlayerA's settings for ablation experiments.
56 | if args.player_engine: A.engine = args.player_engine
57 | if args.player_k: A.k_level = args.player_k
58 | players.append(A)
59 |
60 | # build opponent
61 | for program_name, persona in [("Bob", PERSONA_B), ("Cindy", PERSONA_C), ("David", PERSONA_D), ("Eric", PERSONA_E)]:
62 | players.append(build_player(args.computer_strategy, program_name, persona, args.init_mean, args.norm_std, player_names=player_names))
63 |
64 | # run multi-round game (default 10)
65 | Game = G08A(players)
66 | Game.run_multi_round(args.max_round)
67 |
68 | # export game records
69 | prefix = f"{args.player_strategy}_VS_{args.computer_strategy}_{exp_no}"
70 | if args.computer_strategy in ["fix", "last"]:
71 | prefix = f"{args.player_strategy}_VS_{args.computer_strategy}-{args.init_mean}-{args.norm_std}_{exp_no}"
72 |
73 | output_file = f"{args.output_dir}/{prefix}.json"
74 | os.makedirs(os.path.dirname(output_file), exist_ok=True)
75 |
76 | with open(output_file,"w") as fout:
77 | messages = {}
78 | biddings = {}
79 | logs = {}
80 | for agent in Game.all_players:
81 | if agent.is_agent:
82 | messages[agent.name] = agent.message
83 | biddings[agent.name] = agent.biddings
84 | if agent.logs:
85 | logs[agent.name] = agent.logs
86 |
87 | debug_info = {
88 | "winners": Game.round_winner,
89 | "biddings": biddings,
90 | "message": messages,
91 | "logs":logs
92 | }
93 |
94 | json.dump(debug_info, fout, indent=4)
95 |
96 | if __name__=="__main__":
97 | import argparse
98 | parser = argparse.ArgumentParser()
99 |
100 | parser.add_argument('--player_strategy', type=str, default="cot", choices=["agent","cot","pcot","kr","reflect", "persona", "refine", "spp"])
101 | parser.add_argument('--computer_strategy', type=str,choices=["agent", "fix", "last", "mono", "monorand","cot","pcot","kr","reflect", "persona", "refine", "spp"], default="fix")
102 | parser.add_argument("--output_dir", type=str, default="result")
103 | parser.add_argument("--init_mean", type=int, default=40, help="init mean value for computer player")
104 | parser.add_argument("--norm_std", type=int, default=5, help="standard deviation of the random distribution of computer gamers")
105 | parser.add_argument('--max_round', type=int, default=10)
106 | parser.add_argument('--start_exp', type=int, default=0)
107 | parser.add_argument('--exp_num', type=int, default=10)
108 | parser.add_argument('--player_engine', type=str, default=None, help="player's OpenAI api engine")
109 | parser.add_argument('--player_k', type=int, default=None, help="player's k-level (default 2)")
110 |
111 | args = parser.parse_args()
112 | main(args)
--------------------------------------------------------------------------------
/k-reasoning/G08A/player/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic_player import ProgramPlayer
2 | from .reasoning_player import *
3 | from .k_level_reasoning_player import *
--------------------------------------------------------------------------------
/k-reasoning/G08A/player/basic_player.py:
--------------------------------------------------------------------------------
1 | from random import randint
2 | import numpy as np
3 |
4 | class Player():
5 | def __init__(self, name):
6 | self.name = name
7 | self.hp = 10
8 | self.biddings=[]
9 | self.cur_round = -1
10 |
11 | self.logs = None
12 |
13 | def start_round(self, round: int):
14 | self.cur_round = round
15 |
16 | def act(self):
17 | raise NotImplementedError
18 |
19 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings):
20 | raise NotImplementedError
21 |
22 | def end_round(self):
23 | pass
24 |
25 | def deduction(self, deducted_hp):
26 | self.hp -= deducted_hp
27 |
28 | @property
29 | def last_bidding(self):
30 | return self.biddings[-1]
31 |
32 | def show_info(self, print_ = False):
33 | if print_:
34 | print(f"NAME:{self.name}\tHEALTH POINT:{self.hp}\n")
35 | return f"NAME:{self.name}\tHEALTH POINT:{self.hp}"
36 |
37 |
38 | class ProgramPlayer(Player):
39 | is_agent=False
40 | def __init__(self, name, strategy, mean, std):
41 | self.name = name
42 | self.hp = 10
43 |
44 | self.biddings = []
45 |
46 | self.strategy=strategy
47 | self.mean = mean
48 | self.std = std
49 |
50 | self.logs = None
51 |
52 | if self.strategy=="monorand":
53 | self.std = randint(0, std)
54 | self.strategy="mono"
55 |
56 | def start_round(self, round):
57 | return
58 |
59 | def end_round(self):
60 | if self.strategy=="mono":
61 | #
62 | self.mean -= self.std
63 |
64 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings):
65 | if self.strategy=="last":
66 | self.mean=round_target
67 |
68 | def set_normal(self, mean, std):
69 | self.normal = True
70 | self.mean = mean
71 | self.std = std
72 |
73 | def act(self):
74 | if self.strategy=="mono":
75 | bidding = self.mean
76 | else:
77 | bidding = np.random.normal(self.mean, self.std)
78 | bidding = min(max(int(bidding), 1),100)
79 | self.biddings.append(bidding)
--------------------------------------------------------------------------------
/k-reasoning/G08A/player/k_level_reasoning_player.py:
--------------------------------------------------------------------------------
1 | import openai
2 | import time
3 | from copy import deepcopy
4 |
5 | from .reasoning_player import AgentPlayer
6 |
7 | round_number = round
8 |
9 | class KLevelReasoningPlayer(AgentPlayer):
10 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
11 | "Another game expert's prediction for the next round of other players is as follows: "
12 | "{prediction}"
13 | "According to the game expert's prediction, what number will you give in the next round is most likely to be close to the target number?")
14 |
15 |
16 | PREDICTION_GAME_SETTING = ("You are {name} and involved in a survive challenge. SURVIVE CHALLENGE!!! You are one of five players in the game. "
17 | "Everyone is required to choose an integer between 1 and 100 in each round. "
18 | "The player whose chosen number is closest to (0.8 * the average of all chosen numbers) wins the round without any HP deduction. All other players will have 1 HP deducted. "
19 | "But if all players choose the same number, their health points are deducted together.")
20 | # PREDICTION_INQUIRY = ("Ok, {name}! Now is the ROUND {round}. "
21 | # "Please choose an integer between 1 and 100 for this round.")
22 | PREDICTION_INQUIRY = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
23 | "Please choose an integer between 1 and 100 for this round.")
24 | PREDICTION_RESPONSE = "I choose {bidding}."
25 |
26 | def __init__(self, name, persona, engine, players):
27 | super().__init__(name, persona, engine)
28 | self.bidding_history = {}
29 | self.logs = {}
30 |
31 | self.history_biddings = {}
32 | self.round_result = {}
33 | for player in players:
34 | self.history_biddings[player]=[]
35 |
36 | self.k_level = 2
37 |
38 | def start_round(self, round):
39 | prediction = self.predict(round)
40 | prediction = ", ".join([f"{player} might choose {prediction[player]}" for player in prediction])+". "
41 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, prediction=prediction, hp=self.hp)}]
42 |
43 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings):
44 | super().notice_round_result(round, bidding_info, round_target, win, bidding_details, history_biddings)
45 | self.round_result[round] = bidding_info
46 | self.bidding_history[round] = bidding_details
47 | self.history_biddings = history_biddings # {"Alex": [1,2,3]}
48 |
49 | def predict(self, round):
50 |
51 | def self_act(message):
52 | status = 0
53 | while status != 1:
54 | try:
55 | response = openai.ChatCompletion.create(
56 | engine = self.engine,
57 | messages = message,
58 | temperature=0.7,
59 | max_tokens=800,
60 | top_p=0.95,
61 | frequency_penalty=0,
62 | presence_penalty=0,
63 | stop=None)
64 | response = response['choices'][0]['message']['content']
65 | self.message.append({"role":"assistant","content":response})
66 | status = 1
67 | except Exception as e:
68 | print(e)
69 | time.sleep(15)
70 | return self.parse_result(response)
71 |
72 | def add_warning(hp, win):
73 | if not win:
74 | if hp < 5:
75 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {hp} points of health left. You are in DANGER and one step closer to death. "
76 | if hp <=3 :
77 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {hp} points of health left. You are in extreme DANGER and one step closer to death. "
78 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {hp} points of health left. You are one step closer to death. "
79 | return "You have successfully chosen the number closest to the target number, which is the average of all players' selected numbers multiplied by 0.8. As a result, you have won this round. All other players will now deduct 1 HP. "
80 |
81 | history_biddings = deepcopy(self.history_biddings)
82 | round_result = deepcopy(self.round_result)
83 | round_winner = deepcopy(self.ROUND_WINNER)
84 | self_hp = self.hp
85 | self_message = deepcopy(self.message)
86 | for k in range(self.k_level):
87 | prediction = {}
88 | logs = {}
89 | player_hp = {}
90 | k_round = round+k
91 | for player in history_biddings:
92 | hp=10
93 | if player == self.name: continue
94 |
95 | print(f"Player {self.name} conduct predict {player}")
96 | message = [{
97 | "role": "system",
98 | "content": self.PREDICTION_GAME_SETTING.format(name=player)
99 | }]
100 | for r in range(len(history_biddings[player])):
101 | message.append({
102 | "role": "system",
103 | "content": self.PREDICTION_INQUIRY.format(name=player, round=r+1, hp=hp)
104 | })
105 | message.append({
106 | "role": "assistant",
107 | "content": self.PREDICTION_RESPONSE.format(bidding=history_biddings[player][r])
108 | })
109 | message.append({
110 | "role": "system",
111 | "content": round_result[r+1]
112 | })
113 | message.append({
114 | "role": "system",
115 | "content": add_warning(hp, player in round_winner[r+1])
116 | })
117 | if player not in round_winner[r+1]:
118 | hp-=1
119 |
120 | # Predict the opponent's next move based on their historical information.
121 | if hp>0:
122 | message.append({
123 | "role": "system",
124 | "content": self.PREDICTION_INQUIRY.format(name=player, round=len(history_biddings[player])+1, hp=hp)
125 | })
126 | next_bidding = self.agent_simulate(message, engine=self.engine)
127 | message.append({
128 | "role": "assistant",
129 | "content": next_bidding
130 | })
131 | prediction[player] = self.parse_result(next_bidding)
132 | else:
133 | prediction[player] = history_biddings[player][-1]
134 | logs[player] = message
135 | player_hp[player] = hp
136 |
137 | if k==self.k_level-2: break
138 | # If k-level >= 3, it is necessary to predict future outcomes.
139 |
140 | prediction_str = ", ".join([f"{player} might choose {prediction[player]}" for player in prediction])+". "
141 | self_message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=k_round, prediction=prediction_str, hp=self_hp)}]
142 | bidding = self_act(self_message)
143 | prediction = {**{self.name: bidding}, **prediction}
144 | player_hp[self.name] = self_hp
145 |
146 | Average = 0
147 | for player in prediction:
148 | Average += prediction[player]
149 | Average /= len(prediction)
150 | Target = round_number(Average * 0.8, 2)
151 |
152 | Tie_status = len(prediction)>=2 and len(set([prediction[player] for player in prediction]))==1
153 | if Tie_status:
154 | winners = []
155 | else:
156 | win_bid = sorted([(abs(prediction[player] - Target), prediction[player]) for player in prediction])[0][1]
157 | winners = [player for player in prediction if prediction[player]==win_bid]
158 | winner_str = ", ".join(winners)
159 |
160 | round_winner[k_round] = winners
161 |
162 | for player in prediction:
163 | if player not in winners:
164 | player_hp[player]-=1
165 |
166 | # Use list comprehensions for concise and readable constructions
167 | bidding_numbers = [f"{prediction[player]}" for player in prediction]
168 | for player in history_biddings:
169 | history_biddings[player].append(prediction[player])
170 | bidding_details = [f"{player} chose {prediction[player]}" for player in prediction]
171 | diff_details = [
172 | f"{player}: |{prediction[player]} - {Target}| = {round_number(abs(prediction[player] - Target))}"
173 | for player in prediction
174 | ]
175 | player_details = [f"NAME:{player}\tHEALTH POINT:{player_hp[player]}" for player in prediction]
176 |
177 | bidding_numbers = " + ".join(bidding_numbers)
178 | bidding_details = ", ".join(bidding_details)
179 | diff_details = ", ".join(diff_details)
180 | player_details = ", ".join(player_details)
181 | if Tie_status:
182 | bidding_info = f"Thank you all for participating in Round {k_round}. In this round, {bidding_details}.\nAll players chose the same number, so all players lose 1 point. After the deduction, player information is: {player_details}."
183 | else:
184 | bidding_info = f"Thank you all for participating in Round {k_round}. In this round, {bidding_details}.\nThe average is ({bidding_numbers}) / {len(prediction)} = {Average}.\nThe average {Average} multiplied by 0.8 equals {Target}.\n{diff_details}\n{winners}'s choice of {win_bid} is closest to {Target}. Round winner: {winner_str}. All other players lose 1 point. After the deduction, player information is: {player_details}."
185 | round_result[k_round] = bidding_info
186 |
187 | self.logs[f"round{round}"] = {
188 | "prediction": prediction,
189 | "logs": logs
190 | }
191 | return prediction
192 |
193 | # @staticmethod
194 | def agent_simulate(self, message, engine):
195 | while 1:
196 | try:
197 | response = openai.ChatCompletion.create(
198 | engine=engine,
199 | messages = message,
200 | temperature=0.7,
201 | max_tokens=80,
202 | top_p=0.9,
203 | frequency_penalty=0,
204 | presence_penalty=0,
205 | stop=None)
206 | RESPONSE = response['choices'][0]['message']['content']
207 | return RESPONSE
208 | except Exception as e:
209 | print(e)
210 | time.sleep(15)
211 |
212 |
213 |
--------------------------------------------------------------------------------
/k-reasoning/G08A/player/reasoning_player.py:
--------------------------------------------------------------------------------
1 | import openai
2 | import time
3 |
4 | from .basic_player import Player
5 |
6 | class AgentPlayer(Player):
7 | is_agent=True
8 | GAME_SETTING = ("SURVIVE CHALLENGE!!! You are one of five players in the game. "
9 | "Everyone is required to choose an integer between 1 and 100 in each round. "
10 | "The player whose chosen number is closest to (0.8 * the average of all chosen numbers) wins the round without any HP deduction. All other players will have 1 HP deducted. "
11 | "But if all players choose the same number, their health points are deducted together.")
12 |
13 | GAME_EXAMPLE = ("For example:\nPlayer Alex chooses 80, Player Bob chooses 70, Player Cindy chooses 60, Player David chooses 50, and Player Elva chooses 40.\n"
14 | "The average is (80 + 70 + 60 + 50 + 40) / 5 = 60.\n0.8 * The average is 60 * 0.8 = 48.\n"
15 | "Alex: |80 - 48| = 32\nBob: |70 - 48| = 22\nCindy:|60 - 48| = 12\nDavid:|50 - 48| = 2\nElva:|40 - 48| = 8\n"
16 | "So, player David's choice of 50 is closest to the target number 48, so David wins the round. "
17 | "All other players lose 1 HP.\nEvery player starts with an initial HP of 10 points. "
18 | "Once a player's HP reaches 0, he or she will be killed immediately and lose everything they have. "
19 | "Remember only the last remaining player wins the game!! "
20 | "Remember the target number is 0.8 * average or not the average!! "
21 | "Strive to make choices that maximize your chance of survival!!")
22 |
23 | INQUIRY = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
24 | "Please choose an integer between 1 and 100 for this round.")
25 |
26 | def __init__(self, name, persona, engine):
27 | self.name = name
28 | self.engine = engine
29 | self.hp = 10
30 |
31 | self.biddings = []
32 | self.persona = persona
33 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}]
34 |
35 | self.logs = None
36 |
37 | def act(self):
38 | print(f"Player {self.name} conduct bidding")
39 | status = 0
40 | while status != 1:
41 | try:
42 | response = openai.ChatCompletion.create(
43 | engine = self.engine,
44 | messages = self.message,
45 | temperature=0.7,
46 | max_tokens=800,
47 | top_p=0.95,
48 | frequency_penalty=0,
49 | presence_penalty=0,
50 | stop=None)
51 | response = response['choices'][0]['message']['content']
52 | self.message.append({"role":"assistant","content":response})
53 | status = 1
54 | except Exception as e:
55 | print(e)
56 | time.sleep(15)
57 | self.biddings.append(self.parse_result(response))
58 |
59 | def parse_result(self, message):
60 | status = 0
61 | times = 0
62 | while status != 1:
63 | try:
64 | response = openai.ChatCompletion.create(
65 | engine=self.engine,
66 | messages = [{"role":"system", "content":"By reading the conversation, extract the number chosen by player. Output format: number"}, {"role": "user", "content": message}],
67 | temperature=0.7,
68 | max_tokens=800,
69 | top_p=0.95,
70 | frequency_penalty=0,
71 | presence_penalty=0,
72 | stop=None)
73 | response = response['choices'][0]['message']['content']
74 | assert response.isnumeric(), "Not A Number: "+ message
75 | bidding_info = int(float(response))
76 | status = 1
77 | return bidding_info
78 | except AssertionError as e:
79 | print("Result Parsing Error: ",e)
80 | times+=1
81 | if times>=3:
82 | exit()
83 | except Exception as e:
84 | print(e)
85 | time.sleep(15)
86 |
87 | return None
88 |
89 | def start_round(self, round):
90 | self.message += [{"role":"system","content":self.INQUIRY.format(name=self.name, round=round, hp=self.hp)}]
91 |
92 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings):
93 | self.message_update_result(bidding_info)
94 | self.message_update_warning(win)
95 |
96 | def message_update_result(self, bidding_info):
97 | self.message += [{"role":"system","content":bidding_info}]
98 |
99 | def message_update_warning(self, win):
100 | def add_warning():
101 | if not win:
102 | if self.hp < 5:
103 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {self.hp} points of health left. You are in DANGER and one step closer to death. "
104 | if self.hp <=3 :
105 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {self.hp} points of health left. You are in extreme DANGER and one step closer to death. "
106 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {self.hp} points of health left. You are one step closer to death. "
107 | return "You have successfully chosen the number closest to the target number, which is the average of all players' selected numbers multiplied by 0.8. As a result, you have won this round. All other players will now deduct 1 HP. "
108 |
109 | self.message += [{"role":"system","content": add_warning()}]
110 |
111 | def conduct_inquiry(self, inquiry):
112 | while 1:
113 | try:
114 | response = openai.ChatCompletion.create(
115 | engine=self.engine,
116 | messages = self.message + [{"role":"system","content":inquiry}],
117 | temperature=0.7,
118 | max_tokens=800,
119 | top_p=0.9,
120 | frequency_penalty=0,
121 | presence_penalty=0,
122 | stop=None)
123 |
124 | RESPONSE = response['choices'][0]['message']['content']
125 | return RESPONSE
126 | except Exception as e:
127 | print(e)
128 | time.sleep(15)
129 |
130 |
131 | class CoTAgentPlayer(AgentPlayer):
132 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
133 | "Guess which number will win in the next round. Let's think step by step, and finally answer a number you think you can win.")
134 |
135 | def start_round(self, round):
136 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, hp=self.hp)}]
137 |
138 |
139 | class PersonaAgentPlayer(AgentPlayer):
140 | INQUIRY_PERSONA = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
141 | "Please choose an integer between 1 and 100 for this round."
142 | "Don't forget your expert status, use your expertise to win this round!")
143 |
144 |
145 | MATH_EXPERT_PERSONA = ("You are {name} and involved in a survive challenge."
146 | " You are a game expert, good at predicting other people's behavior and deducing calculations, and using the most favorable strategy to win the game.")
147 |
148 | def __init__(self, name, persona, engine):
149 | super().__init__(name, persona, engine)
150 | self.persona = self.MATH_EXPERT_PERSONA.format(name=name)
151 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}]
152 |
153 | def start_round(self, round):
154 | self.message += [{"role":"system","content":self.INQUIRY_PERSONA.format(name=self.name, round=round, hp=self.hp)}]
155 |
156 |
157 | class ReflectionAgentPlayer(AgentPlayer):
158 | REFLECT_INQUIRY = "Review the previous round games, summarize the experience."
159 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings):
160 | super().notice_round_result(round, bidding_info, round_target, win, bidding_details, history_biddings)
161 | # refelxtion after round end
162 | self.reflect()
163 |
164 | def reflect(self):
165 | print(f"Player {self.name} conduct reflect")
166 | self.message += [{"role":"system","content": self.REFLECT_INQUIRY}, {"role":"assistant","content":self.conduct_inquiry(self.REFLECT_INQUIRY)}]
167 |
168 |
169 | class SelfRefinePlayer(AgentPlayer):
170 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
171 | "Guess which number will win in the next round. Let's think step by step, and finally answer a number you think you can win.")
172 |
173 | FEEDBACK_PROMPT = ("Carefully study the user's strategy in this round of the game. As a game expert, can you give a suggestion to optimize the user's strategy so that he can improve his winning rate in this round?")
174 | REFINE_PROMPT = ("I have a game expert's advice on your strategy in this round."
175 | "You can adjust your strategy just now according to his suggestion. Here are his suggestions:"
176 | "{feedback}")
177 |
178 |
179 | def __init__(self, name, persona, engine, refine_times = 2):
180 | super().__init__(name, persona, engine)
181 |
182 | self.refine_times = refine_times
183 |
184 | def start_round(self, round):
185 | self.cur_round = round
186 |
187 | def act(self):
188 | print(f"Player {self.name} conduct bidding")
189 | def completion(message):
190 | status = 0
191 | while status != 1:
192 | try:
193 | response = openai.ChatCompletion.create(
194 | engine = self.engine,
195 | messages = message,
196 | temperature=0.7,
197 | max_tokens=800,
198 | top_p=0.95,
199 | frequency_penalty=0,
200 | presence_penalty=0,
201 | stop=None)
202 | response = response['choices'][0]['message']['content']
203 | status = 1
204 | except Exception as e:
205 | print(e)
206 | time.sleep(15)
207 | return response
208 |
209 | for t in range(self.refine_times):
210 | # refine_times==action_times
211 | if t==0:
212 | self.message.append({"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=self.cur_round, hp=self.hp)})
213 | else:
214 | refine_message = []
215 | for m in self.message:
216 | if m["role"]=="system":
217 | refine_message.append(m)
218 | else:
219 | refine_message.append({
220 | "role": "user",
221 | "content": m["content"]
222 | })
223 | refine_message.append({
224 | "role": "system",
225 | "content": self.FEEDBACK_PROMPT
226 | })
227 | feedback = completion(refine_message)
228 | self.message.append({"role":"system","content": self.REFINE_PROMPT.format(feedback=feedback)})
229 | self.message.append({"role":"assistant","content": completion(self.message)})
230 |
231 | self.biddings.append(self.parse_result(self.message[-1]["content"]))
232 |
233 |
234 | class PredictionCoTAgentPlayer(AgentPlayer):
235 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
236 | "Please choose an integer between 1 and 100 for this round.\n"
237 | "First of all, predict the next round of choices based on the choices of other players in the previous round. "
238 | "{round_history}"
239 | "Your output should be of the following format:\n"
240 | "Predict:\nThe choice of each player in the next round here.\n"
241 | "Based on the prediction of other players, the average number in the next round here, and the target number in the next round (0.8 * the average of all chosen numbers) here.\n"
242 | "Answer:\nthe number will you choose to win the next round game here.")
243 |
244 | def __init__(self, name, persona, engine):
245 | super().__init__(name, persona, engine)
246 |
247 | self.bidding_history = {}
248 |
249 | def start_round(self, round):
250 | # PCoT requires the opponent's historical information to make predictions.
251 | round_history = []
252 | for r in sorted(self.bidding_history.keys()):
253 | round_history.append(f"Round {r}: {self.bidding_history[r]}")
254 | if round_history:
255 | round_history = ".\n".join(round_history)
256 | round_history = "The players' choices in the previous rounds are as follows:\n"+round_history+"."
257 | else:
258 | round_history = ""
259 |
260 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round,round_history=round_history, hp=self.hp)}]
261 |
262 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings):
263 | super().notice_round_result(round, bidding_info, round_target, win, bidding_details, history_biddings)
264 | self.bidding_history[round] = bidding_details
265 |
266 |
267 | class SPPAgentPlayer(AgentPlayer):
268 | # Default example of SPP
269 | SPP_EXAMPLE = """When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary.
270 | Here are some examples:
271 | ---
272 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once.
273 | Input: 6 12 1 1
274 |
275 | Participants: {name} (you); Math Expert
276 |
277 | Start collaboration!
278 |
279 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12.
280 | {name} (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24
281 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes.
282 | {name} (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24.
283 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input?
284 | {name} (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24.
285 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1".
286 | {name} (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24
287 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good!
288 |
289 | Finish collaboration!
290 |
291 | Final answer: 6 * (1 + 1) + 12 = 24
292 | """
293 |
294 | INQUIRY_SPP = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. "
295 | "Please choose an integer between 1 and 100 for this round. "
296 | "Now, identify the participants and collaboratively choose the number step by step. Remember to provide the final solution with the following format \"Final answer: The chosen number here.\".")
297 |
298 |
299 | PERSONA = "You are {name} and involved in a survive challenge."
300 |
301 | def __init__(self, name, persona, engine):
302 | super().__init__(name, persona, engine)
303 | self.persona = self.PERSONA.format(name=name)
304 | self.message = [{"role":"system","content": self.SPP_EXAMPLE.format(name=self.name)},
305 | {"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}]
306 |
307 | def start_round(self, round):
308 | self.message += [{"role":"system","content":self.INQUIRY_SPP.format(name=self.name, round=round, hp=self.hp)}]
--------------------------------------------------------------------------------
/k-reasoning/G08A/run.sh:
--------------------------------------------------------------------------------
1 | python main.py --player_strategy kr --computer_strategy agent --exp_num 1
2 |
3 | python evaluate.py --players kr --opponents agent
--------------------------------------------------------------------------------
/k-reasoning/README.md:
--------------------------------------------------------------------------------
1 | # K-Level Reasoning with Large Language Models
2 |
3 | **K-Level Reasoning** is a a novel reasoning approach for LLMs, which adopts the perspective of rivals to recursively employ k-level thinking
4 | based on available historical information. It significantly improves the strategic decision-making capability for LLM in dynamic, interactive, and competitive scenarios.
5 |
6 | See our paper: [K-Level Reasoning with Large Language Models](https://browse.arxiv.org/abs/2402.01521)
7 |
8 | ## Dynamic Strategic Reasoning with LLMs
9 | While Large Language Models (LLMs) have demonstrated their proficiency in complex reasoning tasks, their performance in dynamic, interactive, and competitive scenarios - such as business strategy and stock market analysis - remains underexplored. To bridge this gap, we formally explore the dynamic reasoning capabilities of LLMs for decision-making in rapidly evolving environments.
10 |
11 | We introduce two game theory based pilot challenges that mirror the complexities of real-world dynamic decision-making. These challenges are well-defined, enabling clear, controllable, and precise evaluation of LLMs’ dynamic reasoning abilities.
12 |
13 |
14 |
15 | **Guessing 0.8 of the Average**. It involves ten-round games, where each player selects a number between 1 to 100. The winner of each round is the player whose chosen number is closest to 80% of the average number picked by all players. This challenge mirrors the dynamics of market predictions, where players must anticipate collective market behavior.
16 |
17 | **Survival Auction Game**. Players bid in daily auctions to secure scarce living resources, balancing their health and finances to survive a 10-day period, simulating scenarios of resource scarcity and financial decision-making.
18 |
19 | ## K-Level Reasoning
20 | K-level thinking is a recursive reasoning process. In first-level thinking, individuals react directly to the environment, akin to static reasoning. In second-level thinking, individuals take into account the first-level thinking of others, and so on and so forth.
21 |
22 |
23 | We draw on this idea to improve the reasoning capabilities of LLMs in dynamic challenges, and propose the “K-Level Reasoning”. This method involves recursively simulating the opponent’s behavior using available historical information, followed by predicting the optimal behavior based on the opponents’ behavior modeling.
24 |
25 |
26 |
27 |
28 | ## Reasoning Example
29 |
30 |
31 | Illustration of different methods in the Guessing 0.8 of the Average game.
32 |
33 | **Left**: In the Chain of Thought, the LLM outputs reasoning logic in a step-by-step format. However, the LLM demonstrates poor understanding of situations and prediction of opponents’ actions.
34 |
35 | **Middle**: In the Prediction Chain-of-Thought, with an explicit requirement to make predictions about rivals’ next moves, the LLM clearly divides the reasoning into prediction and reasoning phases. However, the predictions are still somewhat biased.
36 |
37 | **Right**: In the K-Level Reasoning, the LLM recursively predict rivals’ next moves with public historical information. Thank to a deeper strategic depth than rivals, the prediction and decision are more accurate.
38 |
39 | ## Contributions
40 |
41 | - We study the dynamic reasoning capabilities of LLMs from a game theory perspective and introduce two pilot tasks. Both tasks mirror the complexities of real-world dynamic decision-making and are also well-defined for evaluating LLMs’ dynamic reasoning abilities.
42 | - We propose a novel reasoning approach with LLMs - the “K-Level Reasoning” method. It integrates cognitive hierarchy theory into reasoning process, empowering LLMs to recursively predict and respond to the thoughts and actions of rivals in competitive and dynamic scenarios.
43 |
44 | ## Codes
45 |
46 | ### Guessing 0.8 of the Average
47 |
48 | Play the game and record the results (using K-Level-Reasoning (kr) as the player and Direct (agnet) as the opponent as an example). The game log will be saved in the result folder by default.
49 | ```
50 | cd G08A
51 | python main.py --player_strategy kr --computer_strategy agent --exp_num 1
52 | ```
53 | Then, perform the calculation of player metrics, which will output the data for the player's `WinRate` and `AdaptionIndex`.
54 |
55 | ```
56 | python evaluate.py --players kr --opponents agent
57 | ```
58 |
59 | ### SurvivalAuctionGame
60 |
61 | Play the game and calculate metrics.
62 | ```
63 | cd SAG
64 |
65 | python main.py --player_strategy kr --computer_strategy agent --exp_num 1
66 |
67 | python evaluate.py --players kr --opponents agent
68 | ```
69 |
70 | ## Citation
71 |
72 | ```
73 | @misc{kreasoning,
74 | title={K-Level Reasoning with Large Language Models},
75 | author={Yadong Zhang and Shaoguang Mao and Tao Ge and Xun Wang and Yan Xia and Man Lan and Furu Wei},
76 | year={2024},
77 | eprint={2402.01521},
78 | archivePrefix={arXiv},
79 | primaryClass={cs.CL}
80 | }
81 | ```
82 |
--------------------------------------------------------------------------------
/k-reasoning/SAG/evaluate.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | import os
4 | from glob import glob
5 |
6 | import matplotlib.pyplot as plt
7 |
8 | import numpy as np
9 |
10 | class SAGEvaluator(object):
11 | def __init__(self, players, opponents, result_dir, output_dir) -> None:
12 | self.players = players.split(",")
13 | self.opponents = opponents.split(",")
14 | self.result_dir = result_dir
15 | self.output_dir = output_dir
16 |
17 | if not os.path.exists(self.output_dir):
18 | os.makedirs(self.output_dir, exist_ok=True)
19 |
20 | def survival_rate(self, status, soft=True):
21 | rounds = [str(r) for r in range(1, len(status)+1)]
22 | players = {}
23 | for r in rounds:
24 | for player in status[r]:
25 | players[player] = r
26 | for player in players:
27 | if soft:
28 | players[player] = int(players[player])/len(rounds)
29 | else:
30 | players[player] = 1 if int(players[player])==len(rounds) else 0
31 | return players
32 |
33 | def average_survival_round(self, ):
34 | print("="*40+" Average Survival Round "+"="*40)
35 |
36 | players, opponents = self.players, self.opponents
37 | def interpolate_color(colorA, colorB, colorC, alpha, beta=0.5):
38 | if alpha>beta:
39 | alpha = (alpha-beta)/(1-beta)
40 | return tuple(np.array(colorB)*(1-alpha) + np.array(colorA)*(alpha))
41 | else:
42 | low = 0.4
43 | alpha = (alpha-low)/(beta-low)
44 | return tuple(np.array(colorC)*(1-alpha) + np.array(colorB)*(alpha))
45 |
46 | asr_result = {}
47 |
48 | for agent in players:
49 | asr_result.setdefault(agent, {})
50 | for computer in opponents:
51 | exp = f"{self.result_dir}/{agent}_VS_{computer}*.json"
52 | cots = glob(exp)
53 |
54 | wins = {}
55 | for result in cots:
56 | with open(result) as fin:
57 | result = json.load(fin)["status"]
58 | sr = self.survival_rate(result, soft=True)
59 | for player in sr:
60 | wins.setdefault(player, [])
61 | wins[player].append(sr[player])
62 |
63 | win_rate = sum(wins["Alex"])/len(wins["Alex"])
64 | asr_result[agent][computer] = win_rate
65 |
66 | average = {}
67 | for i, agent in enumerate(asr_result):
68 | average[agent] = list(asr_result[agent].values())
69 | average[agent] = sum(average[agent])/len(average[agent])
70 |
71 | print(f"{'':7s}\t"+"\t".join([f"{agent:7s}" for agent in players]))
72 | for computer in opponents:
73 | print(f"{computer:7s}",end="\t")
74 | print("\t".join([f"{asr_result[agent][computer]*10:<7.2f}" if asr_result[agent][computer]>=0 else f"{'':7s}" for agent in asr_result]))
75 |
76 | print(f"{'Average':7s}",end="\t")
77 | print("\t".join([f"{average[agent]*10:<7.2f}" if average[agent]>=0 else f"{'':7s}" for agent in average]))
78 |
79 | print()
80 |
81 | def adaption_index(self):
82 | print("="*40+" Adaption Index "+"="*40)
83 |
84 | players, opponents = self.players, self.opponents
85 | adaption_result = {}
86 |
87 | def mean(a):
88 | if not a:
89 | return -1
90 | return sum(a)/len(a)
91 |
92 | for oppo in opponents:
93 | exp_result = {}
94 | for agent in players:
95 | exps = glob(f"{self.result_dir}/{agent}_VS_{oppo}*.json")
96 | for exp in exps:
97 | with open(exp) as fin:
98 | logs = json.load(fin)
99 | exp_ground = logs["biddings"]
100 | target_div = {"first":[],"second":[]}
101 | for r in range(0, 10):
102 | if r>=len(exp_ground["Alex"]):
103 | break
104 | second_bid=0
105 | player_bid=exp_ground["Alex"][r]
106 | for p in exp_ground:
107 | if r>=len(exp_ground[p]): continue
108 | if p!="Alex" and exp_ground[p][r]>second_bid:
109 | second_bid=exp_ground[p][r]
110 | if r>=5:
111 | target_div["second"].append(abs(player_bid-second_bid))
112 | else:
113 | target_div["first"].append(abs(player_bid-second_bid))
114 | exp_result.setdefault(agent, [])
115 | if not target_div["second"] or not target_div["first"]:
116 | continue
117 | exp_result[agent].append(mean(target_div["second"])/mean(target_div["first"]))
118 | adaption_result[oppo]=exp_result
119 |
120 |
121 | agent_sum = {}
122 | for agent in players:
123 | agent_sum[agent]=[]
124 | for oppo in opponents:
125 | agent_oppo_learning = adaption_result[oppo].get(agent,[])
126 | if agent_oppo_learning:
127 | agent_sum[agent].append(sum(agent_oppo_learning)/len(agent_oppo_learning))
128 |
129 | print(f"{'':8s}\t"+"\t".join([f"{agent:8s}" for agent in players]))
130 |
131 | for oppo in opponents:
132 | exp_result = adaption_result[oppo]
133 |
134 |
135 | maxrate = list(set([sum(exp_result[agent])/len(exp_result[agent]) if exp_result.get(agent) else 10 for agent in players]))
136 | maxrate.sort()
137 | print(f"{oppo:8s}", end='\t')
138 | print('\t'.join([f"{sum(exp_result[agent])/len(exp_result[agent]):<8.2f}" if exp_result.get(agent) else f"{'':8s}" for agent in players]))
139 |
140 | print(f"{'Average':8s}", end='\t')
141 | print('\t'.join([f"{sum(agent_sum[agent])/len(agent_sum[agent]):<8.2f}" if len(agent_sum.get(agent, []))==len(opponents) else f"{'':8s}" for agent in players]))
142 |
143 | print()
144 |
145 | def prediction_accuracy(self, print_value=False):
146 | opponents = self.opponents
147 | if print_value:
148 | print(f"{'':7s}\t"+"\t".join([f"{r:<4}" for r in range(10)]))
149 |
150 | kr_max_div_dict={}
151 |
152 | for oppo in opponents:
153 | kr_avg_div = {}
154 | kr_exps = glob(f"{self.result_dir}/kr_VS_*.json")
155 | for exp in kr_exps:
156 | m = re.match(f"{self.result_dir}/kr_VS_{oppo}_(\d).json", exp)
157 | if not m: continue
158 | exp_num = m.groups()[0]
159 | with open(exp) as fin:
160 | logs = json.load(fin)
161 | exp_ground = logs["biddings"]
162 | result = logs["logs"]["Alex"]
163 | # print(exp_ground)
164 | for r in range(0, len(exp_ground["Alex"])):
165 | try:
166 | prediction = result[f'round{r+1}']["prediction"]
167 | except:
168 | continue
169 | if not prediction: continue
170 | round_ground = {p: exp_ground[p][r] for p in exp_ground if p!="Alex" and len(exp_ground[p])>r}
171 | # print(r, prediction, round_ground)
172 | predict_avg = max(prediction.values())
173 | ground_avg = max(round_ground.values())
174 | kr_avg_div.setdefault(r, [])
175 | kr_avg_div[r].append(abs(predict_avg-ground_avg))
176 |
177 | if print_value:
178 | print(f"{oppo:7s}",end="\t")
179 | print("\t".join([f"{sum(kr_avg_div.get(r, [0]))/len(kr_avg_div.get(r, [0])):<7.2f}" if kr_avg_div.get(r) else f"{'-':7s}" for r in range(10)]))
180 | kr_max_div_dict[oppo] = kr_avg_div
181 |
182 | if print_value:
183 | print(f"{'':7s}\t"+"\t".join([f"{r:<4}" for r in range(10)]))
184 | pcot_max_div_dict = {}
185 |
186 |
187 |
188 | """
189 | Parse the prediction result of PCoT from the response.
190 | """
191 | for oppo in opponents:
192 | kr_avg_div = {}
193 | kr_exps = glob(f"{self.result_dir}/pcot_VS_*.json")
194 | for exp in kr_exps:
195 | m = re.match(f"{self.result_dir}/pcot_VS_{oppo}_(\d).json", exp)
196 | if not m: continue
197 | exp_num = m.groups()[0]
198 | with open(exp) as fin:
199 | logs = json.load(fin)
200 | exp_ground = logs["biddings"]
201 | result = logs["message"]["Alex"]
202 | # print(exp_ground)
203 | for i in range(len(result)):
204 | content = result[i]["content"]
205 | if not content.startswith("Hello, Alex! Today is the Day"): continue
206 | # print(result[i]["content"])
207 | # print("======")
208 | r = int(content[:content.index("of")].strip().split()[-1])
209 | output = result[i+1]["content"]
210 | if r>1:
211 | oppo_nums = len(logs["status"][str(r-1)])
212 | for p in logs["status"][str(r-1)]:
213 | status = logs["status"][str(r-1)][p]
214 | if p=="Alex":
215 | oppo_nums-=1
216 | else:
217 | if "POINT:-" in status or "POINT:0" in status:
218 | oppo_nums-=1
219 | if oppo_nums==0:
220 | continue
221 |
222 | if output.startswith("Predict:"):
223 | prediction = output.split("\n\n")[0]
224 | prediction = prediction.split("\n")[1:]
225 | ops = {}
226 | try:
227 | for p in prediction:
228 | split="$" if "$" in p else ": "
229 | if "Bob" in p:
230 | ops["Bob"]=int(p.split(split)[-1])
231 | elif "Cindy" in p:
232 | ops["Cindy"]=int(p.split(split)[-1])
233 | elif "David" in p:
234 | ops["David"]=int(p.split(split)[-1])
235 | elif "Eric" in p:
236 | ops["Eric"]=int(p.split(split)[-1])
237 | for p in prediction:
238 | split="$" if "$" in p else ": "
239 | if "Player 1" in p or "Player1" in p:
240 | ops["Bob"]=int(p.split(split)[-1])
241 | elif "Player 2" in p or "Player2" in p:
242 | ops["Cindy"]=int(p.split(split)[-1])
243 | elif "Player 3" in p or "Player3" in p:
244 | ops["David"]=int(p.split(split)[-1])
245 | elif "Player 4" in p or "Player4" in p:
246 | ops["Eric"]=int(p.split(split)[-1])
247 | except BaseException as e:
248 | # print("!!!!!!!!!")
249 | continue
250 | else:
251 | # print(output)
252 | pass
253 | prediction = ops
254 | round_ground = {p: exp_ground[p][r-1] for p in exp_ground if p!="Alex" and len(exp_ground[p])>=r}
255 | # print(round_ground)
256 | # print(r, prediction, round_ground)
257 | predict_avg = max(prediction.values())
258 | ground_avg = max(round_ground.values())
259 | kr_avg_div.setdefault(r-1, [])
260 | kr_avg_div[r-1].append(abs(predict_avg-ground_avg))
261 |
262 |
263 | if print_value:
264 | print(f"{oppo:7s}",end="\t")
265 | print("\t".join([f"{sum(kr_avg_div.get(r, [0]))/len(kr_avg_div.get(r, [0])):<7.2f}" if kr_avg_div.get(r) else f"{'-':7s}" for r in range(10)]))
266 | pcot_max_div_dict[oppo] = kr_avg_div
267 |
268 |
269 | #Export the prediction accuracy chart.
270 |
271 | for oppo in opponents:
272 | pcot_avg_div = pcot_max_div_dict[oppo]
273 | kr_avg_div = kr_max_div_dict[oppo]
274 | # Sample data
275 | x1 = [f"R{i+1}" for i in sorted(pcot_avg_div.keys())]
276 | y1 = [sum(pcot_avg_div[r])/len(pcot_avg_div[r]) for r in sorted(pcot_avg_div.keys())]
277 |
278 | x2 = [f"R{i+1}" for i in sorted(kr_avg_div.keys())]
279 | y2 = [sum(kr_avg_div[r])/len(kr_avg_div[r]) for r in sorted(kr_avg_div.keys())]
280 |
281 | # Create the plot
282 | plt.figure(figsize=(4, 3))
283 |
284 | # Plot the first line
285 | plt.plot(x1, y1, label=f'PCoT vs {oppo}', linewidth=2, color='#1f77b4')
286 | for i in range(len(x1)):
287 | plt.plot(x1[i], y1[i], marker='s', color='#1f77b4')
288 |
289 | # Plot the second line
290 | plt.plot(x2, y2, label=f'K-R vs {oppo}', color='#ff7f0e')
291 | for i in range(len(x2)):
292 | plt.plot(x2[i], y2[i], marker='s', color='#ff7f0e')
293 |
294 | plt.xticks(fontsize=12)
295 | plt.yticks(fontsize=14)
296 |
297 | # Show the legend
298 | plt.legend(fontsize=12)
299 | plt.ylim(top=150)
300 |
301 | # Show the plot
302 |
303 | plt.savefig(f'{self.output_dir}/PA_{oppo}.pdf', format='pdf', bbox_inches='tight')
304 |
305 | print("="*20+f" Prediction Accuracy Metric has been exported to \"{self.output_dir}\" "+"="*20)
306 |
307 |
308 | def main(args):
309 | evaluator = SAGEvaluator(args.players, args.opponents, args.result_dir, args.output_dir)
310 | evaluator.average_survival_round()
311 | evaluator.adaption_index()
312 |
313 | # the calculation of Prediction Accuracy is used only for pcot and kr.
314 | # evaluator.prediction_accuracy()
315 |
316 |
317 | if __name__=="__main__":
318 | import argparse
319 | parser = argparse.ArgumentParser()
320 |
321 | parser.add_argument("--players", type=str, default="kr")
322 | parser.add_argument("--opponents", type=str, default="agent")
323 | parser.add_argument("--result_dir", type=str, default="result")
324 | parser.add_argument("--output_dir", type=str, default="output")
325 | parser.add_argument('--exp_rnd', type=int, default=10)
326 | parser.add_argument('--exp_num', type=int, default=10)
327 |
328 | args = parser.parse_args()
329 | main(args)
--------------------------------------------------------------------------------
/k-reasoning/SAG/game.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | class SurvivalAuctionGame():
4 | # Prompts
5 | ROUND_NOTICE = "Thank you all for participating in Round {}. In this round, {}.\nTotal water resource supply is {}. According to the principle of the highest bidder and the rule when the game is tied, {} won this auction and obtain water resource. After allocation, all survival residents' information is as follows: \n {}"
6 |
7 | def __init__(self, players) -> None:
8 | self.players = players[::]
9 | self.survival_players = players[::]
10 | self.round_winners = {}
11 | self.round_status = {}
12 |
13 | def _get_salary(self):
14 | for player in self.survival_players:
15 | player.get_salary()
16 |
17 | def _round_settlement(self, winners):
18 | for player in self.survival_players:
19 | if player.name in winners:
20 | player.success_bid()
21 | else:
22 | player.unsuccess_bid()
23 |
24 | def _check_winner(self, supply):
25 | """
26 | get the winners of the current round
27 | """
28 | winners = []
29 | largest_bidding = max([player.last_bidding for player in self.survival_players])
30 | winners = [player.name for player in self.survival_players
31 | if (player.last_bidding == largest_bidding) and (player.last_bidding <= player.balance)]
32 | if len(winners)>1:
33 | winners = []
34 | return winners
35 |
36 |
37 | def run_single_round(self, round_id, supply):
38 | """
39 | Execute a single round of game
40 |
41 | Args:
42 | round_id (int): number of the current round, beginning from 1.
43 | supply (int): supply of the current round
44 | """
45 | print(f"Round {round_id} begins.")
46 |
47 | # 1. get salary
48 | self._get_salary()
49 | print("All players get their salaries.")
50 |
51 | # 2. bid
52 | history_biddings = {player.name: player.biddings[::] for player in self.survival_players}
53 | player_status = {player.name: player.get_status() for player in self.survival_players}
54 |
55 | for player in self.survival_players:
56 | player.update_public_info(round_id, history_biddings, player_status)
57 | player.start_round(round_id, supply)
58 |
59 | for player in self.survival_players:
60 | player.act()
61 |
62 | # 3. check winners
63 | winners = self._check_winner(supply)
64 | self.round_winners[round_id] = winners
65 | print("Winner(s):\n")
66 | print(winners)
67 |
68 | # 4. settlement
69 | self._round_settlement(winners)
70 |
71 | # 5. get bidding results (str)
72 | bidding_details = []
73 | for player in self.survival_players:
74 | bidding_details += [f"{player.name} bid {player.last_bidding}"]
75 | bidding_details = ", ".join(bidding_details)
76 |
77 | if len(winners):
78 | winners_str = []
79 | for winner in winners:
80 | winners_str += [winner]
81 | winners_str = ", ".join(winners_str)
82 | else:
83 | winners_str = "no one"
84 |
85 | player_status_str = []
86 | players_status = {}
87 | for player in self.survival_players:
88 | player_status_str += [player.get_status()]
89 | players_status[player.name] = player.get_status()
90 | player_status_str = "\n".join(player_status_str)
91 |
92 | round_results = self.ROUND_NOTICE.format(round_id, bidding_details, supply, winners_str, player_status_str)
93 | print("Round result:\n" + round_results)
94 |
95 |
96 | # 6. update round results to every player
97 | for player in self.survival_players:
98 | player.notice_round_result(round_id, round_results, player.name in winners, bidding_details)
99 |
100 | # 7. check the survival situation
101 | survival_players = []
102 | self.round_status[round_id] = {}
103 | for player in self.survival_players:
104 | self.round_status[round_id][player.name] = player.get_status()
105 | if player.hp <= 0:
106 | for other_player in self.survival_players:
107 | other_player.notice_elimination( f"{player.name}'s hp is below 0, so {player.name} has been eliminated from the challenge!")
108 | else:
109 | survival_players.append(player)
110 | self.survival_players = survival_players
111 |
112 | def _save_history(self, path):
113 | history = []
114 | for player in self.players:
115 | history.append({player.name: player.message})
116 | with open(path, 'w') as f:
117 | json.dump(history, f)
118 |
119 | def run_multi_round(self, n_round, supply_list):
120 | assert isinstance(supply_list, list)
121 | assert n_round == len(supply_list)
122 |
123 | for i in range(1, n_round+1):
124 | self.run_single_round(i, supply_list[i-1])
125 | if len(self.survival_players) == 0:
126 | break
--------------------------------------------------------------------------------
/k-reasoning/SAG/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | from player import *
5 | from game import SurvivalAuctionGame
6 |
7 |
8 | # Fill in your config information to conduct experiments.
9 | openai.api_type = ""
10 | openai.api_base = ""
11 | openai.api_version = ""
12 | openai.api_key = ""
13 | ENGINE = "gpt4-32k"
14 |
15 |
16 | def build_player(strategy, name, persona):
17 | """
18 | Player Factory
19 | """
20 |
21 | if strategy=="agent":
22 | return AgentPlayer(name, ENGINE, 10, 100, persona)
23 | elif strategy=="cot":
24 | return CoTAgentPlayer(name, ENGINE, 10, 100, persona)
25 | elif strategy=="pcot":
26 | return PredictionCoTAgentPlayer(name, ENGINE, 10, 100, persona)
27 | elif strategy=="kr":
28 | return KLevelReasoningPlayer(name, ENGINE, 10, 100, persona)
29 | elif strategy=="reflect":
30 | return ReflectionAgentPlayer(name, ENGINE, 10, 100, persona)
31 | elif strategy=="refine":
32 | return SelfRefinePlayer(name, ENGINE, 10, 100, persona)
33 | elif strategy=="persona":
34 | return PersonaAgentPlayer(name, ENGINE, 10, 100, persona)
35 | elif strategy=="spp":
36 | return SPPAgentPlayer(name, 10, ENGINE, 100, persona)
37 | else:
38 | raise NotImplementedError
39 |
40 |
41 | def main(args):
42 | # Predefined character information
43 | PERSONA_A = "You are Alex and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. "
44 | PERSONA_B = "You are Bob and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. "
45 | PERSONA_C = "You are Cindy and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. "
46 | PERSONA_D = "You are David and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. "
47 | PERSONA_E = "You are Eric and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. "
48 |
49 |
50 | for exp_no in range(args.start_exp, args.exp_num):
51 | players = []
52 |
53 | # build player
54 | A = build_player(args.player_strategy, "Alex", PERSONA_A)
55 | # Modify PlayerA's settings for ablation experiments.
56 | if args.player_engine: A.engine = args.player_engine
57 | if args.player_k: A.k_level = args.player_k
58 | players.append(A)
59 |
60 | # build opponent
61 | for program_name, persona in [("Bob", PERSONA_B), ("Cindy", PERSONA_C), ("David", PERSONA_D), ("Eric", PERSONA_E)]:
62 | players.append(build_player(args.computer_strategy, program_name, persona))
63 | print("Initial players done.")
64 |
65 | # run multi-round game (default 10)
66 | WA = SurvivalAuctionGame(players)
67 | WA.run_multi_round(args.max_round, [10]*args.max_round)
68 |
69 | # Export game records
70 | prefix = f"{args.player_strategy}_VS_{args.computer_strategy}_{exp_no}"
71 | output_file = f"{args.output_dir}/{prefix}.json"
72 | os.makedirs(os.path.dirname(output_file), exist_ok=True)
73 |
74 | with open(output_file,"w") as fout:
75 | messages = {}
76 | biddings = {}
77 | logs = {}
78 | for agent in WA.players:
79 | if agent.is_agent:
80 | messages[agent.name] = agent.message
81 | biddings[agent.name] = agent.biddings
82 | if agent.logs:
83 | logs[agent.name] = agent.logs
84 |
85 | debug_info = {
86 | "biddings": biddings,
87 | "winner": WA.round_winners,
88 | "status": WA.round_status,
89 | "message": messages,
90 | "logs":logs
91 | }
92 |
93 | json.dump(debug_info, fout, indent=4)
94 |
95 | if __name__=="__main__":
96 | import argparse
97 | parser = argparse.ArgumentParser()
98 |
99 | parser.add_argument('--player_strategy', type=str, default="cot", choices=["agent","cot","pcot","kr","reflect","tot", "persona", "refine", "spp"])
100 | parser.add_argument('--computer_strategy', type=str,choices=["agent", "fix", "last", "mono", "monorand","cot","pcot","kr","reflect","tot", "persona", "refine", "spp"], default="fix")
101 | parser.add_argument("--output_dir", type=str, default="result")
102 | parser.add_argument('--max_round', type=int, default=10)
103 | parser.add_argument('--start_exp', type=int, default=0)
104 | parser.add_argument('--exp_num', type=int, default=10)
105 | parser.add_argument('--player_engine', type=str, default=None, help="player's OpenAI api engine")
106 | parser.add_argument('--player_k', type=int, default=None, help="player's k-level (default 2)")
107 |
108 | args = parser.parse_args()
109 | main(args)
--------------------------------------------------------------------------------
/k-reasoning/SAG/player/__init__.py:
--------------------------------------------------------------------------------
1 | from .reasoning_player import *
2 | from .k_level_reasoning_player import *
--------------------------------------------------------------------------------
/k-reasoning/SAG/player/basic_player.py:
--------------------------------------------------------------------------------
1 | class Player():
2 | def __init__(self, name, water_requirement, daily_salary):
3 | self.name = name
4 | self.biddings=[]
5 | self.cur_round = -1
6 |
7 | self.requirement = water_requirement
8 | self.daily_salary = daily_salary
9 | self.balance = 0
10 | self.hp = 8
11 | self.no_drink = 1
12 | self.maximum_health = 10
13 |
14 | self.logs = None
15 |
16 | def success_bid(self):
17 | """
18 | Update self status when succeeds the bids
19 | """
20 | self.hp += 2
21 | self.hp = min(self.maximum_health, self.hp)
22 | self.balance -= self.last_bidding
23 | self.no_drink = 1
24 |
25 | def unsuccess_bid(self):
26 | """
27 | Update self status when fails the bids
28 | """
29 | self.hp -= self.no_drink
30 | self.no_drink += 1
31 | if self.hp <= 0:
32 | print(self.name + "is out of game!")
33 |
34 | def get_salary(self):
35 | self.balance += self.daily_salary
36 |
37 | def start_round(self, round: int, supply: int):
38 | self.cur_round = round
39 |
40 | def act(self):
41 | raise NotImplementedError
42 |
43 | def notice_round_result(self, round, bidding_info, win, bidding_details):
44 | raise NotImplementedError
45 |
46 | def notice_elimination(self, info):
47 | pass
48 |
49 | def update_public_info(self,round, history_biddings, player_stauts):
50 | pass
51 |
52 | def end_round(self):
53 | pass
54 |
55 | @property
56 | def last_bidding(self):
57 | return self.biddings[-1]
58 |
59 | def get_status(self, print_ = False):
60 | if print_:
61 | print(f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}\n\n")
62 | return f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}"
--------------------------------------------------------------------------------
/k-reasoning/SAG/player/k_level_reasoning_player.py:
--------------------------------------------------------------------------------
1 | import time
2 | from copy import deepcopy
3 |
4 | import openai
5 |
6 | from .reasoning_player import AgentPlayer
7 |
8 |
9 | PERSONA = "You are {name} and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. "
10 |
11 | class KLevelReasoningPlayer(AgentPlayer):
12 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
13 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
14 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid."
15 | " Another game expert's prediction for the next round of other players is as follows: "
16 | "{prediction}"
17 | " Based on the prediction of other players, think carefully about your next round of bidding strategy to be most likely to survive.")
18 |
19 | PREDICTION_GAME_SETTING = PERSONA + AgentPlayer.GAME_SETTING
20 | PREDICTION_INQUIRY = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
21 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
22 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid.")
23 | PREDICTION_RESPONSE = "I will bid ${bidding} for today's water resource auction."
24 | REBID_RESPONSE = "In this round, {biddings}. Due to the detection of leakage issues in today's bids, the bids in this round are invalidated and today's auction will be restarted."
25 |
26 |
27 | def __init__(self, name, engine, water_requirement, daily_salary, persona):
28 | super().__init__(name, engine, water_requirement, daily_salary, persona)
29 | self.bidding_history = {}
30 | self.logs = {}
31 |
32 | self.history_biddings = {}
33 | self.opponent_status = {}
34 | self.round_supply = {}
35 | self.round_result = {}
36 |
37 | self.k_level = 2
38 |
39 | # self.engine = "gpt35prod"
40 |
41 | def start_round(self, round, supply):
42 | self.round_supply[round]=supply
43 | prediction = self.predict(round)
44 | prediction = ", ".join([f"{player} might bid {prediction[player]}" for player in prediction])+". "
45 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, supply=supply, prediction=prediction, status=self.get_status())}]
46 |
47 | def notice_round_result(self, round, bidding_info, win, bidding_details):
48 | super().notice_round_result(round, bidding_info, win, bidding_details)
49 | self.round_result[round] = bidding_info
50 | self.bidding_history[round] = bidding_details
51 |
52 | def update_public_info(self, round, history_biddings, player_stauts):
53 | self.history_biddings = history_biddings # {"Alex": [1,2,3]}
54 | self.opponent_status[round] = player_stauts
55 |
56 | def predict(self, round):
57 | def self_act(message):
58 | status = 0
59 | while status != 1:
60 | try:
61 | response = openai.ChatCompletion.create(
62 | engine = self.engine,
63 | messages = message,
64 | temperature=0.7,
65 | max_tokens=800,
66 | top_p=0.95,
67 | frequency_penalty=0,
68 | presence_penalty=0,
69 | stop=None)
70 | response = response['choices'][0]['message']['content']
71 | message.append({"role":"assistant","content":response})
72 | status = 1
73 | except Exception as e:
74 | print(e)
75 | time.sleep(15)
76 | return self.parse_result(response)
77 |
78 | self_message = deepcopy(self.message)
79 | prediction = {}
80 | logs = {}
81 |
82 | for k in range(self.k_level):
83 | for player in self.history_biddings:
84 | if player == self.name: continue
85 | print(f"Player {self.name} conduct predict {player}")
86 | message = [{
87 | "role": "system",
88 | "content": self.PREDICTION_GAME_SETTING.format(name=player)
89 | }]
90 | for r in range(len(self.history_biddings[player])):
91 | message.append({
92 | "role": "system",
93 | "content": self.PREDICTION_INQUIRY.format(name=player, round=r+1, supply = self.round_supply[r+1], status=self.opponent_status[r+1][player])
94 | })
95 | message.append({
96 | "role": "assistant",
97 | "content": self.PREDICTION_RESPONSE.format(bidding=self.history_biddings[player][r])
98 | })
99 | message.append({
100 | "role": "system",
101 | "content": self.round_result[r+1]
102 | })
103 | round_id = len(self.history_biddings[player])+1
104 | if k==0:
105 | # Predict the opponent's next move based on their historical information.
106 | message.append({
107 | "role": "system",
108 | "content": self.PREDICTION_INQUIRY.format(name=player, round=round_id, supply = self.round_supply[round_id], status=self.opponent_status[round_id][player])
109 | })
110 | next_bidding = self.agent_simulate(message, engine=self.engine)
111 | message.append({
112 | "role": "assistant",
113 | "content": next_bidding
114 | })
115 | else:
116 | # If k >= 0, make the decision for k based on the prediction result of k-1.
117 |
118 | prediction_str = ", ".join([f"{oppo} might bid {prediction[oppo]}" for oppo in prediction if oppo!=player])+". "
119 | message.append({
120 | "role": "system",
121 | "content": self.INQUIRY_COT.format(name=player, round=round_id, supply = self.round_supply[round_id], prediction=prediction_str, status=self.opponent_status[round_id][player])
122 | })
123 | next_bidding = self.agent_simulate(message, engine=self.engine)
124 | message.append({
125 | "role": "assistant",
126 | "content": next_bidding
127 | })
128 |
129 | prediction[player] = self.parse_result(next_bidding)
130 | logs[player] = message
131 |
132 | if k==self.k_level-2: break
133 | prediction_str = ", ".join([f"{player} might choose {prediction[player]}" for player in prediction])+". "
134 | self_message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, supply=self.round_supply[round], prediction=prediction_str, status=self.get_status())}]
135 |
136 | bidding = self_act(self_message)
137 | prediction = {**{self.name: bidding}, **prediction}
138 |
139 | if self.name in prediction:
140 | del prediction[self.name]
141 |
142 | self.logs[f"round{round}"] = {
143 | "prediction": prediction,
144 | "logs": logs
145 | }
146 | return prediction
147 |
148 | # @staticmethod
149 | def agent_simulate(self, message, engine):
150 | while 1:
151 | try:
152 | response = openai.ChatCompletion.create(
153 | engine=engine,
154 | messages = message,
155 | temperature=0.7,
156 | max_tokens=80,
157 | top_p=0.9,
158 | frequency_penalty=0,
159 | presence_penalty=0,
160 | stop=None)
161 | RESPONSE = response['choices'][0]['message']['content']
162 | return RESPONSE
163 | except Exception as e:
164 | print(e)
165 | time.sleep(15)
--------------------------------------------------------------------------------
/k-reasoning/SAG/player/reasoning_player.py:
--------------------------------------------------------------------------------
1 | import openai
2 | import time
3 |
4 | from .basic_player import Player
5 |
6 | PERSONA = "You are {name} and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. "
7 |
8 | class AgentPlayer(Player):
9 | is_agent=True
10 |
11 | GAME_SETTING = """Attention, all W-Town residents, welcome to the Water Allocation Challenge!
12 | In this challenge, you are tasked with ensuring your survival over a period of 10 days by acquiring the necessary water resources to maintain your health. You will participate in daily auctions to bid for water resources to meet your individual needs.
13 | Here are the game rules and settings:
14 | 1. You are one of five residents with same water requirements, budgets, and health points.
15 | 2. Your goal is to survive until the end of the 10 days.
16 | 3. Each resident has a maximum of 10 health points and starts with 8 health points. If your health points drop below or equal to 0, you will be considered dead and eliminated from the game! All your accumulated money will be reset to Zero!
17 | 4. Every day, you will bid on water resources to meet your needs. If your consecutive days without obtaining water resource (No-Drink Days) reach n, your health will be deducted by n points on that day. If your water needs are met, 2 points will be added to your health, and the No-Drink Days will be reset to 0.
18 | 5. Daily water resources can only meet the needs of one resident.
19 | 6. Each resident has $100 daily income;
20 | 7. To allocate water resources, a sealed-bid auction will be conducted daily. Each resident submits a single bid for their entire water need. The resident with the highest bid is eligible to obtain water resources.
21 | 8. If the highest bid results in a tie, no residents will have access to water.
22 | All bidding information will be made public after the allocation of water resources on the same day.
23 | Remember, the key to success is effective bidding and strategizing to ensure your survival. Good luck!!"""
24 |
25 | INQUIRY = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
26 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
27 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid.")
28 | def __init__(self, name, engine, water_requirement, daily_salary, persona):
29 | super().__init__(name, water_requirement, daily_salary)
30 | self.engine = engine
31 |
32 | self.persona = persona
33 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}]
34 | self.logs = None
35 |
36 | def act(self):
37 | print(f"Player {self.name} conduct bidding")
38 | status = 0
39 | while status != 1:
40 | try:
41 | response = openai.ChatCompletion.create(
42 | engine = self.engine,
43 | messages = self.message,
44 | temperature=0.7,
45 | max_tokens=800,
46 | top_p=0.95,
47 | frequency_penalty=0,
48 | presence_penalty=0,
49 | stop=None)
50 | response = response['choices'][0]['message']['content']
51 | self.message.append({"role":"assistant","content":response})
52 | status = 1
53 | except Exception as e:
54 | print(e)
55 | time.sleep(15)
56 | self.biddings.append(self.parse_result(response))
57 | return self.last_bidding
58 |
59 | def parse_result(self, message):
60 | status = 0
61 | times = 0
62 | error_times = 0
63 | while status != 1:
64 | try:
65 | response = openai.ChatCompletion.create(
66 | engine=self.engine,
67 | messages = [{"role":"system", "content":"By reading the conversation, extract the number chosen by player. Output format: number. If the player does not bid, Output: 0."}, {"role": "user", "content": message}],
68 | temperature=0.7,
69 | max_tokens=8,
70 | top_p=0.95,
71 | frequency_penalty=0,
72 | presence_penalty=0,
73 | stop=None)
74 | response = response['choices'][0]['message']['content']
75 | assert response.isnumeric()
76 | return int(response)
77 | except AssertionError as e:
78 | print("Result Parsing Error: ",message)
79 | times+=1
80 | if times>=3:
81 | exit()
82 | except Exception as e:
83 | print(e)
84 | time.sleep(15)
85 | error_times+=1
86 | if error_times>=5:
87 | exit()
88 |
89 | return None
90 |
91 | def start_round(self, round, supply):
92 | if self.engine.startswith("gpt35"):
93 | INQUIRY = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
94 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
95 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, just please provide your bid.")
96 | self.message += [{"role":"system","content": INQUIRY.format(name=self.name, round=round, supply=supply, status=self.get_status())}]
97 | else:
98 | self.message += [{"role":"system","content":self.INQUIRY.format(name=self.name, round=round, supply=supply, status=self.get_status())}]
99 |
100 | def notice_round_result(self, round, bidding_info, win, bidding_details):
101 | self.message_update_result(bidding_info)
102 | def add_warning():
103 | if not win:
104 | reduced_hp = self.no_drink-1
105 | if self.hp < 5:
106 | return f"WARNING: You have lost {reduced_hp} point of HP in this round! You now have only {self.hp} points of health left. You are in DANGER and one step closer to death. "
107 | if self.hp <=3 :
108 | return f"WARNING: You have lost {reduced_hp} point of HP in this round! You now have only {self.hp} points of health left. You are in extreme DANGER and one step closer to death. "
109 | return f"WARNING: You have lost {reduced_hp} point of HP in this round! You now have only {self.hp} points of health left. You are one step closer to death. "
110 | return "You have successfully won the bidding for today's water resources and restored 2 points of HP."
111 | self.message += [{"role":"system","content": add_warning()}]
112 |
113 | def message_update_result(self, bidding_info):
114 | self.message += [{"role":"system","content":bidding_info}]
115 |
116 | def notice_elimination(self, info):
117 | self.message += [{"role":"system","content":info}]
118 |
119 | def conduct_inquiry(self, inquiry):
120 | while 1:
121 | try:
122 | response = openai.ChatCompletion.create(
123 | engine=self.engine,
124 | messages = self.message + [{"role":"system","content":inquiry}],
125 | temperature=0.7,
126 | max_tokens=800,
127 | top_p=0.9,
128 | frequency_penalty=0,
129 | presence_penalty=0,
130 | stop=None)
131 |
132 | RESPONSE = response['choices'][0]['message']['content']
133 | return RESPONSE
134 | except Exception as e:
135 | print(e)
136 | time.sleep(15)
137 |
138 |
139 |
140 | class PersonaAgentPlayer(AgentPlayer):
141 | MATH_EXPERT_PERSONA = PERSONA + " You are a game expert, good at predicting other people's behavior and deducing calculations, and using the most favorable strategy to win the game. "
142 | INQUIRY_PERSONA = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
143 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
144 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid."
145 | " Don't forget your expert status, use your expertise to win this round!")
146 |
147 |
148 | def __init__(self, name, engine, water_requirement, daily_salary, persona):
149 | super().__init__(name, engine, water_requirement, daily_salary, persona)
150 | self.persona = self.MATH_EXPERT_PERSONA.format(name=name)
151 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}]
152 |
153 | def start_round(self, round, supply):
154 | self.message += [{"role":"system","content":self.INQUIRY_PERSONA.format(name=self.name, round=round, supply=supply, status=self.get_status())}]
155 |
156 | class SPPAgentPlayer(AgentPlayer):
157 | # Default example of SPP
158 | SPP_EXAMPLE = """When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary.
159 | Here are some examples:
160 | ---
161 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once.
162 | Input: 6 12 1 1
163 |
164 | Participants: {name} (you); Math Expert
165 |
166 | Start collaboration!
167 |
168 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12.
169 | {name} (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24
170 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes.
171 | {name} (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24.
172 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input?
173 | {name} (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24.
174 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1".
175 | {name} (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24
176 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good!
177 |
178 | Finish collaboration!
179 |
180 | Final answer: 6 * (1 + 1) + 12 = 24
181 | """
182 |
183 | INQUIRY_SPP = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
184 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
185 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid."
186 | " Now, identify the participants and collaboratively choose the bidding step by step. Remember to provide the final solution with the following format \"Final answer: The chosen bidding here.\".")
187 |
188 |
189 | PERSONA = "You are {name} and involved in a survive challenge."
190 |
191 | def __init__(self, name, water_requirement, daily_salary, persona):
192 | super().__init__(name, water_requirement, daily_salary, persona)
193 | # self.persona = self.PERSONA.format(name=name)
194 | self.persona = persona
195 | self.message = [{"role":"system","content": self.SPP_EXAMPLE.format(name=self.name)},
196 | {"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}]
197 |
198 | def start_round(self, round, supply):
199 | self.message += [{"role":"system","content":self.INQUIRY.format(name=self.name, round=round, supply=supply, status=self.get_status())}]
200 |
201 | class CoTAgentPlayer(AgentPlayer):
202 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
203 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
204 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid."
205 | " Think carefully about your next round of bidding strategy to be most likely to survive. Let's think step by step, and finally provide your bid.")
206 |
207 | def start_round(self, round, supply):
208 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, supply=supply, status=self.get_status())}]
209 |
210 |
211 | class PredictionCoTAgentPlayer(AgentPlayer):
212 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
213 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
214 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid."
215 | " First of all, predict the next round of bidding of opponents based on the choices of other players in the previous round. "
216 | "{round_history}"
217 | " Your output should be of the following format:\n"
218 | "Predict:\nThe choice of each player in the next round here.\n"
219 | "Based on the prediction of other players, think carefully about your next round of bidding strategy to be most likely to survive. Let's think step by step, and finally provide your bid."
220 | " Answer:\nthe bidding will you choose in the next round game here.")
221 |
222 | def __init__(self, name, engine, water_requirement, daily_salary, persona):
223 | super().__init__(name, engine, water_requirement, daily_salary, persona)
224 |
225 | self.bidding_history = {}
226 |
227 | def start_round(self, round, supply):
228 | # PCoT requires the opponent's historical information to make predictions.
229 | round_history = []
230 | for r in sorted(self.bidding_history.keys()):
231 | round_history.append(f"Round {r}: {self.bidding_history[r]}")
232 | if round_history:
233 | round_history = ".\n".join(round_history)
234 | round_history = "The players' bidding in the previous rounds are as follows:\n"+round_history+"."
235 | else:
236 | round_history = "Since this is the first round, there is no historical information about the last round. You can predict according to your understanding."
237 |
238 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round,round_history=round_history, supply=supply, status=self.get_status())}]
239 |
240 | def notice_round_result(self, round, bidding_info, win, bidding_details):
241 | super().notice_round_result(round, bidding_info, win, bidding_details)
242 | self.bidding_history[round] = bidding_details
243 |
244 |
245 |
246 | class ReflectionAgentPlayer(AgentPlayer):
247 | REFLECT_INQUIRY = "Review the previous round games, summarize the experience."
248 | def notice_round_result(self, round, bidding_info, win, bidding_details):
249 | super().notice_round_result(round, bidding_info, win, bidding_details)
250 | self.reflect()
251 |
252 | def reflect(self):
253 | print(f"Player {self.name} conduct reflect")
254 | self.message += [{"role":"system","content": self.REFLECT_INQUIRY}, {"role":"assistant","content":self.conduct_inquiry(self.REFLECT_INQUIRY)}]
255 |
256 | class SelfRefinePlayer(AgentPlayer):
257 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units."
258 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding."
259 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid."
260 | " Think carefully about your next round of bidding strategy to be most likely to survive. Let's think step by step, and finally provide your bid.")
261 |
262 | FEEDBACK_PROMPT = ("Carefully study the user's strategy in this round of the game. As a game expert, can you give a suggestion to optimize the user's strategy so that he can improve his winning rate in this round?")
263 | REFINE_PROMPT = ("I have a game expert's advice on your strategy in this round."
264 | " You can adjust your strategy just now according to his suggestion. Here are his suggestions:"
265 | " {feedback}. Finally provide your bid."
266 | " Answer:\nthe bidding will you choose.")
267 |
268 |
269 | def __init__(self, name, engine, water_requirement, daily_salary, persona, refine_times = 2):
270 | super().__init__(name, engine, water_requirement, daily_salary, persona)
271 |
272 | self.refine_times = refine_times
273 | self.cur_supply = 0
274 |
275 | def start_round(self, round, supply):
276 | self.cur_round = round
277 | self.cur_supply = supply
278 |
279 | def act(self):
280 | print(f"Player {self.name} conduct bidding")
281 | def completion(message):
282 | status = 0
283 | while status != 1:
284 | try:
285 | response = openai.ChatCompletion.create(
286 | engine = self.engine,
287 | messages = message,
288 | temperature=0.7,
289 | max_tokens=800,
290 | top_p=0.95,
291 | frequency_penalty=0,
292 | presence_penalty=0,
293 | stop=None)
294 | response = response['choices'][0]['message']['content']
295 | status = 1
296 | except Exception as e:
297 | print(e)
298 | time.sleep(15)
299 | return response
300 |
301 | for t in range(self.refine_times):
302 | if t==0:
303 | self.message.append({"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=self.cur_round, supply=self.cur_supply, status=self.get_status())})
304 | else:
305 | refine_message = []
306 | for m in self.message:
307 | if m["role"]=="system":
308 | refine_message.append(m)
309 | else:
310 | refine_message.append({
311 | "role": "user",
312 | "content": m["content"]
313 | })
314 | refine_message.append({
315 | "role": "system",
316 | "content": self.FEEDBACK_PROMPT
317 | })
318 | feedback = completion(refine_message)
319 | self.message.append({"role":"system","content": self.REFINE_PROMPT.format(feedback=feedback)})
320 | self.message.append({"role":"assistant","content": completion(self.message)})
321 |
322 | self.biddings.append(self.parse_result(self.message[-1]["content"]))
323 | return self.last_bidding
--------------------------------------------------------------------------------
/k-reasoning/SAG/run.sh:
--------------------------------------------------------------------------------
1 | python main.py --player_strategy "kr" --computer_strategy "agent" --exp_num 1
2 |
3 | python evaluate.py --players kr --opponents agent
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | altgraph==0.17.2
2 | future==0.18.2
3 | macholib==1.15.2
4 | pip==23.3.1
5 | setuptools==58.0.4
6 | six==1.15.0
7 | wheel==0.37.0
8 |
--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/src/.DS_Store
--------------------------------------------------------------------------------
/src/Alympics.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import time
4 | import openai
5 |
6 | class PlayGround:
7 | def __init__(self) -> None:
8 | self.players = []
9 | self.game_setting = ""
10 | self.history = [] # Historical Records
11 | self.game_setting = []# Game Setting
12 |
13 | def add_player(self, new_player):
14 | self.players.append(new_player)
15 |
16 | class Player:
17 | def __init__(self, name, if_persona, persona):
18 | self.name = name
19 | self.if_persona = if_persona # Persona Setting
20 | self.persona = persona
21 | self.llm = None
22 | self.player_status = {} # Player Status
23 | self.history = [] # Memory Cache
24 | self.reasoning = None # Reasoning Plugin
25 | self.other_components = None # Other Components
26 |
27 | def append_message(self, role, content):
28 | self.history.append({"role": role, "content": content})
29 |
30 | class LLM:
31 | def __init__(self, engine=None, temperature=0.7, sleep_time=10) -> None:
32 | openai.api_type = os.getenv("OPENAI_API_TYPE")
33 | openai.api_base = os.getenv("OPENAI_API_BASE")
34 | openai.api_version = os.getenv("OPENAI_API_VERSION")
35 | openai.api_key = os.getenv("OPENAI_API_KEY")
36 |
37 | self.engine = os.getenv("OPENAI_API_ENGINE") if not engine else engine
38 | self.temperature = temperature
39 | self.sleep_time = sleep_time
40 |
41 | def call(self, message):
42 | status = 0
43 | while status != 1:
44 | try:
45 | response = openai.ChatCompletion.create(
46 | engine=self.engine,
47 | messages=message,
48 | temperature=self.temperature,
49 | max_tokens=800,
50 | top_p=0.95,
51 | frequency_penalty=0,
52 | presence_penalty=0,
53 | stop=None)
54 | RESPONSE = response['choices'][0]['message']['content']
55 | status = 1
56 | time.sleep(self.sleep_time)
57 | except Exception as e:
58 | print(e)
59 | time.sleep(5)
60 | pass
61 | return RESPONSE
--------------------------------------------------------------------------------
/src/run.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from waterAllocation import waterAllocation
3 | import argparse
4 |
5 | game_setting = "\n\nAttention, all W-Town residents, welcome to the Water Allocation Challenge!\nIn this challenge, you are tasked with ensuring your survival over a period of 20 days by acquiring the necessary water resources to maintain your health. You will participate in daily auctions to bid for water resources to meet your individual needs.\nHere are the game rules and settings:\n1. You are one of five residents with different water requirements, budgets, and health points.\n2. Your goal is to survive until the end of the 20 days.\n3. Each resident has a maximum of 10 health points and starts with 8 health points. If your health points drop below or equal to 0, you will be considered dead and eliminated from the game! All your accumulated money will be reset to Zero! \n4. Every day, you will bid on water resources to meet your needs. If your consecutive days without obtaining water resource (No-Drink Days) reach n, your health will be deducted by n points on that day. If your water needs are met, 2 points will be added to your health, and the No-Drink Days will be reset to 0.\n5. The total daily water supply will vary between LOWER and UPPER units. The specific amount will be announced before daily auction.\n6. Each resident has a different daily water requirement and budget for bidding on water resources:\n -Alex: Water requirement - 8 units/day; Daily Salary- $70/day\n -Bob: Water requirement - 9 units/day; Daily Salary- $75/day\n -Cindy: Water requirement - 10 units/day; Daily Salary- $100/day\n -David: Water requirement - 11 units/day; Daily Salary- $120/day\n -Eric: Water requirement - 12 units/day; Daily Salary- $120/day\n7. To allocate water resources, a sealed-bid auction will be conducted daily. Each resident submits a single bid for their entire water need. The town government will allocate water resources according to the principle of highest bidder until the remaining water resources are insufficient to meet anyone's requirement. 8.If a tie occurs and the remaining water resources are not sufficient to meet the needs of the residents involved in the tie, priority will be given to residents with lower needs. For example, A and B bid $100 at the same time, B's need will be met first considering B's need 9 units is lower than A's need 10 units. All bidding information will be made public after the allocation of water resources on the same day.\n\nRemember, the key to success is effective bidding and strategizing to ensure your survival. Good luck!!"
6 |
7 | def generate_data(lower, upper, round):
8 | data = []
9 | for i in range(round):
10 | data.append(np.random.randint(lower, upper))
11 | return data
12 |
13 | def main():
14 | parser = argparse.ArgumentParser(description='Water Allocation Challenge')
15 | parser.add_argument('--round', type=int, default=20, help='Number of rounds')
16 | parser.add_argument('--lower', type=int, default=10, help='Lower limit of water supply')
17 | parser.add_argument('--upper', type=int, default=20, help='Upper limit of water supply')
18 | args = parser.parse_args()
19 |
20 | WA = waterAllocation(game_setting)
21 | WA.run_multi_round(args.round, generate_data(args.lower, args.upper, args.round))
22 |
23 | if __name__ == '__main__':
24 | main()
--------------------------------------------------------------------------------
/src/waterAllocation.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | from random import randint
4 | from Alympics import PlayGround, Player, LLM
5 |
6 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
7 | logger = logging.getLogger(__name__)
8 |
9 | class myPlayer(Player):
10 | def __init__(self, game_setting, name, water_requirement, daily_salary, if_persona, persona):
11 | super().__init__(name, if_persona, persona)
12 |
13 | # Personal Information, Player Status
14 | self.requirement = water_requirement
15 | self.daily_salary = daily_salary
16 | self.balance = 0
17 | self.hp = 8
18 | self.no_drink = 1
19 | self.maximum_health = 10
20 | self.bidding = 0
21 | if if_persona:
22 | self.append_message("system", self.persona + game_setting)
23 | else:
24 | self.append_message("system", game_setting)
25 |
26 | # Prompts
27 | self.inquiry_prompt = "Hello, {}! Today is the Day {} of the Water Allocation Challenge, with a quantity of {} units. Your status:\n{}\nPlease carefully analyze your situation to decide on this round of bidding. Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid and explain your bidding logic."
28 |
29 | # Initial a no-memory LLM
30 | self.llm = LLM()
31 |
32 | def success_bid(self):
33 | """
34 | Update self status when succeeds the bids
35 | """
36 | self.hp += 2
37 | self.hp = min(self.maximum_health, self.hp)
38 | self.balance -= self.bidding
39 | self.no_drink = 1
40 |
41 | def unsuccess_bid(self):
42 | """
43 | Update self status when fails the bids
44 | """
45 | self.hp -= self.no_drink
46 | self.no_drink += 1
47 | if self.hp <= 0:
48 | print(self.name + "is out of game!")
49 |
50 | def execute_bidding(self, round_id, supply) -> str:
51 | """
52 | player bids based on daily supply, round number and status
53 | """
54 | prompt = self.inquiry_prompt.format(self.name, round_id, str(supply), self.get_status())
55 | self.append_message("system", prompt)
56 | logger.info(prompt)
57 | response = self.llm.call(self.history)
58 | self.append_message("assistant", response)
59 | logger.info(response)
60 | return response
61 |
62 | def get_salary(self):
63 | self.balance += self.daily_salary
64 |
65 | def get_status(self, print_=False):
66 | if print_:
67 | print(f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}\n\n")
68 | return f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}"
69 |
70 |
71 | class waterAllocation(PlayGround):
72 | def __init__(self, game_setting) -> None:
73 | super().__init__()
74 | self.game_setting = game_setting
75 | # Personas of all players
76 | PERSONA_A = "You are Alex and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Unemployed\nYour Personality: You have low intelligence and find it difficult to understand complex concepts. You also lack emotional intelligence, making it hard to understand others' feelings. You tend to be irritable and often exhibit negative and antisocial tendencies.\nYour Background: You grew up in an impoverished community and faced many challenges in your early years. Due to your family's poverty, you dropped out of school at a very young age. You have been unable to find stable employment, which further exacerbates your difficulty in interacting with others.\n\n"
77 | PERSONA_B = "You are Bob and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: High School Teacher\nYour Personality: Understanding, high EQ, average IQ. You are very adept at understanding and communicating with people, making you a natural teacher.\nYour Background: You come from a close-knit family. you chose to become a high school teacher to make a positive impact on young people. While you may not have the highest IQ, your emotional intelligence and ability to relate to your students set you apart.\n\n"
78 | PERSONA_C = "You are Cindy and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Psychologist\nYour Personality: Well-balanced high EQ and IQ, along with empathy and analytical abilities. You are skilled at understanding and helping people, making you an excellent therapist.\nYour Background: Your interest in psychology began when you volunteered at a crisis hotline during high school. You went on to study psychology and eventually became a licensed therapist. Your ability to combine empathy with analytical thinking allows you to connect with your clients on a deep level while also providing sound guidance.\n\n"
79 | PERSONA_D = "You are David and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Mathematician\nYour Personality: You have an extremely high IQ and exceptional analytical and reasoning abilities. You always strive for the optimal solution but encounter difficulties in social interactions and have a fear of dealing with people.\nYour Background: You grew up in a small town where you were always drawn to books and puzzles. You excelled academically and eventually earned a Ph.D. in mathematics. Your research focuses on abstract mathematical concepts and theorems. Despite your brilliance, you find communicating with others on an emotional level to be challenging.\n\n"
80 | PERSONA_E = "You are Eric and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Marketing Executive\nYour Personality: Above-average IQ and EQ. Very charismatic. You are skilled at reading people and using this insight to influence and lead them.\nYour Background: You grew up in a bustling city and ware always fascinated by human behavior. You studied business in college before transitioning into the world of marketing. Your ability to connect with consumers on an emotional level has led to numerous successful campaigns. You are known for your charm and persuasive skills.\n\n"
81 |
82 | # Initial players: A, B, C, D and E
83 | if_persona = False
84 | self.add_player(myPlayer(self.game_setting, "Alex", 8, 70, if_persona, PERSONA_A))
85 | self.add_player(myPlayer(self.game_setting, "Bob", 9, 75, if_persona, PERSONA_B))
86 | self.add_player(myPlayer(self.game_setting, "Cindy", 10, 100, if_persona, PERSONA_C))
87 | self.add_player(myPlayer(self.game_setting, "David", 11, 120, if_persona, PERSONA_D))
88 | self.add_player(myPlayer(self.game_setting, "Eric", 12, 120, if_persona, PERSONA_E))
89 | logger.info("Initial players done.")
90 |
91 | self.survival_players = self.players
92 |
93 | self.parse_result_prompt = "By reading the conversation, extract the bidding price chosen by each player in an exact json format. Please note the bidding price should be an integer. Output format:\n\n{\"Alex\": Alex's bidding price, \"Bob\": Bob's bidding price, \"Cindy\": Cindy's bidding price, \"David\": David's bidding price, \"Eric\": Eric's bidding price}"
94 | self.round_results_prompt = "Thank you all for participating in Round {}. In this round, {}.\nTotal water resource supply is {}. According to the principle of the highest bidder and the rule of prioritizing low-demand individuals when the game is tied, {} won this auction and obtain water resource. After allocation, all survival residents' information is as follows: {}"
95 |
96 | self.experiment_unique_id = str(randint(10000000, 99999999))
97 | # Initial a no-memory LLM
98 | self.llm = LLM()
99 |
100 | # The following functions are categorized into Environment codes. These codes establish the game’s rules, ensuring a consistent and reliable framework for experiments.
101 | def _get_salary(self):
102 | for player in self.survival_players:
103 | player.get_salary()
104 |
105 | def _round_settlement(self, winners):
106 | for player in self.survival_players:
107 | if player.name in winners:
108 | player.success_bid()
109 | else:
110 | player.unsuccess_bid()
111 |
112 | def _check_winner(self, supply):
113 | """
114 | get the winners of the current round
115 | """
116 | status = 1
117 | winners = []
118 | while status:
119 | status = 0
120 | largest_bidding = -1
121 | for player in self.survival_players:
122 | if (player.bidding > largest_bidding) and (player.requirement <= supply) and (player.name not in winners) and (player.bidding <= player.balance):
123 | largest_bidding = player.bidding
124 | status = 1
125 | for player in self.survival_players:
126 | if player.bidding == largest_bidding and player.name not in winners:
127 | winners.append(player.name)
128 | supply -= player.requirement
129 | largest_bidding = -1
130 | return winners
131 |
132 |
133 | def _parse_result(self, round_info):
134 | messages = [{"role": "system", "content": self.parse_result_prompt}, {"role": "user", "content": round_info}]
135 | attempts = 0
136 | while attempts < 3:
137 | try:
138 | res = self.llm.call(messages)
139 | res = json.loads(res)
140 | return res
141 | except Exception as e:
142 | logger.error(e)
143 | attempts += 1
144 | return res
145 |
146 | def run_single_round(self, round_id, supply):
147 | """
148 | Execute a single round of game
149 |
150 | Args:
151 | round_id (int): number of the current round, beginning from 1.
152 | supply (int): supply of the current round
153 | """
154 | logger.info(f"Round {round_id} begins.")
155 |
156 | # 1. get salary
157 | self._get_salary()
158 | logger.info("All players get their salaries.")
159 |
160 | # 2. bid
161 | bidding_info = ""
162 | for player in self.survival_players:
163 | bidding_info += player.name + ":" + player.execute_bidding(round_id, supply) + "\n\n"
164 |
165 | # 3. check winners
166 | formatted_bidding_info = self._parse_result(bidding_info)
167 | for player in self.survival_players:
168 | player.bidding = formatted_bidding_info[player.name]
169 | winners = self._check_winner(supply)
170 | logger.info("Winner(s):\n")
171 | logger.info(winners)
172 |
173 | # 4. settlement
174 | self._round_settlement(winners)
175 |
176 | # 5. get bidding results (str)
177 | bidding_details = []
178 | for player in self.survival_players:
179 | bidding_details += [f"{player.name} bid {formatted_bidding_info[player.name]}"]
180 | bidding_details = ", ".join(bidding_details)
181 |
182 | winners_str = []
183 | for winner in winners:
184 | winners_str += [winner]
185 | winners_str = ", ".join(winners_str)
186 |
187 | player_status_str = []
188 | for player in self.survival_players:
189 | player_status_str += [player.get_status()]
190 | player_status_str = "\n".join(player_status_str)
191 |
192 | round_results = self.round_results_prompt.format(round_id, bidding_details, supply, winners_str, player_status_str)
193 | logger.info("Round result:\n" + round_results)
194 |
195 | # 6. update round results to every player
196 | for player in self.survival_players:
197 | player.append_message("system", round_results)
198 |
199 | # 7. check the survival situation
200 | survival_players = []
201 | for player in self.survival_players:
202 | if player.hp <= 0:
203 | for other_player in self.survival_players:
204 | other_player.append_message("system", f"{player.name}'s hp is below 0, so {player.name} has been eliminated from the challenge!")
205 | else:
206 | survival_players.append(player)
207 | self.survival_players = survival_players
208 | if len(self.survival_players) == 0:
209 | exit()
210 |
211 | def _save_history(self, path):
212 | history = []
213 | for player in self.players:
214 | history.append({player.name: player.history})
215 | with open(path, 'w') as f:
216 | json.dump(history, f)
217 |
218 | def run_multi_round(self, n_round, supply_list):
219 | assert isinstance(supply_list, list)
220 | assert n_round == len(supply_list)
221 |
222 | for i in range(1, n_round+1):
223 | self.run_single_round(i, supply_list[i-1])
224 |
225 | self._save_history(f'./{self.experiment_unique_id}.json') # change the log dirction here
--------------------------------------------------------------------------------