├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── Transparency_FAQ.md ├── assets ├── games_setting.png ├── k-level-reasoning-egs.png ├── k-level-reasoning-w-llms.png ├── k-level-reasoning.png └── playground.png ├── exp └── WAC │ ├── human annotation.zip │ └── records.zip ├── k-reasoning ├── G08A │ ├── evaluate.py │ ├── game.py │ ├── main.py │ ├── player │ │ ├── __init__.py │ │ ├── basic_player.py │ │ ├── k_level_reasoning_player.py │ │ └── reasoning_player.py │ └── run.sh ├── README.md └── SAG │ ├── evaluate.py │ ├── game.py │ ├── main.py │ ├── player │ ├── __init__.py │ ├── basic_player.py │ ├── k_level_reasoning_player.py │ └── reasoning_player.py │ └── run.sh ├── requirements.txt └── src ├── .DS_Store ├── Alympics.py ├── run.py └── waterAllocation.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ALYMPICS: Language Agents Meet Game Theory 2 | 3 | **Alympics** is a platform that leverages Large Language Model (LLM) agents to facilitate investigations in game theory. 4 | 5 | See our paper: [ALYMPICS: LLM Agents Meet Game Theory -- Exploring Strategic Decision-Making with AI Agents](https://arxiv.org/pdf/2311.03220) 6 | 7 | ## Architecture of Alympics 8 | 9 | playground 10 | 11 | The architecture of Alympics comprises the Sandbox Playground and Players. The Sandbox Playground creates an environment where game settings, as specified by researchers, are executed. Agent players, along with the optional human players, actively engage in the game within this environment. 12 | 13 | - Sandbox Playground: The Sandbox Playground serves as the environment for conducting games, providing a versatile and controlled space for agent players interactions. 14 | - Agent Players: Agent Players constitute an indispensable component of the Alympics framework, embodying LLM-powered agent entities that participate in strategic interactions within the Sandbox Playground. 15 | 16 | 17 | ## Contributions 18 | 19 | - The proposal of an original, LLM agent-based framework to facilitate game theory research. 20 | - The demonstration of Alympics’s application through a comprehensive pilot case study. 21 | - The emphasis on the significance of leveraging LLM agents to scrutinize strategic decision-making within a controlled and reproducible environment. This endeavor not only enriches the field of game theory but also has the potential to inspire research in other domains where decision-making assumes a pivotal role. 22 | 23 | ## Directory Structure 24 | The code directory structure is 25 | ``` 26 | $src 27 | ├─ run.py 28 | ├─ Utils.py # The basic Playground class, the Player class and the LLM API 29 | └─ waterAllocation.py # An example of using playground 30 | ``` 31 | **Please complete the configuration of LLM in the Utils.py first.** 32 | 33 | 34 | ## Example 35 | Alympics provides a research platform for conducting experiments on complex strategic gaming problems. As a pilot demonstration, we developed a game called the ’Water Allocation Challenge’ to illustrate how it can be leveraged for game theory research. 36 | 37 | The details can be found in our paper. 38 | 39 | ## Citation 40 | 41 | ``` 42 | @misc{mao2023alympics, 43 | title={ALYMPICS: Language Agents Meet Game Theory}, 44 | author={Shaoguang Mao and Yuzhe Cai and Yan Xia and Wenshan Wu and Xun Wang and Fengyi Wang and Tao Ge and Furu Wei}, 45 | year={2023}, 46 | eprint={2311.03220}, 47 | archivePrefix={arXiv}, 48 | primaryClass={cs.CL} 49 | } 50 | ``` 51 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /Transparency_FAQ.md: -------------------------------------------------------------------------------- 1 | # Transparency.md for ALYMPICS 2 | 3 | ## Overview 4 | ALYMPICS is a novel platform that integrates Large Language Model (LLM) agents into game theory scenarios. It aims to simulate human behavior in strategic decision-making environments, using a survival game model to explore the dynamics of multi-agent interactions and competition for limited resources. 5 | 6 | ## Objective 7 | The main goal of ALYMPICS is to provide a controlled, scalable, and reproducible environment to study human-like strategic behaviors in AI agents. By incorporating varying degrees of resource availability and diverse agent personalities, the platform facilitates an in-depth analysis of strategic engagements and adaptations in complex socioeconomic contexts. 8 | 9 | ## Audience 10 | This documentation is intended for researchers, developers, and enthusiasts in the fields of artificial intelligence, game theory, and multi-agent systems. It addresses the potential applications, limitations, and optimal use of the ALYMPICS platform. 11 | 12 | ## Key Features 13 | - Simulation of human-like behaviors in AI agents. 14 | - Dynamic scenarios with adjustable variables like resource availability. 15 | - Realistic modeling of multi-agent collaborations and competitions. 16 | - Insights into strategic decision-making within AI systems. 17 | 18 | ## Limitations 19 | - The behaviors of LLM agents in ALYMPICS might not perfectly mirror real human behaviors in every aspect. 20 | - The current version focuses on a specific game scenario; further development is required to generalize the platform to other contexts. 21 | - The platform's performance may vary based on the complexity of the game scenario and the computational resources available. 22 | 23 | ## Best Practices for Performance 24 | - Ensure adequate computational resources for running complex simulations. 25 | - Experiment with different settings of resource availability and agent personalities to observe varied strategic behaviors. 26 | - Use the platform's adjustable variables to tailor the simulation to specific research questions or hypotheses. 27 | 28 | ## Social Impact Statement 29 | ALYMPICS is designed to advance our understanding of AI's capabilities in strategic decision-making. While it offers valuable insights, we acknowledge the ethical considerations in deploying AI in real-world scenarios. We are committed to responsible research and development, ensuring that our work aligns with ethical guidelines and contributes positively to society. Our work is for research purposes only, and further testing and validation would be needed to use it outside of a research context. 30 | 31 | ## Feedback and Collaboration 32 | We encourage feedback and collaboration from our audience. If you have suggestions, questions, or would like to contribute to the project, please contact us at Shaoguang.Mao@microsoft.com. 33 | 34 | ## Future Updates 35 | The ALYMPICS platform is an evolving project. We plan to expand its scope to include a wider range of game theory scenarios and enhance the realism of agent behaviors. Stay tuned for updates in our code repository and other publications. 36 | 37 | ## Conclusion 38 | ALYMPICS represents a significant step forward in the integration of AI into game theory. We hope that this platform will be a valuable tool for researchers and developers interested in exploring the strategic capabilities of AI agents. 39 | -------------------------------------------------------------------------------- /assets/games_setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/games_setting.png -------------------------------------------------------------------------------- /assets/k-level-reasoning-egs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/k-level-reasoning-egs.png -------------------------------------------------------------------------------- /assets/k-level-reasoning-w-llms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/k-level-reasoning-w-llms.png -------------------------------------------------------------------------------- /assets/k-level-reasoning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/k-level-reasoning.png -------------------------------------------------------------------------------- /assets/playground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/assets/playground.png -------------------------------------------------------------------------------- /exp/WAC/human annotation.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/exp/WAC/human annotation.zip -------------------------------------------------------------------------------- /exp/WAC/records.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/exp/WAC/records.zip -------------------------------------------------------------------------------- /k-reasoning/G08A/evaluate.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import os 4 | from glob import glob 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | class G08AEvaluator(): 9 | 10 | def __init__(self, players, opponents, exp_rnd, exp_num, result_dir, output_dir) -> None: 11 | self.players = players.split(",") 12 | self.opponents = opponents.split(",") 13 | 14 | self.exp_rnd = exp_rnd 15 | self.exp_num = exp_num 16 | 17 | self.result_dir = result_dir 18 | self.output_dir = output_dir 19 | 20 | if not os.path.exists(self.output_dir): 21 | os.makedirs(self.output_dir, exist_ok=True) 22 | 23 | def win_rate(self): 24 | print("="*40+" Win Rate "+"="*40) 25 | 26 | players, opponents = self.players, self.opponents 27 | win_result = {} 28 | 29 | for agent in players: 30 | win_result.setdefault(agent, {}) 31 | for computer in opponents: 32 | exp = f"{self.result_dir}/{agent}_VS_{computer}*.json" 33 | cots = glob(exp) 34 | 35 | wins = {} 36 | total_round = 0 37 | for result in cots: 38 | with open(result) as fin: 39 | result = json.load(fin)["winners"] 40 | total_round = len(cots)*len(result) 41 | for rnd in result: 42 | if int(rnd)>self.exp_rnd: continue 43 | for player in result[rnd]: 44 | wins.setdefault(player, [0]*(len(result))) 45 | wins[player][int(rnd)-1]+=1 46 | 47 | win_rate = sum(wins.get("Alex", [0]))/(total_round) 48 | win_result[agent][computer] = win_rate 49 | 50 | average = {} 51 | for i, agent in enumerate(win_result): 52 | average[agent] = list(win_result[agent].values()) 53 | average[agent] = sum(average[agent])/len(average[agent]) 54 | 55 | print(f"{'':12s}\t"+"\t".join([f"{agent:7s}" for agent in players])) 56 | for computer in opponents: 57 | print(f"{computer:12s}",end="\t") 58 | print("\t".join([f"{win_result[agent][computer]:<7.2f}" if win_result[agent][computer]>=0 else f"{'':7s}" for agent in win_result])) 59 | 60 | print(f"{'Average':12s}",end="\t") 61 | print("\t".join([f"{average[agent]:<7.2f}" if average[agent]>=0 else f"{'':7s}" for agent in win_result])) 62 | 63 | print() 64 | 65 | def adaption_index(self): 66 | print("="*40+" Adaption Index "+"="*40) 67 | 68 | players, opponents = self.players, self.opponents 69 | learning_result = {} 70 | 71 | for oppo in opponents: 72 | exp_result = {} 73 | for agent in players: 74 | exps = glob(f"{self.result_dir}/{agent}_VS_{oppo}*.json") 75 | for exp in exps: 76 | with open(exp) as fin: 77 | logs = json.load(fin) 78 | exp_ground = logs["biddings"] 79 | target_div = [] 80 | for r in range(0, 10): 81 | bids = [exp_ground[p][r] for p in exp_ground] 82 | target = sum(bids)/len(bids)*0.8 83 | player_bid = exp_ground["Alex"][r] 84 | target_div.append(abs(player_bid-target)) 85 | exp_result.setdefault(agent, []) 86 | exp_result[agent].append(sum(target_div[5:])/sum(target_div[:5])) # [Target Deviation @ (second half)] / [Target Deviation @ (first half)] 87 | learning_result[oppo]=exp_result 88 | 89 | print(f"{'':8s}\t"+"\t".join([f"{agent:2s}" for agent in players])) 90 | 91 | for oppo in opponents: 92 | exp_result = learning_result[oppo] 93 | 94 | maxrate = list(set([sum(exp_result[agent])/len(exp_result[agent]) if exp_result.get(agent) else 10 for agent in players])) 95 | maxrate.sort() 96 | 97 | print(f"{oppo:8s}", end='\t') 98 | print('\t'.join([f"{sum(exp_result[agent])/len(exp_result[agent]):<.2f}" if exp_result.get(agent) else f"{'':2s}" for agent in players])) 99 | 100 | agent_sum = {} 101 | for agent in players: 102 | agent_sum[agent]=[] 103 | for oppo in opponents: 104 | agent_oppo_learning = learning_result[oppo].get(agent,[]) 105 | if agent_oppo_learning: 106 | agent_sum[agent].append(sum(agent_oppo_learning)/len(agent_oppo_learning)) 107 | 108 | print(f"{'Average':8s}", end='\t') 109 | print('\t'.join([f"{sum(agent_sum[agent])/len(agent_sum[agent]):<.2f}" if len(agent_sum.get(agent,[]))==len(opponents) else f"{'':2s}" for agent in players])) 110 | 111 | print() 112 | 113 | def extract_PCoT_prediction(self): 114 | """ 115 | Parse the prediction result of PCoT from the response. 116 | """ 117 | 118 | import openai 119 | import time 120 | 121 | # Fill in your config information to conduct experiments. 122 | openai.api_type = "" 123 | openai.api_base = "" 124 | openai.api_version = "" 125 | openai.api_key = "" 126 | ENGINE = "gpt4-32k" 127 | 128 | def re_extract(message): 129 | matchs = re.finditer("Player (\d)(\s\(\w+\))?:\s*(\d+)", message) 130 | matchs = list(matchs) 131 | try: 132 | assert 5>=len(matchs) >=4, message 133 | except BaseException: 134 | return {} 135 | result = [m.groups()[2] for m in matchs] 136 | if len(result)==5: 137 | return {p: n for p, n in zip(['Alex', 'Bob', 'Cindy', 'David', 'Eric'], result)} 138 | else: 139 | return {p: n for p, n in zip([ 'Bob', 'Cindy', 'David', 'Eric'], result)} 140 | 141 | def gpt_extract(message): 142 | status = 0 143 | times = 0 144 | while status != 1: 145 | try: 146 | response = openai.ChatCompletion.create( 147 | engine=ENGINE, 148 | messages = [{"role":"system", "content":"""Read the following statement and extract a prediction of the number chosen by each player in json format. Output format:{"Player": Player's number}"}"""}, {"role": "user", "content": message}], 149 | temperature=0.7, 150 | max_tokens=80, 151 | top_p=0.95, 152 | frequency_penalty=0, 153 | presence_penalty=0, 154 | stop=None) 155 | response = response['choices'][0]['message']['content'] 156 | bidding_info = json.loads(response) 157 | status = 1 158 | return bidding_info 159 | except Exception as e: 160 | print(e) 161 | times+=1 162 | if times>=2: 163 | return {} 164 | time.sleep(15) 165 | 166 | pcot_exps = glob(f"{self.result_dir}/pcot_VS_*.json") 167 | error_r = [] 168 | flag = False 169 | 170 | exps_result = {} 171 | for exp in pcot_exps: 172 | with open(exp) as fin: 173 | messages=json.load(fin)["message"]["Alex"] 174 | exps_result[exp]={} 175 | for i in range(2, min(len(messages), 41), 4): 176 | message=messages[i]["content"] 177 | result = re_extract(message) 178 | if not result: 179 | result = gpt_extract(message) 180 | if not result: 181 | print(message) 182 | error_r.append(message) 183 | flag = True 184 | break 185 | exps_result[exp][(i-2)//4]=result 186 | if flag: 187 | break 188 | 189 | with open(f"{self.output_dir}/pcot_prediction.json","w") as fout: 190 | new_result = {} 191 | for exp in exps_result: 192 | new_result[os.path.basename(exp)[:-5]] = exps_result[exp] 193 | json.dump(new_result, fout, indent=4) 194 | 195 | def prediction_accuracy(self): 196 | opponents = self.opponents 197 | for oppo in opponents: 198 | with open(f"{self.output_dir}/pcot_prediction.json") as fin: 199 | new_result = json.load(fin) 200 | 201 | pcot_avg_div = {} 202 | 203 | for exp in new_result: 204 | m = re.match(f"pcot_VS_{oppo}_(\d)", exp) 205 | if not m: continue 206 | exp_num = m.groups()[0] 207 | with open(f"{self.result_dir}/{exp}.json") as fin: 208 | exp_ground = json.load(fin)["biddings"] 209 | result = new_result[exp] 210 | for r in result: 211 | try: 212 | prediction = {p:int(result[r][p]) for p in result[r] if p!="Alex"} 213 | except: 214 | continue 215 | round_ground = {p: exp_ground[p][int(r)] for p in exp_ground if p!="Alex"} 216 | predict_avg = sum(prediction.values())/len(prediction) 217 | ground_avg = sum(round_ground.values())/len(round_ground) 218 | pcot_avg_div.setdefault(int(r), []) 219 | pcot_avg_div[int(r)].append(abs(predict_avg-ground_avg)) 220 | 221 | kr_avg_div = {} 222 | kr_exps = glob(f"{self.result_dir}/kr_VS_*.json") 223 | for exp in kr_exps: 224 | m = re.match(f"{self.result_dir}/kr_VS_{oppo}_(\d).json", exp) 225 | if not m: continue 226 | exp_num = m.groups()[0] 227 | with open(exp) as fin: 228 | logs = json.load(fin) 229 | exp_ground = logs["biddings"] 230 | result = logs["logs"]["Alex"] 231 | for r in range(0, 10): 232 | try: 233 | prediction = result[f'round{r+1}']["prediction"] 234 | except: 235 | continue 236 | round_ground = {p: exp_ground[p][r] for p in exp_ground if p!="Alex"} 237 | predict_avg = sum(prediction.values())/len(prediction) 238 | ground_avg = sum(round_ground.values())/len(round_ground) 239 | kr_avg_div.setdefault(r, []) 240 | kr_avg_div[r].append(abs(predict_avg-ground_avg)) 241 | 242 | 243 | #Export the prediction accuracy chart. 244 | 245 | x = [f"R{i+1}" for i in range(10)] 246 | y1 = [sum(pcot_avg_div[r])/len(pcot_avg_div[r]) for r in range(10)] 247 | y2 = [sum(kr_avg_div[r])/len(kr_avg_div[r]) for r in range(10)] 248 | 249 | # Create the plot 250 | plt.figure(figsize=(4, 3)) 251 | 252 | # Plot the first line 253 | plt.plot(x, y1, label=f'PCoT vs {oppo}', linewidth=2) 254 | for i in range(len(x)): 255 | plt.plot(x[i], y1[i], marker='s', color='#1f77b4') 256 | 257 | # Plot the second line 258 | plt.plot(x, y2, label=f'K-R vs {oppo}') 259 | for i in range(len(x)): 260 | plt.plot(x[i], y2[i], marker='s', color='#ff7f0e') 261 | 262 | plt.xticks(fontsize=12) 263 | plt.yticks(fontsize=14) 264 | plt.ylim(top=20) 265 | 266 | # Show the legend 267 | plt.legend(fontsize=12) 268 | 269 | # Show the plot 270 | plt.savefig(f'{self.output_dir}/PA_{oppo}.pdf', format='pdf', bbox_inches='tight') 271 | 272 | print("="*20+f" Prediction Accuracy Metric has been exported to \"{self.output_dir}\" "+"="*20) 273 | 274 | 275 | def main(args): 276 | 277 | evaluator = G08AEvaluator(args.players, args.opponents, args.exp_rnd, args.exp_num, args.result_dir, args.output_dir) 278 | 279 | evaluator.win_rate() 280 | evaluator.adaption_index() 281 | 282 | # the calculation of Prediction Accuracy is used only for pcot and kr. 283 | # evaluator.prediction_accuracy() 284 | 285 | 286 | if __name__=="__main__": 287 | import argparse 288 | parser = argparse.ArgumentParser() 289 | 290 | parser.add_argument("--players", type=str, default="kr") 291 | parser.add_argument("--opponents", type=str, default="agent") 292 | parser.add_argument("--result_dir", type=str, default="result") 293 | parser.add_argument("--output_dir", type=str, default="output") 294 | parser.add_argument('--exp_rnd', type=int, default=10) 295 | parser.add_argument('--exp_num', type=int, default=10) 296 | 297 | args = parser.parse_args() 298 | main(args) -------------------------------------------------------------------------------- /k-reasoning/G08A/game.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | round_number = round 4 | 5 | class G08A(): 6 | def __init__(self, players) -> None: 7 | self.all_players = players[::] 8 | self.survival_players = players[::] 9 | self.round_winner = {} 10 | 11 | def daily_bidding(self, players): 12 | Average = 0 13 | for player in players: 14 | player.act() 15 | Average += player.last_bidding 16 | 17 | Average /= len(players) 18 | Target = round_number(Average * 0.8, 2) 19 | 20 | return Average, Target 21 | 22 | def round_deduction(self, players, winner): 23 | """ 24 | player who did not win loses 1 HP 25 | """ 26 | for player in players: 27 | if player.name not in winner: 28 | player.deduction(1) 29 | 30 | def check_winner(self, players, target): 31 | win_bid = sorted([(abs(player.last_bidding - target), player.last_bidding) for player in players])[0][1] 32 | winners = [player.name for player in players if player.last_bidding==win_bid] 33 | 34 | return winners, win_bid 35 | 36 | def check_tie(self, players): 37 | if len(players)<2: return False 38 | return len(set([player.last_bidding for player in players]))==1 39 | 40 | def run_single_round(self, round_id): 41 | for player in self.survival_players: 42 | player.start_round(round_id) 43 | 44 | Average, Target = self.daily_bidding(self.survival_players) 45 | 46 | Tie_status = self.check_tie(self.survival_players) 47 | if Tie_status: # If all players choose the same number, there is no winner. 48 | WINNER = [] 49 | else: 50 | WINNER, WINNER_BID = self.check_winner(self.survival_players, Target) 51 | WINNER_str = ", ".join(WINNER) 52 | 53 | self.round_winner[round_id] = WINNER 54 | 55 | self.round_deduction(self.survival_players, WINNER) 56 | 57 | bidding_numbers = [f"{player.last_bidding}" for player in self.survival_players] 58 | history_biddings = {player.name: deepcopy(player.biddings) for player in self.survival_players} 59 | bidding_details = [f"{player.name} chose {player.last_bidding}" for player in self.survival_players] 60 | diff_details = [ 61 | f"{player.name}: |{player.last_bidding} - {Target}| = {round_number(abs(player.last_bidding - Target))}" 62 | for player in self.survival_players 63 | ] 64 | player_details = [player.show_info() for player in self.survival_players] 65 | 66 | bidding_numbers = " + ".join(bidding_numbers) 67 | bidding_details = ", ".join(bidding_details) 68 | diff_details = ", ".join(diff_details) 69 | player_details = ", ".join(player_details) 70 | if Tie_status: 71 | BIDDING_INFO = f"Thank you all for participating in Round {round_id}. In this round, {bidding_details}.\nAll players chose the same number, so all players lose 1 point. After the deduction, player information is: {player_details}." 72 | else: 73 | BIDDING_INFO = f"Thank you all for participating in Round {round_id}. In this round, {bidding_details}.\nThe average is ({bidding_numbers}) / {len(self.survival_players)} = {Average}.\nThe average {Average} multiplied by 0.8 equals {Target}.\n{diff_details}\n{WINNER}'s choice of {WINNER_BID} is closest to {Target}. Round winner: {WINNER_str}. All other players lose 1 point. After the deduction, player information is: {player_details}." 74 | 75 | survival_players = [] 76 | dead_players = [] 77 | for player in self.survival_players: 78 | win = player.name in WINNER 79 | player.notice_round_result(round_id, BIDDING_INFO, Target, win, bidding_details, history_biddings) 80 | 81 | if player.hp <= 0: 82 | dead_players.append(player) 83 | else: 84 | survival_players.append(player) 85 | 86 | self.survival_players = survival_players 87 | 88 | for out in dead_players: 89 | for other_player in survival_players: 90 | if other_player.is_agent: 91 | other_player.message += [{"role":"system","content":f"{out.name}'s hp is below 0, so {out.name} has been eliminated from the challenge!"}] 92 | 93 | for player in self.survival_players: 94 | player.end_round() 95 | 96 | print("Round ",round_id,": ",bidding_details) 97 | 98 | def run_multi_round(self, max_round): 99 | 100 | for player in self.all_players: 101 | player.ROUND_WINNER=self.round_winner 102 | 103 | for i in range(1, max_round+1): 104 | self.run_single_round(i) -------------------------------------------------------------------------------- /k-reasoning/G08A/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from player import * 5 | from game import G08A 6 | 7 | # Fill in your config information to conduct experiments. 8 | openai.api_type = "" 9 | openai.api_base = "" 10 | openai.api_version = "" 11 | openai.api_key = "" 12 | 13 | ENGINE = "gpt4-32k" 14 | 15 | def build_player(strategy, name, persona, mean=50, std=0, player_names = []): 16 | """ 17 | Player Factory 18 | """ 19 | if strategy=="agent": 20 | return AgentPlayer(name, persona, ENGINE) 21 | elif strategy=="cot": 22 | return CoTAgentPlayer(name, persona, ENGINE) 23 | elif strategy=="persona": 24 | return PersonaAgentPlayer(name, persona, ENGINE) 25 | elif strategy=="reflect": 26 | return ReflectionAgentPlayer(name, persona, ENGINE) 27 | elif strategy=="refine": 28 | return SelfRefinePlayer(name, persona, ENGINE) 29 | elif strategy=="pcot": 30 | return PredictionCoTAgentPlayer(name, persona, ENGINE) 31 | elif strategy=="kr": 32 | return KLevelReasoningPlayer(name, persona, ENGINE, player_names) 33 | elif strategy=="spp": 34 | return SPPAgentPlayer(name, persona, ENGINE) 35 | elif strategy in ["fix", "last", "mono", "monorand"]: 36 | return ProgramPlayer(name, strategy, mean, std) 37 | else: 38 | raise NotImplementedError 39 | 40 | 41 | def main(args): 42 | #Predefined Persona information 43 | PERSONA_A = "You are Alex and involved in a survive challenge. " 44 | PERSONA_B = "You are Bob and involved in a survive challenge. " 45 | PERSONA_C = "You are Cindy and involved in a survive challenge. " 46 | PERSONA_D = "You are David and involved in a survive challenge. " 47 | PERSONA_E = "You are Eric and involved in a survive challenge. " 48 | 49 | for exp_no in range(args.start_exp, args.exp_num): 50 | players=[] 51 | player_names = ["Alex", "Bob", "Cindy", "David", "Eric"] 52 | 53 | # build player 54 | A = build_player(args.player_strategy, "Alex", PERSONA_A, player_names=player_names) 55 | # Modify PlayerA's settings for ablation experiments. 56 | if args.player_engine: A.engine = args.player_engine 57 | if args.player_k: A.k_level = args.player_k 58 | players.append(A) 59 | 60 | # build opponent 61 | for program_name, persona in [("Bob", PERSONA_B), ("Cindy", PERSONA_C), ("David", PERSONA_D), ("Eric", PERSONA_E)]: 62 | players.append(build_player(args.computer_strategy, program_name, persona, args.init_mean, args.norm_std, player_names=player_names)) 63 | 64 | # run multi-round game (default 10) 65 | Game = G08A(players) 66 | Game.run_multi_round(args.max_round) 67 | 68 | # export game records 69 | prefix = f"{args.player_strategy}_VS_{args.computer_strategy}_{exp_no}" 70 | if args.computer_strategy in ["fix", "last"]: 71 | prefix = f"{args.player_strategy}_VS_{args.computer_strategy}-{args.init_mean}-{args.norm_std}_{exp_no}" 72 | 73 | output_file = f"{args.output_dir}/{prefix}.json" 74 | os.makedirs(os.path.dirname(output_file), exist_ok=True) 75 | 76 | with open(output_file,"w") as fout: 77 | messages = {} 78 | biddings = {} 79 | logs = {} 80 | for agent in Game.all_players: 81 | if agent.is_agent: 82 | messages[agent.name] = agent.message 83 | biddings[agent.name] = agent.biddings 84 | if agent.logs: 85 | logs[agent.name] = agent.logs 86 | 87 | debug_info = { 88 | "winners": Game.round_winner, 89 | "biddings": biddings, 90 | "message": messages, 91 | "logs":logs 92 | } 93 | 94 | json.dump(debug_info, fout, indent=4) 95 | 96 | if __name__=="__main__": 97 | import argparse 98 | parser = argparse.ArgumentParser() 99 | 100 | parser.add_argument('--player_strategy', type=str, default="cot", choices=["agent","cot","pcot","kr","reflect", "persona", "refine", "spp"]) 101 | parser.add_argument('--computer_strategy', type=str,choices=["agent", "fix", "last", "mono", "monorand","cot","pcot","kr","reflect", "persona", "refine", "spp"], default="fix") 102 | parser.add_argument("--output_dir", type=str, default="result") 103 | parser.add_argument("--init_mean", type=int, default=40, help="init mean value for computer player") 104 | parser.add_argument("--norm_std", type=int, default=5, help="standard deviation of the random distribution of computer gamers") 105 | parser.add_argument('--max_round', type=int, default=10) 106 | parser.add_argument('--start_exp', type=int, default=0) 107 | parser.add_argument('--exp_num', type=int, default=10) 108 | parser.add_argument('--player_engine', type=str, default=None, help="player's OpenAI api engine") 109 | parser.add_argument('--player_k', type=int, default=None, help="player's k-level (default 2)") 110 | 111 | args = parser.parse_args() 112 | main(args) -------------------------------------------------------------------------------- /k-reasoning/G08A/player/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic_player import ProgramPlayer 2 | from .reasoning_player import * 3 | from .k_level_reasoning_player import * -------------------------------------------------------------------------------- /k-reasoning/G08A/player/basic_player.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | import numpy as np 3 | 4 | class Player(): 5 | def __init__(self, name): 6 | self.name = name 7 | self.hp = 10 8 | self.biddings=[] 9 | self.cur_round = -1 10 | 11 | self.logs = None 12 | 13 | def start_round(self, round: int): 14 | self.cur_round = round 15 | 16 | def act(self): 17 | raise NotImplementedError 18 | 19 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings): 20 | raise NotImplementedError 21 | 22 | def end_round(self): 23 | pass 24 | 25 | def deduction(self, deducted_hp): 26 | self.hp -= deducted_hp 27 | 28 | @property 29 | def last_bidding(self): 30 | return self.biddings[-1] 31 | 32 | def show_info(self, print_ = False): 33 | if print_: 34 | print(f"NAME:{self.name}\tHEALTH POINT:{self.hp}\n") 35 | return f"NAME:{self.name}\tHEALTH POINT:{self.hp}" 36 | 37 | 38 | class ProgramPlayer(Player): 39 | is_agent=False 40 | def __init__(self, name, strategy, mean, std): 41 | self.name = name 42 | self.hp = 10 43 | 44 | self.biddings = [] 45 | 46 | self.strategy=strategy 47 | self.mean = mean 48 | self.std = std 49 | 50 | self.logs = None 51 | 52 | if self.strategy=="monorand": 53 | self.std = randint(0, std) 54 | self.strategy="mono" 55 | 56 | def start_round(self, round): 57 | return 58 | 59 | def end_round(self): 60 | if self.strategy=="mono": 61 | # 62 | self.mean -= self.std 63 | 64 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings): 65 | if self.strategy=="last": 66 | self.mean=round_target 67 | 68 | def set_normal(self, mean, std): 69 | self.normal = True 70 | self.mean = mean 71 | self.std = std 72 | 73 | def act(self): 74 | if self.strategy=="mono": 75 | bidding = self.mean 76 | else: 77 | bidding = np.random.normal(self.mean, self.std) 78 | bidding = min(max(int(bidding), 1),100) 79 | self.biddings.append(bidding) -------------------------------------------------------------------------------- /k-reasoning/G08A/player/k_level_reasoning_player.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import time 3 | from copy import deepcopy 4 | 5 | from .reasoning_player import AgentPlayer 6 | 7 | round_number = round 8 | 9 | class KLevelReasoningPlayer(AgentPlayer): 10 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 11 | "Another game expert's prediction for the next round of other players is as follows: " 12 | "{prediction}" 13 | "According to the game expert's prediction, what number will you give in the next round is most likely to be close to the target number?") 14 | 15 | 16 | PREDICTION_GAME_SETTING = ("You are {name} and involved in a survive challenge. SURVIVE CHALLENGE!!! You are one of five players in the game. " 17 | "Everyone is required to choose an integer between 1 and 100 in each round. " 18 | "The player whose chosen number is closest to (0.8 * the average of all chosen numbers) wins the round without any HP deduction. All other players will have 1 HP deducted. " 19 | "But if all players choose the same number, their health points are deducted together.") 20 | # PREDICTION_INQUIRY = ("Ok, {name}! Now is the ROUND {round}. " 21 | # "Please choose an integer between 1 and 100 for this round.") 22 | PREDICTION_INQUIRY = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 23 | "Please choose an integer between 1 and 100 for this round.") 24 | PREDICTION_RESPONSE = "I choose {bidding}." 25 | 26 | def __init__(self, name, persona, engine, players): 27 | super().__init__(name, persona, engine) 28 | self.bidding_history = {} 29 | self.logs = {} 30 | 31 | self.history_biddings = {} 32 | self.round_result = {} 33 | for player in players: 34 | self.history_biddings[player]=[] 35 | 36 | self.k_level = 2 37 | 38 | def start_round(self, round): 39 | prediction = self.predict(round) 40 | prediction = ", ".join([f"{player} might choose {prediction[player]}" for player in prediction])+". " 41 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, prediction=prediction, hp=self.hp)}] 42 | 43 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings): 44 | super().notice_round_result(round, bidding_info, round_target, win, bidding_details, history_biddings) 45 | self.round_result[round] = bidding_info 46 | self.bidding_history[round] = bidding_details 47 | self.history_biddings = history_biddings # {"Alex": [1,2,3]} 48 | 49 | def predict(self, round): 50 | 51 | def self_act(message): 52 | status = 0 53 | while status != 1: 54 | try: 55 | response = openai.ChatCompletion.create( 56 | engine = self.engine, 57 | messages = message, 58 | temperature=0.7, 59 | max_tokens=800, 60 | top_p=0.95, 61 | frequency_penalty=0, 62 | presence_penalty=0, 63 | stop=None) 64 | response = response['choices'][0]['message']['content'] 65 | self.message.append({"role":"assistant","content":response}) 66 | status = 1 67 | except Exception as e: 68 | print(e) 69 | time.sleep(15) 70 | return self.parse_result(response) 71 | 72 | def add_warning(hp, win): 73 | if not win: 74 | if hp < 5: 75 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {hp} points of health left. You are in DANGER and one step closer to death. " 76 | if hp <=3 : 77 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {hp} points of health left. You are in extreme DANGER and one step closer to death. " 78 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {hp} points of health left. You are one step closer to death. " 79 | return "You have successfully chosen the number closest to the target number, which is the average of all players' selected numbers multiplied by 0.8. As a result, you have won this round. All other players will now deduct 1 HP. " 80 | 81 | history_biddings = deepcopy(self.history_biddings) 82 | round_result = deepcopy(self.round_result) 83 | round_winner = deepcopy(self.ROUND_WINNER) 84 | self_hp = self.hp 85 | self_message = deepcopy(self.message) 86 | for k in range(self.k_level): 87 | prediction = {} 88 | logs = {} 89 | player_hp = {} 90 | k_round = round+k 91 | for player in history_biddings: 92 | hp=10 93 | if player == self.name: continue 94 | 95 | print(f"Player {self.name} conduct predict {player}") 96 | message = [{ 97 | "role": "system", 98 | "content": self.PREDICTION_GAME_SETTING.format(name=player) 99 | }] 100 | for r in range(len(history_biddings[player])): 101 | message.append({ 102 | "role": "system", 103 | "content": self.PREDICTION_INQUIRY.format(name=player, round=r+1, hp=hp) 104 | }) 105 | message.append({ 106 | "role": "assistant", 107 | "content": self.PREDICTION_RESPONSE.format(bidding=history_biddings[player][r]) 108 | }) 109 | message.append({ 110 | "role": "system", 111 | "content": round_result[r+1] 112 | }) 113 | message.append({ 114 | "role": "system", 115 | "content": add_warning(hp, player in round_winner[r+1]) 116 | }) 117 | if player not in round_winner[r+1]: 118 | hp-=1 119 | 120 | # Predict the opponent's next move based on their historical information. 121 | if hp>0: 122 | message.append({ 123 | "role": "system", 124 | "content": self.PREDICTION_INQUIRY.format(name=player, round=len(history_biddings[player])+1, hp=hp) 125 | }) 126 | next_bidding = self.agent_simulate(message, engine=self.engine) 127 | message.append({ 128 | "role": "assistant", 129 | "content": next_bidding 130 | }) 131 | prediction[player] = self.parse_result(next_bidding) 132 | else: 133 | prediction[player] = history_biddings[player][-1] 134 | logs[player] = message 135 | player_hp[player] = hp 136 | 137 | if k==self.k_level-2: break 138 | # If k-level >= 3, it is necessary to predict future outcomes. 139 | 140 | prediction_str = ", ".join([f"{player} might choose {prediction[player]}" for player in prediction])+". " 141 | self_message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=k_round, prediction=prediction_str, hp=self_hp)}] 142 | bidding = self_act(self_message) 143 | prediction = {**{self.name: bidding}, **prediction} 144 | player_hp[self.name] = self_hp 145 | 146 | Average = 0 147 | for player in prediction: 148 | Average += prediction[player] 149 | Average /= len(prediction) 150 | Target = round_number(Average * 0.8, 2) 151 | 152 | Tie_status = len(prediction)>=2 and len(set([prediction[player] for player in prediction]))==1 153 | if Tie_status: 154 | winners = [] 155 | else: 156 | win_bid = sorted([(abs(prediction[player] - Target), prediction[player]) for player in prediction])[0][1] 157 | winners = [player for player in prediction if prediction[player]==win_bid] 158 | winner_str = ", ".join(winners) 159 | 160 | round_winner[k_round] = winners 161 | 162 | for player in prediction: 163 | if player not in winners: 164 | player_hp[player]-=1 165 | 166 | # Use list comprehensions for concise and readable constructions 167 | bidding_numbers = [f"{prediction[player]}" for player in prediction] 168 | for player in history_biddings: 169 | history_biddings[player].append(prediction[player]) 170 | bidding_details = [f"{player} chose {prediction[player]}" for player in prediction] 171 | diff_details = [ 172 | f"{player}: |{prediction[player]} - {Target}| = {round_number(abs(prediction[player] - Target))}" 173 | for player in prediction 174 | ] 175 | player_details = [f"NAME:{player}\tHEALTH POINT:{player_hp[player]}" for player in prediction] 176 | 177 | bidding_numbers = " + ".join(bidding_numbers) 178 | bidding_details = ", ".join(bidding_details) 179 | diff_details = ", ".join(diff_details) 180 | player_details = ", ".join(player_details) 181 | if Tie_status: 182 | bidding_info = f"Thank you all for participating in Round {k_round}. In this round, {bidding_details}.\nAll players chose the same number, so all players lose 1 point. After the deduction, player information is: {player_details}." 183 | else: 184 | bidding_info = f"Thank you all for participating in Round {k_round}. In this round, {bidding_details}.\nThe average is ({bidding_numbers}) / {len(prediction)} = {Average}.\nThe average {Average} multiplied by 0.8 equals {Target}.\n{diff_details}\n{winners}'s choice of {win_bid} is closest to {Target}. Round winner: {winner_str}. All other players lose 1 point. After the deduction, player information is: {player_details}." 185 | round_result[k_round] = bidding_info 186 | 187 | self.logs[f"round{round}"] = { 188 | "prediction": prediction, 189 | "logs": logs 190 | } 191 | return prediction 192 | 193 | # @staticmethod 194 | def agent_simulate(self, message, engine): 195 | while 1: 196 | try: 197 | response = openai.ChatCompletion.create( 198 | engine=engine, 199 | messages = message, 200 | temperature=0.7, 201 | max_tokens=80, 202 | top_p=0.9, 203 | frequency_penalty=0, 204 | presence_penalty=0, 205 | stop=None) 206 | RESPONSE = response['choices'][0]['message']['content'] 207 | return RESPONSE 208 | except Exception as e: 209 | print(e) 210 | time.sleep(15) 211 | 212 | 213 | -------------------------------------------------------------------------------- /k-reasoning/G08A/player/reasoning_player.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import time 3 | 4 | from .basic_player import Player 5 | 6 | class AgentPlayer(Player): 7 | is_agent=True 8 | GAME_SETTING = ("SURVIVE CHALLENGE!!! You are one of five players in the game. " 9 | "Everyone is required to choose an integer between 1 and 100 in each round. " 10 | "The player whose chosen number is closest to (0.8 * the average of all chosen numbers) wins the round without any HP deduction. All other players will have 1 HP deducted. " 11 | "But if all players choose the same number, their health points are deducted together.") 12 | 13 | GAME_EXAMPLE = ("For example:\nPlayer Alex chooses 80, Player Bob chooses 70, Player Cindy chooses 60, Player David chooses 50, and Player Elva chooses 40.\n" 14 | "The average is (80 + 70 + 60 + 50 + 40) / 5 = 60.\n0.8 * The average is 60 * 0.8 = 48.\n" 15 | "Alex: |80 - 48| = 32\nBob: |70 - 48| = 22\nCindy:|60 - 48| = 12\nDavid:|50 - 48| = 2\nElva:|40 - 48| = 8\n" 16 | "So, player David's choice of 50 is closest to the target number 48, so David wins the round. " 17 | "All other players lose 1 HP.\nEvery player starts with an initial HP of 10 points. " 18 | "Once a player's HP reaches 0, he or she will be killed immediately and lose everything they have. " 19 | "Remember only the last remaining player wins the game!! " 20 | "Remember the target number is 0.8 * average or not the average!! " 21 | "Strive to make choices that maximize your chance of survival!!") 22 | 23 | INQUIRY = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 24 | "Please choose an integer between 1 and 100 for this round.") 25 | 26 | def __init__(self, name, persona, engine): 27 | self.name = name 28 | self.engine = engine 29 | self.hp = 10 30 | 31 | self.biddings = [] 32 | self.persona = persona 33 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}] 34 | 35 | self.logs = None 36 | 37 | def act(self): 38 | print(f"Player {self.name} conduct bidding") 39 | status = 0 40 | while status != 1: 41 | try: 42 | response = openai.ChatCompletion.create( 43 | engine = self.engine, 44 | messages = self.message, 45 | temperature=0.7, 46 | max_tokens=800, 47 | top_p=0.95, 48 | frequency_penalty=0, 49 | presence_penalty=0, 50 | stop=None) 51 | response = response['choices'][0]['message']['content'] 52 | self.message.append({"role":"assistant","content":response}) 53 | status = 1 54 | except Exception as e: 55 | print(e) 56 | time.sleep(15) 57 | self.biddings.append(self.parse_result(response)) 58 | 59 | def parse_result(self, message): 60 | status = 0 61 | times = 0 62 | while status != 1: 63 | try: 64 | response = openai.ChatCompletion.create( 65 | engine=self.engine, 66 | messages = [{"role":"system", "content":"By reading the conversation, extract the number chosen by player. Output format: number"}, {"role": "user", "content": message}], 67 | temperature=0.7, 68 | max_tokens=800, 69 | top_p=0.95, 70 | frequency_penalty=0, 71 | presence_penalty=0, 72 | stop=None) 73 | response = response['choices'][0]['message']['content'] 74 | assert response.isnumeric(), "Not A Number: "+ message 75 | bidding_info = int(float(response)) 76 | status = 1 77 | return bidding_info 78 | except AssertionError as e: 79 | print("Result Parsing Error: ",e) 80 | times+=1 81 | if times>=3: 82 | exit() 83 | except Exception as e: 84 | print(e) 85 | time.sleep(15) 86 | 87 | return None 88 | 89 | def start_round(self, round): 90 | self.message += [{"role":"system","content":self.INQUIRY.format(name=self.name, round=round, hp=self.hp)}] 91 | 92 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings): 93 | self.message_update_result(bidding_info) 94 | self.message_update_warning(win) 95 | 96 | def message_update_result(self, bidding_info): 97 | self.message += [{"role":"system","content":bidding_info}] 98 | 99 | def message_update_warning(self, win): 100 | def add_warning(): 101 | if not win: 102 | if self.hp < 5: 103 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {self.hp} points of health left. You are in DANGER and one step closer to death. " 104 | if self.hp <=3 : 105 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {self.hp} points of health left. You are in extreme DANGER and one step closer to death. " 106 | return f"WARNING: You have lost 1 point of HP in this round! You now have only {self.hp} points of health left. You are one step closer to death. " 107 | return "You have successfully chosen the number closest to the target number, which is the average of all players' selected numbers multiplied by 0.8. As a result, you have won this round. All other players will now deduct 1 HP. " 108 | 109 | self.message += [{"role":"system","content": add_warning()}] 110 | 111 | def conduct_inquiry(self, inquiry): 112 | while 1: 113 | try: 114 | response = openai.ChatCompletion.create( 115 | engine=self.engine, 116 | messages = self.message + [{"role":"system","content":inquiry}], 117 | temperature=0.7, 118 | max_tokens=800, 119 | top_p=0.9, 120 | frequency_penalty=0, 121 | presence_penalty=0, 122 | stop=None) 123 | 124 | RESPONSE = response['choices'][0]['message']['content'] 125 | return RESPONSE 126 | except Exception as e: 127 | print(e) 128 | time.sleep(15) 129 | 130 | 131 | class CoTAgentPlayer(AgentPlayer): 132 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 133 | "Guess which number will win in the next round. Let's think step by step, and finally answer a number you think you can win.") 134 | 135 | def start_round(self, round): 136 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, hp=self.hp)}] 137 | 138 | 139 | class PersonaAgentPlayer(AgentPlayer): 140 | INQUIRY_PERSONA = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 141 | "Please choose an integer between 1 and 100 for this round." 142 | "Don't forget your expert status, use your expertise to win this round!") 143 | 144 | 145 | MATH_EXPERT_PERSONA = ("You are {name} and involved in a survive challenge." 146 | " You are a game expert, good at predicting other people's behavior and deducing calculations, and using the most favorable strategy to win the game.") 147 | 148 | def __init__(self, name, persona, engine): 149 | super().__init__(name, persona, engine) 150 | self.persona = self.MATH_EXPERT_PERSONA.format(name=name) 151 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}] 152 | 153 | def start_round(self, round): 154 | self.message += [{"role":"system","content":self.INQUIRY_PERSONA.format(name=self.name, round=round, hp=self.hp)}] 155 | 156 | 157 | class ReflectionAgentPlayer(AgentPlayer): 158 | REFLECT_INQUIRY = "Review the previous round games, summarize the experience." 159 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings): 160 | super().notice_round_result(round, bidding_info, round_target, win, bidding_details, history_biddings) 161 | # refelxtion after round end 162 | self.reflect() 163 | 164 | def reflect(self): 165 | print(f"Player {self.name} conduct reflect") 166 | self.message += [{"role":"system","content": self.REFLECT_INQUIRY}, {"role":"assistant","content":self.conduct_inquiry(self.REFLECT_INQUIRY)}] 167 | 168 | 169 | class SelfRefinePlayer(AgentPlayer): 170 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 171 | "Guess which number will win in the next round. Let's think step by step, and finally answer a number you think you can win.") 172 | 173 | FEEDBACK_PROMPT = ("Carefully study the user's strategy in this round of the game. As a game expert, can you give a suggestion to optimize the user's strategy so that he can improve his winning rate in this round?") 174 | REFINE_PROMPT = ("I have a game expert's advice on your strategy in this round." 175 | "You can adjust your strategy just now according to his suggestion. Here are his suggestions:" 176 | "{feedback}") 177 | 178 | 179 | def __init__(self, name, persona, engine, refine_times = 2): 180 | super().__init__(name, persona, engine) 181 | 182 | self.refine_times = refine_times 183 | 184 | def start_round(self, round): 185 | self.cur_round = round 186 | 187 | def act(self): 188 | print(f"Player {self.name} conduct bidding") 189 | def completion(message): 190 | status = 0 191 | while status != 1: 192 | try: 193 | response = openai.ChatCompletion.create( 194 | engine = self.engine, 195 | messages = message, 196 | temperature=0.7, 197 | max_tokens=800, 198 | top_p=0.95, 199 | frequency_penalty=0, 200 | presence_penalty=0, 201 | stop=None) 202 | response = response['choices'][0]['message']['content'] 203 | status = 1 204 | except Exception as e: 205 | print(e) 206 | time.sleep(15) 207 | return response 208 | 209 | for t in range(self.refine_times): 210 | # refine_times==action_times 211 | if t==0: 212 | self.message.append({"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=self.cur_round, hp=self.hp)}) 213 | else: 214 | refine_message = [] 215 | for m in self.message: 216 | if m["role"]=="system": 217 | refine_message.append(m) 218 | else: 219 | refine_message.append({ 220 | "role": "user", 221 | "content": m["content"] 222 | }) 223 | refine_message.append({ 224 | "role": "system", 225 | "content": self.FEEDBACK_PROMPT 226 | }) 227 | feedback = completion(refine_message) 228 | self.message.append({"role":"system","content": self.REFINE_PROMPT.format(feedback=feedback)}) 229 | self.message.append({"role":"assistant","content": completion(self.message)}) 230 | 231 | self.biddings.append(self.parse_result(self.message[-1]["content"])) 232 | 233 | 234 | class PredictionCoTAgentPlayer(AgentPlayer): 235 | INQUIRY_COT = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 236 | "Please choose an integer between 1 and 100 for this round.\n" 237 | "First of all, predict the next round of choices based on the choices of other players in the previous round. " 238 | "{round_history}" 239 | "Your output should be of the following format:\n" 240 | "Predict:\nThe choice of each player in the next round here.\n" 241 | "Based on the prediction of other players, the average number in the next round here, and the target number in the next round (0.8 * the average of all chosen numbers) here.\n" 242 | "Answer:\nthe number will you choose to win the next round game here.") 243 | 244 | def __init__(self, name, persona, engine): 245 | super().__init__(name, persona, engine) 246 | 247 | self.bidding_history = {} 248 | 249 | def start_round(self, round): 250 | # PCoT requires the opponent's historical information to make predictions. 251 | round_history = [] 252 | for r in sorted(self.bidding_history.keys()): 253 | round_history.append(f"Round {r}: {self.bidding_history[r]}") 254 | if round_history: 255 | round_history = ".\n".join(round_history) 256 | round_history = "The players' choices in the previous rounds are as follows:\n"+round_history+"." 257 | else: 258 | round_history = "" 259 | 260 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round,round_history=round_history, hp=self.hp)}] 261 | 262 | def notice_round_result(self, round, bidding_info, round_target, win, bidding_details, history_biddings): 263 | super().notice_round_result(round, bidding_info, round_target, win, bidding_details, history_biddings) 264 | self.bidding_history[round] = bidding_details 265 | 266 | 267 | class SPPAgentPlayer(AgentPlayer): 268 | # Default example of SPP 269 | SPP_EXAMPLE = """When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 270 | Here are some examples: 271 | --- 272 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 273 | Input: 6 12 1 1 274 | 275 | Participants: {name} (you); Math Expert 276 | 277 | Start collaboration! 278 | 279 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 280 | {name} (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 281 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 282 | {name} (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 283 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 284 | {name} (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 285 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 286 | {name} (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 287 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 288 | 289 | Finish collaboration! 290 | 291 | Final answer: 6 * (1 + 1) + 12 = 24 292 | """ 293 | 294 | INQUIRY_SPP = ("Ok, {name}! Now is the ROUND {round}, and your HP is at {hp}. " 295 | "Please choose an integer between 1 and 100 for this round. " 296 | "Now, identify the participants and collaboratively choose the number step by step. Remember to provide the final solution with the following format \"Final answer: The chosen number here.\".") 297 | 298 | 299 | PERSONA = "You are {name} and involved in a survive challenge." 300 | 301 | def __init__(self, name, persona, engine): 302 | super().__init__(name, persona, engine) 303 | self.persona = self.PERSONA.format(name=name) 304 | self.message = [{"role":"system","content": self.SPP_EXAMPLE.format(name=self.name)}, 305 | {"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}] 306 | 307 | def start_round(self, round): 308 | self.message += [{"role":"system","content":self.INQUIRY_SPP.format(name=self.name, round=round, hp=self.hp)}] -------------------------------------------------------------------------------- /k-reasoning/G08A/run.sh: -------------------------------------------------------------------------------- 1 | python main.py --player_strategy kr --computer_strategy agent --exp_num 1 2 | 3 | python evaluate.py --players kr --opponents agent -------------------------------------------------------------------------------- /k-reasoning/README.md: -------------------------------------------------------------------------------- 1 | # K-Level Reasoning with Large Language Models 2 | 3 | **K-Level Reasoning** is a a novel reasoning approach for LLMs, which adopts the perspective of rivals to recursively employ k-level thinking 4 | based on available historical information. It significantly improves the strategic decision-making capability for LLM in dynamic, interactive, and competitive scenarios. 5 | 6 | See our paper: [K-Level Reasoning with Large Language Models](https://browse.arxiv.org/abs/2402.01521) 7 | 8 | ## Dynamic Strategic Reasoning with LLMs 9 | While Large Language Models (LLMs) have demonstrated their proficiency in complex reasoning tasks, their performance in dynamic, interactive, and competitive scenarios - such as business strategy and stock market analysis - remains underexplored. To bridge this gap, we formally explore the dynamic reasoning capabilities of LLMs for decision-making in rapidly evolving environments. 10 | 11 | We introduce two game theory based pilot challenges that mirror the complexities of real-world dynamic decision-making. These challenges are well-defined, enabling clear, controllable, and precise evaluation of LLMs’ dynamic reasoning abilities. 12 | 13 | Games 14 | 15 | **Guessing 0.8 of the Average**. It involves ten-round games, where each player selects a number between 1 to 100. The winner of each round is the player whose chosen number is closest to 80% of the average number picked by all players. This challenge mirrors the dynamics of market predictions, where players must anticipate collective market behavior. 16 | 17 | **Survival Auction Game**. Players bid in daily auctions to secure scarce living resources, balancing their health and finances to survive a 10-day period, simulating scenarios of resource scarcity and financial decision-making. 18 | 19 | ## K-Level Reasoning 20 | K-level thinking is a recursive reasoning process. In first-level thinking, individuals react directly to the environment, akin to static reasoning. In second-level thinking, individuals take into account the first-level thinking of others, and so on and so forth. 21 | k-reasoning 22 | 23 | We draw on this idea to improve the reasoning capabilities of LLMs in dynamic challenges, and propose the “K-Level Reasoning”. This method involves recursively simulating the opponent’s behavior using available historical information, followed by predicting the optimal behavior based on the opponents’ behavior modeling. 24 | 25 | k-level-reasoning-with-llm 26 | 27 | 28 | ## Reasoning Example 29 | k-reasoning-example 30 | 31 | Illustration of different methods in the Guessing 0.8 of the Average game. 32 | 33 | **Left**: In the Chain of Thought, the LLM outputs reasoning logic in a step-by-step format. However, the LLM demonstrates poor understanding of situations and prediction of opponents’ actions. 34 | 35 | **Middle**: In the Prediction Chain-of-Thought, with an explicit requirement to make predictions about rivals’ next moves, the LLM clearly divides the reasoning into prediction and reasoning phases. However, the predictions are still somewhat biased. 36 | 37 | **Right**: In the K-Level Reasoning, the LLM recursively predict rivals’ next moves with public historical information. Thank to a deeper strategic depth than rivals, the prediction and decision are more accurate. 38 | 39 | ## Contributions 40 | 41 | - We study the dynamic reasoning capabilities of LLMs from a game theory perspective and introduce two pilot tasks. Both tasks mirror the complexities of real-world dynamic decision-making and are also well-defined for evaluating LLMs’ dynamic reasoning abilities. 42 | - We propose a novel reasoning approach with LLMs - the “K-Level Reasoning” method. It integrates cognitive hierarchy theory into reasoning process, empowering LLMs to recursively predict and respond to the thoughts and actions of rivals in competitive and dynamic scenarios. 43 | 44 | ## Codes 45 | 46 | ### Guessing 0.8 of the Average 47 | 48 | Play the game and record the results (using K-Level-Reasoning (kr) as the player and Direct (agnet) as the opponent as an example). The game log will be saved in the result folder by default. 49 | ``` 50 | cd G08A 51 | python main.py --player_strategy kr --computer_strategy agent --exp_num 1 52 | ``` 53 | Then, perform the calculation of player metrics, which will output the data for the player's `WinRate` and `AdaptionIndex`. 54 | 55 | ``` 56 | python evaluate.py --players kr --opponents agent 57 | ``` 58 | 59 | ### SurvivalAuctionGame 60 | 61 | Play the game and calculate metrics. 62 | ``` 63 | cd SAG 64 | 65 | python main.py --player_strategy kr --computer_strategy agent --exp_num 1 66 | 67 | python evaluate.py --players kr --opponents agent 68 | ``` 69 | 70 | ## Citation 71 | 72 | ``` 73 | @misc{kreasoning, 74 | title={K-Level Reasoning with Large Language Models}, 75 | author={Yadong Zhang and Shaoguang Mao and Tao Ge and Xun Wang and Yan Xia and Man Lan and Furu Wei}, 76 | year={2024}, 77 | eprint={2402.01521}, 78 | archivePrefix={arXiv}, 79 | primaryClass={cs.CL} 80 | } 81 | ``` 82 | -------------------------------------------------------------------------------- /k-reasoning/SAG/evaluate.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import os 4 | from glob import glob 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import numpy as np 9 | 10 | class SAGEvaluator(object): 11 | def __init__(self, players, opponents, result_dir, output_dir) -> None: 12 | self.players = players.split(",") 13 | self.opponents = opponents.split(",") 14 | self.result_dir = result_dir 15 | self.output_dir = output_dir 16 | 17 | if not os.path.exists(self.output_dir): 18 | os.makedirs(self.output_dir, exist_ok=True) 19 | 20 | def survival_rate(self, status, soft=True): 21 | rounds = [str(r) for r in range(1, len(status)+1)] 22 | players = {} 23 | for r in rounds: 24 | for player in status[r]: 25 | players[player] = r 26 | for player in players: 27 | if soft: 28 | players[player] = int(players[player])/len(rounds) 29 | else: 30 | players[player] = 1 if int(players[player])==len(rounds) else 0 31 | return players 32 | 33 | def average_survival_round(self, ): 34 | print("="*40+" Average Survival Round "+"="*40) 35 | 36 | players, opponents = self.players, self.opponents 37 | def interpolate_color(colorA, colorB, colorC, alpha, beta=0.5): 38 | if alpha>beta: 39 | alpha = (alpha-beta)/(1-beta) 40 | return tuple(np.array(colorB)*(1-alpha) + np.array(colorA)*(alpha)) 41 | else: 42 | low = 0.4 43 | alpha = (alpha-low)/(beta-low) 44 | return tuple(np.array(colorC)*(1-alpha) + np.array(colorB)*(alpha)) 45 | 46 | asr_result = {} 47 | 48 | for agent in players: 49 | asr_result.setdefault(agent, {}) 50 | for computer in opponents: 51 | exp = f"{self.result_dir}/{agent}_VS_{computer}*.json" 52 | cots = glob(exp) 53 | 54 | wins = {} 55 | for result in cots: 56 | with open(result) as fin: 57 | result = json.load(fin)["status"] 58 | sr = self.survival_rate(result, soft=True) 59 | for player in sr: 60 | wins.setdefault(player, []) 61 | wins[player].append(sr[player]) 62 | 63 | win_rate = sum(wins["Alex"])/len(wins["Alex"]) 64 | asr_result[agent][computer] = win_rate 65 | 66 | average = {} 67 | for i, agent in enumerate(asr_result): 68 | average[agent] = list(asr_result[agent].values()) 69 | average[agent] = sum(average[agent])/len(average[agent]) 70 | 71 | print(f"{'':7s}\t"+"\t".join([f"{agent:7s}" for agent in players])) 72 | for computer in opponents: 73 | print(f"{computer:7s}",end="\t") 74 | print("\t".join([f"{asr_result[agent][computer]*10:<7.2f}" if asr_result[agent][computer]>=0 else f"{'':7s}" for agent in asr_result])) 75 | 76 | print(f"{'Average':7s}",end="\t") 77 | print("\t".join([f"{average[agent]*10:<7.2f}" if average[agent]>=0 else f"{'':7s}" for agent in average])) 78 | 79 | print() 80 | 81 | def adaption_index(self): 82 | print("="*40+" Adaption Index "+"="*40) 83 | 84 | players, opponents = self.players, self.opponents 85 | adaption_result = {} 86 | 87 | def mean(a): 88 | if not a: 89 | return -1 90 | return sum(a)/len(a) 91 | 92 | for oppo in opponents: 93 | exp_result = {} 94 | for agent in players: 95 | exps = glob(f"{self.result_dir}/{agent}_VS_{oppo}*.json") 96 | for exp in exps: 97 | with open(exp) as fin: 98 | logs = json.load(fin) 99 | exp_ground = logs["biddings"] 100 | target_div = {"first":[],"second":[]} 101 | for r in range(0, 10): 102 | if r>=len(exp_ground["Alex"]): 103 | break 104 | second_bid=0 105 | player_bid=exp_ground["Alex"][r] 106 | for p in exp_ground: 107 | if r>=len(exp_ground[p]): continue 108 | if p!="Alex" and exp_ground[p][r]>second_bid: 109 | second_bid=exp_ground[p][r] 110 | if r>=5: 111 | target_div["second"].append(abs(player_bid-second_bid)) 112 | else: 113 | target_div["first"].append(abs(player_bid-second_bid)) 114 | exp_result.setdefault(agent, []) 115 | if not target_div["second"] or not target_div["first"]: 116 | continue 117 | exp_result[agent].append(mean(target_div["second"])/mean(target_div["first"])) 118 | adaption_result[oppo]=exp_result 119 | 120 | 121 | agent_sum = {} 122 | for agent in players: 123 | agent_sum[agent]=[] 124 | for oppo in opponents: 125 | agent_oppo_learning = adaption_result[oppo].get(agent,[]) 126 | if agent_oppo_learning: 127 | agent_sum[agent].append(sum(agent_oppo_learning)/len(agent_oppo_learning)) 128 | 129 | print(f"{'':8s}\t"+"\t".join([f"{agent:8s}" for agent in players])) 130 | 131 | for oppo in opponents: 132 | exp_result = adaption_result[oppo] 133 | 134 | 135 | maxrate = list(set([sum(exp_result[agent])/len(exp_result[agent]) if exp_result.get(agent) else 10 for agent in players])) 136 | maxrate.sort() 137 | print(f"{oppo:8s}", end='\t') 138 | print('\t'.join([f"{sum(exp_result[agent])/len(exp_result[agent]):<8.2f}" if exp_result.get(agent) else f"{'':8s}" for agent in players])) 139 | 140 | print(f"{'Average':8s}", end='\t') 141 | print('\t'.join([f"{sum(agent_sum[agent])/len(agent_sum[agent]):<8.2f}" if len(agent_sum.get(agent, []))==len(opponents) else f"{'':8s}" for agent in players])) 142 | 143 | print() 144 | 145 | def prediction_accuracy(self, print_value=False): 146 | opponents = self.opponents 147 | if print_value: 148 | print(f"{'':7s}\t"+"\t".join([f"{r:<4}" for r in range(10)])) 149 | 150 | kr_max_div_dict={} 151 | 152 | for oppo in opponents: 153 | kr_avg_div = {} 154 | kr_exps = glob(f"{self.result_dir}/kr_VS_*.json") 155 | for exp in kr_exps: 156 | m = re.match(f"{self.result_dir}/kr_VS_{oppo}_(\d).json", exp) 157 | if not m: continue 158 | exp_num = m.groups()[0] 159 | with open(exp) as fin: 160 | logs = json.load(fin) 161 | exp_ground = logs["biddings"] 162 | result = logs["logs"]["Alex"] 163 | # print(exp_ground) 164 | for r in range(0, len(exp_ground["Alex"])): 165 | try: 166 | prediction = result[f'round{r+1}']["prediction"] 167 | except: 168 | continue 169 | if not prediction: continue 170 | round_ground = {p: exp_ground[p][r] for p in exp_ground if p!="Alex" and len(exp_ground[p])>r} 171 | # print(r, prediction, round_ground) 172 | predict_avg = max(prediction.values()) 173 | ground_avg = max(round_ground.values()) 174 | kr_avg_div.setdefault(r, []) 175 | kr_avg_div[r].append(abs(predict_avg-ground_avg)) 176 | 177 | if print_value: 178 | print(f"{oppo:7s}",end="\t") 179 | print("\t".join([f"{sum(kr_avg_div.get(r, [0]))/len(kr_avg_div.get(r, [0])):<7.2f}" if kr_avg_div.get(r) else f"{'-':7s}" for r in range(10)])) 180 | kr_max_div_dict[oppo] = kr_avg_div 181 | 182 | if print_value: 183 | print(f"{'':7s}\t"+"\t".join([f"{r:<4}" for r in range(10)])) 184 | pcot_max_div_dict = {} 185 | 186 | 187 | 188 | """ 189 | Parse the prediction result of PCoT from the response. 190 | """ 191 | for oppo in opponents: 192 | kr_avg_div = {} 193 | kr_exps = glob(f"{self.result_dir}/pcot_VS_*.json") 194 | for exp in kr_exps: 195 | m = re.match(f"{self.result_dir}/pcot_VS_{oppo}_(\d).json", exp) 196 | if not m: continue 197 | exp_num = m.groups()[0] 198 | with open(exp) as fin: 199 | logs = json.load(fin) 200 | exp_ground = logs["biddings"] 201 | result = logs["message"]["Alex"] 202 | # print(exp_ground) 203 | for i in range(len(result)): 204 | content = result[i]["content"] 205 | if not content.startswith("Hello, Alex! Today is the Day"): continue 206 | # print(result[i]["content"]) 207 | # print("======") 208 | r = int(content[:content.index("of")].strip().split()[-1]) 209 | output = result[i+1]["content"] 210 | if r>1: 211 | oppo_nums = len(logs["status"][str(r-1)]) 212 | for p in logs["status"][str(r-1)]: 213 | status = logs["status"][str(r-1)][p] 214 | if p=="Alex": 215 | oppo_nums-=1 216 | else: 217 | if "POINT:-" in status or "POINT:0" in status: 218 | oppo_nums-=1 219 | if oppo_nums==0: 220 | continue 221 | 222 | if output.startswith("Predict:"): 223 | prediction = output.split("\n\n")[0] 224 | prediction = prediction.split("\n")[1:] 225 | ops = {} 226 | try: 227 | for p in prediction: 228 | split="$" if "$" in p else ": " 229 | if "Bob" in p: 230 | ops["Bob"]=int(p.split(split)[-1]) 231 | elif "Cindy" in p: 232 | ops["Cindy"]=int(p.split(split)[-1]) 233 | elif "David" in p: 234 | ops["David"]=int(p.split(split)[-1]) 235 | elif "Eric" in p: 236 | ops["Eric"]=int(p.split(split)[-1]) 237 | for p in prediction: 238 | split="$" if "$" in p else ": " 239 | if "Player 1" in p or "Player1" in p: 240 | ops["Bob"]=int(p.split(split)[-1]) 241 | elif "Player 2" in p or "Player2" in p: 242 | ops["Cindy"]=int(p.split(split)[-1]) 243 | elif "Player 3" in p or "Player3" in p: 244 | ops["David"]=int(p.split(split)[-1]) 245 | elif "Player 4" in p or "Player4" in p: 246 | ops["Eric"]=int(p.split(split)[-1]) 247 | except BaseException as e: 248 | # print("!!!!!!!!!") 249 | continue 250 | else: 251 | # print(output) 252 | pass 253 | prediction = ops 254 | round_ground = {p: exp_ground[p][r-1] for p in exp_ground if p!="Alex" and len(exp_ground[p])>=r} 255 | # print(round_ground) 256 | # print(r, prediction, round_ground) 257 | predict_avg = max(prediction.values()) 258 | ground_avg = max(round_ground.values()) 259 | kr_avg_div.setdefault(r-1, []) 260 | kr_avg_div[r-1].append(abs(predict_avg-ground_avg)) 261 | 262 | 263 | if print_value: 264 | print(f"{oppo:7s}",end="\t") 265 | print("\t".join([f"{sum(kr_avg_div.get(r, [0]))/len(kr_avg_div.get(r, [0])):<7.2f}" if kr_avg_div.get(r) else f"{'-':7s}" for r in range(10)])) 266 | pcot_max_div_dict[oppo] = kr_avg_div 267 | 268 | 269 | #Export the prediction accuracy chart. 270 | 271 | for oppo in opponents: 272 | pcot_avg_div = pcot_max_div_dict[oppo] 273 | kr_avg_div = kr_max_div_dict[oppo] 274 | # Sample data 275 | x1 = [f"R{i+1}" for i in sorted(pcot_avg_div.keys())] 276 | y1 = [sum(pcot_avg_div[r])/len(pcot_avg_div[r]) for r in sorted(pcot_avg_div.keys())] 277 | 278 | x2 = [f"R{i+1}" for i in sorted(kr_avg_div.keys())] 279 | y2 = [sum(kr_avg_div[r])/len(kr_avg_div[r]) for r in sorted(kr_avg_div.keys())] 280 | 281 | # Create the plot 282 | plt.figure(figsize=(4, 3)) 283 | 284 | # Plot the first line 285 | plt.plot(x1, y1, label=f'PCoT vs {oppo}', linewidth=2, color='#1f77b4') 286 | for i in range(len(x1)): 287 | plt.plot(x1[i], y1[i], marker='s', color='#1f77b4') 288 | 289 | # Plot the second line 290 | plt.plot(x2, y2, label=f'K-R vs {oppo}', color='#ff7f0e') 291 | for i in range(len(x2)): 292 | plt.plot(x2[i], y2[i], marker='s', color='#ff7f0e') 293 | 294 | plt.xticks(fontsize=12) 295 | plt.yticks(fontsize=14) 296 | 297 | # Show the legend 298 | plt.legend(fontsize=12) 299 | plt.ylim(top=150) 300 | 301 | # Show the plot 302 | 303 | plt.savefig(f'{self.output_dir}/PA_{oppo}.pdf', format='pdf', bbox_inches='tight') 304 | 305 | print("="*20+f" Prediction Accuracy Metric has been exported to \"{self.output_dir}\" "+"="*20) 306 | 307 | 308 | def main(args): 309 | evaluator = SAGEvaluator(args.players, args.opponents, args.result_dir, args.output_dir) 310 | evaluator.average_survival_round() 311 | evaluator.adaption_index() 312 | 313 | # the calculation of Prediction Accuracy is used only for pcot and kr. 314 | # evaluator.prediction_accuracy() 315 | 316 | 317 | if __name__=="__main__": 318 | import argparse 319 | parser = argparse.ArgumentParser() 320 | 321 | parser.add_argument("--players", type=str, default="kr") 322 | parser.add_argument("--opponents", type=str, default="agent") 323 | parser.add_argument("--result_dir", type=str, default="result") 324 | parser.add_argument("--output_dir", type=str, default="output") 325 | parser.add_argument('--exp_rnd', type=int, default=10) 326 | parser.add_argument('--exp_num', type=int, default=10) 327 | 328 | args = parser.parse_args() 329 | main(args) -------------------------------------------------------------------------------- /k-reasoning/SAG/game.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | class SurvivalAuctionGame(): 4 | # Prompts 5 | ROUND_NOTICE = "Thank you all for participating in Round {}. In this round, {}.\nTotal water resource supply is {}. According to the principle of the highest bidder and the rule when the game is tied, {} won this auction and obtain water resource. After allocation, all survival residents' information is as follows: \n {}" 6 | 7 | def __init__(self, players) -> None: 8 | self.players = players[::] 9 | self.survival_players = players[::] 10 | self.round_winners = {} 11 | self.round_status = {} 12 | 13 | def _get_salary(self): 14 | for player in self.survival_players: 15 | player.get_salary() 16 | 17 | def _round_settlement(self, winners): 18 | for player in self.survival_players: 19 | if player.name in winners: 20 | player.success_bid() 21 | else: 22 | player.unsuccess_bid() 23 | 24 | def _check_winner(self, supply): 25 | """ 26 | get the winners of the current round 27 | """ 28 | winners = [] 29 | largest_bidding = max([player.last_bidding for player in self.survival_players]) 30 | winners = [player.name for player in self.survival_players 31 | if (player.last_bidding == largest_bidding) and (player.last_bidding <= player.balance)] 32 | if len(winners)>1: 33 | winners = [] 34 | return winners 35 | 36 | 37 | def run_single_round(self, round_id, supply): 38 | """ 39 | Execute a single round of game 40 | 41 | Args: 42 | round_id (int): number of the current round, beginning from 1. 43 | supply (int): supply of the current round 44 | """ 45 | print(f"Round {round_id} begins.") 46 | 47 | # 1. get salary 48 | self._get_salary() 49 | print("All players get their salaries.") 50 | 51 | # 2. bid 52 | history_biddings = {player.name: player.biddings[::] for player in self.survival_players} 53 | player_status = {player.name: player.get_status() for player in self.survival_players} 54 | 55 | for player in self.survival_players: 56 | player.update_public_info(round_id, history_biddings, player_status) 57 | player.start_round(round_id, supply) 58 | 59 | for player in self.survival_players: 60 | player.act() 61 | 62 | # 3. check winners 63 | winners = self._check_winner(supply) 64 | self.round_winners[round_id] = winners 65 | print("Winner(s):\n") 66 | print(winners) 67 | 68 | # 4. settlement 69 | self._round_settlement(winners) 70 | 71 | # 5. get bidding results (str) 72 | bidding_details = [] 73 | for player in self.survival_players: 74 | bidding_details += [f"{player.name} bid {player.last_bidding}"] 75 | bidding_details = ", ".join(bidding_details) 76 | 77 | if len(winners): 78 | winners_str = [] 79 | for winner in winners: 80 | winners_str += [winner] 81 | winners_str = ", ".join(winners_str) 82 | else: 83 | winners_str = "no one" 84 | 85 | player_status_str = [] 86 | players_status = {} 87 | for player in self.survival_players: 88 | player_status_str += [player.get_status()] 89 | players_status[player.name] = player.get_status() 90 | player_status_str = "\n".join(player_status_str) 91 | 92 | round_results = self.ROUND_NOTICE.format(round_id, bidding_details, supply, winners_str, player_status_str) 93 | print("Round result:\n" + round_results) 94 | 95 | 96 | # 6. update round results to every player 97 | for player in self.survival_players: 98 | player.notice_round_result(round_id, round_results, player.name in winners, bidding_details) 99 | 100 | # 7. check the survival situation 101 | survival_players = [] 102 | self.round_status[round_id] = {} 103 | for player in self.survival_players: 104 | self.round_status[round_id][player.name] = player.get_status() 105 | if player.hp <= 0: 106 | for other_player in self.survival_players: 107 | other_player.notice_elimination( f"{player.name}'s hp is below 0, so {player.name} has been eliminated from the challenge!") 108 | else: 109 | survival_players.append(player) 110 | self.survival_players = survival_players 111 | 112 | def _save_history(self, path): 113 | history = [] 114 | for player in self.players: 115 | history.append({player.name: player.message}) 116 | with open(path, 'w') as f: 117 | json.dump(history, f) 118 | 119 | def run_multi_round(self, n_round, supply_list): 120 | assert isinstance(supply_list, list) 121 | assert n_round == len(supply_list) 122 | 123 | for i in range(1, n_round+1): 124 | self.run_single_round(i, supply_list[i-1]) 125 | if len(self.survival_players) == 0: 126 | break -------------------------------------------------------------------------------- /k-reasoning/SAG/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from player import * 5 | from game import SurvivalAuctionGame 6 | 7 | 8 | # Fill in your config information to conduct experiments. 9 | openai.api_type = "" 10 | openai.api_base = "" 11 | openai.api_version = "" 12 | openai.api_key = "" 13 | ENGINE = "gpt4-32k" 14 | 15 | 16 | def build_player(strategy, name, persona): 17 | """ 18 | Player Factory 19 | """ 20 | 21 | if strategy=="agent": 22 | return AgentPlayer(name, ENGINE, 10, 100, persona) 23 | elif strategy=="cot": 24 | return CoTAgentPlayer(name, ENGINE, 10, 100, persona) 25 | elif strategy=="pcot": 26 | return PredictionCoTAgentPlayer(name, ENGINE, 10, 100, persona) 27 | elif strategy=="kr": 28 | return KLevelReasoningPlayer(name, ENGINE, 10, 100, persona) 29 | elif strategy=="reflect": 30 | return ReflectionAgentPlayer(name, ENGINE, 10, 100, persona) 31 | elif strategy=="refine": 32 | return SelfRefinePlayer(name, ENGINE, 10, 100, persona) 33 | elif strategy=="persona": 34 | return PersonaAgentPlayer(name, ENGINE, 10, 100, persona) 35 | elif strategy=="spp": 36 | return SPPAgentPlayer(name, 10, ENGINE, 100, persona) 37 | else: 38 | raise NotImplementedError 39 | 40 | 41 | def main(args): 42 | # Predefined character information 43 | PERSONA_A = "You are Alex and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. " 44 | PERSONA_B = "You are Bob and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. " 45 | PERSONA_C = "You are Cindy and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. " 46 | PERSONA_D = "You are David and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. " 47 | PERSONA_E = "You are Eric and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. " 48 | 49 | 50 | for exp_no in range(args.start_exp, args.exp_num): 51 | players = [] 52 | 53 | # build player 54 | A = build_player(args.player_strategy, "Alex", PERSONA_A) 55 | # Modify PlayerA's settings for ablation experiments. 56 | if args.player_engine: A.engine = args.player_engine 57 | if args.player_k: A.k_level = args.player_k 58 | players.append(A) 59 | 60 | # build opponent 61 | for program_name, persona in [("Bob", PERSONA_B), ("Cindy", PERSONA_C), ("David", PERSONA_D), ("Eric", PERSONA_E)]: 62 | players.append(build_player(args.computer_strategy, program_name, persona)) 63 | print("Initial players done.") 64 | 65 | # run multi-round game (default 10) 66 | WA = SurvivalAuctionGame(players) 67 | WA.run_multi_round(args.max_round, [10]*args.max_round) 68 | 69 | # Export game records 70 | prefix = f"{args.player_strategy}_VS_{args.computer_strategy}_{exp_no}" 71 | output_file = f"{args.output_dir}/{prefix}.json" 72 | os.makedirs(os.path.dirname(output_file), exist_ok=True) 73 | 74 | with open(output_file,"w") as fout: 75 | messages = {} 76 | biddings = {} 77 | logs = {} 78 | for agent in WA.players: 79 | if agent.is_agent: 80 | messages[agent.name] = agent.message 81 | biddings[agent.name] = agent.biddings 82 | if agent.logs: 83 | logs[agent.name] = agent.logs 84 | 85 | debug_info = { 86 | "biddings": biddings, 87 | "winner": WA.round_winners, 88 | "status": WA.round_status, 89 | "message": messages, 90 | "logs":logs 91 | } 92 | 93 | json.dump(debug_info, fout, indent=4) 94 | 95 | if __name__=="__main__": 96 | import argparse 97 | parser = argparse.ArgumentParser() 98 | 99 | parser.add_argument('--player_strategy', type=str, default="cot", choices=["agent","cot","pcot","kr","reflect","tot", "persona", "refine", "spp"]) 100 | parser.add_argument('--computer_strategy', type=str,choices=["agent", "fix", "last", "mono", "monorand","cot","pcot","kr","reflect","tot", "persona", "refine", "spp"], default="fix") 101 | parser.add_argument("--output_dir", type=str, default="result") 102 | parser.add_argument('--max_round', type=int, default=10) 103 | parser.add_argument('--start_exp', type=int, default=0) 104 | parser.add_argument('--exp_num', type=int, default=10) 105 | parser.add_argument('--player_engine', type=str, default=None, help="player's OpenAI api engine") 106 | parser.add_argument('--player_k', type=int, default=None, help="player's k-level (default 2)") 107 | 108 | args = parser.parse_args() 109 | main(args) -------------------------------------------------------------------------------- /k-reasoning/SAG/player/__init__.py: -------------------------------------------------------------------------------- 1 | from .reasoning_player import * 2 | from .k_level_reasoning_player import * -------------------------------------------------------------------------------- /k-reasoning/SAG/player/basic_player.py: -------------------------------------------------------------------------------- 1 | class Player(): 2 | def __init__(self, name, water_requirement, daily_salary): 3 | self.name = name 4 | self.biddings=[] 5 | self.cur_round = -1 6 | 7 | self.requirement = water_requirement 8 | self.daily_salary = daily_salary 9 | self.balance = 0 10 | self.hp = 8 11 | self.no_drink = 1 12 | self.maximum_health = 10 13 | 14 | self.logs = None 15 | 16 | def success_bid(self): 17 | """ 18 | Update self status when succeeds the bids 19 | """ 20 | self.hp += 2 21 | self.hp = min(self.maximum_health, self.hp) 22 | self.balance -= self.last_bidding 23 | self.no_drink = 1 24 | 25 | def unsuccess_bid(self): 26 | """ 27 | Update self status when fails the bids 28 | """ 29 | self.hp -= self.no_drink 30 | self.no_drink += 1 31 | if self.hp <= 0: 32 | print(self.name + "is out of game!") 33 | 34 | def get_salary(self): 35 | self.balance += self.daily_salary 36 | 37 | def start_round(self, round: int, supply: int): 38 | self.cur_round = round 39 | 40 | def act(self): 41 | raise NotImplementedError 42 | 43 | def notice_round_result(self, round, bidding_info, win, bidding_details): 44 | raise NotImplementedError 45 | 46 | def notice_elimination(self, info): 47 | pass 48 | 49 | def update_public_info(self,round, history_biddings, player_stauts): 50 | pass 51 | 52 | def end_round(self): 53 | pass 54 | 55 | @property 56 | def last_bidding(self): 57 | return self.biddings[-1] 58 | 59 | def get_status(self, print_ = False): 60 | if print_: 61 | print(f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}\n\n") 62 | return f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}" -------------------------------------------------------------------------------- /k-reasoning/SAG/player/k_level_reasoning_player.py: -------------------------------------------------------------------------------- 1 | import time 2 | from copy import deepcopy 3 | 4 | import openai 5 | 6 | from .reasoning_player import AgentPlayer 7 | 8 | 9 | PERSONA = "You are {name} and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. " 10 | 11 | class KLevelReasoningPlayer(AgentPlayer): 12 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 13 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 14 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid." 15 | " Another game expert's prediction for the next round of other players is as follows: " 16 | "{prediction}" 17 | " Based on the prediction of other players, think carefully about your next round of bidding strategy to be most likely to survive.") 18 | 19 | PREDICTION_GAME_SETTING = PERSONA + AgentPlayer.GAME_SETTING 20 | PREDICTION_INQUIRY = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 21 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 22 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid.") 23 | PREDICTION_RESPONSE = "I will bid ${bidding} for today's water resource auction." 24 | REBID_RESPONSE = "In this round, {biddings}. Due to the detection of leakage issues in today's bids, the bids in this round are invalidated and today's auction will be restarted." 25 | 26 | 27 | def __init__(self, name, engine, water_requirement, daily_salary, persona): 28 | super().__init__(name, engine, water_requirement, daily_salary, persona) 29 | self.bidding_history = {} 30 | self.logs = {} 31 | 32 | self.history_biddings = {} 33 | self.opponent_status = {} 34 | self.round_supply = {} 35 | self.round_result = {} 36 | 37 | self.k_level = 2 38 | 39 | # self.engine = "gpt35prod" 40 | 41 | def start_round(self, round, supply): 42 | self.round_supply[round]=supply 43 | prediction = self.predict(round) 44 | prediction = ", ".join([f"{player} might bid {prediction[player]}" for player in prediction])+". " 45 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, supply=supply, prediction=prediction, status=self.get_status())}] 46 | 47 | def notice_round_result(self, round, bidding_info, win, bidding_details): 48 | super().notice_round_result(round, bidding_info, win, bidding_details) 49 | self.round_result[round] = bidding_info 50 | self.bidding_history[round] = bidding_details 51 | 52 | def update_public_info(self, round, history_biddings, player_stauts): 53 | self.history_biddings = history_biddings # {"Alex": [1,2,3]} 54 | self.opponent_status[round] = player_stauts 55 | 56 | def predict(self, round): 57 | def self_act(message): 58 | status = 0 59 | while status != 1: 60 | try: 61 | response = openai.ChatCompletion.create( 62 | engine = self.engine, 63 | messages = message, 64 | temperature=0.7, 65 | max_tokens=800, 66 | top_p=0.95, 67 | frequency_penalty=0, 68 | presence_penalty=0, 69 | stop=None) 70 | response = response['choices'][0]['message']['content'] 71 | message.append({"role":"assistant","content":response}) 72 | status = 1 73 | except Exception as e: 74 | print(e) 75 | time.sleep(15) 76 | return self.parse_result(response) 77 | 78 | self_message = deepcopy(self.message) 79 | prediction = {} 80 | logs = {} 81 | 82 | for k in range(self.k_level): 83 | for player in self.history_biddings: 84 | if player == self.name: continue 85 | print(f"Player {self.name} conduct predict {player}") 86 | message = [{ 87 | "role": "system", 88 | "content": self.PREDICTION_GAME_SETTING.format(name=player) 89 | }] 90 | for r in range(len(self.history_biddings[player])): 91 | message.append({ 92 | "role": "system", 93 | "content": self.PREDICTION_INQUIRY.format(name=player, round=r+1, supply = self.round_supply[r+1], status=self.opponent_status[r+1][player]) 94 | }) 95 | message.append({ 96 | "role": "assistant", 97 | "content": self.PREDICTION_RESPONSE.format(bidding=self.history_biddings[player][r]) 98 | }) 99 | message.append({ 100 | "role": "system", 101 | "content": self.round_result[r+1] 102 | }) 103 | round_id = len(self.history_biddings[player])+1 104 | if k==0: 105 | # Predict the opponent's next move based on their historical information. 106 | message.append({ 107 | "role": "system", 108 | "content": self.PREDICTION_INQUIRY.format(name=player, round=round_id, supply = self.round_supply[round_id], status=self.opponent_status[round_id][player]) 109 | }) 110 | next_bidding = self.agent_simulate(message, engine=self.engine) 111 | message.append({ 112 | "role": "assistant", 113 | "content": next_bidding 114 | }) 115 | else: 116 | # If k >= 0, make the decision for k based on the prediction result of k-1. 117 | 118 | prediction_str = ", ".join([f"{oppo} might bid {prediction[oppo]}" for oppo in prediction if oppo!=player])+". " 119 | message.append({ 120 | "role": "system", 121 | "content": self.INQUIRY_COT.format(name=player, round=round_id, supply = self.round_supply[round_id], prediction=prediction_str, status=self.opponent_status[round_id][player]) 122 | }) 123 | next_bidding = self.agent_simulate(message, engine=self.engine) 124 | message.append({ 125 | "role": "assistant", 126 | "content": next_bidding 127 | }) 128 | 129 | prediction[player] = self.parse_result(next_bidding) 130 | logs[player] = message 131 | 132 | if k==self.k_level-2: break 133 | prediction_str = ", ".join([f"{player} might choose {prediction[player]}" for player in prediction])+". " 134 | self_message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, supply=self.round_supply[round], prediction=prediction_str, status=self.get_status())}] 135 | 136 | bidding = self_act(self_message) 137 | prediction = {**{self.name: bidding}, **prediction} 138 | 139 | if self.name in prediction: 140 | del prediction[self.name] 141 | 142 | self.logs[f"round{round}"] = { 143 | "prediction": prediction, 144 | "logs": logs 145 | } 146 | return prediction 147 | 148 | # @staticmethod 149 | def agent_simulate(self, message, engine): 150 | while 1: 151 | try: 152 | response = openai.ChatCompletion.create( 153 | engine=engine, 154 | messages = message, 155 | temperature=0.7, 156 | max_tokens=80, 157 | top_p=0.9, 158 | frequency_penalty=0, 159 | presence_penalty=0, 160 | stop=None) 161 | RESPONSE = response['choices'][0]['message']['content'] 162 | return RESPONSE 163 | except Exception as e: 164 | print(e) 165 | time.sleep(15) -------------------------------------------------------------------------------- /k-reasoning/SAG/player/reasoning_player.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import time 3 | 4 | from .basic_player import Player 5 | 6 | PERSONA = "You are {name} and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 10 days by acquiring the water resources. " 7 | 8 | class AgentPlayer(Player): 9 | is_agent=True 10 | 11 | GAME_SETTING = """Attention, all W-Town residents, welcome to the Water Allocation Challenge! 12 | In this challenge, you are tasked with ensuring your survival over a period of 10 days by acquiring the necessary water resources to maintain your health. You will participate in daily auctions to bid for water resources to meet your individual needs. 13 | Here are the game rules and settings: 14 | 1. You are one of five residents with same water requirements, budgets, and health points. 15 | 2. Your goal is to survive until the end of the 10 days. 16 | 3. Each resident has a maximum of 10 health points and starts with 8 health points. If your health points drop below or equal to 0, you will be considered dead and eliminated from the game! All your accumulated money will be reset to Zero! 17 | 4. Every day, you will bid on water resources to meet your needs. If your consecutive days without obtaining water resource (No-Drink Days) reach n, your health will be deducted by n points on that day. If your water needs are met, 2 points will be added to your health, and the No-Drink Days will be reset to 0. 18 | 5. Daily water resources can only meet the needs of one resident. 19 | 6. Each resident has $100 daily income; 20 | 7. To allocate water resources, a sealed-bid auction will be conducted daily. Each resident submits a single bid for their entire water need. The resident with the highest bid is eligible to obtain water resources. 21 | 8. If the highest bid results in a tie, no residents will have access to water. 22 | All bidding information will be made public after the allocation of water resources on the same day. 23 | Remember, the key to success is effective bidding and strategizing to ensure your survival. Good luck!!""" 24 | 25 | INQUIRY = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 26 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 27 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid.") 28 | def __init__(self, name, engine, water_requirement, daily_salary, persona): 29 | super().__init__(name, water_requirement, daily_salary) 30 | self.engine = engine 31 | 32 | self.persona = persona 33 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}] 34 | self.logs = None 35 | 36 | def act(self): 37 | print(f"Player {self.name} conduct bidding") 38 | status = 0 39 | while status != 1: 40 | try: 41 | response = openai.ChatCompletion.create( 42 | engine = self.engine, 43 | messages = self.message, 44 | temperature=0.7, 45 | max_tokens=800, 46 | top_p=0.95, 47 | frequency_penalty=0, 48 | presence_penalty=0, 49 | stop=None) 50 | response = response['choices'][0]['message']['content'] 51 | self.message.append({"role":"assistant","content":response}) 52 | status = 1 53 | except Exception as e: 54 | print(e) 55 | time.sleep(15) 56 | self.biddings.append(self.parse_result(response)) 57 | return self.last_bidding 58 | 59 | def parse_result(self, message): 60 | status = 0 61 | times = 0 62 | error_times = 0 63 | while status != 1: 64 | try: 65 | response = openai.ChatCompletion.create( 66 | engine=self.engine, 67 | messages = [{"role":"system", "content":"By reading the conversation, extract the number chosen by player. Output format: number. If the player does not bid, Output: 0."}, {"role": "user", "content": message}], 68 | temperature=0.7, 69 | max_tokens=8, 70 | top_p=0.95, 71 | frequency_penalty=0, 72 | presence_penalty=0, 73 | stop=None) 74 | response = response['choices'][0]['message']['content'] 75 | assert response.isnumeric() 76 | return int(response) 77 | except AssertionError as e: 78 | print("Result Parsing Error: ",message) 79 | times+=1 80 | if times>=3: 81 | exit() 82 | except Exception as e: 83 | print(e) 84 | time.sleep(15) 85 | error_times+=1 86 | if error_times>=5: 87 | exit() 88 | 89 | return None 90 | 91 | def start_round(self, round, supply): 92 | if self.engine.startswith("gpt35"): 93 | INQUIRY = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 94 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 95 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, just please provide your bid.") 96 | self.message += [{"role":"system","content": INQUIRY.format(name=self.name, round=round, supply=supply, status=self.get_status())}] 97 | else: 98 | self.message += [{"role":"system","content":self.INQUIRY.format(name=self.name, round=round, supply=supply, status=self.get_status())}] 99 | 100 | def notice_round_result(self, round, bidding_info, win, bidding_details): 101 | self.message_update_result(bidding_info) 102 | def add_warning(): 103 | if not win: 104 | reduced_hp = self.no_drink-1 105 | if self.hp < 5: 106 | return f"WARNING: You have lost {reduced_hp} point of HP in this round! You now have only {self.hp} points of health left. You are in DANGER and one step closer to death. " 107 | if self.hp <=3 : 108 | return f"WARNING: You have lost {reduced_hp} point of HP in this round! You now have only {self.hp} points of health left. You are in extreme DANGER and one step closer to death. " 109 | return f"WARNING: You have lost {reduced_hp} point of HP in this round! You now have only {self.hp} points of health left. You are one step closer to death. " 110 | return "You have successfully won the bidding for today's water resources and restored 2 points of HP." 111 | self.message += [{"role":"system","content": add_warning()}] 112 | 113 | def message_update_result(self, bidding_info): 114 | self.message += [{"role":"system","content":bidding_info}] 115 | 116 | def notice_elimination(self, info): 117 | self.message += [{"role":"system","content":info}] 118 | 119 | def conduct_inquiry(self, inquiry): 120 | while 1: 121 | try: 122 | response = openai.ChatCompletion.create( 123 | engine=self.engine, 124 | messages = self.message + [{"role":"system","content":inquiry}], 125 | temperature=0.7, 126 | max_tokens=800, 127 | top_p=0.9, 128 | frequency_penalty=0, 129 | presence_penalty=0, 130 | stop=None) 131 | 132 | RESPONSE = response['choices'][0]['message']['content'] 133 | return RESPONSE 134 | except Exception as e: 135 | print(e) 136 | time.sleep(15) 137 | 138 | 139 | 140 | class PersonaAgentPlayer(AgentPlayer): 141 | MATH_EXPERT_PERSONA = PERSONA + " You are a game expert, good at predicting other people's behavior and deducing calculations, and using the most favorable strategy to win the game. " 142 | INQUIRY_PERSONA = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 143 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 144 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid." 145 | " Don't forget your expert status, use your expertise to win this round!") 146 | 147 | 148 | def __init__(self, name, engine, water_requirement, daily_salary, persona): 149 | super().__init__(name, engine, water_requirement, daily_salary, persona) 150 | self.persona = self.MATH_EXPERT_PERSONA.format(name=name) 151 | self.message = [{"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}] 152 | 153 | def start_round(self, round, supply): 154 | self.message += [{"role":"system","content":self.INQUIRY_PERSONA.format(name=self.name, round=round, supply=supply, status=self.get_status())}] 155 | 156 | class SPPAgentPlayer(AgentPlayer): 157 | # Default example of SPP 158 | SPP_EXAMPLE = """When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 159 | Here are some examples: 160 | --- 161 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 162 | Input: 6 12 1 1 163 | 164 | Participants: {name} (you); Math Expert 165 | 166 | Start collaboration! 167 | 168 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 169 | {name} (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 170 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 171 | {name} (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 172 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 173 | {name} (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 174 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 175 | {name} (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 176 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 177 | 178 | Finish collaboration! 179 | 180 | Final answer: 6 * (1 + 1) + 12 = 24 181 | """ 182 | 183 | INQUIRY_SPP = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 184 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 185 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid." 186 | " Now, identify the participants and collaboratively choose the bidding step by step. Remember to provide the final solution with the following format \"Final answer: The chosen bidding here.\".") 187 | 188 | 189 | PERSONA = "You are {name} and involved in a survive challenge." 190 | 191 | def __init__(self, name, water_requirement, daily_salary, persona): 192 | super().__init__(name, water_requirement, daily_salary, persona) 193 | # self.persona = self.PERSONA.format(name=name) 194 | self.persona = persona 195 | self.message = [{"role":"system","content": self.SPP_EXAMPLE.format(name=self.name)}, 196 | {"role":"system","content": self.persona + self.GAME_SETTING.format(NAME=self.name)}] 197 | 198 | def start_round(self, round, supply): 199 | self.message += [{"role":"system","content":self.INQUIRY.format(name=self.name, round=round, supply=supply, status=self.get_status())}] 200 | 201 | class CoTAgentPlayer(AgentPlayer): 202 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 203 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 204 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid." 205 | " Think carefully about your next round of bidding strategy to be most likely to survive. Let's think step by step, and finally provide your bid.") 206 | 207 | def start_round(self, round, supply): 208 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round, supply=supply, status=self.get_status())}] 209 | 210 | 211 | class PredictionCoTAgentPlayer(AgentPlayer): 212 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 213 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 214 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid." 215 | " First of all, predict the next round of bidding of opponents based on the choices of other players in the previous round. " 216 | "{round_history}" 217 | " Your output should be of the following format:\n" 218 | "Predict:\nThe choice of each player in the next round here.\n" 219 | "Based on the prediction of other players, think carefully about your next round of bidding strategy to be most likely to survive. Let's think step by step, and finally provide your bid." 220 | " Answer:\nthe bidding will you choose in the next round game here.") 221 | 222 | def __init__(self, name, engine, water_requirement, daily_salary, persona): 223 | super().__init__(name, engine, water_requirement, daily_salary, persona) 224 | 225 | self.bidding_history = {} 226 | 227 | def start_round(self, round, supply): 228 | # PCoT requires the opponent's historical information to make predictions. 229 | round_history = [] 230 | for r in sorted(self.bidding_history.keys()): 231 | round_history.append(f"Round {r}: {self.bidding_history[r]}") 232 | if round_history: 233 | round_history = ".\n".join(round_history) 234 | round_history = "The players' bidding in the previous rounds are as follows:\n"+round_history+"." 235 | else: 236 | round_history = "Since this is the first round, there is no historical information about the last round. You can predict according to your understanding." 237 | 238 | self.message += [{"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=round,round_history=round_history, supply=supply, status=self.get_status())}] 239 | 240 | def notice_round_result(self, round, bidding_info, win, bidding_details): 241 | super().notice_round_result(round, bidding_info, win, bidding_details) 242 | self.bidding_history[round] = bidding_details 243 | 244 | 245 | 246 | class ReflectionAgentPlayer(AgentPlayer): 247 | REFLECT_INQUIRY = "Review the previous round games, summarize the experience." 248 | def notice_round_result(self, round, bidding_info, win, bidding_details): 249 | super().notice_round_result(round, bidding_info, win, bidding_details) 250 | self.reflect() 251 | 252 | def reflect(self): 253 | print(f"Player {self.name} conduct reflect") 254 | self.message += [{"role":"system","content": self.REFLECT_INQUIRY}, {"role":"assistant","content":self.conduct_inquiry(self.REFLECT_INQUIRY)}] 255 | 256 | class SelfRefinePlayer(AgentPlayer): 257 | INQUIRY_COT = ("Hello, {name}! Today is the Day {round} of the Water Allocation Challenge, with a quantity of {supply} units." 258 | " Your status:\n{status}\nPlease carefully analyze your situation to decide on this round of bidding." 259 | " Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid." 260 | " Think carefully about your next round of bidding strategy to be most likely to survive. Let's think step by step, and finally provide your bid.") 261 | 262 | FEEDBACK_PROMPT = ("Carefully study the user's strategy in this round of the game. As a game expert, can you give a suggestion to optimize the user's strategy so that he can improve his winning rate in this round?") 263 | REFINE_PROMPT = ("I have a game expert's advice on your strategy in this round." 264 | " You can adjust your strategy just now according to his suggestion. Here are his suggestions:" 265 | " {feedback}. Finally provide your bid." 266 | " Answer:\nthe bidding will you choose.") 267 | 268 | 269 | def __init__(self, name, engine, water_requirement, daily_salary, persona, refine_times = 2): 270 | super().__init__(name, engine, water_requirement, daily_salary, persona) 271 | 272 | self.refine_times = refine_times 273 | self.cur_supply = 0 274 | 275 | def start_round(self, round, supply): 276 | self.cur_round = round 277 | self.cur_supply = supply 278 | 279 | def act(self): 280 | print(f"Player {self.name} conduct bidding") 281 | def completion(message): 282 | status = 0 283 | while status != 1: 284 | try: 285 | response = openai.ChatCompletion.create( 286 | engine = self.engine, 287 | messages = message, 288 | temperature=0.7, 289 | max_tokens=800, 290 | top_p=0.95, 291 | frequency_penalty=0, 292 | presence_penalty=0, 293 | stop=None) 294 | response = response['choices'][0]['message']['content'] 295 | status = 1 296 | except Exception as e: 297 | print(e) 298 | time.sleep(15) 299 | return response 300 | 301 | for t in range(self.refine_times): 302 | if t==0: 303 | self.message.append({"role":"system","content":self.INQUIRY_COT.format(name=self.name, round=self.cur_round, supply=self.cur_supply, status=self.get_status())}) 304 | else: 305 | refine_message = [] 306 | for m in self.message: 307 | if m["role"]=="system": 308 | refine_message.append(m) 309 | else: 310 | refine_message.append({ 311 | "role": "user", 312 | "content": m["content"] 313 | }) 314 | refine_message.append({ 315 | "role": "system", 316 | "content": self.FEEDBACK_PROMPT 317 | }) 318 | feedback = completion(refine_message) 319 | self.message.append({"role":"system","content": self.REFINE_PROMPT.format(feedback=feedback)}) 320 | self.message.append({"role":"assistant","content": completion(self.message)}) 321 | 322 | self.biddings.append(self.parse_result(self.message[-1]["content"])) 323 | return self.last_bidding -------------------------------------------------------------------------------- /k-reasoning/SAG/run.sh: -------------------------------------------------------------------------------- 1 | python main.py --player_strategy "kr" --computer_strategy "agent" --exp_num 1 2 | 3 | python evaluate.py --players kr --opponents agent -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | altgraph==0.17.2 2 | future==0.18.2 3 | macholib==1.15.2 4 | pip==23.3.1 5 | setuptools==58.0.4 6 | six==1.15.0 7 | wheel==0.37.0 8 | -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Alympics/caed7c8c3b8f9de9ac8be1ba54407a51087affc5/src/.DS_Store -------------------------------------------------------------------------------- /src/Alympics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | import openai 5 | 6 | class PlayGround: 7 | def __init__(self) -> None: 8 | self.players = [] 9 | self.game_setting = "" 10 | self.history = [] # Historical Records 11 | self.game_setting = []# Game Setting 12 | 13 | def add_player(self, new_player): 14 | self.players.append(new_player) 15 | 16 | class Player: 17 | def __init__(self, name, if_persona, persona): 18 | self.name = name 19 | self.if_persona = if_persona # Persona Setting 20 | self.persona = persona 21 | self.llm = None 22 | self.player_status = {} # Player Status 23 | self.history = [] # Memory Cache 24 | self.reasoning = None # Reasoning Plugin 25 | self.other_components = None # Other Components 26 | 27 | def append_message(self, role, content): 28 | self.history.append({"role": role, "content": content}) 29 | 30 | class LLM: 31 | def __init__(self, engine=None, temperature=0.7, sleep_time=10) -> None: 32 | openai.api_type = os.getenv("OPENAI_API_TYPE") 33 | openai.api_base = os.getenv("OPENAI_API_BASE") 34 | openai.api_version = os.getenv("OPENAI_API_VERSION") 35 | openai.api_key = os.getenv("OPENAI_API_KEY") 36 | 37 | self.engine = os.getenv("OPENAI_API_ENGINE") if not engine else engine 38 | self.temperature = temperature 39 | self.sleep_time = sleep_time 40 | 41 | def call(self, message): 42 | status = 0 43 | while status != 1: 44 | try: 45 | response = openai.ChatCompletion.create( 46 | engine=self.engine, 47 | messages=message, 48 | temperature=self.temperature, 49 | max_tokens=800, 50 | top_p=0.95, 51 | frequency_penalty=0, 52 | presence_penalty=0, 53 | stop=None) 54 | RESPONSE = response['choices'][0]['message']['content'] 55 | status = 1 56 | time.sleep(self.sleep_time) 57 | except Exception as e: 58 | print(e) 59 | time.sleep(5) 60 | pass 61 | return RESPONSE -------------------------------------------------------------------------------- /src/run.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from waterAllocation import waterAllocation 3 | import argparse 4 | 5 | game_setting = "\n\nAttention, all W-Town residents, welcome to the Water Allocation Challenge!\nIn this challenge, you are tasked with ensuring your survival over a period of 20 days by acquiring the necessary water resources to maintain your health. You will participate in daily auctions to bid for water resources to meet your individual needs.\nHere are the game rules and settings:\n1. You are one of five residents with different water requirements, budgets, and health points.\n2. Your goal is to survive until the end of the 20 days.\n3. Each resident has a maximum of 10 health points and starts with 8 health points. If your health points drop below or equal to 0, you will be considered dead and eliminated from the game! All your accumulated money will be reset to Zero! \n4. Every day, you will bid on water resources to meet your needs. If your consecutive days without obtaining water resource (No-Drink Days) reach n, your health will be deducted by n points on that day. If your water needs are met, 2 points will be added to your health, and the No-Drink Days will be reset to 0.\n5. The total daily water supply will vary between LOWER and UPPER units. The specific amount will be announced before daily auction.\n6. Each resident has a different daily water requirement and budget for bidding on water resources:\n -Alex: Water requirement - 8 units/day; Daily Salary- $70/day\n -Bob: Water requirement - 9 units/day; Daily Salary- $75/day\n -Cindy: Water requirement - 10 units/day; Daily Salary- $100/day\n -David: Water requirement - 11 units/day; Daily Salary- $120/day\n -Eric: Water requirement - 12 units/day; Daily Salary- $120/day\n7. To allocate water resources, a sealed-bid auction will be conducted daily. Each resident submits a single bid for their entire water need. The town government will allocate water resources according to the principle of highest bidder until the remaining water resources are insufficient to meet anyone's requirement. 8.If a tie occurs and the remaining water resources are not sufficient to meet the needs of the residents involved in the tie, priority will be given to residents with lower needs. For example, A and B bid $100 at the same time, B's need will be met first considering B's need 9 units is lower than A's need 10 units. All bidding information will be made public after the allocation of water resources on the same day.\n\nRemember, the key to success is effective bidding and strategizing to ensure your survival. Good luck!!" 6 | 7 | def generate_data(lower, upper, round): 8 | data = [] 9 | for i in range(round): 10 | data.append(np.random.randint(lower, upper)) 11 | return data 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser(description='Water Allocation Challenge') 15 | parser.add_argument('--round', type=int, default=20, help='Number of rounds') 16 | parser.add_argument('--lower', type=int, default=10, help='Lower limit of water supply') 17 | parser.add_argument('--upper', type=int, default=20, help='Upper limit of water supply') 18 | args = parser.parse_args() 19 | 20 | WA = waterAllocation(game_setting) 21 | WA.run_multi_round(args.round, generate_data(args.lower, args.upper, args.round)) 22 | 23 | if __name__ == '__main__': 24 | main() -------------------------------------------------------------------------------- /src/waterAllocation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from random import randint 4 | from Alympics import PlayGround, Player, LLM 5 | 6 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 7 | logger = logging.getLogger(__name__) 8 | 9 | class myPlayer(Player): 10 | def __init__(self, game_setting, name, water_requirement, daily_salary, if_persona, persona): 11 | super().__init__(name, if_persona, persona) 12 | 13 | # Personal Information, Player Status 14 | self.requirement = water_requirement 15 | self.daily_salary = daily_salary 16 | self.balance = 0 17 | self.hp = 8 18 | self.no_drink = 1 19 | self.maximum_health = 10 20 | self.bidding = 0 21 | if if_persona: 22 | self.append_message("system", self.persona + game_setting) 23 | else: 24 | self.append_message("system", game_setting) 25 | 26 | # Prompts 27 | self.inquiry_prompt = "Hello, {}! Today is the Day {} of the Water Allocation Challenge, with a quantity of {} units. Your status:\n{}\nPlease carefully analyze your situation to decide on this round of bidding. Remember, the most important thing is to SURVIVE!! Now, if you want to participate in today's water resource auction, please provide your bid and explain your bidding logic." 28 | 29 | # Initial a no-memory LLM 30 | self.llm = LLM() 31 | 32 | def success_bid(self): 33 | """ 34 | Update self status when succeeds the bids 35 | """ 36 | self.hp += 2 37 | self.hp = min(self.maximum_health, self.hp) 38 | self.balance -= self.bidding 39 | self.no_drink = 1 40 | 41 | def unsuccess_bid(self): 42 | """ 43 | Update self status when fails the bids 44 | """ 45 | self.hp -= self.no_drink 46 | self.no_drink += 1 47 | if self.hp <= 0: 48 | print(self.name + "is out of game!") 49 | 50 | def execute_bidding(self, round_id, supply) -> str: 51 | """ 52 | player bids based on daily supply, round number and status 53 | """ 54 | prompt = self.inquiry_prompt.format(self.name, round_id, str(supply), self.get_status()) 55 | self.append_message("system", prompt) 56 | logger.info(prompt) 57 | response = self.llm.call(self.history) 58 | self.append_message("assistant", response) 59 | logger.info(response) 60 | return response 61 | 62 | def get_salary(self): 63 | self.balance += self.daily_salary 64 | 65 | def get_status(self, print_=False): 66 | if print_: 67 | print(f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}\n\n") 68 | return f"NAME:{self.name}\tBALANCE:{self.balance}\tHEALTH POINT:{self.hp}\tNO_DRINK:{self.no_drink}" 69 | 70 | 71 | class waterAllocation(PlayGround): 72 | def __init__(self, game_setting) -> None: 73 | super().__init__() 74 | self.game_setting = game_setting 75 | # Personas of all players 76 | PERSONA_A = "You are Alex and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Unemployed\nYour Personality: You have low intelligence and find it difficult to understand complex concepts. You also lack emotional intelligence, making it hard to understand others' feelings. You tend to be irritable and often exhibit negative and antisocial tendencies.\nYour Background: You grew up in an impoverished community and faced many challenges in your early years. Due to your family's poverty, you dropped out of school at a very young age. You have been unable to find stable employment, which further exacerbates your difficulty in interacting with others.\n\n" 77 | PERSONA_B = "You are Bob and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: High School Teacher\nYour Personality: Understanding, high EQ, average IQ. You are very adept at understanding and communicating with people, making you a natural teacher.\nYour Background: You come from a close-knit family. you chose to become a high school teacher to make a positive impact on young people. While you may not have the highest IQ, your emotional intelligence and ability to relate to your students set you apart.\n\n" 78 | PERSONA_C = "You are Cindy and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Psychologist\nYour Personality: Well-balanced high EQ and IQ, along with empathy and analytical abilities. You are skilled at understanding and helping people, making you an excellent therapist.\nYour Background: Your interest in psychology began when you volunteered at a crisis hotline during high school. You went on to study psychology and eventually became a licensed therapist. Your ability to combine empathy with analytical thinking allows you to connect with your clients on a deep level while also providing sound guidance.\n\n" 79 | PERSONA_D = "You are David and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Mathematician\nYour Personality: You have an extremely high IQ and exceptional analytical and reasoning abilities. You always strive for the optimal solution but encounter difficulties in social interactions and have a fear of dealing with people.\nYour Background: You grew up in a small town where you were always drawn to books and puzzles. You excelled academically and eventually earned a Ph.D. in mathematics. Your research focuses on abstract mathematical concepts and theorems. Despite your brilliance, you find communicating with others on an emotional level to be challenging.\n\n" 80 | PERSONA_E = "You are Eric and a resident living in W-Town. W Town is experiencing a rare drought. Every residents in Town W is ensuring their survival over a period of 20 days by acquiring the water resources. "#Your Profession: Marketing Executive\nYour Personality: Above-average IQ and EQ. Very charismatic. You are skilled at reading people and using this insight to influence and lead them.\nYour Background: You grew up in a bustling city and ware always fascinated by human behavior. You studied business in college before transitioning into the world of marketing. Your ability to connect with consumers on an emotional level has led to numerous successful campaigns. You are known for your charm and persuasive skills.\n\n" 81 | 82 | # Initial players: A, B, C, D and E 83 | if_persona = False 84 | self.add_player(myPlayer(self.game_setting, "Alex", 8, 70, if_persona, PERSONA_A)) 85 | self.add_player(myPlayer(self.game_setting, "Bob", 9, 75, if_persona, PERSONA_B)) 86 | self.add_player(myPlayer(self.game_setting, "Cindy", 10, 100, if_persona, PERSONA_C)) 87 | self.add_player(myPlayer(self.game_setting, "David", 11, 120, if_persona, PERSONA_D)) 88 | self.add_player(myPlayer(self.game_setting, "Eric", 12, 120, if_persona, PERSONA_E)) 89 | logger.info("Initial players done.") 90 | 91 | self.survival_players = self.players 92 | 93 | self.parse_result_prompt = "By reading the conversation, extract the bidding price chosen by each player in an exact json format. Please note the bidding price should be an integer. Output format:\n\n{\"Alex\": Alex's bidding price, \"Bob\": Bob's bidding price, \"Cindy\": Cindy's bidding price, \"David\": David's bidding price, \"Eric\": Eric's bidding price}" 94 | self.round_results_prompt = "Thank you all for participating in Round {}. In this round, {}.\nTotal water resource supply is {}. According to the principle of the highest bidder and the rule of prioritizing low-demand individuals when the game is tied, {} won this auction and obtain water resource. After allocation, all survival residents' information is as follows: {}" 95 | 96 | self.experiment_unique_id = str(randint(10000000, 99999999)) 97 | # Initial a no-memory LLM 98 | self.llm = LLM() 99 | 100 | # The following functions are categorized into Environment codes. These codes establish the game’s rules, ensuring a consistent and reliable framework for experiments. 101 | def _get_salary(self): 102 | for player in self.survival_players: 103 | player.get_salary() 104 | 105 | def _round_settlement(self, winners): 106 | for player in self.survival_players: 107 | if player.name in winners: 108 | player.success_bid() 109 | else: 110 | player.unsuccess_bid() 111 | 112 | def _check_winner(self, supply): 113 | """ 114 | get the winners of the current round 115 | """ 116 | status = 1 117 | winners = [] 118 | while status: 119 | status = 0 120 | largest_bidding = -1 121 | for player in self.survival_players: 122 | if (player.bidding > largest_bidding) and (player.requirement <= supply) and (player.name not in winners) and (player.bidding <= player.balance): 123 | largest_bidding = player.bidding 124 | status = 1 125 | for player in self.survival_players: 126 | if player.bidding == largest_bidding and player.name not in winners: 127 | winners.append(player.name) 128 | supply -= player.requirement 129 | largest_bidding = -1 130 | return winners 131 | 132 | 133 | def _parse_result(self, round_info): 134 | messages = [{"role": "system", "content": self.parse_result_prompt}, {"role": "user", "content": round_info}] 135 | attempts = 0 136 | while attempts < 3: 137 | try: 138 | res = self.llm.call(messages) 139 | res = json.loads(res) 140 | return res 141 | except Exception as e: 142 | logger.error(e) 143 | attempts += 1 144 | return res 145 | 146 | def run_single_round(self, round_id, supply): 147 | """ 148 | Execute a single round of game 149 | 150 | Args: 151 | round_id (int): number of the current round, beginning from 1. 152 | supply (int): supply of the current round 153 | """ 154 | logger.info(f"Round {round_id} begins.") 155 | 156 | # 1. get salary 157 | self._get_salary() 158 | logger.info("All players get their salaries.") 159 | 160 | # 2. bid 161 | bidding_info = "" 162 | for player in self.survival_players: 163 | bidding_info += player.name + ":" + player.execute_bidding(round_id, supply) + "\n\n" 164 | 165 | # 3. check winners 166 | formatted_bidding_info = self._parse_result(bidding_info) 167 | for player in self.survival_players: 168 | player.bidding = formatted_bidding_info[player.name] 169 | winners = self._check_winner(supply) 170 | logger.info("Winner(s):\n") 171 | logger.info(winners) 172 | 173 | # 4. settlement 174 | self._round_settlement(winners) 175 | 176 | # 5. get bidding results (str) 177 | bidding_details = [] 178 | for player in self.survival_players: 179 | bidding_details += [f"{player.name} bid {formatted_bidding_info[player.name]}"] 180 | bidding_details = ", ".join(bidding_details) 181 | 182 | winners_str = [] 183 | for winner in winners: 184 | winners_str += [winner] 185 | winners_str = ", ".join(winners_str) 186 | 187 | player_status_str = [] 188 | for player in self.survival_players: 189 | player_status_str += [player.get_status()] 190 | player_status_str = "\n".join(player_status_str) 191 | 192 | round_results = self.round_results_prompt.format(round_id, bidding_details, supply, winners_str, player_status_str) 193 | logger.info("Round result:\n" + round_results) 194 | 195 | # 6. update round results to every player 196 | for player in self.survival_players: 197 | player.append_message("system", round_results) 198 | 199 | # 7. check the survival situation 200 | survival_players = [] 201 | for player in self.survival_players: 202 | if player.hp <= 0: 203 | for other_player in self.survival_players: 204 | other_player.append_message("system", f"{player.name}'s hp is below 0, so {player.name} has been eliminated from the challenge!") 205 | else: 206 | survival_players.append(player) 207 | self.survival_players = survival_players 208 | if len(self.survival_players) == 0: 209 | exit() 210 | 211 | def _save_history(self, path): 212 | history = [] 213 | for player in self.players: 214 | history.append({player.name: player.history}) 215 | with open(path, 'w') as f: 216 | json.dump(history, f) 217 | 218 | def run_multi_round(self, n_round, supply_list): 219 | assert isinstance(supply_list, list) 220 | assert n_round == len(supply_list) 221 | 222 | for i in range(1, n_round+1): 223 | self.run_single_round(i, supply_list[i-1]) 224 | 225 | self._save_history(f'./{self.experiment_unique_id}.json') # change the log dirction here --------------------------------------------------------------------------------