├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── VERSION ├── conf.py ├── docs ├── CLI.md ├── README.md ├── assets │ ├── ion_client.gif │ ├── ion_client_cli.png │ ├── ion_client_py.png │ ├── ion_server.gif │ ├── ion_server_cli.png │ ├── ion_server_py.png │ ├── logo.png │ ├── pom_battle_1.png │ ├── pom_battle_2.png │ └── pom_env_output.png ├── competitions.md ├── environment.md ├── game_rules.md ├── getting_started.md ├── index.md ├── multiplayer.md ├── pommerman.bib └── research.md ├── env.yml ├── examples ├── docker-agent │ ├── Dockerfile │ └── run.py └── simple_ffa_run.py ├── manager ├── app.py ├── app.wsgi ├── celery_.py ├── requirements.txt └── run_celery.sh ├── mkdocs.yml ├── notebooks └── Playground.ipynb ├── pommerman ├── README.md ├── __init__.py ├── agents │ ├── __init__.py │ ├── base_agent.py │ ├── docker_agent.py │ ├── http_agent.py │ ├── player_agent.py │ ├── player_agent_blocking.py │ ├── random_agent.py │ ├── simple_agent.py │ └── tensorforce_agent.py ├── characters.py ├── cli │ ├── __init__.py │ ├── run_battle.py │ └── train_with_tensorforce.py ├── configs.py ├── constants.py ├── envs │ ├── __init__.py │ ├── v0.py │ ├── v1.py │ └── v2.py ├── forward_model.py ├── graphics.py ├── helpers │ └── __init__.py ├── network │ ├── README.md │ ├── __init__.py │ ├── client │ │ ├── __init__.py │ │ ├── constants.py │ │ └── network.py │ └── server │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── match.py │ │ └── network.py ├── resources │ ├── Agent0-No-Background.png │ ├── Agent0-Team-No-Background.png │ ├── Agent0-Team.png │ ├── Agent0.png │ ├── Agent1-No-Background.png │ ├── Agent1-Team-No-Background.png │ ├── Agent1-Team.png │ ├── Agent1.png │ ├── Agent2-No-Background.png │ ├── Agent2-Team-No-Background.png │ ├── Agent2-Team.png │ ├── Agent2.png │ ├── Agent3-No-Background.png │ ├── Agent3-Team-No-Background.png │ ├── Agent3-Team.png │ ├── Agent3.png │ ├── AgentDummy-No-Background.png │ ├── AgentDummy.png │ ├── Bomb-1.png │ ├── Bomb-10.png │ ├── Bomb-2.png │ ├── Bomb-3.png │ ├── Bomb-4.png │ ├── Bomb-5.png │ ├── Bomb-6.png │ ├── Bomb-7.png │ ├── Bomb-8.png │ ├── Bomb-9.png │ ├── Bomb.png │ ├── Cousine-Regular.ttf │ ├── ExtraBomb.png │ ├── Flames.png │ ├── Fog.png │ ├── IncrRange.png │ ├── Kick.png │ ├── Passage.png │ ├── Rigid.png │ ├── Skull.png │ ├── Wood.png │ └── X-No-Background.png ├── runner │ ├── __init__.py │ └── docker_agent_runner.py └── utility.py ├── pylintrc ├── requirements.txt ├── requirements_extra.txt ├── scripts ├── build_sample_docker_agent └── run_sample_docker_agent └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | .pytest_cache/ 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule.* 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # Environments 83 | .env 84 | .venv 85 | env/ 86 | venv*/ 87 | ENV/ 88 | env.bak/ 89 | venv.bak/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | 105 | # End of https://www.gitignore.io/api/python 106 | 107 | #IDEA 108 | .idea 109 | .vscode/ 110 | 111 | # Compiled source # 112 | ################### 113 | *.com 114 | *.class 115 | *.dll 116 | *.exe 117 | *.o 118 | *.so 119 | *.pyc 120 | 121 | # Packages # 122 | ############ 123 | # it's better to unpack these files and commit the raw source 124 | # git has its own built in compression methods 125 | *.7z 126 | *.dmg 127 | *.gz 128 | *.iso 129 | *.jar 130 | *.rar 131 | *.tar 132 | *.zip 133 | 134 | # Logs and databases # 135 | ###################### 136 | *.log 137 | *.sql 138 | *.sqlite 139 | 140 | # OS generated files # 141 | ###################### 142 | .DS_Store 143 | .DS_Store? 144 | ._* 145 | .Spotlight-V100 146 | .Trashes 147 | Icon? 148 | ehthumbs.db 149 | Thumbs.db 150 | 151 | # NPM and SASS compilation # 152 | ############################# 153 | .sass-cache 154 | node_modules 155 | 156 | # My own # 157 | ###################### 158 | *~ 159 | *#* 160 | venv 161 | documents 162 | *.p 163 | audio 164 | dump.rdb 165 | local_config.py 166 | *sublime-workspace 167 | *sublime-project 168 | *xcdatamodeld* 169 | *xcodeproject* 170 | concierge.xcodeproj 171 | tmpimg/ 172 | local 173 | .eslintrc 174 | 175 | # Web specific # 176 | #################### 177 | a/static/js/build/* 178 | a/static/css/* 179 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | 5 | script: 6 | - # @TODO run tests here 7 | - pip install . 8 | - "nosetests && pylint -j 0 pommerman" 9 | - "nosetests && pylint -j 0 mananger" 10 | - "nosetests && pylint -j 0 examples" 11 | 12 | deploy: 13 | provider: pypi 14 | user: "cinjon" 15 | password: 16 | secure: "WigaDIWWgBGVQ2H2rx7dhLGivPDxLOOjvI+6OPoVgT1Q3/ydloeu9zZWcjEzgP1q9zbajKMFOOdOmO2d/agZpgBY2vDxDfSiPYsasxnxY1ADNP3zpcHO4FMRa6+GZWPKFGjibNTcUgoTsNRxIfPBPGQZpKH33sAnekwlkQIEtOYRFf6gU+4dtn+X0FaoKmwimChnVbiEhPw9YU65/9+3hVsJAtm6m8jxVSr5grN6kQPIK4ItZ2TgRPbTYXAi/t623Gx762emok6UofgdNoobYSmbozFtgG5PC1WcDOnA8nBEyJQOlkXRQvrTQ9THDGa0qhtb6kfNz0zbHe4uQ08QhHZ2EGp1MLnB/pmOhDLS/XKtoPI1mehYApz30ZAsTd+fIwYLUf5eGmiBN5lcJ+unSaY8HwNUD0ZR6YsXLTHJY3LuC9wAUbjlsj/3CkVV7hi/5kzUpg4mkYNw80cffHavWXC1eAw1qC/L6ryjFr1jq2irzIxWKbVEnkpMqGlVom0FrfupsUoSKwPlxEx12QwIzM6qLOKr4CRZzml1sMjsKWF4KpayLCRkWHeMOQh7Tu8P7VhJ7OD/HByrSGb01Sc/KQq89u1xCOb8TEvDvRFXgQZ+i4MIW+uGoW+d66wPwK91m1QwFwmKVNXl4xx9o4K1mf0gOFw9P9RPxgTehAqQPq8=" 17 | on: 18 | tags: true 19 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Pommerman is dedicated to providing a harassment-free experience for everyone, regardless of gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, age, race, or religion. We do not tolerate harassment of participants in any form. 4 | 5 | This code of conduct applies to all Pommerman spaces (including Gist comments) both online and off. Anyone who violates this code of conduct may be sanctioned or expelled from these spaces at the discretion of the Pommerman team. 6 | 7 | We may add additional rules over time, which will be made clearly available to participants. Participants are responsible for knowing and abiding by these rules. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributor Guide 2 | 3 | We are excited to have your help building Pommerman. There are many ways to contribute to Pommerman. You can contribute code, write documentation and tutorials, or answer questions in our Discord community. Below is a general overview of contributing. We really look forward to your help! 4 | 5 | ## Code of Conduct 6 | 7 | We strive to foster an open community. Please read our [Code of Conduct](https://github.com/MultiAgentLearning/playground/blob/master/CODE_OF_CONDUCT.md). 8 | 9 | ## How to contribute 10 | 11 | Below is a list of different ways for you to contribute. 12 | 13 | * Bugfixes 14 | * New features 15 | * Documentation 16 | * Design 17 | * Tutorials 18 | 19 | This list isn't complete. We very much welcome other ideas. Please come talk to us in our [Discord](https://discord.gg/wjVJEDc) chat. 20 | 21 | Here's a list that we see as priorities from the community: 22 | 23 | 1. Better graphics: We want Pommerman to have a more welcoming feel. Right now, it's just pixels. Even replacing the squares with sprites would be really nice. [Issue 5](https://github.com/MultiAgentLearning/playground/issues/7) 24 | 2. Better (and more) baselines: We released the SimpleAgent as a first baseline to beat before submitting agents to compete. We would like to see more there, each with a degree of difficulty and geared towards the different competitions. 25 | 3. Make tutorials: We plan to make a tutorial for each of the learned Agents that we enter. However, it would be awesome if others did as well. This extends from well-documented algorithms like DQN all the way to less considered ones like Evolutionary Learning. 26 | 27 | 28 | ## Contributing Code 29 | 30 | The general workflow for commiting code. 31 | 32 | * Fork the repository 33 | * Create a local branch for your fix 34 | * Commit your changes and push your created branch to your fork 35 | * Open a new pull request into our master branch 36 | 37 | ## Formating 38 | 39 | **Spacing** - In between methods in classes use one line space. Functions, Classes, and groups of variables outside of a Class use two line spaces. 40 | 41 | **Naming** - Classes use caps camelcase whereas functions, methods, and variables use snake case. Constants are all caps and use snake case. Names should not exceed 80 characters. 42 | 43 | **Commenting** - Doc string are required for all files, modules, classes, and functions. Comment complicated code or code that isn't easily understood. 44 | 45 | 46 | ## Linting 47 | 48 | This project uses pylint to ensure code is formatted correctly. You can lint a module space or a single file by using one of the following terminal commands. 49 | 50 | ``` 51 | # A directory or module 52 | pylint pommerman/ 53 | 54 | # A single file 55 | pylint pommerman/utility. 56 | ``` 57 | 58 | If your code doesn't pass linting please make the updates to ensure your code passes. PR's will not be accepted if your code doesn't pass the linter. 59 | 60 | You can dig into the how we lint by taking a look at the `pylintrc` file in the root of this repo. 61 | 62 | **Linting** - Please lint according to the google style. An easy way to do this is to use the yapf pip package: `yapf --style google `. Include the flag `-i` to edit the file in place. 63 | 64 | **Linting** - Please lint according to the google style. An easy way to do this is to use the yapf pip package: `yapf --style google `. Include the flag `-i` to edit the file in place. 65 | 66 | ## Discord 67 | 68 | Discussions, correspondence, and announcements often happen in Discord. You can get access through our [Discord invite.](https://discord.gg/wjVJEDc) 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018 Cinjon Resnick 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Playground 2 | 3 | > First time? check out our [website](https://www.pommerman.com) for more information, 4 | > our [Discord](https://discordapp.com/invite/wjVJEDc) to join the community, 5 | > or read the [documentation](./docs) to get started. 6 | 7 | Playground hosts Pommerman, a clone of Bomberman built for AI research. People from around the world submit agents that they've trained to play. We run regular competitions on our servers and report the results and replays. 8 | 9 | There are three variants for which you can enter your agents to compete: 10 | 11 | * FFA: Free For All where four agents enter and one leaves. It tests planning, tactics, and cunning. The board is fully observable. 12 | * Team (The NIPS '18 Competition environment): 2v2 where two teams of agents enter and one team wins. It tests planning, and tactics, and cooperation. The board is partially observable. 13 | * Team Radio: Like team in that a it's a 2v2 game. Differences are that the agents each have a radio that they can use to convey 2 words from a dictionary of size 8 each step. 14 | 15 | #### Why should I participate? 16 | 17 | * You are a machine learning researcher and similarly recognize the lack of approachable benchmarks for this subfield. Help us rectify this and prove that your algorithm is better than others. 18 | * You want to contribute to multi agent or communication research. This is first and foremost a platform for doing research and everything that we do here will eventually get published with generous (or primary) support from us. 19 | * You really like(d) Bomberman and are fascinated by AI. This is a great opportunity to learn how to build intelligent agents. 20 | * You want the glory of winning an AI competition. We are going to publicize the results widely. 21 | * You think AI is dumb and can make a deterministic system that beats any learned agent. 22 | 23 | #### How do I train agents? 24 | 25 | Most open-source research tools in this domain have been designed with single agents in mind. We will be developing resources towards standardizing multi-agent learning. In the meantime, we have provided an example training script in train_with_tensorforce.py. It demonstrates how to wrap the Pommerman environments such that they can be trained with popular libraries like TensorForce. 26 | 27 | #### How do I submit agents that I have trained? 28 | 29 | The setup for submitting agents will be live shortly. It involves making a [Docker](https://docs.docker.com/get-started/) container that runs your agent. We then read and upload your docker file via Github Deploy Keys. You retain the ownership and license of the agents. We will only look at your code to ensure that it is safe to run, doesn't execute anything malicious, and does not cheat. We are just going to run your agent in competitions on our servers. We have an example agent that already works and further instructions are in the games/a/docker directory. 30 | 31 | #### Who is running this? 32 | 33 | [Cinjon Resnick](http://twitter.com/cinjoncin), [Denny Britz](https://twitter.com/dennybritz), [David Ha](https://twitter.com/hardmaru), [Jakob Foerster](https://www.linkedin.com/in/jakobfoerster/), and [Wes Eldridge](https://twitter.com/weseldridge) are the folks behind this. We are generously supported by a host of other people, including [Kyunghyun Cho](https://twitter.com/kchonyc), [Joan Bruna](https://twitter.com/joanbruna), [Julian Togelius](http://julian.togelius.com/) and [Jason Weston](https://research.fb.com/people/weston-jason/). You can find us in the [Discord](https://discordapp.com/invite/wjVJEDc). 34 | 35 | Pommerman is immensely appreciate of the generous assistance it has received from Jane Street Capital, NVidia, Facebook AI Research, and Google Cloud. 36 | 37 | #### How can I help? 38 | 39 | To see the ways you can get invovled with the project head over to our [Contributing Guide](https://github.com/MultiAgentLearning/playground/blob/master/CONTRIBUTING.md) and checkout our current [issues](https://github.com/MultiAgentLearning/playground/issues). 40 | 41 | # Contributing 42 | 43 | We welcome contributions through pull request. See [CONTRIBUTING](../master/CONTRIBUTING.md) for more details. 44 | 45 | # Code of Conduct 46 | 47 | We strive for an open community. Please read over our [CODE OF CONDUCT](../master/CODE_OF_CONDUCT.md) 48 | 49 | # Citation 50 | 51 | If you use the Pommerman environment in your research, please cite us using the [bibtex file](../master/docs/pommerman.bib) in docs. 52 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.2.0 2 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | from recommonmark.parser import CommonMarkParser 2 | 3 | source_parsers = { 4 | '.md': CommonMarkParser, 5 | } 6 | 7 | source_suffix = ['.rst', '.md'] -------------------------------------------------------------------------------- /docs/CLI.md: -------------------------------------------------------------------------------- 1 | # Command-Line Interface 2 | Pommerman comes with a CLI tool that allows you to quickly launch a game. This can be used to test how well a trained agent plays against other agents. 3 | 4 | Call this with a config, a game, and a list of agents. The script will start separate threads to operate the agents and then report back the result. 5 | 6 | An example with all four test agents running FFA: 7 | ```bash 8 | pom_battle --agents=test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0 9 | ``` 10 | An example with one player, two random agents, and one test agent: 11 | ```bash 12 | pom_battle --agents=player::arrows,test::agents.SimpleAgent,random::null,random::null --config=PommeFFACompetition-v0 13 | ``` 14 | An example with a docker agent: 15 | ```bash 16 | pom_battle --agents=player::arrows,docker::pommerman/test-agent,random::null,random::null --config=PommeFFACompetition-v0 17 | ``` 18 | ![pom_battle Output](./assets/pom_battle_1.png)*Output of the 1st example* 19 | ## Configurations and Options 20 | To get a list of active options you can run `pom_battle --help`. The current list of parameters is: 21 | * `--game`: Allows you to change the game your agent plays. The default is `pommerman`. Currently only supports `pommerman` 22 | 23 | * `--config`: Changes the type of game the agents will play. The default is `PommeFFACompetition-v0`. Other options are `PommeFFACompetition-v0`, `PommeFFAFast-v0`, `PommeFFA-v1`, `PommeRadio-v2`, `PommeTeam-v0`, `PommeTeamFast-v0` and `OneVsOne-v0`. 24 | 25 | * `--agents`: Defines the agents participating in the game. The default is 4 simple agents. To changes the agents in the game use a comma delineated list of agent. 26 | 27 | * `--agent_env_vars`: Sends enviroment variables to to Docker agents and only Docker agents. The default is "". An example is '0:foo=bar:baz=lar,3:foo=lam', which would send two arguments to Docker Agent 0 and one to Docker Agent 3. 28 | 29 | * `--record_pngs_dir`: Defines the directory to record PNGs of the game board for each step. The default is `None`. If the directory doesn't exist, it will be created. The PNGs are saved with the format `%m-%d-%y_%-H-%M-%S_(STEP).png` (`04-17-18_15-54-39_3.png`). 30 | 31 | * `--record_json_dir`: Defines the directory to record the JSON representations of the game. The default is `None`. If the directory doesn't exist, it will be created. 32 | 33 | * `--render`: Allows you to turn of rendering of the game. The default is `False`. 34 | 35 | * `--render_mode`: Changes the render mode of the game. The default is `human`. Available options are `human`, `rgb_pixel`, and `rgb_array`. 36 | 37 | * `--game_state_file`: Changes the initial state of the game. The file is expected to be in JSON format. The format of the file is defined below. 38 | * `agents`: List of agents serialized (agent_id, is_alive, position, ammo, blast_strength, can_kick) 39 | * `board`: Board matrix topology (board_size2) 40 | * `board_size`: Board size 41 | * `bombs`: List of bombs serialized (position, bomber_id, life, blast_strength, moving_direction) 42 | * `flames`: List of flames serialized (position, life) 43 | * `items`: List of item by position 44 | * `step_count`: Step count 45 | 46 | ![pom_battle Help](./assets/pom_battle_2.png)*Output of help from pom_battle* 47 | ## Training an agent using Tensorforce 48 | Pommerman comes with a trainable agent out of the box. The agent uses a Proximal Policy Optimization (PPO) algorithm. This agent is a good place to start if you want to train your own agent. All of the options that are available in the CLI tool are available in the Tensorforce CLI. 49 | An example with all three simple agents running FFA: 50 | ```bash 51 | pom_tf_battle --agents=tensorforce::ppo,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0 52 | ``` -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | # Pre-requisites 4 | 5 | * [Python 3.6.0](https://www.python.org/downloads/release/python-360/)+ (including `pip`) 6 | * [Docker](https://www.docker.com/) (only needed for `DockerAgent`) 7 | * [virtualenv](https://virtualenv.pypa.io/en/stable/) (optional, for isolated Python environment) 8 | 9 | # Installation 10 | 11 | * Clone the repository 12 | ``` 13 | $ git clone https://github.com/MultiAgentLearning/playground ~/playground 14 | ``` 15 | 16 | ## Pip 17 | 18 | * **OPTIONAL**: Setup an isolated virtual Python environment by running the following commands 19 | ``` 20 | $ virtualenv ~/venv 21 | ``` 22 | This environment needs to be activated for usage. Any package installations will now persist 23 | in this virtual environment folder only. 24 | ``` 25 | source ~/venv/bin/activate 26 | ``` 27 | 28 | * Install the `pommerman` package. This needs to be done every time the code is updated to get the 29 | latest modules 30 | ``` 31 | $ cd ~/playground 32 | $ pip install -U . 33 | ``` 34 | 35 | ## Conda 36 | 37 | * Install the `pommerman` environment. 38 | ``` 39 | $ cd ~/playground 40 | $ conda env create -f env.yml 41 | $ conda activate pommerman 42 | ``` 43 | 44 | * To update the environment 45 | ``` 46 | $ conda env update -f env.yml --prune 47 | ``` 48 | 49 | # Examples 50 | 51 | ## A Simple Example 52 | 53 | The [simple_ffa_run.py](../examples/simple_ffa_run.py) runs a sample Free-For-All game with two 54 | [SimpleAgent](../pommerman/agents/simple_agent.py)s and two [RandomAgent](../pommerman/agents/random_agent.py)s 55 | on the board. 56 | 57 | ## Using A Docker Agent 58 | 59 | The above example can be extended to use [DockerAgent](../pommerman/agents/docker_agent.py) instead of a 60 | [RandomAgent](../pommerman/agents/random_agent.py). [examples/docker-agent](../examples/docker-agent) contains 61 | the code to wrap a [SimpleAgent](../pommerman/agents/simple_agent.py) inside Docker. 62 | 63 | 64 | * We will build a docker image with the name "pommerman/simple-agent" using the `Dockerfile` provided. 65 | ``` 66 | $ cd ~/playground 67 | $ docker build -t pommerman/simple-agent -f examples/docker-agent/Dockerfile . 68 | ``` 69 | 70 | * The agent list seen in the previous example can now be updated. Note that a `port` argument (of an unoccupied port) is 71 | needed to expose the HTTP server. 72 | ```python 73 | agent_list = [ 74 | agents.SimpleAgent(), 75 | agents.RandomAgent(), 76 | agents.SimpleAgent(), 77 | agents.DockerAgent("pommerman/simple-agent", port=12345) 78 | ] 79 | ``` 80 | 81 | ## Playing an interactive game 82 | 83 | You can also play the game! See below for an example where one [PlayerAgent](../pommerman/agents/player_agent.py) 84 | controls with the `arrow` keys and the other with the `wasd` keys. 85 | 86 | 87 | ```python 88 | agent_list = [ 89 | agents.SimpleAgent(), 90 | agents.PlayerAgent(agent_control="arrows"), # arrows to move, space to lay bomb 91 | agents.SimpleAgent(), 92 | agents.PlayerAgent(agent_control="wasd"), # W,A,S,D to move, E to lay bomb 93 | ] 94 | ``` 95 | 96 | ## Submitting an Agent. 97 | 98 | In order to submit an agent, you need to create an account at 99 | [pommerman.com](https://pommerman.com). You can do this by registering with your 100 | email address or logging in with your Github account. 101 | 102 | Once you have created an account, login and navigate to your profile - 103 | [Pommerman profile](https://pommerman.com/me). To submit an agent, fill in the 104 | form with your agent's name, an ssh git url, and the path to your agent's Docker 105 | file from the github repository's top level directory. Please make sure that 106 | your docker file builds properly beforehand. 107 | 108 | Next, you will need to add an ssh deploy key to your account so we can access 109 | your agent's repo. This is provided to you along with instructions after 110 | registering the agent. 111 | 112 | Before doing all of this, note that we use Docker to run the agents. The best example for making a Docker agent is in the repo in the examples/docker-agent directory. This *must* work in order to properly enter an agent, and we suggest using the accompanying pom_battle cli command (or equivalently run_battle.py) to test out your Docker implementation. If you are having trouble still, feel free to ask questions on our Discord channel. 113 | 114 | ## NIPS Competition Information: 115 | 116 | Each competitor will submit two agents that will be teamed together. These agents can be the same one and can be in the same repository even, but we expect there to be two submissions for each entrant. We additionally expect there to be notable differences among the submissions. Similarly to the June 3rd competition, we will examine the code before running it on our servers and collusion will not be tolerated. 117 | 118 | The competition will be held live at NIPS 2018 in Montreal. We would prefer it if serious entrants were there, but that is not a requirement. 119 | 120 | ## Actually Getting Started 121 | 122 | Here is some information that may help you more quickly develop successful agents: 123 | 124 | 1. Two agents cannot move to the same cell. They will bounce back to their prior places if they try. The same applies to bombs. If an agent and a bomb both try to move to the same space, then the agent will succeed but the bomb will bounce back. 125 | 2. If an agent with the can_kick ability moves to a cell with a bomb, then the bomb is kicked in the direction from which the agent came. The ensuing motion will persist until the bomb hits a wall, another agent, or the edge of the grid. 126 | 3. When a bomb explodes, it immediately reaches its full blast radius. If there is an agent or a wall in the way, then it prematurely ends and destroys that agent or wall. 127 | 4. If a bomb is in the vicinity of an explosion, then it will also go off. In this way, bombs can chain together. 128 | 5. The SimpleAgent is very useful as a barometer for your own efforts. Four SimpleAgents playing against each other have a win rate of ~18% each with the remaining ~28% of the time being a tie. Keep in mind that it _can_ destroy itself. That can skew your own results if not properly understood. 129 | -------------------------------------------------------------------------------- /docs/assets/ion_client.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_client.gif -------------------------------------------------------------------------------- /docs/assets/ion_client_cli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_client_cli.png -------------------------------------------------------------------------------- /docs/assets/ion_client_py.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_client_py.png -------------------------------------------------------------------------------- /docs/assets/ion_server.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_server.gif -------------------------------------------------------------------------------- /docs/assets/ion_server_cli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_server_cli.png -------------------------------------------------------------------------------- /docs/assets/ion_server_py.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/ion_server_py.png -------------------------------------------------------------------------------- /docs/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/logo.png -------------------------------------------------------------------------------- /docs/assets/pom_battle_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/pom_battle_1.png -------------------------------------------------------------------------------- /docs/assets/pom_battle_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/pom_battle_2.png -------------------------------------------------------------------------------- /docs/assets/pom_env_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/docs/assets/pom_env_output.png -------------------------------------------------------------------------------- /docs/competitions.md: -------------------------------------------------------------------------------- 1 | # Competitions 2 | ## Submitting an Agent 3 | In order to submit an agent, you need to create an account at [Pommerman's website](https://pommerman.com). You can do this by registering with your email address or logging in with your GitHub account. 4 | Once you have created an account, login and navigate to your profile - [Pommerman profile](https://pommerman.com/me). To submit an agent, fill in the form with your agent's name, an SSH GIT url, and the path to your agent's Docker file from the GitHub repository's top level directory. Please make sure that your Docker file builds properly beforehand. 5 | Next, you will need to add an SSH deploy key to your account so we can access your agent's repository. This is provided to you along with instructions after registering the agent. 6 | Before doing all of this, note that we use Docker to run the agents. The best example for making a Docker agent is in the repo in the `examples/docker-agent` directory. This *must* work in order to properly enter an agent, and we suggest using the accompanying `pom_battle_cli` command to test out your Docker implementation. If you are having trouble still, feel free to ask questions on our [Discord channel](index.md#How%20do%20I%20get%20help%20%3F/). 7 | ## Rules and Submission 8 | 1. Each submission should have a Docker file per agent. For FFA and Team Random, there is one agent; For Team Radio, there will be two agents. Instructions and an example for building Docker containers from trained agents can be found in our repository. 9 | 2. The positions for each agent will be randomized modulo that each agent's position will be opposite from its teammate's position. 10 | 3. The agents should follow the prescribed convention specified in our example code and expose an "act" endpoint that accepts a dictionary of observations. Because we are using Docker containers and http requests, we do not have any requirements for programming language or framework. There will be ample opportunity to test this on our servers beforehand. 11 | 4. If an agent has a bug in its software that causes its container to crash, that will count as a loss for that agent's team. 12 | 5. The expected response from the agent will be a single integer in [0, 5] representing which of the six actions that agent would like to take, as well as two more integers in [1, 8. representing the message if applicable. 13 | 6. If an agent does not respond in an appropriate time limit (100ms), then we will automatically issue them the Stop action and have them send out the message (0, 0) if applicable. 14 | 7. The game setup as described does not allow for the agents to share a centralized controller. If, however, some clever participant figured out a way to force this, they will be subsequently disqualified. 15 | 8. Agents submitted by organizers can participate in the competitions but are not eligible for prizes. They will be excluded from consideration in the final standings. 16 | 9. Competitions will run according to a double elimination style with two brackets. Each battle will be best of three, with the winner moving on and the loser suffering a defeat. Any draws will be replayed. At the end, we will have a clear top four. 17 | ## NIPS 2018 18 | Each competitor will submit two agents that will be teamed together. These agents can be the same one and can be in the same repository even, but we expect there to be two submissions for each entrant. We additionally expect there to be notable differences among the submissions. Similarly to the June 3rd competition, we will examine the code before running it on our servers and collusion will not be tolerated. 19 | The competition will be held live at NIPS 2018 in Montreal. We would prefer it if serious entrants were there, but that is not a requirement. -------------------------------------------------------------------------------- /docs/environment.md: -------------------------------------------------------------------------------- 1 | # Environment Reference 2 | ## Parameters 3 | These are parameters for `pommerman.make`: 4 | ### Configurations 5 | Configurations that are included in Pommerman. This is the `config_id` parameter of `pommerman.make` as a String: 6 | 7 | * `PommeFFACompetition-v0` - In this all agents are against each other 8 | * `PommeTeamCompetition-v0` - In this teams of two agents each are set against each other. The teams are [Agent0, Agent2] and [Agent1, Agent3] 9 | * `OneVsOne-v0` - In this two agents are against each other 10 | * `PommeFFA-v1` - In this all agents are against each other while the board collapses uniformly by replacing the outermost square with walls 11 | * `PommeTeamCompetition-v1` - This is similar to PommeFFA-v1 but with Teams instead of being Free-For-All 12 | * `PommeRadio-v2` - This is similar to `PommeTeamCompetition-v0` but the agents can send a list of two integers in the range [1, 8] to their teammates on every turn. 13 | 14 | ### Agents 15 | Agent classes that are included in Pommerman. This is the `agent_list` parameter of `pommerman.make` as a list of 4 agent classes. All of these can be found under `pommerman.agents`: 16 | 17 | * `base_agent` - This is the class that all agents inherit from 18 | * `random_agent` - This randomly selects an action and plays it out 19 | * `simple_agent` - This is an agent based on a non-ML approach (This agent is prone to killing itself) 20 | * `tensorforce_agent` - This agent calls [TensorForce](https://github.com/reinforceio/tensorforce) to return an action 21 | * `player_agent` - This is an agent controlled by an keyboard. You can change the control scheme by feeding the `agent_control` parameter as either: `"arrows"` for Arrows = Move and Space = Bomb or `"wasd"` for W,A,S,D = Move, E = Bomb 22 | * `http_agent` - This agent outputs to accepts input in the form of a REST requests to it 23 | * `docker_agent` - This agent outputs and accepts inputs to an agent wrapped inside a Docker container 24 | ## Output 25 | ![Pommerman-enviroment Output](./assets/pom_env_output.png) *This is the output from env.step()* 26 | 27 | It has the following format: 28 | 29 | * Board: The 11x11 board is a numpy array where each value corresponds to one of the representations below. The first element in this 2D array corresponds to the configuration of topmost row of the board and so on. 30 | * Passage = 0 31 | * Wooden Wall = 1 32 | * Rigid Wall = 2 33 | * Bomb = 3 34 | * Flames = 4 35 | * Fog = 5: This is only applicable in the partially observed (2v2 Team Radio) setting. 36 | * Extra Bomb Power-Up = 6: adds ammo. 37 | * Increase Range Power-Up = 7: increases the blast_strength 38 | * Can Kick Power-Up = 8: can kick bombs by touching them. 39 | * AgentDummy = 9 40 | * Agent0 = 10 41 | * Agent1 = 11 42 | * Agent2 = 12 43 | * Agent3 = 13 44 | * Position: A tuple of Ints of (X position, Y position) 45 | * Ammo: An Int representing the amount of ammo this agent has. 46 | * Blast Strength: An Int representing the blast strength of this agent's bombs. 47 | * Can Kick: Whether this agent can kick bombs. This ability is gained by stepping on the can kick power-up. 48 | * Teammate: One Int in [9, 13]. Which agent is this agent's teammate. In the FFA game, this is the AgentDummy. 49 | * Enemies: A list of three Ints, each in [9, 13]. Which agents are this agent's enemies. There are three here to be amenable to all variants of the game. When there are only two enemies like in the team competitions, the last Int will be the AgentDummy to reflect the fact that there are only two enemies. 50 | * Bomb Blast Strength: An 11x11 numpy int array representing the bombs' blast strengths in the agent's view. Everything outside of its view will be fogged out. 51 | * Bomb Life: An 11x11 numpy int array representing the bombs' life in the agent's view. Everything outside of its view will be fogged out. 52 | * Bomb Movement Direction: An 11x11 numpy int array representing the bombs' movement direction (in terms of an agent's action space: 1 -> up, 2 -> down etc...) in the agent's view. Everything outside of its view will be fogged out. 53 | * Flame Life: An 11x11 numpy int array representing the flames' life in the agent's view. Everything outside of its view will be fogged out. 54 | * Message: (Team Radio only) A list of two Ints, each in [0, 8]. The message being relayed from the teammate. Both ints are zero when a teammate is dead or it's the first step. Otherwise they are in [1, 8]. 55 | -------------------------------------------------------------------------------- /docs/game_rules.md: -------------------------------------------------------------------------------- 1 | # Rules and Submission 2 | 3 | 1) Each submission should have a Docker file per agent. For FFA and Team Random, there is one agent; For Team Radio, there will be two agents. Instructions and an example for building Docker containers from trained agents can be found in our repository. 4 | 5 | 2) The positions for each agent will be randomized modulo that each agent's position will be opposite from its teammate's position. 6 | 7 | 3) The agents should follow the prescribed convention specified in our example code and expose an "act" endpoint that accepts a dictionary of observations. Because we are using Docker containers and http requests, we do not have any requirements for programming language or framework. There will be ample opportunity to test this on our servers beforehand. 8 | 9 | 4) If an agent has a bug in its software that causes its container to crash, that will count as a loss for that agent's team. 10 | 11 | 5) The expected response from the agent will be a single integer in [0, 5] representing which of the six actions that agent would like to take, as well as two more integers in [1, 8] representing the message if applicable. 12 | 13 | 6) If an agent does not respond in an appropriate time limit (100ms), then we will automatically issue them the Stop action and have them send out the message (0, 0) if applicable. 14 | 15 | 7) The game setup as described does not allow for the agents to share a centralized controller. If, however, some clever participant figured out a way to force this, they will be subsequently disqualified. 16 | 17 | 8) Agents submitted by organizers can participate in the competitions but are not eligible for prizes. They will be excluded from consideration in the final standings. 18 | 19 | 9) Competitions will run according to a double elimination style with two brackets. Each battle will be best of three, with the winner moving on and the loser suffering a defeat. Any draws will be replayed. At the end, we will have a clear top four. 20 | -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | ## Pre-requisites 3 | * [Python 3.6.0](https://www.python.org/downloads/release/python-360/)+ (including `pip`) 4 | * [Docker](https://www.docker.com/) (only needed for `DockerAgent`) 5 | * [virtualenv](https://virtualenv.pypa.io/en/stable/) (optional, for isolated Python environment) 6 | ## Installation 7 | * Clone the repository 8 | ``` 9 | $ git clone https://github.com/MultiAgentLearning/playground ~/playground 10 | ``` 11 | ## Pip 12 | * **OPTIONAL**: Setup an isolated virtual Python environment by running the following commands 13 | ``` 14 | $ virtualenv ~/venv 15 | ``` 16 | This environment needs to be activated for usage. Any package installations will now persist 17 | in this virtual environment folder only. 18 | ``` 19 | source ~/venv/bin/activate 20 | ``` 21 | * Install the `pommerman` package. This needs to be done every time the code is updated to get the 22 | latest modules 23 | ``` 24 | $ cd ~/playground 25 | $ pip install -U . 26 | ``` 27 | ## Conda 28 | * Install the `pommerman` environment. 29 | ``` 30 | $ cd ~/playground 31 | $ conda env create -f env.yml 32 | $ conda activate pommerman 33 | ``` 34 | * To update the environment 35 | ``` 36 | $ conda env update -f env.yml --prune 37 | ``` 38 | ## Examples 39 | ### Free-For-All 40 | The code below runs a sample Free-For-All game with two **SimpleAgent**'s and two **RandomAgent**'s on the board. 41 | ```python 42 | #!/usr/bin/python 43 | """A simple Free-For-All game with Pommerman.""" 44 | import pommerman 45 | from pommerman import agents 46 | 47 | 48 | def main(): 49 | """Simple function to bootstrap a game""" 50 | # Print all possible environments in the Pommerman registry 51 | print(pommerman.REGISTRY) 52 | 53 | # Create a set of agents (exactly four) 54 | agent_list = [ 55 | agents.SimpleAgent(), 56 | agents.RandomAgent(), 57 | agents.SimpleAgent(), 58 | agents.RandomAgent(), 59 | # agents.DockerAgent("pommerman/simple-agent", port=12345), 60 | ] 61 | # Make the "Free-For-All" environment using the agent list 62 | env = pommerman.make('PommeFFACompetition-v0', agent_list) 63 | 64 | # Run the episodes just like OpenAI Gym 65 | for i_episode in range(1): 66 | state = env.reset() 67 | done = False 68 | while not done: 69 | env.render() 70 | actions = env.act(state) 71 | state, reward, done, info = env.step(actions) 72 | print('Episode {} finished'.format(i_episode)) 73 | env.close() 74 | 75 | 76 | if __name__ == '__main__': 77 | main() 78 | ``` 79 | ### Docker Agent 80 | The above example can be extended to use **DockerAgent** instead of a **RandomAgent**. The code below wraps a **SimpleAgent** inside Docker. 81 | ```python 82 | #!/usr/bin/python 83 | """Implementation of a simple deterministic agent using Docker.""" 84 | 85 | from pommerman import agents 86 | from pommerman.runner import DockerAgentRunner 87 | 88 | 89 | class MyAgent(DockerAgentRunner): 90 | """An example Docker agent class""" 91 | 92 | def __init__(self): 93 | self._agent = agents.SimpleAgent() 94 | 95 | def act(self, observation, action_space): 96 | return self._agent.act(observation, action_space) 97 | 98 | 99 | def main(): 100 | """Inits and runs a Docker Agent""" 101 | agent = MyAgent() 102 | agent.run() 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | ``` 108 | * We will build a docker image with the name `pommerman/simple-agent` using the `Dockerfile` provided. 109 | ```shell 110 | $ cd ~/playground 111 | $ docker build -t pommerman/simple-agent -f examples/docker-agent/Dockerfile . 112 | ``` 113 | 114 | * The agent list seen in the previous example can now be updated. Note that a `port` argument (of an unoccupied port) is 115 | needed to expose the HTTP server. 116 | ```python 117 | #!/usr/bin/python 118 | agent_list = [ 119 | agents.SimpleAgent(), 120 | agents.RandomAgent(), 121 | agents.SimpleAgent(), 122 | agents.DockerAgent("pommerman/simple-agent", port=12345) 123 | ] 124 | ``` 125 | ## Playing an interactive game 126 | You can also play the game! See below for an example where one **PlayerAgent** controls with the `Arrow` keys and the other with the `WASD` keys. 127 | ```python 128 | #!/usr/bin/python 129 | agent_list = [ 130 | agents.SimpleAgent(), 131 | agents.PlayerAgent(agent_control="arrows"), # Arrows = Move, Space = Bomb 132 | agents.SimpleAgent(), 133 | agents.PlayerAgent(agent_control="wasd"), # W,A,S,D = Move, E = Bomb 134 | ] 135 | ``` 136 | 137 | ## NeurIPS 2018 Docker Agents 138 | 139 | To test your agent against 2018 NeurIPS competition agents you can download an agent using `docker pull`... 140 | 141 | ``` 142 | docker pull multiagentlearning/hakozakijunctions 143 | ``` 144 | 145 | The following agents are available: `multiagentlearning/hakozakijunctions`, `multiagentlearning/dypm.1`, `multiagentlearning/dypm.2`, `multiagentlearning/navocado`, `multiagentlearning/skynet955`, `multiagentlearning/eisenach` 146 | 147 | To use an agent once you have pulled it from docker hub use a command like the following. 148 | 149 | ``` 150 | pom_battle --agents=MyAgent,docker::multiagentlearning/navocado,player::arrows,docker::multiagentlearning/eisenach --config=PommeRadioCompetition-v2 151 | ``` 152 | 153 | ## Useful information 154 | 1. Two agents cannot move to the same cell. They will bounce back to their prior places if they try. The same applies to bombs. If an agent and a bomb both try to move to the same space, then the agent will succeed but the bomb will bounce back. 155 | 2. If an agent with the can_kick ability moves to a cell with a bomb, then the bomb is kicked in the direction from which the agent came. The ensuing motion will persist until the bomb hits a wall, another agent, or the edge of the grid. 156 | 3. When a bomb explodes, it immediately reaches its full blast radius. If there is an agent or a wall in the way, then it prematurely ends and destroys that agent or wall. 157 | 4. If a bomb is in the vicinity of an explosion, then it will also go off. In this way, bombs can chain together. 158 | 5. The SimpleAgent is very useful as a barometer for your own efforts. Four SimpleAgents playing against each other have a win rate of ~18% each with the remaining ~28% of the time being a tie. Keep in mind that it **can** destroy itself. That can skew your own results if not properly understood. 159 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Pommerman 2 | ![Pommerman](./assets/logo.png) 3 | ## Introduction 4 | We are machine learning researchers exploring how to train agents that can operate in environments with other learning agents, both cooperatively and adversarially. Whether you are a student or a well-oiled machine, we want you to help us advance the state of the art by building agents. 5 | 6 | ## How do I get help ? 7 | Join our Discord chat and check out the Github, which has all the instructions and details for training an agent in the environment and then submitting it to compete on [competitions](competitions.md). 8 | 9 | * [GitHub](https://github.com/MultiAgentLearning/playground) 10 | * [Discord](https://discord.gg/wjVJEDc) 11 | 12 | ## What's in the docs ? 13 | * [Getting Started](getting_started.md) 14 | * [Command Line Interface](CLI.md) 15 | * [Multiplayer](multiplayer.md) 16 | * [Competitions](competitions.md) 17 | * [Environment Reference](environment.md) 18 | * [Research Topics](research.md) -------------------------------------------------------------------------------- /docs/multiplayer.md: -------------------------------------------------------------------------------- 1 | # Multiplayer 2 | The multiplayer component of Pommerman has two components a Client and a Server: 3 | ## Client 4 | ### CLI 5 | This is primarily for a single or a few matches. It can be launched using `ion_client`. 6 | #### Output 7 | ![Client CLI](./assets/ion_client_cli.png) 8 | ### API 9 | This is for running a sizable amount of matches. It can be accessed by `pommerman.network.ion_client`. 10 | 11 | #### Code Example 12 | ```python 13 | #!/usr/bin/python 14 | import pommerman 15 | 16 | matches=10 # Amount of matches to play 17 | 18 | net = pommerman.network.client.Network("play.pommerman.com:5050") # This is essentially a single player to the server. If you want to have multiple players use one for each player. 19 | result = {"reward":[], "match_id":[]} # Note: We save match ID so we can view replays later 20 | for i in range(matches): 21 | reward, match_id = pommerman.network.client.match(network=net, room=False, agent=pommerman.agents.SimpleAgent) 22 | # or pommerman.network.client.match(network=net, room="someroom", agent=pommerman.agents.SimpleAgent) 23 | result["reward"].append(reward) 24 | result["match_id"].append(match_id) 25 | print(f"Average reward: {sum(result['reward'])/len(result['reward'])}") 26 | print("The match IDs of played matches:") 27 | for i in range(matches): 28 | print(f"{i+1}. {result['match_id'][i]}") 29 | ``` 30 | #### Output 31 | ![Client API](./assets/ion_client_py.png) 32 | ## Server 33 | ### CLI 34 | This launches a Server. It can be launched using `ion_server`. 35 | #### Output 36 | ![Server CLI](./assets/ion_server_cli.png) 37 | ### API 38 | This programatically launches a Server. It can be accessed by `pommerman.network.server`. 39 | #### Code Example 40 | ```python 41 | #!/usr/bin/python 42 | import pommerman 43 | 44 | if __name__ == '__main__': 45 | while True: # Re-run the server incase it crashes (This is not actually recommended because you should investigate the error) 46 | try: 47 | pommerman.network.server.run(port=5050, max_players=8, timeout=2, mode="PommeTeamCompetition-v0", ui_en=True) 48 | except Exception as e: 49 | print(f"The server has crashed. The exception was {e}") 50 | ``` 51 | #### Output 52 | ![Server API](./assets/ion_server_py.png) -------------------------------------------------------------------------------- /docs/pommerman.bib: -------------------------------------------------------------------------------- 1 | @article{DBLP:journals/corr/abs-1809-07124, 2 | author = {Cinjon Resnick and 3 | Wes Eldridge and 4 | David Ha and 5 | Denny Britz and 6 | Jakob Foerster and 7 | Julian Togelius and 8 | Kyunghyun Cho and 9 | Joan Bruna}, 10 | title = {Pommerman: {A} Multi-Agent Playground}, 11 | journal = {CoRR}, 12 | volume = {abs/1809.07124}, 13 | year = {2018}, 14 | url = {http://arxiv.org/abs/1809.07124}, 15 | archivePrefix = {arXiv}, 16 | eprint = {1809.07124}, 17 | timestamp = {Fri, 05 Oct 2018 11:34:52 +0200}, 18 | biburl = {https://dblp.org/rec/bib/journals/corr/abs-1809-07124}, 19 | bibsource = {dblp computer science bibliography, https://dblp.org} 20 | } 21 | 22 | -------------------------------------------------------------------------------- /docs/research.md: -------------------------------------------------------------------------------- 1 | # Research 2 | 1. Proximal Policy Optimization (PPO) [https://arxiv.org/abs/1707.06347](https://arxiv.org/abs/1707.06347) 3 | 2. Multi-Agent DDPG [https://github.com/openai/maddpg](https://github.com/openai/maddpg) 4 | 3. Monte Carlo Tree Search [https://gnunet.org/sites/default/files/Browne%20et%20al%20-%20A%20survey%20of%20MCTS%20methods.pdf](https://gnunet.org/sites/default/files/Browne%20et%20al%20-%20A%20survey%20of%20MCTS%20methods.pdf) 5 | 4. Monte Carlo Tree Search and Reinforcement Learning [https://www.jair.org/media/5507/live-5507-10333-jair.pdf](https://www.jair.org/media/5507/live-5507-10333-jair.pdf) 6 | 5. Cooperative Multi-Agent Learning [https://link.springer.com/article/10.1007/s10458-005-2631-2](https://link.springer.com/article/10.1007/s10458-005-2631-2) 7 | 6. Opponent Modeling in Deep Reinforcement Learning [http://www.umiacs.umd.edu/~hal/docs/daume16opponent.pdf](http://www.umiacs.umd.edu/~hal/docs/daume16opponent.pdf) 8 | 7. Machine Theory of Mind [https://arxiv.org/pdf/1802.07740.pdf](https://arxiv.org/pdf/1802.07740.pdf) 9 | 8. Coordinated Multi-Agent Imitation Learning [https://arxiv.org/pdf/1703.03121.pdf](https://arxiv.org/pdf/1703.03121.pdf) 10 | 9. Deep Reinforcement Learning from Self-Play in Imperfect-Information Games [https://arxiv.org/pdf/1603.01121.pdf](https://arxiv.org/pdf/1603.01121.pdf) and[http://proceedings.mlr.press/v37/heinrich15.pdf](http://proceedings.mlr.press/v37/heinrich15.pdf) 11 | 10. Autonomous Agents Modelling Other Agents [http://www.cs.utexas.edu/~pstone/Papers/bib2html-links/AIJ18-Albrecht.pdf](http://www.cs.utexas.edu/~pstone/Papers/bib2html-links/AIJ18-Albrecht.pdf) -------------------------------------------------------------------------------- /env.yml: -------------------------------------------------------------------------------- 1 | name: pommerman 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python=3.7.* 7 | - pip 8 | - docker-py=3.* 9 | - scipy=1.* 10 | - pillow=5.* 11 | - ruamel.yaml=0.* 12 | - flask=0.* 13 | - requests=2.* 14 | - astroid>=2 15 | - isort=4.3.* 16 | - pylint>=2 17 | - websockets=6.* 18 | - websocket-client=0.53.* 19 | - python-rapidjson=0.6.* 20 | - click=7.0 21 | - pyglet>=1.2.0 22 | - jsonschema<3.0.0 23 | - tabulate 24 | - unidecode 25 | - colorama 26 | - future 27 | - typed-ast>=1.3.0 28 | - pip: 29 | - gym~=0.10.5 30 | - jsonmerge~=1.5.1 31 | - python-cli-ui~=0.7.1 32 | - ./ 33 | -------------------------------------------------------------------------------- /examples/docker-agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | ADD ./examples/docker-agent /agent 4 | 5 | # @TODO to be replaced with `pip install pommerman` 6 | ADD . /pommerman 7 | RUN cd /pommerman && pip install . 8 | # end @TODO 9 | 10 | EXPOSE 10080 11 | 12 | ENV NAME Agent 13 | 14 | # Run app.py when the container launches 15 | WORKDIR /agent 16 | ENTRYPOINT ["python"] 17 | CMD ["run.py"] 18 | -------------------------------------------------------------------------------- /examples/docker-agent/run.py: -------------------------------------------------------------------------------- 1 | """Implementation of a simple deterministic agent using Docker.""" 2 | 3 | from pommerman import agents 4 | from pommerman.runner import DockerAgentRunner 5 | 6 | 7 | class MyAgent(DockerAgentRunner): 8 | '''An example Docker agent class''' 9 | 10 | def __init__(self): 11 | self._agent = agents.SimpleAgent() 12 | 13 | def init_agent(self, id, game_type): 14 | return self._agent.init_agent(id, game_type) 15 | 16 | def act(self, observation, action_space): 17 | return self._agent.act(observation, action_space) 18 | 19 | def episode_end(self, reward): 20 | return self._agent.episode_end(reward) 21 | 22 | def shutdown(self): 23 | return self._agent.shutdown() 24 | 25 | 26 | def main(): 27 | '''Inits and runs a Docker Agent''' 28 | agent = MyAgent() 29 | agent.run() 30 | 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /examples/simple_ffa_run.py: -------------------------------------------------------------------------------- 1 | '''An example to show how to set up an pommerman game programmatically''' 2 | import pommerman 3 | from pommerman import agents 4 | 5 | 6 | def main(): 7 | '''Simple function to bootstrap a game. 8 | 9 | Use this as an example to set up your training env. 10 | ''' 11 | # Print all possible environments in the Pommerman registry 12 | print(pommerman.REGISTRY) 13 | 14 | # Create a set of agents (exactly four) 15 | agent_list = [ 16 | agents.SimpleAgent(), 17 | agents.RandomAgent(), 18 | agents.SimpleAgent(), 19 | agents.RandomAgent(), 20 | # agents.DockerAgent("pommerman/simple-agent", port=12345), 21 | ] 22 | # Make the "Free-For-All" environment using the agent list 23 | env = pommerman.make('PommeFFACompetition-v0', agent_list) 24 | 25 | # Run the episodes just like OpenAI Gym 26 | for i_episode in range(1): 27 | state = env.reset() 28 | done = False 29 | while not done: 30 | env.render() 31 | actions = env.act(state) 32 | state, reward, done, info = env.step(actions) 33 | print('Episode {} finished'.format(i_episode)) 34 | env.close() 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /manager/app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import celery_ as celery 5 | import docker 6 | from flask import Flask, jsonify, request 7 | import requests 8 | 9 | app = Flask(__name__) 10 | 11 | import pommerman 12 | 13 | 14 | # Game Manager and Servers 15 | @a.app.before_request 16 | def check_for_access(): 17 | incoming = request.get_json() 18 | if request.path != '/ping': 19 | incoming = request.get_json() 20 | access = incoming.get('access') 21 | game_manager_access = os.getenv('PLAYGROUND_GAME_MANAGER_ACCESS') 22 | if not access or access != game_manager_access: 23 | return jsonify(received=False, error="Access Denied"), 400 24 | 25 | 26 | # To Game Manager and Servers, from Web. 27 | @app.route('/ping', methods=['GET']) 28 | def ping(): 29 | return jsonify(success=True) 30 | 31 | 32 | # To Game Manager, from Web. 33 | @app.route('/test', methods=['POST']) 34 | def test(): 35 | """Build and run this docker agent locally.""" 36 | try: 37 | incoming = request.get_json() 38 | docker_build_path = incoming["docker_build_path"] 39 | github_repo = incoming["github_repo"] 40 | private_key = incoming["private_key"] 41 | name = incoming["name"] 42 | agent_id = incoming["agent_id"] 43 | user = incoming["user"] 44 | config = incoming["config"] 45 | celery.run_test(docker_build_path, github_repo, private_key, name, 46 | agent_id, user, config) 47 | return jsonify(received=True, error="") 48 | except Exception as e: 49 | return jsonify(received=False, error=e) 50 | 51 | 52 | # To Game Manager, from Web. 53 | @app.route('/request_battle', methods=['POST']) 54 | def request_battle(): 55 | """Process a request to do a battle among four agents. 56 | 57 | This is on the game manager server. The request includes the docker images 58 | for each agent, along with their agent id (aid) and the config. 59 | 60 | The execution order is: 61 | 1. Tell each of the four servers to pull their given agent's container. 62 | 2. They'll then send us back container_is_ready notifications. 63 | 3. When we receive all of those notifications, we'll fire run_battle here. 64 | 4. The run_battle script will then manage speaking to each of the servers. 65 | 5. After the game is over, a result will be sent back to the web server. 66 | """ 67 | try: 68 | incoming = request.get_json() 69 | agents = [{ 70 | 'docker_image': 71 | incoming.get('docker_image_agent_%d' % agent_id), 72 | 'aid': 73 | incoming.get('aid_%d' % agent_id), 74 | 'agent_id': 75 | agent_id 76 | } for agent_id in range(4)] 77 | battle_info = incoming['config'] 78 | battle_info += '-%d-%d-%d-%d' % [agent['aid'] for agent in agents] 79 | success, message = notify_containers(agents, battle_info) 80 | if success: 81 | return jsonify(success=True, error="") 82 | else: 83 | return jsonify(success=False, error=message) 84 | except Exception as e: 85 | return jsonify(success=False, error=e) 86 | 87 | 88 | def notify_containers(agents, battle_info): 89 | """Tell the servers to pull and start the given containers.""" 90 | for agent in enumerate(agents): 91 | if pommerman.helpers.use_game_servers: 92 | server = pommerman.helpers.game_servers[agent['agent_id']] 93 | else: 94 | server = "http://localhost" 95 | 96 | port = "8000" 97 | url = ':'.join([server, port]) 98 | request_url = url + "/start_container" 99 | # This includes the aid, the docker_image, and the agent_id 100 | request_json = agent.copy() 101 | request_json["access"] = os.getenv('PLAYGROUND_GAME_MANAGER_ACCESS') 102 | request_json["battle_info"] = battle_info 103 | request_json["url"] = url 104 | requests.post(request_url, json=request_json) 105 | 106 | 107 | # From Game Manager, To Game Servers. 108 | @app.route('/start_container', methods=['POST']) 109 | def start_container(): 110 | """Server endpoint for requests to pull and then start containers.""" 111 | game_manager_url = os.getenv("PLAYGROUND_GAME_MANAGER_SERVER") + ":8000" 112 | 113 | try: 114 | incoming = request.get_json() 115 | 116 | # The battle's unique identifier, my url (server:port), and the docker 117 | # image. I'm going to send these back when I report we're good to go. 118 | agent_id = incoming["agent_id"] 119 | battle_info = incoming["battle_info"] 120 | docker_image = incoming["docker_image"] 121 | url = incoming["url"] 122 | 123 | client = docker.from_env() 124 | client.login( 125 | os.getenv("PLAYGROUND_DOCKER_LOGIN"), 126 | os.getenv("PLAYGROUND_DOCKER_PASSWORD")) 127 | logging.warn("Pulling the image %s..." % docker_image) 128 | img = client.images.pull(docker_image, tag="latest") 129 | 130 | if img: 131 | request_url = game_manager_url + "/container_is_ready" 132 | request_json = { 133 | 'aid': incoming['aid'], 134 | 'battle_info': battle_info, 135 | 'docker_image': docker_image, 136 | 'agent_id': agent_id 137 | } 138 | requests.post(request_url, json=request_json) 139 | else: 140 | pass 141 | except Exception as e: 142 | print("Failed to pull container: %s" % e) 143 | 144 | 145 | # From Game Servers, To Game Manager. 146 | @app.route('/container_is_ready', methods=['POST']) 147 | def container_is_ready(): 148 | """A ready container alert from a server came in. Feed this to celery.""" 149 | try: 150 | incoming = request.get_json() 151 | celery_.add_server_ready_notif(incoming) 152 | return jsonify(success=True, error="") 153 | except Exception as e: 154 | return jsonify(success=False, error=e) 155 | 156 | 157 | # From Game Manager, To Game Servers. 158 | @app.route('/run_container', methods=['POST']) 159 | def run_container(): 160 | incoming = request.get_json() 161 | docker_image = incoming['docker_image'] 162 | env_vars = incoming['env_vars'] 163 | port = incoming['port'] 164 | 165 | client = docker.from_env() 166 | client.login( 167 | os.getenv("PLAYGROUND_DOCKER_LOGIN"), 168 | os.getenv("PLAYGROUND_DOCKER_PASSWORD")) 169 | container = client.containers.run( 170 | docker_image, 171 | detach=True, 172 | auto_remove=True, 173 | ports={10080: port}, 174 | environment=env_vars) 175 | for line in container.logs(stream=True): 176 | print(line.decode("utf-8").strip()) 177 | 178 | 179 | if __name__ == '__main__': 180 | app.run() 181 | -------------------------------------------------------------------------------- /manager/app.wsgi: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | path_to_server_files = '/var/www/manager' 5 | sys.path.insert(0, path_to_server_files) 6 | activate_this = os.path.join(path_to_server_files, 'venv/bin/activate_this.py') 7 | with open(activate_this) as file_: 8 | exec(file_.read(), dict(__file__=activate_this)) 9 | 10 | from app import app as application 11 | -------------------------------------------------------------------------------- /manager/requirements.txt: -------------------------------------------------------------------------------- 1 | amqp==2.3.1 2 | billiard==3.5.0.3 3 | celery==5.2.2 4 | certifi==2018.4.16 5 | chardet==3.0.4 6 | click==6.7 7 | docker==3.3.0 8 | docker-pycreds==0.2.3 9 | Flask==0.12.4 10 | future==0.16.0 11 | gitdb2==2.0.3 12 | GitPython==2.1.10 13 | gym==0.10.5 14 | idna==2.6 15 | itsdangerous==0.24 16 | Jinja2==2.10 17 | kombu==4.2.0 18 | MarkupSafe==1.0 19 | numpy==1.14.3 20 | Pillow==8.3.2 21 | pommerman==0.2.0 22 | pyglet==1.4.0a1 23 | pytz==2018.4 24 | requests==2.18.4 25 | ruamel.yaml==0.15.37 26 | scipy==1.1.0 27 | six==1.11.0 28 | smmap2==2.0.3 29 | urllib3==1.22 30 | vine==1.1.4 31 | websocket-client==0.48.0 32 | Werkzeug==0.15.3 33 | -------------------------------------------------------------------------------- /manager/run_celery.sh: -------------------------------------------------------------------------------- 1 | celery worker -A celery_.celery --loglevel=info 2 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Pommerman 2 | theme: material 3 | markdown_extensions: 4 | - codehilite 5 | repo_url: http://github.com/MultiAgentLearning/playground 6 | site_url: http://pommerman.com/ 7 | site_description: 'Documentation for Pommerman.' 8 | 9 | nav: 10 | - Home: index.md 11 | - Getting Started: getting_started.md 12 | - Command Line Interface: CLI.md 13 | - Multiplayer: multiplayer.md 14 | - Competitions: competitions.md 15 | - Environment Reference: environment.md 16 | - Research Topics: research.md -------------------------------------------------------------------------------- /notebooks/Playground.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pommerman Demo.\n", 8 | "\n", 9 | "This notebook demonstrates how to train Pommerman agents. Please let us know at support@pommerman.com if you run into any issues." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "import sys\n", 20 | "import numpy as np\n", 21 | "\n", 22 | "from pommerman.agents import SimpleAgent, RandomAgent, PlayerAgent, BaseAgent\n", 23 | "from pommerman.configs import ffa_v0_fast_env\n", 24 | "from pommerman.envs.v0 import Pomme\n", 25 | "from pommerman.characters import Bomber\n", 26 | "from pommerman import utility" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Random agents\n", 34 | "\n", 35 | "The following codes instantiates the environment with four random agents who take actions until the game is finished. (This will be a quick game.)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "\u001b[33mWARN: gym.spaces.Box autodetected dtype as . Please provide explicit dtype.\u001b[0m\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "# Instantiate the environment\n", 53 | "config = ffa_v0_fast_env()\n", 54 | "env = Pomme(**config[\"env_kwargs\"])" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# Add four random agents\n", 64 | "agents = {}\n", 65 | "for agent_id in range(4):\n", 66 | " agents[agent_id] = RandomAgent(config[\"agent\"](agent_id, config[\"game_type\"]))\n", 67 | "env.set_agents(list(agents.values()))\n", 68 | "env.set_init_game_state(None)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "{'result': , 'winners': [3]}\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "# Seed and reset the environment\n", 86 | "env.seed(0)\n", 87 | "obs = env.reset()\n", 88 | "\n", 89 | "# Run the random agents until we're done\n", 90 | "done = False\n", 91 | "while not done:\n", 92 | " env.render()\n", 93 | " actions = env.act(obs)\n", 94 | " obs, reward, done, info = env.step(actions)\n", 95 | "env.render(close=True)\n", 96 | "env.close()\n", 97 | "\n", 98 | "print(info)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# Human Agents\n", 106 | "\n", 107 | "The following code runs the environment with 3 random agents and one agent with human input (use the arrow keys on your keyboard). This can also be called on the command line with:\n", 108 | "\n", 109 | "`python run_battle.py --agents=player::arrows,random::null,random::null,random::null --config=PommeFFACompetition-v0`\n", 110 | "\n", 111 | "You can also run this with SimpleAgents by executing:\n", 112 | "\n", 113 | "`python run_battle.py --agents=player::arrows,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0`" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "\u001b[33mWARN: gym.spaces.Box autodetected dtype as . Please provide explicit dtype.\u001b[0m\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "# Instantiate the environment\n", 131 | "config = ffa_v0_fast_env()\n", 132 | "env = Pomme(**config[\"env_kwargs\"])\n", 133 | "\n", 134 | "# Add 3 random agents\n", 135 | "agents = {}\n", 136 | "for agent_id in range(3):\n", 137 | " agents[agent_id] = RandomAgent(config[\"agent\"](agent_id, config[\"game_type\"]))\n", 138 | "\n", 139 | "# Add human agent\n", 140 | "agents[3] = PlayerAgent(config[\"agent\"](agent_id, config[\"game_type\"]), \"arrows\")\n", 141 | "\n", 142 | "env.set_agents(list(agents.values()))\n", 143 | "env.set_init_game_state(None)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "{'result': }\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "# Seed and reset the environment\n", 161 | "env.seed(0)\n", 162 | "obs = env.reset()\n", 163 | "\n", 164 | "# Run the agents until we're done\n", 165 | "done = False\n", 166 | "while not done:\n", 167 | " env.render()\n", 168 | " actions = env.act(obs)\n", 169 | " obs, reward, done, info = env.step(actions)\n", 170 | "env.render(close=True)\n", 171 | "env.close()\n", 172 | "\n", 173 | "# Print the result\n", 174 | "print(info)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "# Training an Agent\n", 182 | "\n", 183 | "The following code uses Tensorforce to train a PPO agent. This is in the train_with_tensorforce.py module as well." 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 8, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# Make sure you have tensorforce installed: pip install tensorforce\n", 193 | "from tensorforce.agents import PPOAgent\n", 194 | "from tensorforce.execution import Runner\n", 195 | "from tensorforce.contrib.openai_gym import OpenAIGym" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 9, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "def make_np_float(feature):\n", 205 | " return np.array(feature).astype(np.float32)\n", 206 | "\n", 207 | "def featurize(obs):\n", 208 | " board = obs[\"board\"].reshape(-1).astype(np.float32)\n", 209 | " bomb_blast_strength = obs[\"bomb_blast_strength\"].reshape(-1).astype(np.float32)\n", 210 | " bomb_life = obs[\"bomb_life\"].reshape(-1).astype(np.float32)\n", 211 | " position = make_np_float(obs[\"position\"])\n", 212 | " ammo = make_np_float([obs[\"ammo\"]])\n", 213 | " blast_strength = make_np_float([obs[\"blast_strength\"]])\n", 214 | " can_kick = make_np_float([obs[\"can_kick\"]])\n", 215 | "\n", 216 | " teammate = obs[\"teammate\"]\n", 217 | " if teammate is not None:\n", 218 | " teammate = teammate.value\n", 219 | " else:\n", 220 | " teammate = -1\n", 221 | " teammate = make_np_float([teammate])\n", 222 | "\n", 223 | " enemies = obs[\"enemies\"]\n", 224 | " enemies = [e.value for e in enemies]\n", 225 | " if len(enemies) < 3:\n", 226 | " enemies = enemies + [-1]*(3 - len(enemies))\n", 227 | " enemies = make_np_float(enemies)\n", 228 | "\n", 229 | " return np.concatenate((board, bomb_blast_strength, bomb_life, position, ammo, blast_strength, can_kick, teammate, enemies))\n", 230 | "\n", 231 | "class TensorforceAgent(BaseAgent):\n", 232 | " def act(self, obs, action_space):\n", 233 | " pass" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 11, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "\u001b[33mWARN: gym.spaces.Box autodetected dtype as . Please provide explicit dtype.\u001b[0m\n", 246 | "INFO:tensorflow:Graph was finalized.\n", 247 | "INFO:tensorflow:Running local_init_op.\n", 248 | "INFO:tensorflow:Done running local_init_op.\n" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "# Instantiate the environment\n", 254 | "config = ffa_v0_fast_env()\n", 255 | "env = Pomme(**config[\"env_kwargs\"])\n", 256 | "env.seed(0)\n", 257 | "\n", 258 | "# Create a Proximal Policy Optimization agent\n", 259 | "agent = PPOAgent(\n", 260 | " states=dict(type='float', shape=env.observation_space.shape),\n", 261 | " actions=dict(type='int', num_actions=env.action_space.n),\n", 262 | " network=[\n", 263 | " dict(type='dense', size=64),\n", 264 | " dict(type='dense', size=64)\n", 265 | " ],\n", 266 | " batching_capacity=1000,\n", 267 | " step_optimizer=dict(\n", 268 | " type='adam',\n", 269 | " learning_rate=1e-4\n", 270 | " )\n", 271 | ")\n", 272 | "\n", 273 | "# Add 3 random agents\n", 274 | "agents = []\n", 275 | "for agent_id in range(3):\n", 276 | " agents.append(SimpleAgent(config[\"agent\"](agent_id, config[\"game_type\"])))\n", 277 | "\n", 278 | "# Add TensorforceAgent\n", 279 | "agent_id += 1\n", 280 | "agents.append(TensorforceAgent(config[\"agent\"](agent_id, config[\"game_type\"])))\n", 281 | "env.set_agents(agents)\n", 282 | "env.set_training_agent(agents[-1].agent_id)\n", 283 | "env.set_init_game_state(None)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 12, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "class WrappedEnv(OpenAIGym): \n", 293 | " def __init__(self, gym, visualize=False):\n", 294 | " self.gym = gym\n", 295 | " self.visualize = visualize\n", 296 | " \n", 297 | " def execute(self, action):\n", 298 | " if self.visualize:\n", 299 | " self.gym.render()\n", 300 | "\n", 301 | " actions = self.unflatten_action(action=action)\n", 302 | " \n", 303 | " obs = self.gym.get_observations()\n", 304 | " all_actions = self.gym.act(obs)\n", 305 | " all_actions.insert(self.gym.training_agent, actions)\n", 306 | " state, reward, terminal, _ = self.gym.step(all_actions)\n", 307 | " agent_state = featurize(state[self.gym.training_agent])\n", 308 | " agent_reward = reward[self.gym.training_agent]\n", 309 | " return agent_state, terminal, agent_reward\n", 310 | " \n", 311 | " def reset(self):\n", 312 | " obs = self.gym.reset()\n", 313 | " agent_obs = featurize(obs[3])\n", 314 | " return agent_obs" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 13, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "Stats: [-1, -1, -1, -1, -1] [15, 15, 27, 32, 26] [2.0443358421325684, 0.7581827640533447, 1.3421897888183594, 1.6136739253997803, 1.2573180198669434]\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "# Instantiate and run the environment for 5 episodes.\n", 332 | "wrapped_env = WrappedEnv(env, True)\n", 333 | "runner = Runner(agent=agent, environment=wrapped_env)\n", 334 | "runner.run(episodes=5, max_episode_timesteps=2000)\n", 335 | "print(\"Stats: \", runner.episode_rewards, runner.episode_timesteps, runner.episode_times)\n", 336 | "\n", 337 | "try:\n", 338 | " runner.close()\n", 339 | "except AttributeError as e:\n", 340 | " pass" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [] 349 | } 350 | ], 351 | "metadata": { 352 | "kernelspec": { 353 | "display_name": "pommerman", 354 | "language": "python", 355 | "name": "pommerman" 356 | }, 357 | "language_info": { 358 | "codemirror_mode": { 359 | "name": "ipython", 360 | "version": 3 361 | }, 362 | "file_extension": ".py", 363 | "mimetype": "text/x-python", 364 | "name": "python", 365 | "nbconvert_exporter": "python", 366 | "pygments_lexer": "ipython3", 367 | "version": "3.6.5" 368 | } 369 | }, 370 | "nbformat": 4, 371 | "nbformat_minor": 2 372 | } 373 | -------------------------------------------------------------------------------- /pommerman/README.md: -------------------------------------------------------------------------------- 1 | # Pommerman 2 | 3 | ### Game Overview: 4 | * Pommerman is a play on Bomberman. There are three different variants, each of which follow the same basic idea but have their own distinct flavors: 5 | * FFA: Free For All where four agents enter and one leaves. It tests planning, tactics, and cunning. The board is fully observable. 6 | * Team (The NIPS '18 Competition environment): 2v2 where two teams of agents enter and one team wins. It tests planning, and tactics, and cooperation. The board is partially observable. 7 | * Team Radio: Like team in that a it's a 2v2 game. Differences are that the agents each have a radio that they can use to convey 2 words from a dictionary of size 8 each step. 8 | 9 | ### Directory Overview: 10 | 11 | * agents: Baseline agents will reside here in addition to being available in the Docker directory. 12 | * characters.py: Here lies the actors in the game. This includes Agent, Bomb, and Flame. 13 | * configs.py: This configs module contains the setup. Feel free to edit this in your local directory for easy game loading. 14 | * envs (module): 15 | * utility.py has shared Enums, constants, and common functions to the different environments. 16 | * v0.py: This environment is the base one that we use. 17 | * v1.py: This is a modification of v0.py that collapses the walls in order to end the game more quickly. 18 | * v2.py: This is a modification of v0.py that adds in communication. It works by having the agents send a message as part of their actions and then includes that message in the next turn of observations. 19 | 20 | ### Agent Observations: 21 | 22 | * Each agent sees: 23 | * Board: The 11x11 board is a numpy array where each value corresponds to one of the representations below. 24 | * Passage = 0 25 | * Rigid Wall = 1 26 | * Wooden Wall = 2 27 | * Bomb = 3 28 | * Flames = 4 29 | * Fog = 5: This is only applicable in the partially observed (2v2 Team Radio) setting. 30 | * Extra Bomb Power-Up = 6: adds ammo. 31 | * Increase Range Power-Up = 7: increases the blast_strength 32 | * Can Kick Power-Up = 8: can kick bombs by touching them. 33 | * AgentDummy = 9 34 | * Agent0 = 10 35 | * Agent1 = 11 36 | * Agent2 = 12 37 | * Agent3 = 13 38 | * Position: A tuple of Ints of (X position, Y position) 39 | * Ammo: An Int representing the amount of ammo this agent has. 40 | * Blast Strength: An Int representing the blast strength of this agent's bombs. 41 | * Can Kick: Whether this agent can kick bombs. This ability is gained by stepping on the can kick power-up. 42 | * Teammate: One Int in [9, 13]. Which agent is this agent's teammate. In the FFA game, this is the AgentDummy. 43 | * Enemies: A list of three Ints, each in [9, 13]. Which agents are this agent's enemies. There are three here to be amenable to all variants of the game. When there are only two enemies like in the team competitions, the last Int will be the AgentDummy to reflect the fact that there are only two enemies. 44 | * Bomb Blast Strength: An 11x11 numpy int array representing the bombs' blast strengths in the agent's view. Everything outside of its view will be fogged out. 45 | * Bomb Life: An 11x11 numpy int array representing the bombs' life in the agent's view. Everything outside of its view will be fogged out. 46 | * Message: (Team Radio only) A list of two Ints, each in [0, 8]. The message being relayed from the teammate. Both ints are zero when a teammate is dead or it's the first step. Otherwise they are in [1, 8]. 47 | 48 | ### Agent Actions: 49 | 50 | * Each agent's actions are: 51 | * Movement: a single integer in [0, 5] representing which of the six actions that agent would like to take of the following 52 | * Stop (0): This action is a pass. 53 | * Up (1): Move up on the board. 54 | * Down (2): Move down on the board. 55 | * Left (3): Move left on the board. 56 | * Right (4): Move right on the board. 57 | * Bomb (5): Lay a bomb. 58 | * Message: (Team Radio only) A list of two Ints in [1, 8]. These represent the message. 59 | 60 | ### Game Rules: 61 | 62 | * Every battle starts on a randomly drawn symmetric 11x11 grid (`board'). There are four agents, one in each of the corners. An agent's teammate (if applicable) will be on the kitty corner. 63 | * The board is randomly constructed before each battle and, besides the agents, contains wood walls and rigid walls. We guarantee that the agents will have an accessible path to each other, possibly through wooden walls. 64 | * Rigid walls are indestructible and impassable. 65 | * Wooden walls can be destroyed by bombs (see below). Until they are destroyed, they are impassable. After they are destroyed, they become either a passage or a power-up. 66 | * In any given turn, an agent can choose from one of six actions: 67 | * Stop (0): This action is a pass. 68 | * Up (1): Move up on the board. 69 | * Left (2): Move left on the board. 70 | * Down (3): Move down on the board. 71 | * Right (4): Move right on the board. 72 | * Bomb (5): Lay a bomb. 73 | * If there is communication, each agent additionally emits a message on each turn consisting of two words from a dictionary of size eight. These words will be given to its teammate in the next step as part of the observation. 74 | * The agent starts with one bomb ("ammo"). Every time it lays a bomb, its ammo decreases by one. After that bomb explodes, its ammo will increase by one. 75 | * The agent also has a blast strength (starts at three). Every bomb it lays is imbued with the current blast strength, which is how far in the vertical and horizontal directions that bomb will effect. 76 | * A bomb has a life of 10 time steps. After its life expires, it explodes and any wooden walls, agents, power-ups or other bombs in its range (given by the blast strength) are destroyed. 77 | * Power-Ups: Half of the wooden walls have power-ups hidden underneath them that are revealed when they are destroyed. These are: 78 | * Extra Bomb: Picking this up increases the agent's ammo by one. 79 | * Increase Range: Picking this up increases the agent's blast strength by one. 80 | * Can Kick: Picking this up allows an agent to kick bombs. It does this by running into them. They then travel in the direction that the agent was moving at a speed of one unit per time step until they are impeded either by a player, a bomb, or a wall. 81 | * The game ends when both players on one team have been destroyed. The winning team is the one who has remaining members. 82 | * Ties can happen when the game does not end before the max steps or if both teams' last agents are destroyed on the same turn. If this happens in a competition, we will rerun the game once. If it happens again after that, then we will rerun it with collapsing walls until there is a winner. This is a variant where, after a large number of steps, the game board becomes smaller according to a specified cadence. See v1.py for a working example in the code. 83 | * If an agent does not respond in an appropriate time limit (100ms), then we will automatically issue them the Stop action and have them send out the message (0, 0). 84 | * The game setup does not allow for the agents to share a centralized controller. If, however, some clever participant figured out a way to force this, they will be subsequently disqualified. 85 | -------------------------------------------------------------------------------- /pommerman/__init__.py: -------------------------------------------------------------------------------- 1 | '''Entry point into the pommerman module''' 2 | import gym 3 | import inspect 4 | from . import agents 5 | from . import configs 6 | from . import constants 7 | from . import forward_model 8 | from . import helpers 9 | from . import utility 10 | from . import network 11 | 12 | gym.logger.set_level(40) 13 | REGISTRY = None 14 | 15 | 16 | def _register(): 17 | global REGISTRY 18 | REGISTRY = [] 19 | for name, f in inspect.getmembers(configs, inspect.isfunction): 20 | if not name.endswith('_env'): 21 | continue 22 | 23 | config = f() 24 | gym.envs.registration.register( 25 | id=config['env_id'], 26 | entry_point=config['env_entry_point'], 27 | kwargs=config['env_kwargs'] 28 | ) 29 | REGISTRY.append(config['env_id']) 30 | 31 | 32 | # Register environments with gym 33 | _register() 34 | 35 | def make(config_id, agent_list, game_state_file=None, render_mode='human'): 36 | '''Makes the pommerman env and registers it with gym''' 37 | assert config_id in REGISTRY, "Unknown configuration '{}'. " \ 38 | "Possible values: {}".format(config_id, REGISTRY) 39 | env = gym.make(config_id) 40 | 41 | for id_, agent in enumerate(agent_list): 42 | assert isinstance(agent, agents.BaseAgent) 43 | # NOTE: This is IMPORTANT so that the agent character is initialized 44 | agent.init_agent(id_, env.spec._kwargs['game_type']) 45 | 46 | env.set_agents(agent_list) 47 | env.set_init_game_state(game_state_file) 48 | env.set_render_mode(render_mode) 49 | return env 50 | 51 | 52 | from . import cli 53 | -------------------------------------------------------------------------------- /pommerman/agents/__init__.py: -------------------------------------------------------------------------------- 1 | '''Entry point into the agents module set''' 2 | from .base_agent import BaseAgent 3 | from .docker_agent import DockerAgent 4 | from .http_agent import HttpAgent 5 | from .player_agent import PlayerAgent 6 | from .player_agent_blocking import PlayerAgentBlocking 7 | from .random_agent import RandomAgent 8 | from .simple_agent import SimpleAgent 9 | from .tensorforce_agent import TensorForceAgent 10 | -------------------------------------------------------------------------------- /pommerman/agents/base_agent.py: -------------------------------------------------------------------------------- 1 | '''This is the base abstraction for agents in pommerman. 2 | All agents should inherent from this class''' 3 | from .. import characters 4 | 5 | 6 | class BaseAgent: 7 | """Parent abstract Agent.""" 8 | 9 | def __init__(self, character=characters.Bomber): 10 | self._character = character 11 | 12 | def __getattr__(self, attr): 13 | return getattr(self._character, attr) 14 | 15 | def act(self, obs, action_space): 16 | raise NotImplementedError() 17 | 18 | def episode_end(self, reward): 19 | """This is called at the end of the episode to let the agent know that 20 | the episode has ended and what is the reward. 21 | 22 | Args: 23 | reward: The single reward scalar to this agent. 24 | """ 25 | pass 26 | 27 | def init_agent(self, id_, game_type): 28 | self._character = self._character(id_, game_type) 29 | 30 | @staticmethod 31 | def has_user_input(): 32 | return False 33 | 34 | def shutdown(self): 35 | pass 36 | -------------------------------------------------------------------------------- /pommerman/agents/docker_agent.py: -------------------------------------------------------------------------------- 1 | '''An example docker agent.''' 2 | import json 3 | import time 4 | import os 5 | import threading 6 | import requests 7 | import docker 8 | 9 | from . import BaseAgent 10 | from .. import utility 11 | from .. import characters 12 | 13 | 14 | class DockerAgent(BaseAgent): 15 | """The Docker Agent that Connects to a Docker container where the character runs.""" 16 | 17 | def __init__(self, 18 | docker_image, 19 | port, 20 | server='http://localhost', 21 | character=characters.Bomber, 22 | docker_client=None, 23 | env_vars=None): 24 | super(DockerAgent, self).__init__(character) 25 | 26 | self._docker_image = docker_image 27 | self._docker_client = docker_client 28 | if not self._docker_client: 29 | self._docker_client = docker.from_env() 30 | self._docker_client.login( 31 | os.getenv("PLAYGROUND_DOCKER_LOGIN"), 32 | os.getenv("PLAYGROUND_DOCKER_PASSWORD")) 33 | 34 | self._acknowledged = False # Becomes True when the container is ready. 35 | self._server = server 36 | self._port = port 37 | self._timeout = 32 38 | self._container = None 39 | self._env_vars = env_vars or {} 40 | # Pass env variables starting with DOCKER_AGENT to the container. 41 | for key, value in os.environ.items(): 42 | if not key.startswith("DOCKER_AGENT_"): 43 | continue 44 | env_key = key.replace("DOCKER_AGENT_", "") 45 | self._env_vars[env_key] = value 46 | 47 | # Start the docker agent if it is on this computer. Otherwise, it's far 48 | # away and we need to tell that server to start it. 49 | if 'localhost' in server: 50 | container_thread = threading.Thread( 51 | target=self._run_container, daemon=True) 52 | container_thread.start() 53 | print("Waiting for docker agent at {}:{}...".format(server, port)) 54 | self._wait_for_docker() 55 | else: 56 | request_url = "{}:8000/run_container".format(server) 57 | request_json = { 58 | 'docker_image': self._docker_image, 59 | 'env_vars': self._env_vars, 60 | 'port': port 61 | } 62 | requests.post(request_url, json=request_json) 63 | waiting_thread = threading.Thread( 64 | target=self._wait_for_docker, daemon=True) 65 | waiting_thread.start() 66 | 67 | def _run_container(self): 68 | print("Starting container...") 69 | self._container = self._docker_client.containers.run( 70 | self._docker_image, 71 | detach=True, 72 | auto_remove=True, 73 | ports={10080: self._port}, 74 | environment=self._env_vars) 75 | for line in self._container.logs(stream=True): 76 | print(line.decode("utf-8").strip()) 77 | 78 | def _wait_for_docker(self): 79 | """Wait for network service to appear. A timeout of 0 waits forever.""" 80 | timeout = self._timeout 81 | backoff = .25 82 | max_backoff = min(timeout, 16) 83 | 84 | if timeout: 85 | # time module is needed to calc timeout shared between two exceptions 86 | end = time.time() + timeout 87 | 88 | while True: 89 | try: 90 | now = time.time() 91 | if timeout and end < now: 92 | print("Timed out - %s:%s" % (self._server, self._port)) 93 | raise 94 | 95 | request_url = '%s:%s/ping' % (self._server, self._port) 96 | req = requests.get(request_url) 97 | self._acknowledged = True 98 | return True 99 | except requests.exceptions.ConnectionError as e: 100 | print("ConnectionError: ", e) 101 | backoff = min(max_backoff, backoff * 2) 102 | time.sleep(backoff) 103 | except requests.exceptions.HTTPError as e: 104 | print("HTTPError: ", e) 105 | backoff = min(max_backoff, backoff * 2) 106 | time.sleep(backoff) 107 | except docker.errors.APIError as e: 108 | print("This is a Docker error. Please fix: ", e) 109 | raise 110 | 111 | def init_agent(self, id, game_type): 112 | super(DockerAgent, self).init_agent(id, game_type) 113 | request_url = "http://localhost:{}/init_agent".format(self._port) 114 | try: 115 | req = requests.post( 116 | request_url, 117 | timeout=0.5, 118 | json={ 119 | "id": json.dumps(id, cls=utility.PommermanJSONEncoder), 120 | "game_type": json.dumps(game_type, cls=utility.PommermanJSONEncoder) 121 | }) 122 | except requests.exceptions.Timeout as e: 123 | print('Timeout in init_agent()!') 124 | 125 | def act(self, obs, action_space): 126 | obs_serialized = json.dumps(obs, cls=utility.PommermanJSONEncoder) 127 | request_url = "http://localhost:{}/action".format(self._port) 128 | try: 129 | req = requests.post( 130 | request_url, 131 | timeout=0.15, 132 | json={ 133 | "obs": 134 | obs_serialized, 135 | "action_space": 136 | json.dumps(action_space, cls=utility.PommermanJSONEncoder) 137 | }) 138 | action = req.json()['action'] 139 | except requests.exceptions.Timeout as e: 140 | print('Timeout!') 141 | # TODO: Fix this. It's ugly. 142 | num_actions = len(action_space.shape) 143 | if num_actions > 1: 144 | return [0] * num_actions 145 | else: 146 | return 0 147 | return action 148 | 149 | def episode_end(self, reward): 150 | request_url = "http://localhost:{}/episode_end".format(self._port) 151 | try: 152 | req = requests.post( 153 | request_url, 154 | timeout=0.5, 155 | json={ 156 | "reward": json.dumps(reward, cls=utility.PommermanJSONEncoder) 157 | }) 158 | except requests.exceptions.Timeout as e: 159 | print('Timeout in episode_end()!') 160 | 161 | def shutdown(self): 162 | request_url = "http://localhost:{}/shutdown".format(self._port) 163 | try: 164 | req = requests.post( 165 | request_url, 166 | timeout=0.5, 167 | json={ }) 168 | except requests.exceptions.Timeout as e: 169 | print('Timeout in shutdown()!') 170 | 171 | print("Stopping container..") 172 | if self._container: 173 | try: 174 | return self._container.remove(force=True) 175 | except docker.errors.NotFound as e: 176 | return True 177 | -------------------------------------------------------------------------------- /pommerman/agents/http_agent.py: -------------------------------------------------------------------------------- 1 | '''The HTTP agent - provides observation using http push to remote 2 | agent and expects action in the reply''' 3 | import json 4 | import time 5 | import os 6 | import threading 7 | import requests 8 | 9 | from . import BaseAgent 10 | from .. import utility 11 | from .. import characters 12 | 13 | 14 | class HttpAgent(BaseAgent): 15 | """The HTTP Agent that connects to a port with a remote agent where the 16 | character runs. It uses the same interface as the docker agent and 17 | is useful for debugging.""" 18 | 19 | def __init__(self, 20 | port=8080, 21 | host='localhost', 22 | timeout=120, 23 | character=characters.Bomber): 24 | self._port = port 25 | self._host = host 26 | self._timeout = timeout 27 | super(HttpAgent, self).__init__(character) 28 | self._wait_for_remote() 29 | 30 | def _wait_for_remote(self): 31 | """Wait for network service to appear. A timeout of 0 waits forever.""" 32 | timeout = self._timeout 33 | backoff = .25 34 | max_backoff = min(timeout, 16) 35 | 36 | if timeout: 37 | # time module is needed to calc timeout shared between two exceptions 38 | end = time.time() + timeout 39 | 40 | while True: 41 | try: 42 | now = time.time() 43 | if timeout and end < now: 44 | print("Timed out - %s:%s" % (self._host, self._port)) 45 | raise 46 | 47 | request_url = 'http://%s:%s/ping' % (self._host, self._port) 48 | req = requests.get(request_url) 49 | self._acknowledged = True 50 | return True 51 | except requests.exceptions.ConnectionError as e: 52 | print("ConnectionError: ", e) 53 | backoff = min(max_backoff, backoff * 2) 54 | time.sleep(backoff) 55 | except requests.exceptions.HTTPError as e: 56 | print("HTTPError: ", e) 57 | backoff = min(max_backoff, backoff * 2) 58 | time.sleep(backoff) 59 | 60 | def init_agent(self, id, game_type): 61 | super(HttpAgent, self).init_agent(id, game_type) 62 | request_url = "http://{}:{}/init_agent".format(self._host, self._port) 63 | try: 64 | req = requests.post( 65 | request_url, 66 | timeout=0.5, 67 | json={ 68 | "id": json.dumps(id, cls=utility.PommermanJSONEncoder), 69 | "game_type": json.dumps(game_type, cls=utility.PommermanJSONEncoder) 70 | }) 71 | except requests.exceptions.Timeout as e: 72 | print('Timeout in init_agent()!') 73 | 74 | def act(self, obs, action_space): 75 | obs_serialized = json.dumps(obs, cls=utility.PommermanJSONEncoder) 76 | request_url = "http://{}:{}/action".format(self._host, self._port) 77 | try: 78 | req = requests.post( 79 | request_url, 80 | timeout=0.15, 81 | json={ 82 | "obs": 83 | obs_serialized, 84 | "action_space": 85 | json.dumps(action_space, cls=utility.PommermanJSONEncoder) 86 | }) 87 | action = req.json()['action'] 88 | except requests.exceptions.Timeout as e: 89 | print('Timeout!') 90 | # TODO: Fix this. It's ugly. 91 | num_actions = len(action_space.shape) 92 | if num_actions > 1: 93 | return [0] * num_actions 94 | else: 95 | return 0 96 | return action 97 | 98 | def episode_end(self, reward): 99 | request_url = "http://{}:{}/episode_end".format(self._host, self._port) 100 | try: 101 | req = requests.post( 102 | request_url, 103 | timeout=0.5, 104 | json={ 105 | "reward": json.dumps(reward, cls=utility.PommermanJSONEncoder) 106 | }) 107 | except requests.exceptions.Timeout as e: 108 | print('Timeout in episode_end()!') 109 | 110 | def shutdown(self): 111 | request_url = "http://{}:{}/shutdown".format(self._host, self._port) 112 | try: 113 | req = requests.post( 114 | request_url, 115 | timeout=0.5, 116 | json={ }) 117 | except requests.exceptions.Timeout as e: 118 | print('Timeout in shutdown()!') 119 | -------------------------------------------------------------------------------- /pommerman/agents/player_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | NOTE: 3 | 4 | There are a few minor complications to fluid human control which make this 5 | code a little more involved than trivial. 6 | 7 | 1. Key press-release cycles can be, and often are, faster than one tick of 8 | the game/simulation, but the player still wants that cycle to count, i.e. 9 | to lay a bomb! 10 | 2. When holding down a key, the player expects that action to be repeated, 11 | at least after a slight delay. 12 | 3. But when holding a key down (say, move left) and simultaneously doing a 13 | quick press-release cycle (put a bomb), we want the held-down key to keep 14 | being executed, but the cycle should have happened in-between. 15 | 16 | The way we solve this problem is by separating key-state and actions-to-do. 17 | We hold the actions that need be executed in a queue (`self._action_q`) and 18 | a state for all considered keys. 19 | 20 | 1. When a key is pressed down, we note the time and mark it as down. 21 | 2. If it is released quickly thereafter, before a game tick could happen, 22 | we add its action into the queue. This often happens when putting bombs. 23 | 3. If it's still pressed down as we enter a game tick, we do some math to see 24 | if it's time for a "repeat" event and, if so, push an action to the queue. 25 | 4. Just work off one item from the queue each tick. 26 | 27 | This way, the input is "natural" and things like dropping a bomb while doing 28 | a diagonal walk from one end to the other "just work". 29 | """ 30 | 31 | from time import time 32 | 33 | from . import BaseAgent 34 | from .. import characters 35 | 36 | REPEAT_DELAY = 0.2 # seconds 37 | REPEAT_INTERVAL = 0.1 38 | 39 | 40 | class Keystate: 41 | '''Handles keyboard state for a human player''' 42 | def __init__(self): 43 | self.keydown_time = time() 44 | self.last_repeat_time = None 45 | self.fired = False 46 | 47 | def should_fire(self): 48 | if self.last_repeat_time is None: 49 | # The first repetition: 50 | if time() - self.keydown_time > REPEAT_DELAY: 51 | return True 52 | else: 53 | # A repetition after the first: 54 | if time() - self.last_repeat_time > REPEAT_INTERVAL: 55 | return True 56 | 57 | # No repetition yet 58 | return False 59 | 60 | def mark_fired(self): 61 | self.last_repeat_time = time() 62 | self.fired = True 63 | 64 | 65 | class PlayerAgent(BaseAgent): 66 | """The Player Agent that lets the user control a character.""" 67 | 68 | def __init__(self, character=characters.Bomber, agent_control='arrows'): 69 | super(PlayerAgent, self).__init__(character) 70 | 71 | ## 72 | # @NOTE: DO NOT move this import outside the constructor. It will 73 | # not work in headless environments like a Docker container 74 | # and prevents Pommerman from running. 75 | # 76 | from pyglet.window import key 77 | controls = { 78 | 'arrows': { 79 | key.UP: 1, 80 | key.DOWN: 2, 81 | key.LEFT: 3, 82 | key.RIGHT: 4, 83 | key.SPACE: 5, 84 | key.M: 6 # In Pommerman, this will freeze the game. 85 | }, 86 | 'wasd': { 87 | key.W: 1, 88 | key.S: 2, 89 | key.A: 3, 90 | key.D: 4, 91 | key.E: 5, 92 | key.Q: 6 # In Pommerman, this will freeze the game. 93 | } 94 | } 95 | 96 | assert agent_control in controls, "Unknown control: {}".format( 97 | agent_control) 98 | self._key2act = controls[agent_control] 99 | 100 | self._action_q = [] 101 | self._keystate = {} 102 | 103 | def act(self, obs, action_space): 104 | # Go through the keys and fire for those that needs repetition (because they're held down) 105 | for k, state in self._keystate.items(): 106 | if state.should_fire(): 107 | self._action_q.append(k) 108 | state.mark_fired() 109 | 110 | act = 0 111 | if self._action_q: # Work off the keys that are queued. 112 | act = self._key2act[self._action_q.pop(0)] 113 | return act 114 | 115 | @staticmethod 116 | def has_user_input(): 117 | return True 118 | 119 | def on_key_press(self, k, mod): 120 | # Ignore if we're not handling the key. Avoids "shadowing" ticks in 121 | # multiplayer mode. 122 | if k in self._key2act: 123 | self._keystate[k] = Keystate() 124 | 125 | def on_key_release(self, k, mod): 126 | # We only need to act on keys for which we did something in the 127 | # `key_press` event, and ignore any other key releases. 128 | if k in self._keystate: 129 | # Only mark this as a "press" upon release if it was a quick one, 130 | # i.e. not held down and executed already 131 | if not self._keystate[k].fired: 132 | self._action_q.append(k) 133 | del self._keystate[k] 134 | -------------------------------------------------------------------------------- /pommerman/agents/player_agent_blocking.py: -------------------------------------------------------------------------------- 1 | """ 2 | This variant is blocking, that is the game pauses for keyboard input. 3 | """ 4 | 5 | from time import time 6 | import click 7 | 8 | from . import BaseAgent 9 | from .. import characters 10 | from .. import constants 11 | 12 | # keypad control codes 13 | K_PREFIX = '\x1b' 14 | K_RT = '[C' 15 | K_LF = '[D' 16 | K_UP = '[A' 17 | K_DN = '[B' 18 | 19 | 20 | class PlayerAgentBlocking(BaseAgent): 21 | """Block for keyboard input.""" 22 | 23 | def __init__(self, character=characters.Bomber, agent_control='arrows'): 24 | super(PlayerAgentBlocking, self).__init__(character) 25 | self.agent_control = agent_control 26 | 27 | def act(self, obs, action_space): 28 | key = click.getchar() 29 | if self.agent_control == 'arrows': 30 | if key == K_RT + K_PREFIX: return constants.Action.Right.value 31 | if key == K_LF + K_PREFIX: return constants.Action.Left.value 32 | if key == K_UP + K_PREFIX: return constants.Action.Up.value 33 | if key == K_DN + K_PREFIX: return constants.Action.Down.value 34 | if key == ' ': return constants.Action.Bomb.value 35 | return constants.Action.Stop.value 36 | 37 | if self.agent_control == 'wasd': 38 | if key == 'd': return constants.Action.Right.value 39 | if key == 'a': return constants.Action.Left.value 40 | if key == 'w': return constants.Action.Up.value 41 | if key == 's': return constants.Action.Down.value 42 | if key == 'e': return constants.Action.Bomb.value 43 | if key == 'q': return constants.Action.Stop.value 44 | return constants.Action.Stop.value 45 | -------------------------------------------------------------------------------- /pommerman/agents/random_agent.py: -------------------------------------------------------------------------------- 1 | '''An agent that preforms a random action each step''' 2 | from . import BaseAgent 3 | 4 | 5 | class RandomAgent(BaseAgent): 6 | """The Random Agent that returns random actions given an action_space.""" 7 | 8 | def act(self, obs, action_space): 9 | return action_space.sample() 10 | -------------------------------------------------------------------------------- /pommerman/agents/tensorforce_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | A Work-In-Progress agent using Tensorforce 3 | """ 4 | from . import BaseAgent 5 | from .. import characters 6 | 7 | 8 | class TensorForceAgent(BaseAgent): 9 | """The TensorForceAgent. Acts through the algorith, not here.""" 10 | 11 | def __init__(self, character=characters.Bomber, algorithm='ppo'): 12 | super(TensorForceAgent, self).__init__(character) 13 | self.algorithm = algorithm 14 | 15 | def act(self, obs, action_space): 16 | """This agent has its own way of inducing actions. See train_with_tensorforce.""" 17 | return None 18 | 19 | def initialize(self, env): 20 | from gym import spaces 21 | from tensorforce.agents import PPOAgent 22 | 23 | if self.algorithm == "ppo": 24 | if type(env.action_space) == spaces.Tuple: 25 | actions = { 26 | str(num): { 27 | 'type': 'int', 28 | 'num_actions': space.n 29 | } 30 | for num, space in enumerate(env.action_space.spaces) 31 | } 32 | else: 33 | actions = dict(type='int', num_actions=env.action_space.n) 34 | 35 | return PPOAgent( 36 | states=dict(type='float', shape=env.observation_space.shape), 37 | actions=actions, 38 | network=[ 39 | dict(type='dense', size=64), 40 | dict(type='dense', size=64) 41 | ], 42 | batching_capacity=1000, 43 | step_optimizer=dict(type='adam', learning_rate=1e-4)) 44 | return None 45 | -------------------------------------------------------------------------------- /pommerman/characters.py: -------------------------------------------------------------------------------- 1 | """These are objects in the game world. Please do not edit this file. The competition will be run with it as is.""" 2 | 3 | import random 4 | 5 | from . import constants 6 | from . import utility 7 | 8 | 9 | class Bomber(object): 10 | """Container to keep the agent state.""" 11 | 12 | def __init__(self, agent_id=None, game_type=None): 13 | self._game_type = game_type 14 | self.ammo = 1 15 | self.is_alive = True 16 | self.blast_strength = constants.DEFAULT_BLAST_STRENGTH 17 | self.can_kick = False 18 | if agent_id is not None: 19 | self.set_agent_id(agent_id) 20 | 21 | def set_agent_id(self, agent_id): 22 | self.agent_id = agent_id 23 | if self._game_type == constants.GameType.FFA: 24 | self.teammate = constants.Item.AgentDummy 25 | self.enemies = [ 26 | getattr(constants.Item, f'Agent{id_}') 27 | for id_ in range(4) 28 | if id_ != agent_id 29 | ] 30 | elif self._game_type == constants.GameType.OneVsOne: 31 | self.teammate = constants.Item.AgentDummy 32 | self.enemies = [ 33 | getattr(constants.Item, f'Agent{id_}') 34 | for id_ in range(2) 35 | if id_ != agent_id 36 | ] 37 | else: 38 | teammate_id = (agent_id + 2) % 4 39 | self.teammate = getattr(constants.Item, f'Agent{teammate_id}') 40 | self.enemies = [ 41 | getattr(constants.Item, f'Agent{id_}') 42 | for id_ in range(4) 43 | if id_ != agent_id and id_ != teammate_id 44 | ] 45 | self.enemies.append(constants.Item.AgentDummy) 46 | 47 | def maybe_lay_bomb(self): 48 | if self.ammo > 0: 49 | self.ammo -= 1 50 | return Bomb(self, self.position, constants.DEFAULT_BOMB_LIFE + 1, 51 | self.blast_strength) 52 | return None 53 | 54 | def incr_ammo(self): 55 | self.ammo = min(self.ammo + 1, 10) 56 | 57 | def get_next_position(self, direction): 58 | action = constants.Action(direction) 59 | return utility.get_next_position(self.position, action) 60 | 61 | def move(self, direction): 62 | self.position = self.get_next_position(direction) 63 | 64 | def stop(self): 65 | pass 66 | 67 | def in_range(self, exploded_map): 68 | row, col = self.position 69 | return exploded_map[row][col] == 1 70 | 71 | def die(self): 72 | self.is_alive = False 73 | 74 | def set_start_position(self, start_position): 75 | self.start_position = start_position 76 | 77 | def reset(self, ammo=1, is_alive=True, blast_strength=None, can_kick=False): 78 | self.position = self.start_position 79 | self.ammo = ammo 80 | self.is_alive = is_alive 81 | self.blast_strength = blast_strength or constants.DEFAULT_BLAST_STRENGTH 82 | self.can_kick = can_kick 83 | 84 | def pick_up(self, item, max_blast_strength): 85 | if item == constants.Item.ExtraBomb: 86 | self.incr_ammo() 87 | elif item == constants.Item.IncrRange: 88 | self.blast_strength = min(self.blast_strength + 1, 89 | max_blast_strength) 90 | elif item == constants.Item.Kick: 91 | self.can_kick = True 92 | 93 | def to_json(self): 94 | return { 95 | "agent_id": self.agent_id, 96 | "is_alive": self.is_alive, 97 | "position": self.position, 98 | "ammo": self.ammo, 99 | "blast_strength": self.blast_strength, 100 | "can_kick": self.can_kick 101 | } 102 | 103 | 104 | class Bomb(object): 105 | """Container for the Bomb object.""" 106 | 107 | def __init__(self, 108 | bomber, 109 | position, 110 | life, 111 | blast_strength, 112 | moving_direction=None): 113 | self.bomber = bomber 114 | self.position = position 115 | self.life = life 116 | self.blast_strength = blast_strength 117 | self.moving_direction = moving_direction 118 | 119 | def tick(self): 120 | self.life -= 1 121 | 122 | def fire(self): 123 | """Encounter Flames and blow up.""" 124 | self.life = 0 125 | 126 | def move(self): 127 | if self.is_moving(): 128 | self.position = utility.get_next_position(self.position, 129 | self.moving_direction) 130 | 131 | def stop(self): 132 | self.moving_direction = None 133 | 134 | def exploded(self): 135 | return self.life == 0 136 | 137 | def explode(self): 138 | row, col = self.position 139 | indices = { 140 | 'up': ([row - i, col] for i in range(1, self.blast_strength)), 141 | 'down': ([row + i, col] for i in range(self.blast_strength)), 142 | 'left': ([row, col - i] for i in range(1, self.blast_strength)), 143 | 'right': ([row, col + i] for i in range(1, self.blast_strength)) 144 | } 145 | return indices 146 | 147 | def in_range(self, exploded_map): 148 | row, col = self.position 149 | return exploded_map[row][col] == 1 150 | 151 | def is_moving(self): 152 | return self.moving_direction is not None 153 | 154 | def to_json(self): 155 | return { 156 | "position": self.position, 157 | "bomber_id": self.bomber.agent_id, 158 | "life": self.life, 159 | "blast_strength": self.blast_strength, 160 | "moving_direction": self.moving_direction 161 | } 162 | 163 | 164 | class Flame(object): 165 | """Container for Flame object.""" 166 | 167 | def __init__(self, position, life=2): 168 | self.position = position 169 | self.life = life 170 | 171 | def tick(self): 172 | self.life -= 1 173 | 174 | def is_dead(self): 175 | return self.life == 0 176 | 177 | def to_json(self): 178 | return {"position": self.position, "life": self.life} 179 | -------------------------------------------------------------------------------- /pommerman/cli/__init__.py: -------------------------------------------------------------------------------- 1 | '''CLI module entry point''' 2 | from . import run_battle 3 | -------------------------------------------------------------------------------- /pommerman/cli/run_battle.py: -------------------------------------------------------------------------------- 1 | """Run a battle among agents. 2 | 3 | Call this with a config, a game, and a list of agents. The script will start separate threads to operate the agents 4 | and then report back the result. 5 | 6 | An example with all four test agents running ffa: 7 | python run_battle.py --agents=test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent --config=PommeFFACompetition-v0 8 | 9 | An example with one player, two random agents, and one test agent: 10 | python run_battle.py --agents=player::arrows,test::agents.SimpleAgent,random::null,random::null --config=PommeFFACompetition-v0 11 | 12 | An example with a docker agent: 13 | python run_battle.py --agents=player::arrows,docker::pommerman/test-agent,random::null,random::null --config=PommeFFACompetition-v0 14 | """ 15 | import atexit 16 | from datetime import datetime 17 | import os 18 | import random 19 | import sys 20 | import time 21 | 22 | import argparse 23 | import numpy as np 24 | 25 | from .. import helpers 26 | from .. import make 27 | from pommerman import utility 28 | 29 | 30 | def run(args, num_times=1, seed=None): 31 | '''Wrapper to help start the game''' 32 | config = args.config 33 | record_pngs_dir = args.record_pngs_dir 34 | record_json_dir = args.record_json_dir 35 | agent_env_vars = args.agent_env_vars 36 | game_state_file = args.game_state_file 37 | render_mode = args.render_mode 38 | do_sleep = args.do_sleep 39 | 40 | agents = [ 41 | helpers.make_agent_from_string(agent_string, agent_id) 42 | for agent_id, agent_string in enumerate(args.agents.split(',')) 43 | ] 44 | 45 | env = make(config, agents, game_state_file, render_mode=render_mode) 46 | 47 | def _run(record_pngs_dir=None, record_json_dir=None): 48 | '''Runs a game''' 49 | print("Starting the Game.") 50 | if record_pngs_dir and not os.path.isdir(record_pngs_dir): 51 | os.makedirs(record_pngs_dir) 52 | if record_json_dir and not os.path.isdir(record_json_dir): 53 | os.makedirs(record_json_dir) 54 | 55 | obs = env.reset() 56 | done = False 57 | 58 | while not done: 59 | if args.render: 60 | env.render( 61 | record_pngs_dir=record_pngs_dir, 62 | record_json_dir=record_json_dir, 63 | do_sleep=do_sleep) 64 | if args.render is False and record_json_dir: 65 | env.save_json(record_json_dir) 66 | time.sleep(1.0 / env._render_fps) 67 | actions = env.act(obs) 68 | obs, reward, done, info = env.step(actions) 69 | 70 | print("Final Result: ", info) 71 | if args.render: 72 | env.render( 73 | record_pngs_dir=record_pngs_dir, 74 | record_json_dir=record_json_dir, 75 | do_sleep=do_sleep) 76 | if do_sleep: 77 | time.sleep(5) 78 | env.render(close=True) 79 | 80 | if args.render is False and record_json_dir: 81 | env.save_json(record_json_dir) 82 | time.sleep(1.0 / env._render_fps) 83 | 84 | if record_json_dir: 85 | finished_at = datetime.now().isoformat() 86 | _agents = args.agents.split(',') 87 | utility.join_json_state(record_json_dir, _agents, finished_at, 88 | config, info) 89 | 90 | return info 91 | 92 | if seed is None: 93 | # Pick a random seed between 0 and 2^31 - 1 94 | seed = random.randint(0, np.iinfo(np.int32).max) 95 | np.random.seed(seed) 96 | random.seed(seed) 97 | env.seed(seed) 98 | 99 | infos = [] 100 | times = [] 101 | for i in range(num_times): 102 | start = time.time() 103 | 104 | record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \ 105 | if record_pngs_dir else None 106 | record_json_dir_ = record_json_dir + '/%d' % (i+1) \ 107 | if record_json_dir else None 108 | infos.append(_run(record_pngs_dir_, record_json_dir_)) 109 | 110 | times.append(time.time() - start) 111 | print("Game Time: ", times[-1]) 112 | 113 | atexit.register(env.close) 114 | return infos 115 | 116 | 117 | def main(): 118 | '''CLI entry pointed used to bootstrap a battle''' 119 | simple_agent = 'test::agents.SimpleAgent' 120 | player_agent = 'player::arrows' 121 | docker_agent = 'docker::pommerman/simple-agent' 122 | 123 | parser = argparse.ArgumentParser(description='Playground Flags.') 124 | parser.add_argument( 125 | '--config', 126 | default='PommeFFACompetition-v0', 127 | help='Configuration to execute. See env_ids in ' 128 | 'configs.py for options.') 129 | parser.add_argument( 130 | '--agents', 131 | default=','.join([simple_agent] * 4), 132 | # default=','.join([player_agent] + [simple_agent]*3]), 133 | # default=','.join([docker_agent] + [simple_agent]*3]), 134 | help='Comma delineated list of agent types and docker ' 135 | 'locations to run the agents.') 136 | parser.add_argument( 137 | '--agent_env_vars', 138 | help='Comma delineated list of agent environment vars ' 139 | 'to pass to Docker. This is only for the Docker Agent.' 140 | " An example is '0:foo=bar:baz=lar,3:foo=lam', which " 141 | 'would send two arguments to Docker Agent 0 and one ' 142 | 'to Docker Agent 3.', 143 | default="") 144 | parser.add_argument( 145 | '--record_pngs_dir', 146 | default=None, 147 | help='Directory to record the PNGs of the game. ' 148 | "Doesn't record if None.") 149 | parser.add_argument( 150 | '--record_json_dir', 151 | default=None, 152 | help='Directory to record the JSON representations of ' 153 | "the game. Doesn't record if None.") 154 | parser.add_argument( 155 | "--render", 156 | default=False, 157 | action='store_true', 158 | help="Whether to render or not. Defaults to False.") 159 | parser.add_argument( 160 | '--render_mode', 161 | default='human', 162 | help="What mode to render. Options are human, rgb_pixel, and rgb_array") 163 | parser.add_argument( 164 | '--game_state_file', 165 | default=None, 166 | help="File from which to load game state.") 167 | parser.add_argument( 168 | '--do_sleep', 169 | default=True, 170 | help="Whether we sleep after each rendering.") 171 | args = parser.parse_args() 172 | run(args) 173 | 174 | 175 | if __name__ == "__main__": 176 | main() 177 | -------------------------------------------------------------------------------- /pommerman/cli/train_with_tensorforce.py: -------------------------------------------------------------------------------- 1 | """Train an agent with TensorForce. 2 | 3 | Call this with a config, a game, and a list of agents, one of which should be a 4 | tensorforce agent. The script will start separate threads to operate the agents 5 | and then report back the result. 6 | 7 | An example with all three simple agents running ffa: 8 | python train_with_tensorforce.py \ 9 | --agents=tensorforce::ppo,test::agents.SimpleAgent,test::agents.SimpleAgent,test::agents.SimpleAgent \ 10 | --config=PommeFFACompetition-v0 11 | """ 12 | import atexit 13 | import functools 14 | import os 15 | 16 | import argparse 17 | import docker 18 | from tensorforce.execution import Runner 19 | from tensorforce.contrib.openai_gym import OpenAIGym 20 | import gym 21 | 22 | from pommerman import helpers, make 23 | from pommerman.agents import TensorForceAgent 24 | 25 | 26 | CLIENT = docker.from_env() 27 | 28 | 29 | def clean_up_agents(agents): 30 | """Stops all agents""" 31 | return [agent.shutdown() for agent in agents] 32 | 33 | 34 | class WrappedEnv(OpenAIGym): 35 | '''An Env Wrapper used to make it easier to work 36 | with multiple agents''' 37 | 38 | def __init__(self, gym, visualize=False): 39 | self.gym = gym 40 | self.visualize = visualize 41 | 42 | def execute(self, action): 43 | if self.visualize: 44 | self.gym.render() 45 | 46 | actions = self.unflatten_action(action=action) 47 | 48 | obs = self.gym.get_observations() 49 | all_actions = self.gym.act(obs) 50 | all_actions.insert(self.gym.training_agent, actions) 51 | state, reward, terminal, _ = self.gym.step(all_actions) 52 | agent_state = self.gym.featurize(state[self.gym.training_agent]) 53 | agent_reward = reward[self.gym.training_agent] 54 | return agent_state, terminal, agent_reward 55 | 56 | def reset(self): 57 | obs = self.gym.reset() 58 | agent_obs = self.gym.featurize(obs[3]) 59 | return agent_obs 60 | 61 | 62 | def main(): 63 | '''CLI interface to bootstrap taining''' 64 | parser = argparse.ArgumentParser(description="Playground Flags.") 65 | parser.add_argument("--game", default="pommerman", help="Game to choose.") 66 | parser.add_argument( 67 | "--config", 68 | default="PommeFFACompetition-v0", 69 | help="Configuration to execute. See env_ids in " 70 | "configs.py for options.") 71 | parser.add_argument( 72 | "--agents", 73 | default="tensorforce::ppo,test::agents.SimpleAgent," 74 | "test::agents.SimpleAgent,test::agents.SimpleAgent", 75 | help="Comma delineated list of agent types and docker " 76 | "locations to run the agents.") 77 | parser.add_argument( 78 | "--agent_env_vars", 79 | help="Comma delineated list of agent environment vars " 80 | "to pass to Docker. This is only for the Docker Agent." 81 | " An example is '0:foo=bar:baz=lar,3:foo=lam', which " 82 | "would send two arguments to Docker Agent 0 and one to" 83 | " Docker Agent 3.", 84 | default="") 85 | parser.add_argument( 86 | "--record_pngs_dir", 87 | default=None, 88 | help="Directory to record the PNGs of the game. " 89 | "Doesn't record if None.") 90 | parser.add_argument( 91 | "--record_json_dir", 92 | default=None, 93 | help="Directory to record the JSON representations of " 94 | "the game. Doesn't record if None.") 95 | parser.add_argument( 96 | "--render", 97 | default=False, 98 | action='store_true', 99 | help="Whether to render or not. Defaults to False.") 100 | parser.add_argument( 101 | "--game_state_file", 102 | default=None, 103 | help="File from which to load game state. Defaults to " 104 | "None.") 105 | args = parser.parse_args() 106 | 107 | config = args.config 108 | record_pngs_dir = args.record_pngs_dir 109 | record_json_dir = args.record_json_dir 110 | agent_env_vars = args.agent_env_vars 111 | game_state_file = args.game_state_file 112 | 113 | # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 114 | # this is still missing the docker_env_dict parsing for the agents. 115 | agents = [ 116 | helpers.make_agent_from_string(agent_string, agent_id + 1000) 117 | for agent_id, agent_string in enumerate(args.agents.split(",")) 118 | ] 119 | 120 | env = make(config, agents, game_state_file) 121 | training_agent = None 122 | 123 | for agent in agents: 124 | if type(agent) == TensorForceAgent: 125 | training_agent = agent 126 | env.set_training_agent(agent.agent_id) 127 | break 128 | 129 | if args.record_pngs_dir: 130 | assert not os.path.isdir(args.record_pngs_dir) 131 | os.makedirs(args.record_pngs_dir) 132 | if args.record_json_dir: 133 | assert not os.path.isdir(args.record_json_dir) 134 | os.makedirs(args.record_json_dir) 135 | 136 | # Create a Proximal Policy Optimization agent 137 | agent = training_agent.initialize(env) 138 | 139 | atexit.register(functools.partial(clean_up_agents, agents)) 140 | wrapped_env = WrappedEnv(env, visualize=args.render) 141 | runner = Runner(agent=agent, environment=wrapped_env) 142 | runner.run(episodes=10, max_episode_timesteps=2000) 143 | print("Stats: ", runner.episode_rewards, runner.episode_timesteps, 144 | runner.episode_times) 145 | 146 | try: 147 | runner.close() 148 | except AttributeError as e: 149 | pass 150 | 151 | 152 | if __name__ == "__main__": 153 | main() 154 | -------------------------------------------------------------------------------- /pommerman/constants.py: -------------------------------------------------------------------------------- 1 | """The set of constants in the game. 2 | 3 | This includes not just ints but also classes like Item, GameType, Action, etc. 4 | """ 5 | from enum import Enum 6 | 7 | RENDER_FPS = 15 8 | BOARD_SIZE = 11 9 | NUM_RIGID = 36 10 | NUM_WOOD = 36 11 | NUM_ITEMS = 20 12 | BOARD_SIZE_ONE_VS_ONE = 8 13 | NUM_RIGID_ONE_VS_ONE = 16 14 | NUM_WOOD_ONE_VS_ONE = 16 15 | NUM_ITEMS_ONE_VS_ONE = 10 16 | AGENT_VIEW_SIZE = 4 17 | HUMAN_FACTOR = 32 18 | DEFAULT_BLAST_STRENGTH = 2 19 | DEFAULT_BOMB_LIFE = 9 20 | # color for each of the 4 agents 21 | AGENT_COLORS = [[231, 76, 60], [46, 139, 87], [65, 105, 225], [238, 130, 238]] 22 | # color for each of the items. 23 | ITEM_COLORS = [[240, 248, 255], [128, 128, 128], [210, 180, 140], 24 | [255, 153, 51], [241, 196, 15], [141, 137, 124]] 25 | ITEM_COLORS += [(153, 153, 255), (153, 204, 204), (97, 169, 169), (48, 117, 26 | 117)] 27 | # If using collapsing boards, the step at which the board starts to collapse. 28 | FIRST_COLLAPSE = 500 29 | MAX_STEPS = 800 30 | RADIO_VOCAB_SIZE = 8 31 | RADIO_NUM_WORDS = 2 32 | 33 | # Files for images and and fonts 34 | RESOURCE_DIR = 'resources/' 35 | FILE_NAMES = [ 36 | 'Passage', 'Rigid', 'Wood', 'Bomb', 'Flames', 'Fog', 'ExtraBomb', 37 | 'IncrRange', 'Kick', 'AgentDummy', 'Agent0', 'Agent1', 'Agent2', 'Agent3', 38 | 'AgentDummy-No-Background', 'Agent0-No-Background', 'Agent1-No-Background', 39 | 'Agent2-No-Background', 'Agent3-No-Background', 'X-No-Background', 40 | 'Agent0-Team', 'Agent1-Team', 'Agent2-Team', 'Agent3-Team', 41 | 'Agent0-Team-No-Background', 'Agent1-Team-No-Background', 42 | 'Agent2-Team-No-Background', 'Agent3-Team-No-Background', 43 | ] 44 | IMAGES_DICT = { 45 | num: { 46 | 'id': num, 47 | 'file_name': '%s.png' % file_name, 48 | 'name': file_name, 49 | 'image': None 50 | } for num, file_name in enumerate(FILE_NAMES) 51 | } 52 | BOMB_FILE_NAMES = [ 53 | 'Bomb-1', 'Bomb-2', 'Bomb-3', 'Bomb-4', 'Bomb-5', 'Bomb-6', 54 | 'Bomb-7', 'Bomb-8', 'Bomb-9', 'Bomb-10', 55 | ] 56 | BOMB_DICT = { 57 | num: { 58 | 'id': num, 59 | 'file_name': '%s.png' % file_name, 60 | 'name': file_name, 61 | 'image': None 62 | } for num, file_name in enumerate(BOMB_FILE_NAMES) 63 | } 64 | FONTS_FILE_NAMES = ['Cousine-Regular.ttf'] 65 | 66 | # Human view board configurations 67 | BORDER_SIZE = 20 68 | MARGIN_SIZE = 10 69 | TILE_SIZE = 50 70 | BACKGROUND_COLOR = (41, 39, 51, 255) 71 | TILE_COLOR = (248, 221, 82, 255) 72 | TEXT_COLOR = (170, 170, 170, 255) 73 | 74 | 75 | class Item(Enum): 76 | """The Items in the game. 77 | 78 | When picked up: 79 | - ExtraBomb increments the agent's ammo by 1. 80 | - IncrRange increments the agent's blast strength by 1. 81 | - Kick grants the agent the ability to kick items. 82 | 83 | AgentDummy is used by team games to denote the third enemy and by ffa to 84 | denote the teammate. 85 | """ 86 | Passage = 0 87 | Rigid = 1 88 | Wood = 2 89 | Bomb = 3 90 | Flames = 4 91 | Fog = 5 92 | ExtraBomb = 6 93 | IncrRange = 7 94 | Kick = 8 95 | AgentDummy = 9 96 | Agent0 = 10 97 | Agent1 = 11 98 | Agent2 = 12 99 | Agent3 = 13 100 | 101 | 102 | class GameType(Enum): 103 | """The Game Types. 104 | 105 | FFA: 1v1v1v1. Submit an agent; it competes against other submitted agents. 106 | Team: 2v2. Submit an agent; it is matched up randomly with another agent 107 | and together take on two other similarly matched agents. 108 | TeamRadio: 2v2. Submit two agents; they are matched up against two other 109 | agents. Each team passes discrete communications to each other. 110 | OneVsOne: 1v1. A research environment for dueling between two agents 111 | """ 112 | FFA = 1 113 | Team = 2 114 | TeamRadio = 3 115 | OneVsOne = 4 116 | 117 | 118 | class Action(Enum): 119 | '''The Actions an agent can take''' 120 | Stop = 0 121 | Up = 1 122 | Down = 2 123 | Left = 3 124 | Right = 4 125 | Bomb = 5 126 | 127 | 128 | class Result(Enum): 129 | '''The results available for the end of the game''' 130 | Win = 0 131 | Loss = 1 132 | Tie = 2 133 | Incomplete = 3 134 | 135 | 136 | class InvalidAction(Exception): 137 | '''Invalid Actions Exception''' 138 | pass 139 | -------------------------------------------------------------------------------- /pommerman/envs/__init__.py: -------------------------------------------------------------------------------- 1 | '''Allows each evn to be accessed through this module.''' 2 | from . import v0 3 | from . import v1 4 | from . import v2 5 | -------------------------------------------------------------------------------- /pommerman/envs/v1.py: -------------------------------------------------------------------------------- 1 | """The Pommerman v1 Environment, which implements a collapsing board. 2 | 3 | This environment is the same as v0.py, except that the board will collapse 4 | according to a uniform schedule beginning at the kwarg first_collapse. 5 | 6 | The collapsing works in the following manner: 7 | 1. Set the collapsing schedule. This is uniform starting at step first_collapse 8 | and ending at step max_steps. 9 | 2. Number the rings on the board from 0 to board_size-1 s.t. the outermost ring 10 | is 0 and the innermost ring is board_size-1. The cells in the former are 11 | [[0, i], [i, 0], [board_size-1, i], [i, board_size-1] for i in 12 | [0, board_size-1]] and the latter is, assuming an odd board_size, 13 | [[(board_size-1)/2, (board_size-1)/2]]. 14 | 3. When we are at a step in the collapsing schedule, we take the matching ring 15 | and turn it into rigid walls. This has the effect of destroying any items, 16 | bombs (which don't go off), and agents in those squares. 17 | """ 18 | from .. import constants 19 | from .. import utility 20 | from . import v0 21 | 22 | 23 | class Pomme(v0.Pomme): 24 | '''The second hardest pommerman env. v1 addes a collapsing board.''' 25 | metadata = { 26 | 'render.modes': ['human', 'rgb_array', 'rgb_pixel'], 27 | 'video.frames_per_second': constants.RENDER_FPS 28 | } 29 | 30 | def __init__(self, *args, **kwargs): 31 | super().__init__(*args, **kwargs) 32 | first_collapse = kwargs.get('first_collapse') 33 | self.collapses = list( 34 | range(first_collapse, self._max_steps, 35 | int((self._max_steps - first_collapse) / 4))) 36 | 37 | def _collapse_board(self, ring): 38 | """Collapses the board at a certain ring radius. 39 | 40 | For example, if the board is 13x13 and ring is 0, then the the ring of 41 | the first row, last row, first column, and last column is all going to 42 | be turned into rigid walls. All agents in that ring die and all bombs 43 | are removed without detonating. 44 | 45 | For further rings, the values get closer to the center. 46 | 47 | Args: 48 | ring: Integer value of which cells to collapse. 49 | """ 50 | board = self._board.copy() 51 | 52 | def collapse(r, c): 53 | '''Handles the collapsing of the board. Will 54 | kill of remove any item/agent that is on the 55 | collapsing tile.''' 56 | if utility.position_is_agent(board, (r, c)): 57 | # Agent. Kill it. 58 | num_agent = board[r][c] - constants.Item.Agent0.value 59 | agent = self._agents[num_agent] 60 | agent.die() 61 | if utility.position_is_bomb(self._bombs, (r, c)): 62 | # Bomb. Remove the bomb. Update agent's ammo tally. 63 | new_bombs = [] 64 | for b in self._bombs: 65 | if b.position == (r, c): 66 | b.bomber.incr_ammo() 67 | else: 68 | new_bombs.append(b) 69 | self._bombs = new_bombs 70 | if utility.position_is_flames(board, (r, c)): 71 | self._flames = [f for f in self._flames if f.position != (r,c)] 72 | if (r, c) in self._items: 73 | # Item. Remove the item. 74 | del self._items[(r, c)] 75 | board[r][c] = constants.Item.Rigid.value 76 | 77 | for cell in range(ring, self._board_size - ring): 78 | collapse(ring, cell) 79 | if ring != cell: 80 | collapse(cell, ring) 81 | 82 | end = self._board_size - ring - 1 83 | collapse(end, cell) 84 | if end != cell: 85 | collapse(cell, end) 86 | 87 | return board 88 | 89 | def get_json_info(self): 90 | ret = super().get_json_info() 91 | ret['collapses'] = json.dumps(self.collapses, cls=json_encoder) 92 | return ret 93 | 94 | def set_json_info(self): 95 | super().set_json_info() 96 | self.collapses = json.loads(self._init_game_state['collapses']) 97 | 98 | def step(self, actions): 99 | obs, reward, done, info = super().step(actions) 100 | 101 | for ring, collapse in enumerate(self.collapses): 102 | if self._step_count == collapse: 103 | self._board = self._collapse_board(ring) 104 | break 105 | 106 | return obs, reward, done, info 107 | -------------------------------------------------------------------------------- /pommerman/envs/v2.py: -------------------------------------------------------------------------------- 1 | """The Pommerman v2 Environment, which has communication across the agents. 2 | 3 | The communication works by allowing each agent to send a vector of 4 | radio_num_words (default = 2) from a vocabulary of size radio_vocab_size 5 | (default = 8) to its teammate each turn. These vectors are passed into the 6 | observation stream for each agent. 7 | """ 8 | from gym import spaces 9 | import numpy as np 10 | import json 11 | 12 | from .. import constants 13 | from .. import utility 14 | from . import v0 15 | 16 | 17 | class Pomme(v0.Pomme): 18 | '''The hardest pommerman environment. This class expands env v0 19 | adding communication between agents.''' 20 | metadata = { 21 | 'render.modes': ['human', 'rgb_array', 'rgb_pixel'], 22 | 'video.frames_per_second': constants.RENDER_FPS 23 | } 24 | 25 | def __init__(self, *args, **kwargs): 26 | self._radio_vocab_size = kwargs.get('radio_vocab_size') 27 | self._radio_num_words = kwargs.get('radio_num_words') 28 | if (self._radio_vocab_size and 29 | not self._radio_num_words) or (not self._radio_vocab_size and 30 | self._radio_num_words): 31 | assert ("Include both radio_vocab_size and radio_num_words.") 32 | 33 | self._radio_from_agent = { 34 | agent: (0, 0) 35 | for agent in [ 36 | constants.Item.Agent0, constants.Item.Agent1, 37 | constants.Item.Agent2, constants.Item.Agent3 38 | ] 39 | } 40 | super().__init__(*args, **kwargs) 41 | 42 | def _set_action_space(self): 43 | self.action_space = spaces.Tuple( 44 | tuple([spaces.Discrete(6)] + 45 | [spaces.Discrete(self._radio_vocab_size 46 | )] * self._radio_num_words)) 47 | 48 | def _set_observation_space(self): 49 | """The Observation Space for each agent. 50 | 51 | Total observatiosn: 3*board_size^2 + 12 + radio_vocab_size * radio_num_words: 52 | - all of the board (board_size^2) 53 | - bomb blast strength (board_size^2). 54 | - bomb life (board_size^2) 55 | - agent's position (2) 56 | - player ammo counts (1) 57 | - blast strength (1) 58 | - can_kick (1) 59 | - teammate (one of {AgentDummy.value, Agent3.value}). 60 | - enemies (three of {AgentDummy.value, Agent3.value}). 61 | - radio (radio_vocab_size * radio_num_words) 62 | """ 63 | bss = self._board_size**2 64 | min_obs = [0] * 3 * bss + [0] * 5 + [constants.Item.AgentDummy.value 65 | ] * 4 66 | max_obs = [len(constants.Item)] * bss + [self._board_size 67 | ] * bss + [25] * bss 68 | max_obs += [self._board_size] * 2 + [self._num_items] * 2 + [1] 69 | max_obs += [constants.Item.Agent3.value] * 4 70 | min_obs.extend([0] * self._radio_vocab_size * self._radio_num_words) 71 | max_obs.extend([1] * self._radio_vocab_size * self._radio_num_words) 72 | self.observation_space = spaces.Box( 73 | np.array(min_obs), np.array(max_obs)) 74 | 75 | def get_observations(self): 76 | observations = super().get_observations() 77 | for obs in observations: 78 | obs['message'] = self._radio_from_agent[obs['teammate']] 79 | 80 | self.observations = observations 81 | return observations 82 | 83 | def step(self, actions): 84 | personal_actions = [] 85 | radio_actions = [] 86 | for agent_actions, agent in zip(actions, self._agents): 87 | if type(agent_actions) == int or not agent.is_alive: 88 | personal_actions.append(agent_actions) 89 | radio_actions.append((0, 0)) 90 | elif type(agent_actions) in [tuple, list]: 91 | personal_actions.append(agent_actions[0]) 92 | radio_actions.append( 93 | tuple(agent_actions[1:(1+self._radio_num_words)])) 94 | else: 95 | raise 96 | 97 | self._radio_from_agent[getattr( 98 | constants.Item, 'Agent%d' % agent.agent_id)] = radio_actions[-1] 99 | 100 | return super().step(personal_actions) 101 | 102 | @staticmethod 103 | def featurize(obs): 104 | ret = super().featurize(obs) 105 | message = obs['message'] 106 | message = utility.make_np_float(message) 107 | return np.concatenate((ret, message)) 108 | 109 | def get_json_info(self, json_encoder=utility.PommermanJSONEncoder): 110 | ret = super().get_json_info() 111 | ret['radio_vocab_size'] = json.dumps( 112 | self._radio_vocab_size, cls=json_encoder) 113 | ret['radio_num_words'] = json.dumps( 114 | self._radio_num_words, cls=json_encoder) 115 | 116 | # enum to json dict 117 | radio_from_agent = {} 118 | for agent, radio in self._radio_from_agent.items(): 119 | radio_from_agent.update({agent.name: radio}) 120 | ret['radio_from_agent'] = json.dumps( 121 | radio_from_agent, cls=json_encoder) 122 | 123 | return ret 124 | 125 | def set_json_info(self): 126 | super().set_json_info() 127 | self.radio_vocab_size = json.loads( 128 | self._init_game_state['radio_vocab_size']) 129 | self.radio_num_words = json.loads( 130 | self._init_game_state['radio_num_words']) 131 | 132 | # json dict to enum 133 | radio_from_agent = json.loads(self._init_game_state['radio_from_agent']) 134 | for agent, radio in radio_from_agent.items(): 135 | self._radio_from_agent.update({constants.Item[agent]: radio}) 136 | -------------------------------------------------------------------------------- /pommerman/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | ''' Helpers''' 2 | import os 3 | from .. import agents 4 | 5 | USE_GAME_SERVERS = os.getenv("PLAYGROUND_USE_GAME_SERVERS") 6 | GAME_SERVERS = {id_: os.getenv("PLAYGROUND_GAME_INSTANCE_%d" % id_) 7 | for id_ in range(4)} 8 | 9 | 10 | # NOTE: This routine is meant for internal usage. 11 | def make_agent_from_string(agent_string, agent_id, docker_env_dict=None): 12 | '''Internal helper for building an agent instance''' 13 | 14 | agent_type, agent_control = agent_string.split("::") 15 | 16 | assert agent_type in ["player", "playerblock", "simple", "random", "docker", "http" , "test", "tensorforce"] 17 | 18 | agent_instance = None 19 | 20 | if agent_type == "player": 21 | agent_instance = agents.PlayerAgent(agent_control=agent_control) 22 | elif agent_type == "playerblock": 23 | agent_instance = agents.PlayerAgentBlocking(agent_control=agent_control) 24 | elif agent_type == "simple": 25 | agent_instance = agents.SimpleAgent() 26 | elif agent_type == "random": 27 | agent_instance = agents.RandomAgent() 28 | elif agent_type == "docker": 29 | port = agent_id + 1000 30 | if not USE_GAME_SERVERS: 31 | server = 'http://localhost' 32 | else: 33 | server = GAME_SERVERS[agent_id] 34 | assert port is not None 35 | agent_instance = agents.DockerAgent( 36 | agent_control, port=port, server=server, env_vars=docker_env_dict) 37 | elif agent_type == "http": 38 | host, port = agent_control.split(":") 39 | agent_instance = agents.HttpAgent(port=port, host=host) 40 | elif agent_type == "test": 41 | agent_instance = eval(agent_control)() 42 | elif agent_type == "tensorforce": 43 | agent_instance = agents.TensorForceAgent(algorithm=agent_control) 44 | 45 | return agent_instance 46 | -------------------------------------------------------------------------------- /pommerman/network/README.md: -------------------------------------------------------------------------------- 1 | ## IonPlayer (Pommerman network module) 2 | ## Running: 3 | Run the command `ion_client` for running the client and `ion_server` for running the server 4 | ## How does this work: 5 | ``` 6 | Match-making pseudo-code: 7 | Client: 8 | 1. Run wrapper on client which handles network + environment 9 | 2. Connect to Server and send a "match" request 10 | Server: 11 | 1. Receive match request and look for other users who have sent a match request as well 12 | 2. If amount of players is equal to 4 and amount of matches running in parallel aren't more than a specified amount then send an "ready" request to the 4 players and wait 13 | Client: 14 | 3. Respond to "ready" request with another "ready" 15 | Server: 16 | 3A. If ready was not received from a user: Remove user from active players list and go back to step 1 (Look for another pair) 17 | 3B. If ready was received from everyone: Delegate a process to that match 18 | ``` 19 | ``` 20 | Match-processing loop pseudo-code: 21 | Server: 22 | 1. Send observation to all players with timeout 23 | Client: 24 | 1. Send single action integer to server 25 | Server: 26 | 2A. If action was received within timeout then parse it 27 | 2B. If action wasn't received or was received after timeout then issue a STOP action 28 | ``` 29 | ``` 30 | Security considerations: 31 | 1. Isolated channels must be kept for each and every player as to prevent cheating by reading other messages on a single channel 32 | 2. In addition to 1 everything should also work on a single port 33 | Both of these can be easily handled using WebSocket (https://en.wikipedia.org/wiki/WebSocket) 34 | ``` 35 | ## The network code originated from the following repositories: 36 | * ionclient - https://github.com/PixelyIon/ionplayer-client 37 | * ionserver - https://github.com/PixelyIon/ionplayer-server -------------------------------------------------------------------------------- /pommerman/network/__init__.py: -------------------------------------------------------------------------------- 1 | """Import the network modules""" 2 | from . import client 3 | from . import server 4 | -------------------------------------------------------------------------------- /pommerman/network/client/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """IonClient Entry Point 3 | 4 | This library allows playing of matches on network via the WebSockets protocol. 5 | Functions: 6 | init() - If you want to run the application normally 7 | intent(network) - If you want to pass a pre-existing network object but want to 8 | prompt the user about what they want to do 9 | match(network, room=False, agent=False, ui_en=False) - If you want 10 | to start a match directly 11 | replay(network, id=False, ui_en=False) - If you want to start a replay directly 12 | """ 13 | 14 | import ui 15 | from . import constants 16 | from .network import Network 17 | import signal 18 | import sys 19 | import os 20 | import pommerman 21 | import importlib 22 | import gym 23 | import numpy 24 | 25 | 26 | def _exit_handler(_s=None, _h=None): 27 | """Arguments: 28 | * _s: Unused argument 29 | * _h: Unused argument 30 | Description: Handle exiting the application""" 31 | ui.info(ui.yellow, "Exiting..") 32 | exit(0) 33 | 34 | 35 | def init(): 36 | """Description: Starts up the application normally by asking the user about 37 | the server they want to connect to""" 38 | if ui.ask_yes_no(constants.Strings.server_prompt.value): 39 | domain = ui.ask_string(constants.Strings.server_ip.value) 40 | if domain is None: 41 | ui.fatal(constants.Exceptions.invalid_ip.value) 42 | else: 43 | domain = "play.pommerman.com:5050" 44 | ui.info( 45 | constants.Strings.server_connecting_p1.value, 46 | ui.yellow, 47 | constants.Strings.server_connecting_p2.value, 48 | ui.reset, 49 | constants.Strings.server_connecting_p3.value, 50 | ) 51 | network = Network(domain) 52 | try: 53 | status = network.server_status() 54 | except Exception as e: 55 | ui.fatal(e) 56 | signal.signal(signal.SIGINT, _exit_handler) 57 | ui.info( 58 | constants.Strings.server_connected.value, 59 | ui.yellow, 60 | constants.Strings.server_players.value, 61 | str(status[0]) + ",", 62 | constants.Strings.server_matches.value, 63 | status[1], 64 | ) 65 | intent(network) 66 | 67 | 68 | def _agent_prompt(): 69 | """Description: Prompt the user to import their agent""" 70 | sys.path.append(os.getcwd()) 71 | agent = importlib.import_module(ui.ask_string(constants.Strings.match_import.value)) 72 | agent_class = ui.ask_string(constants.Strings.match_class_name.value) 73 | if agent_class not in agent.__dir__(): 74 | ui.fatal(constants.Strings.error_invalid_class.value) 75 | agent = getattr(agent, agent_class) 76 | if getattr(agent, "act"): 77 | ui.info(ui.green, constants.Strings.match_agent_success.value) 78 | return agent 79 | 80 | 81 | def intent(network): 82 | """Description: This creates a prompt for the user where they can choose to: 83 | * Play a match 84 | * Create/Join a room 85 | * Replay a match 86 | * Exit the application 87 | Arguments: 88 | * network: An `network`(pommerman.network.ion_client.network) object 89 | """ 90 | i = ui.ask_choice( 91 | constants.Strings.intent.value, 92 | [ 93 | constants.Strings.intent_match.value, 94 | constants.Strings.intent_room.value, 95 | constants.Strings.intent_replay.value, 96 | constants.Strings.intent_exit.value, 97 | ], 98 | ) 99 | if i == constants.Strings.intent_match.value: 100 | agent = _agent_prompt() 101 | match(network, agent=agent, ui_en=True) 102 | elif i == constants.Strings.intent_room.value: 103 | room = str(ui.ask_string(constants.Strings.room_code.value)) 104 | agent = _agent_prompt() 105 | match(network, room=room, agent=agent, ui_en=True) 106 | elif i == constants.Strings.intent_replay.value: 107 | replay(network, ui_en=True) 108 | elif i == constants.Strings.intent_exit.value: 109 | exit(0) 110 | 111 | 112 | def match(network, room=False, agent=False, ui_en=False): 113 | """Description: This facilitates playing a match 114 | Arguments: 115 | * network: An `network`(pommerman.network.ion_client.network) object 116 | * room: If String, The room to be created/joined. If False, the public \ 117 | room will be joined 118 | * agent: The class of the agent should be a derivative of BaseAgent 119 | * ui_en: If the UI is enabled or disabled (This also controls if exception \ 120 | are raised or not) 121 | Returns: Array [reward, match_id]""" 122 | agent = agent() 123 | if ui_en: 124 | ui.info(ui.yellow, constants.Strings.server_comm.value) 125 | try: 126 | network.join_list(room) 127 | except Exception as e: 128 | if ui_en: 129 | ui.fatal(e) 130 | raise e 131 | if ui_en: 132 | ui.info(constants.Strings.match_variant.value, ui.yellow, network.mode) 133 | ui.info(ui.yellow, constants.Strings.match_wait.value) 134 | try: 135 | network.wait_match() 136 | except Exception as e: 137 | if ui_en: 138 | ui.fatal(e) 139 | raise e 140 | if ui_en: 141 | ui.info(constants.Strings.match_run.value, "#" + network.match_id) 142 | for mode in pommerman.constants.GameType: 143 | if mode.name in network.mode: 144 | agent.init_agent( 145 | id=0, game_type=mode 146 | ) # We always use ID as 0 as the server doesn't return it 147 | while True: 148 | try: 149 | match_obj = network.match_get() 150 | except Exception as e: 151 | if ui_en: 152 | ui.fatal(e) 153 | raise e 154 | # match_obj[0] is the intent: 0 = OBS, 1 = Agent Dead, 2 = Match End 155 | if match_obj[0] is 0: 156 | action = agent.act(match_obj[1], gym.spaces.Discrete(6)) 157 | try: 158 | network.send_move(action, match_obj[2]) 159 | except Exception as e: 160 | if ui_en: 161 | ui.fatal(e) 162 | raise e 163 | elif match_obj[0] is 2: 164 | agent.episode_end(reward=match_obj[1]) 165 | if ui_en: 166 | if match_obj[1] == 1: 167 | ui.info(constants.Strings.match_won.value) 168 | if match_obj[1] == -1: 169 | ui.info(constants.Strings.match_loss_draw.value) 170 | ui.info( 171 | constants.Strings.match_agent.value, 172 | ui.yellow, 173 | pommerman.constants.Item(match_obj[2]).name, 174 | ) 175 | else: 176 | return [match_obj[1], network.match_id] 177 | break 178 | ui.info(constants.Strings.match_replay.value, ui.yellow, network.match_id) 179 | if ui.ask_yes_no(constants.Strings.match_ask_replay.value): 180 | replay(network, network.match_id) 181 | else: 182 | intent(network) 183 | 184 | 185 | def replay(network, id=False, ui_en=False): 186 | """Description: This replays a particular match 187 | Arguments: 188 | * network: An `network`(pommerman.network.ion_client.network) object 189 | * id: The ID of a match to be played. If False, the user is prompted about \ 190 | it. 191 | * ui_en: If the UI is enabled or disabled (This also controls if exception are\ 192 | raised or not)""" 193 | if not id and ui_en: 194 | id = ui.ask_string(constants.Strings.replay_prompt.value) 195 | if id is None: 196 | ui.fatal(constants.Strings.error_invalid_id.value) 197 | id = str(id) 198 | if id[0] == "#": 199 | id = id[1:] 200 | ui.info( 201 | constants.Strings.server_replay_p1.value, 202 | ui.yellow, 203 | "#" + str(id), 204 | ui.reset, 205 | constants.Strings.server_replay_p2.value, 206 | ) 207 | try: 208 | replay_obj = network.get_replay(id) 209 | except Exception as e: 210 | if ui_en: 211 | ui.fatal(e) 212 | raise e 213 | if ui_en: 214 | ui.info(constants.Strings.replay_start.value, ui.yellow, "#" + str(id)) 215 | env = pommerman.make( 216 | replay_obj["mode"], 217 | [ 218 | pommerman.agents.BaseAgent(), 219 | pommerman.agents.BaseAgent(), 220 | pommerman.agents.BaseAgent(), 221 | pommerman.agents.BaseAgent(), 222 | ], 223 | ) 224 | env.reset() 225 | env._board = numpy.array(replay_obj["board"]) 226 | # Note: Render FPS is set to 30 as it'll be smoother 227 | env._render_fps = 30 228 | for i in replay_obj["actions"]: 229 | env.render() 230 | reward, done = env.step(i)[1:3] 231 | if done: 232 | break 233 | if reward != replay_obj["reward"]: 234 | if ui_en: 235 | ui.info(ui.yellow, constants.Exceptions.replay_no_reward.value) 236 | else: 237 | raise Exception(constants.Exceptions.replay_no_reward.value) 238 | env.close() 239 | if ui_en: 240 | ui.info(ui.yellow, constants.Strings.replay_end.value) 241 | intent(network) 242 | 243 | 244 | if __name__ == "__main__": 245 | init() 246 | -------------------------------------------------------------------------------- /pommerman/network/client/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This holds all of the constants used by ion_client""" 3 | 4 | import enum 5 | 6 | 7 | class Strings(enum.Enum): 8 | """Define all the strings""" 9 | server_prompt = "Connect to custom server ?" 10 | server_ip = "Enter IP of server to connect to:" 11 | server_connecting_p1 = "Connecting to" 12 | server_connecting_p2 = "IonPlayer" 13 | server_connecting_p3 = "servers" 14 | server_connected = "Connected to server:" 15 | server_players = "Concurrent players:" 16 | server_matches = "Concurrent matches:" 17 | server_comm = "Communicating with server ✈" 18 | server_replay_p1 = "Retrieving match" 19 | server_replay_p2 = "from server" 20 | intent = "What do you want to do ?" 21 | intent_match = "Join the public room" 22 | intent_room = "Join a room" 23 | intent_replay = "Look at a replay" 24 | intent_exit = "Quit the application" 25 | room_code = "Enter the name of the room you want to join/create" 26 | match_import = "Input the module which has your agent's class. EG: pommerman.agents" 27 | match_class_name = "Input the name of your agent's class (So class.act() is valid). EG: SimpleAgent" 28 | match_agent_success = "The agent was successfully imported" 29 | match_variant = "The variant of Pommerman used:" 30 | match_wait = "Waiting for Server to allocate players to match.." 31 | match_run = "Running match.." 32 | match_won = "Your agent has won!" 33 | match_loss_draw = "Your agent has either lost or there was a draw" 34 | match_replay = "You can now view the replay using the match ID:" 35 | match_ask_replay = "Do you want to replay the current match ?" 36 | match_agent = "Your agent was" 37 | replay_prompt = "Input the ID of the match ?" 38 | replay_start = "Replaying match.." 39 | replay_nomatch = "The reward of the replay doesn't match that of the game ?" 40 | replay_end = "The replay is over." 41 | error_no_recv = "The observation wasn't successfully retrieved from the server" 42 | 43 | 44 | class NetworkCommands(enum.Enum): 45 | """Define all the network commands""" 46 | check = 0 47 | ping = 1 48 | match = 2 49 | room = 3 50 | match_start = 4 51 | match_act = 5 52 | match_end = 6 53 | replay = 7 54 | status_ok = 10 55 | status_fail = 11 56 | status_full = 12 57 | status_reg = 13 58 | 59 | 60 | class Exceptions(enum.Enum): 61 | """Define all the exceptions""" 62 | net_connect_fail = "Couldn't connect to the server" 63 | net_respond_fail = "The server didn't respond correctly" 64 | net_invalid_response = "The server sent an invalid response" 65 | net_server_full = "The server is full" 66 | net_server_closed = "The connection to the server was closed" 67 | match_full = "The maximum amount of concurrent matches on the has been exceeded" 68 | room_full = "The room is full" 69 | replay_notfound = "Couldn't find replay on server" 70 | replay_no_reward = "The current reward doesn't match the expected reward" 71 | invalid_ip = "The provided IP is invalid" 72 | invalid_id = "The provided match ID is invalid" 73 | invalid_room = "The provided room name is invalid" 74 | invalid_class = "Cannot find class in module file" 75 | -------------------------------------------------------------------------------- /pommerman/network/client/network.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """IonClient Network Manager 3 | 4 | This file contains the Network class that can be used directly. It is also 5 | thread-safe so feel free to run multiple network clients""" 6 | 7 | import pommerman 8 | import websocket 9 | from . import constants 10 | import rapidjson 11 | import threading 12 | import gzip 13 | import numpy 14 | 15 | 16 | class Network(object): 17 | """This class is responsible for handling communication b/w Client 18 | and Server""" 19 | 20 | def __init__(self, ip): 21 | """Arguments: 22 | * ip: The IP of the server""" 23 | self.ws_ = websocket.create_connection( 24 | "ws://" + str(ip)) 25 | self.lock = threading.Lock() 26 | 27 | def server_status(self): 28 | """Description: Retrieves the status of the server""" 29 | self._send(intent=constants.NetworkCommands.check.value) 30 | message_recieved = self._recieve() 31 | if message_recieved[ 32 | "intent"] == constants.NetworkCommands.status_ok.value: 33 | return [message_recieved["players"], message_recieved["matches"]] 34 | elif message_recieved[ 35 | "intent"] == constants.NetworkCommands.status_full.value: 36 | raise Exception(constants.Exceptions.net_server_full.value) 37 | else: 38 | raise Exception(constants.Exceptions.net_respond_fail.value) 39 | 40 | def join_list(self, room=False): 41 | """Description: Check if the server actually responds 42 | Arguments: 43 | * room: The room to be created/joined. If False, the public room will \ 44 | be joined, it should be a String""" 45 | if not room: 46 | self._send(intent=constants.NetworkCommands.match.value) 47 | else: 48 | self._send( 49 | intent=constants.NetworkCommands.room.value, room=str(room)) 50 | message_recieved = self._recieve() 51 | if message_recieved[ 52 | "intent"] == constants.NetworkCommands.status_full.value: 53 | if room: 54 | raise Exception(constants.Exceptions.room_full.value) 55 | else: 56 | raise Exception(constants.Exceptions.match_full.value) 57 | self.id = message_recieved["player_id"] 58 | self.mode = message_recieved["mode"] 59 | 60 | def wait_match(self): 61 | """Description: Wait for a response from the server regarding a match 62 | request""" 63 | message_recieved = self._recieve() 64 | if message_recieved[ 65 | "intent"] == constants.NetworkCommands.match_start.value: 66 | self.match_id = message_recieved["match_id"] 67 | 68 | def match_get(self): 69 | """Description: Get the next step of the match 70 | Return values(Format: "0th element - Meaning - Other elements"): 71 | * 0 - Agent is still alive - OBS and Turn ID 72 | * 1 - Agent is dead - Nothing 73 | * 2 - Match has ended - Reward and agent item ID correlating to \ 74 | pommerman.constants.Items 75 | """ 76 | self.lock.acquire() 77 | try: 78 | message_recieved = self.ws_.recv() 79 | except: 80 | raise Exception(constants.Exceptions.net_respond_fail.value) 81 | finally: 82 | self.lock.release() 83 | try: 84 | # Messages with normal match data are compressed using GZIP while 85 | # match end notifications aren't. So we move on to that if this 86 | # fails to decompress with gzip. 87 | message_decoded = rapidjson.loads( 88 | str(gzip.decompress(message_recieved), "utf-8")) 89 | except: 90 | try: 91 | message_decoded = rapidjson.loads(message_recieved) 92 | if message_decoded[ 93 | "intent"] == constants.NetworkCommands.match_end.value: 94 | return [ 95 | 2, 96 | int(message_decoded["reward"]), 97 | int(message_decoded["agent"]) 98 | ] 99 | except: 100 | raise Exception(constants.Exceptions.net_invalid_response.value) 101 | # Info: message_decoded - ["d"]=Dead, ["o"]=OBS, ["i"] = Turn ID 102 | if message_decoded["d"]: 103 | return [1] 104 | obs = message_decoded["o"] 105 | obs["teammate"] = pommerman.constants.Item[obs["teammate"]] 106 | # Note: If position is not tuple SimpleAgent *will* error out 107 | obs["position"] = tuple(obs["position"]) 108 | for x, y in enumerate(obs["enemies"]): 109 | obs["enemies"][x] = pommerman.constants.Item[y] 110 | for i in ["board", "bomb_life", "bomb_blast_strength"]: 111 | obs[i] = numpy.asarray(obs[i]) 112 | return [0, obs, message_decoded["i"]] 113 | 114 | def send_move(self, action, turn_id): 115 | """Description: Send the action to the server for playing out 116 | Arguments: 117 | * action: The action that has to be sent 118 | * turn_id: The ID of the step taken by the server (To sync up the \ 119 | action of the agent and server)""" 120 | self._send( 121 | intent=constants.NetworkCommands.match_act.value, 122 | player_id=self.id, 123 | act=action, 124 | match_id=self.match_id, 125 | turn_id=turn_id) 126 | 127 | def get_replay(self, id): 128 | """Description: Send the action to the server for playing out 129 | Arguments: 130 | * id: The ID of the match to be replayed""" 131 | self._send(intent=constants.NetworkCommands.replay.value, replay_id=id) 132 | try: 133 | status, replay = rapidjson.loads( 134 | str(gzip.decompress(self.ws_.recv()), "utf-8")) 135 | except ValueError: 136 | raise Exception(constants.Exceptions.replay_notfound.value) 137 | if status == constants.NetworkCommands.status_ok.value: 138 | return replay 139 | else: 140 | raise Exception(constants.Exceptions.replay_notfound.value) 141 | 142 | def _send(self, **kwargs): 143 | self.lock.acquire() 144 | try: 145 | self.ws_.send(rapidjson.dumps(kwargs)) 146 | except: 147 | raise Exception(constants.Exceptions.net_server_closed.value) 148 | finally: 149 | self.lock.release() 150 | 151 | def _recieve(self): 152 | self.lock.acquire() 153 | try: 154 | message_recieved = self.ws_.recv() 155 | except: 156 | if not self.ws_.connected: 157 | raise Exception(constants.Exceptions.net_server_closed.value) 158 | else: 159 | raise Exception(constants.Exceptions.net_invalid_response.value) 160 | finally: 161 | self.lock.release() 162 | try: 163 | return rapidjson.loads(message_recieved) 164 | except: 165 | raise Exception(constants.Exceptions.net_invalid_response.value) -------------------------------------------------------------------------------- /pommerman/network/server/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """IonServer - This library allows playing of matches on network via the 3 | WebSockets protocol. 4 | Functions: 5 | init() - If you want to run the application normally 6 | run(max_players, max_matches, port, timeout, mode, ui_en=True, 7 | exit_handler=True) - If you want to programatically launch the server with 8 | predefined parameters""" 9 | 10 | import ui 11 | import multiprocessing 12 | from . import constants 13 | from . import network 14 | from . import match 15 | import time 16 | import random 17 | import signal 18 | import pommerman 19 | 20 | MATCH_SUBPROCESS = [] 21 | 22 | 23 | def _exit_handler(subprocess_net): 24 | """Description: Return the exit handler with a reference to the subprocess_net 25 | variable.""" 26 | 27 | def exit_handler(_s, _h): 28 | """Description: Handle exiting the application.""" 29 | ui.info(ui.yellow, "Exiting..") 30 | subprocess_net.terminate() 31 | for i in MATCH_SUBPROCESS: 32 | i.terminate() 33 | exit(0) 34 | 35 | return exit_handler 36 | 37 | 38 | def init(): 39 | """Description: Initiate the application by asking questions.""" 40 | ui.info(ui.yellow, constants.Strings.sever_starting.value) 41 | port = int(ui.ask_string(constants.Strings.server_port.value)) 42 | max_players = int(ui.ask_string(constants.Strings.server_maxp.value)) 43 | if max_players < 4: 44 | # If the maximum players allowed on the server is less than 4 45 | # which is the minimum required for a pommerman match then 46 | # notify the user about that and quit. 47 | ui.fatal(ui.yellow, constants.Strings.server_playercount_too_low.value) 48 | modes = [] 49 | for i in pommerman.configs.__dir__(): 50 | if i[-4:] == "_env": 51 | id = getattr(pommerman.configs, i)()["env_id"] 52 | if id[-2:] != "v2": 53 | modes.append(id) 54 | timeout = float(ui.ask_string(constants.Strings.server_timeout.value)) 55 | mode = str(ui.ask_choice(constants.Strings.server_mode.value, modes)) 56 | run(port, max_players, timeout, mode, ui_en=True, exit_handler=True) 57 | 58 | 59 | def run(port, 60 | max_players, 61 | timeout, 62 | mode, 63 | max_matches=False, 64 | ui_en=False, 65 | exit_handler=False): 66 | """Description: This function is responsible for running the server. 67 | Arguments: 68 | * port: The port used by the server 69 | * max_players: The maximum amount of concurrent players 70 | * timeout: (In Seconds) The time to wait before issuing the STOP action 71 | * mode: The flavor of pommerman 72 | * max_matches: The maximum amount of concurrent matches (If not defined this \ 73 | is set to int(max_players/4)) 74 | * ui_en: If True, UI is enabled else UI is disabled 75 | * exit_handler: If True, the exit handler is set else the exit handler \ 76 | isn't set""" 77 | netpipe, rnetpipe = multiprocessing.Pipe() 78 | netqueue = multiprocessing.Queue() 79 | subprocess_net = multiprocessing.Process( 80 | target=network.thread, 81 | args=(rnetpipe, netqueue, port, max_players, mode, timeout), daemon=True) 82 | subprocess_net.start() 83 | if not max_matches: 84 | max_matches = int(max_players / 4) 85 | if exit_handler: 86 | signal.signal(signal.SIGINT, _exit_handler(subprocess_net)) 87 | if ui_en: 88 | ui.info(ui.yellow, constants.Strings.server_ready.value, ui.white, 89 | ui.Symbol("✔", ":)")) 90 | while True: 91 | netpipe.send([constants.SubprocessCommands.get_players.value]) 92 | concurrent_list, num_players, num_matches = netpipe.recv() 93 | if int(num_matches) < max_matches: 94 | for x in list(concurrent_list["room"].keys()): 95 | i = concurrent_list["room"][x] 96 | if len(i) >= 4: 97 | MATCH_SUBPROCESS.append(_create_match(i, netqueue, mode)) 98 | del concurrent_list["room"][x] 99 | if len(concurrent_list["noroom"]) >= 4: 100 | e = random.sample(concurrent_list["noroom"], 101 | (int(len(concurrent_list["noroom"]) / 4) * 4)) 102 | for group in range(int(len(concurrent_list["noroom"]) / 4)): 103 | MATCH_SUBPROCESS.append( 104 | _create_match(e[group * 4:(group + 1) * 4], netqueue, 105 | mode)) 106 | for player in e[group * 4:(group + 1) * 4]: 107 | del concurrent_list["noroom"][concurrent_list["noroom"] 108 | .index(player)] 109 | netpipe.send( 110 | [constants.SubprocessCommands.update_cc.value, concurrent_list]) 111 | if ui_en: 112 | ui.info( 113 | "\033[2K\r", 114 | ui.white, 115 | constants.Strings.server_players.value, 116 | ui.yellow, 117 | "[", 118 | num_players, 119 | "/", 120 | max_players, 121 | "]", 122 | ui.white, 123 | constants.Strings.server_matches.value, 124 | ui.yellow, 125 | "[", 126 | num_matches, 127 | "/", 128 | max_matches, 129 | "]", 130 | end="") 131 | for process in tuple(MATCH_SUBPROCESS): 132 | if not process.is_alive(): 133 | MATCH_SUBPROCESS.remove(process) 134 | time.sleep(2) 135 | 136 | 137 | def _create_match(players, queue_subproc, mode): 138 | """Description: This function is responsible for creating a match""" 139 | subprocess = multiprocessing.Process( 140 | target=match.thread, args=(players, queue_subproc, mode), daemon=True) 141 | subprocess.start() 142 | return subprocess 143 | 144 | 145 | if __name__ == "__main__": 146 | multiprocessing.freeze_support() 147 | init() 148 | -------------------------------------------------------------------------------- /pommerman/network/server/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This holds all of the constants used by ion_server""" 3 | 4 | import enum 5 | 6 | 7 | class Strings(enum.Enum): 8 | """Define all the strings""" 9 | sever_starting = "Server is being initiated.." 10 | server_ready = "Server is ready" 11 | server_players = "Concurrent players" 12 | server_matches = "Concurrent matches" 13 | server_exit_prompt = "Are you sure you want to stop the server ?" 14 | server_exit = "The server is quitting.." 15 | server_port = "What port would you want the server to start on ?" 16 | server_maxp = "What's the maximum amount of players that can be concurrently connected to the server ?" 17 | server_playercount_too_low = "Input a number greater than 4" 18 | server_timeout = "What's the timeout for the player response (In seconds) ?" 19 | server_mode = "Which variant of Pommerman would you like the Server to run ?" 20 | 21 | 22 | class SubprocessCommands(enum.Enum): 23 | """Define all the sub-process commands""" 24 | get_players = 0 25 | update_cc = 1 26 | match_next = 2 27 | player_drop = 3 28 | match_end = 4 29 | 30 | 31 | class NetworkCommands(enum.Enum): 32 | """Define all the network commands""" 33 | check = 0 34 | ping = 1 35 | match = 2 36 | room = 3 37 | match_start = 4 38 | match_act = 5 39 | match_end = 6 40 | replay = 7 41 | status_ok = 10 42 | status_fail = 11 43 | status_full = 12 44 | status_reg = 13 45 | -------------------------------------------------------------------------------- /pommerman/network/server/match.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """IonServer Match handler 3 | 4 | This contains functions responsible for playing matches 5 | (You shouldn't use this file directly due to the very specialized 6 | interactions required for it to function in addition to parameters 7 | i.e: Pipes, Queues)""" 8 | 9 | import multiprocessing 10 | from . import constants 11 | import uuid 12 | import os 13 | import rapidjson 14 | import gzip 15 | import enum 16 | import pommerman 17 | import numpy 18 | 19 | 20 | def unique_uuid(dir): 21 | """Generates a unique UUID and checks for collision with files within the 22 | specified directory (So we don't override a pre-existing file)""" 23 | try: 24 | ls_dir = os.listdir(dir) 25 | except FileNotFoundError: 26 | os.makedirs(dir) 27 | ls_dir = [] 28 | uuid_ = str(uuid.uuid4())[:10] 29 | while uuid_ + ".json" in ls_dir: 30 | uuid_ = str(uuid.uuid4())[:10] 31 | return uuid_ 32 | 33 | 34 | def resolve_classes(i): 35 | """Resolves observation into JSONable types by looping over every element 36 | in it""" 37 | if isinstance(i, tuple): 38 | i = list(i) 39 | for key, value in enumerate(i): 40 | if isinstance(i, dict): 41 | key = value 42 | value = i[key] 43 | if hasattr(value, '__iter__') and not isinstance( 44 | i[key], str) and not isinstance(i[key], numpy.ndarray): 45 | i[key] = resolve_classes(value) 46 | elif isinstance(value, enum.Enum): 47 | i[key] = str(value.name) 48 | elif isinstance(value, numpy.ndarray): 49 | i[key] = value.tolist() 50 | elif isinstance(value, numpy.uint8) or isinstance(value, numpy.int64): 51 | i[key] = int(value) 52 | return i 53 | 54 | 55 | def thread(players, queue_subproc, mode): 56 | """Handles running of the match loop""" 57 | uuid_ = unique_uuid("matches") 58 | base_agent = pommerman.agents.BaseAgent 59 | env = pommerman.make( 60 | mode, 61 | [base_agent(), base_agent(), 62 | base_agent(), base_agent()]) 63 | net, net_end = multiprocessing.Pipe() 64 | queue_subproc.put([net_end, players, uuid_]) 65 | obs = env.reset() 66 | record = { 67 | "board": numpy.array(env._board, copy=True).tolist(), 68 | "actions": [], 69 | "mode": str(mode) 70 | } 71 | done = False 72 | while not done: 73 | obs_res = resolve_classes(obs.copy()) 74 | turn_id = str(uuid.uuid4())[:5] 75 | try: 76 | obs_bytes = [] 77 | for key, value in enumerate(obs_res): 78 | if 10 + key in obs[0]["alive"]: 79 | obs_bytes.append( 80 | gzip.compress( 81 | bytes( 82 | rapidjson.dumps({ 83 | "o": value, # o = obs 84 | "i": turn_id, # i = Turn ID 85 | "d": False # d = Dead 86 | }), 87 | "utf8"))) 88 | else: 89 | obs_bytes.append( 90 | gzip.compress( 91 | bytes( 92 | rapidjson.dumps({ 93 | "d": True # d = Dead 94 | }), 95 | "utf8"))) 96 | net.send([ 97 | constants.SubprocessCommands.match_next.value, turn_id, 98 | obs_bytes, 99 | len(obs[0]["alive"]) 100 | ]) 101 | act = net.recv() 102 | except: 103 | act = [0, 0, 0, 0] 104 | record["actions"].append(numpy.array(act, copy=True).tolist()) 105 | obs, rew, done = env.step(act)[:3] 106 | record["reward"] = rew 107 | env.close() 108 | with open("./matches/" + uuid_ + ".json", "w") as file: 109 | rapidjson.dump(record, file) 110 | net.send([constants.SubprocessCommands.match_end.value, rew]) 111 | net.recv() 112 | exit(0) 113 | -------------------------------------------------------------------------------- /pommerman/network/server/network.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """IonServer Network handler 3 | 4 | This contains functions responsible Server-Client communication 5 | (You shouldn't use this file directly due to the very specialized 6 | interactions required for it to function in addition to parameters 7 | i.e: Pipes, Queues. This is the reason for the simple docstrings 8 | for functions)""" 9 | 10 | import asyncio 11 | import websockets 12 | import threading 13 | import time 14 | from . import constants 15 | import os 16 | import re 17 | import gzip 18 | import rapidjson 19 | import uuid 20 | 21 | CONCURRENTLY_LOOKING = { 22 | "room": {}, 23 | "noroom": [] 24 | } # This holds the IDs of players concurrently looking for a room 25 | PLAYER_WS = {} # This stores the mapping from player ID to the websocket object 26 | MATCH_PROCESS = {} # This holds pipes to match processes 27 | MAX_PLAYERS = 0 28 | PIPE_MAIN = False # This holds the queue (Main-proc <-> Network-proc) 29 | QUEUE_SUBPROC = False # This holds the queue (Subproc <-> Network-proc) 30 | MODE = "" 31 | STOP_TIMEOUT = 0 32 | 33 | 34 | async def message_parse(message, websocket): 35 | """Parse the messages recieved from the clients""" 36 | if message["intent"] is constants.NetworkCommands.check.value: 37 | await websocket.send( 38 | rapidjson.dumps({ 39 | "intent": 40 | constants.NetworkCommands.status_ok.value, 41 | "players": 42 | len(PLAYER_WS), 43 | "matches": 44 | len(MATCH_PROCESS) 45 | })) 46 | elif message["intent"] is constants.NetworkCommands.match_act.value: 47 | if message["turn_id"] == MATCH_PROCESS[message["match_id"]]["turn_id"]: 48 | # Note: The statements below assign the action to the respective players 49 | MATCH_PROCESS[message["match_id"]]["act"][ 50 | MATCH_PROCESS[message["match_id"]]["players"].index( 51 | message["player_id"])] = message["act"] 52 | MATCH_PROCESS[message["match_id"]]["recv"][MATCH_PROCESS[message[ 53 | "match_id"]]["players"].index(message["player_id"])] = True 54 | elif message["intent"] is constants.NetworkCommands.replay.value: 55 | try: 56 | with open( 57 | os.path.join( 58 | os.path.join(os.getcwd(), "matches"), 59 | str(message["replay_id"]) + ".json"), 'r') as f: 60 | # Note: Registry expression match comes after as it's an expensive operation as compared to file I/O 61 | if re.fullmatch("^[a-z0-9-]*$", 62 | message["replay_id"]) is not None: 63 | f = rapidjson.load(f) 64 | await websocket.send( 65 | gzip.compress( 66 | bytes( 67 | rapidjson.dumps([ 68 | constants.NetworkCommands.status_ok.value, f 69 | ]), "utf8"))) 70 | else: 71 | await websocket.send( 72 | gzip.compress( 73 | bytes( 74 | rapidjson.dumps([ 75 | constants.NetworkCommands.status_fail.value 76 | ]), "utf8"))) 77 | except: 78 | await websocket.send( 79 | gzip.compress( 80 | bytes( 81 | rapidjson.dumps( 82 | [constants.NetworkCommands.status_fail.value]), 83 | "utf8"))) 84 | elif message["intent"] in [ 85 | constants.NetworkCommands.match.value, 86 | constants.NetworkCommands.room.value 87 | ]: 88 | if len(PLAYER_WS) >= MAX_PLAYERS: 89 | await websocket.send( 90 | rapidjson.dumps({ 91 | "intent": 92 | constants.NetworkCommands.status_full.value 93 | })) 94 | return 95 | uuid_ = str(uuid.uuid4()) 96 | while uuid_ in PLAYER_WS: 97 | uuid_ = str(uuid.uuid4()) 98 | PLAYER_WS[uuid_] = {"ws": websocket} 99 | if message["intent"] is constants.NetworkCommands.match.value: 100 | CONCURRENTLY_LOOKING["noroom"].append(uuid_) 101 | PLAYER_WS[uuid_]["noroom"] = True 102 | elif message["intent"] is constants.NetworkCommands.room.value: 103 | if message["room"] in CONCURRENTLY_LOOKING["room"]: 104 | if len(CONCURRENTLY_LOOKING["room"][message["room"]]) <= 4: 105 | CONCURRENTLY_LOOKING["room"][message["room"]].append(uuid_) 106 | else: 107 | await websocket.send( 108 | rapidjson.dumps({ 109 | "intent": 110 | constants.NetworkCommands.status_full.value 111 | })) 112 | return 113 | else: 114 | CONCURRENTLY_LOOKING["room"][message["room"]] = [uuid_] 115 | PLAYER_WS[uuid_]["noroom"] = False 116 | PLAYER_WS[uuid_]["room"] = str(message["room"]) 117 | await websocket.send( 118 | rapidjson.dumps({ 119 | "intent": 120 | constants.NetworkCommands.status_reg.value, 121 | "player_id": 122 | uuid_, 123 | "mode": 124 | MODE 125 | })) 126 | 127 | 128 | async def ws_handler(websocket, pth=None): # pylint: disable=unused-argument 129 | """Handle the messages recieved by WebSocket (pth is not required but still\ 130 | returned by the 'websockets' library)""" 131 | try: 132 | async for message in websocket: 133 | try: 134 | await message_parse(rapidjson.loads(message), websocket) 135 | except: 136 | pass 137 | except websockets.exceptions.ConnectionClosed: 138 | pass 139 | 140 | 141 | async def program_loop(): 142 | """Handles other network-related function""" 143 | global CONCURRENTLY_LOOKING 144 | while (True): 145 | try: 146 | for uuid_ in list(PLAYER_WS.keys()): 147 | i = PLAYER_WS[uuid_] 148 | if not i["ws"].open: 149 | if i["noroom"] is True: 150 | try: 151 | del CONCURRENTLY_LOOKING["noroom"][CONCURRENTLY_LOOKING[ 152 | "noroom"].index(uuid_)] 153 | except: 154 | pass 155 | elif i["noroom"] is False: 156 | try: 157 | del CONCURRENTLY_LOOKING["room"][i["room"]][ 158 | CONCURRENTLY_LOOKING["room"][i["room"]].index( 159 | uuid_)] 160 | except: 161 | pass 162 | try: 163 | del PLAYER_WS[uuid_] 164 | except: 165 | pass 166 | if PIPE_MAIN.poll(): 167 | queue_msg = PIPE_MAIN.recv() 168 | if queue_msg[0] is constants.SubprocessCommands.get_players.value: 169 | PIPE_MAIN.send( 170 | [CONCURRENTLY_LOOKING, 171 | len(PLAYER_WS), 172 | len(MATCH_PROCESS)]) 173 | elif queue_msg[0] is constants.SubprocessCommands.update_cc.value: 174 | CONCURRENTLY_LOOKING = queue_msg[1] 175 | if not QUEUE_SUBPROC.empty(): 176 | queue_msg = QUEUE_SUBPROC.get() 177 | MATCH_PROCESS[queue_msg[2]] = { 178 | "pipe": queue_msg[0], 179 | "players": queue_msg[1], 180 | "match_id": queue_msg[2], 181 | "free": False, 182 | "delete": False 183 | } 184 | for i in queue_msg[1]: 185 | if i in PLAYER_WS: # If the players didn't quits during matching 186 | await PLAYER_WS[i]["ws"].send( 187 | rapidjson.dumps({ 188 | "intent": 189 | constants.NetworkCommands.match_start.value, 190 | "match_id": 191 | queue_msg[2] 192 | })) 193 | for key in list(MATCH_PROCESS.keys()): 194 | value = MATCH_PROCESS[key] 195 | if value["pipe"].poll() and not value["free"]: 196 | pipe_msg = value["pipe"].recv() 197 | if pipe_msg[0] == constants.SubprocessCommands.match_next.value: 198 | value["free"] = True 199 | value["act"] = [0, 0, 0, 0] 200 | value["recv"] = [False, False, False, False] 201 | value["time"] = time.time() 202 | value["turn_id"] = pipe_msg[1] 203 | value["alive"] = pipe_msg[3] 204 | for x, y in enumerate(value["players"]): 205 | if y in list(PLAYER_WS.keys()): 206 | try: 207 | await PLAYER_WS[y]["ws"].send(pipe_msg[2][x]) 208 | except: 209 | pass 210 | elif y not in PLAYER_WS: 211 | value["act"][x] = 5 212 | if pipe_msg[0] is constants.SubprocessCommands.match_end.value: 213 | value["delete"] = True 214 | for x, y in enumerate(value["players"]): 215 | if y in PLAYER_WS: 216 | await PLAYER_WS[y]["ws"].send( 217 | rapidjson.dumps({ 218 | "intent": 219 | constants.NetworkCommands.match_end.value, 220 | "reward": 221 | pipe_msg[1][x], 222 | "agent": 223 | 10 + x 224 | })) 225 | if value["free"]: 226 | if value["time"] + STOP_TIMEOUT < time.time( 227 | ) or value["recv"].count(True) == value["alive"]: 228 | value["pipe"].send(value["act"]) 229 | value["free"] = False 230 | if value["delete"]: 231 | value["pipe"].send("END") 232 | del MATCH_PROCESS[key] 233 | finally: 234 | time.sleep(0.0001) # Sleep for a while so other threads get the GIL 235 | 236 | 237 | def _run_server(port): 238 | """Handles running the websocket thread""" 239 | asyncio.set_event_loop(asyncio.new_event_loop()) 240 | asyncio.get_event_loop().run_until_complete( 241 | websockets.serve(ws_handler, 'localhost', port)) 242 | asyncio.get_event_loop().run_forever() 243 | 244 | 245 | def thread(pipe_main, queue_subproc, port, max_players, mode, stop_timeout): 246 | """Creates a network thread""" 247 | # Note: Multiple threads are used so globals are used to share data b/w them 248 | global MAX_PLAYERS, PIPE_MAIN, QUEUE_SUBPROC, MODE, STOP_TIMEOUT 249 | MAX_PLAYERS = max_players 250 | PIPE_MAIN = pipe_main 251 | QUEUE_SUBPROC = queue_subproc 252 | MODE = mode 253 | STOP_TIMEOUT = stop_timeout 254 | ws_thread = threading.Thread(target=_run_server, args=(port,)) 255 | ws_thread.start() 256 | asyncio.set_event_loop(asyncio.new_event_loop()) 257 | asyncio.get_event_loop().run_until_complete(program_loop()) 258 | asyncio.get_event_loop().run_forever() 259 | ws_thread.join() 260 | -------------------------------------------------------------------------------- /pommerman/resources/Agent0-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent0-Team-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0-Team-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent0-Team.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0-Team.png -------------------------------------------------------------------------------- /pommerman/resources/Agent0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent0.png -------------------------------------------------------------------------------- /pommerman/resources/Agent1-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent1-Team-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1-Team-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent1-Team.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1-Team.png -------------------------------------------------------------------------------- /pommerman/resources/Agent1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent1.png -------------------------------------------------------------------------------- /pommerman/resources/Agent2-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent2-Team-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2-Team-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent2-Team.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2-Team.png -------------------------------------------------------------------------------- /pommerman/resources/Agent2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent2.png -------------------------------------------------------------------------------- /pommerman/resources/Agent3-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent3-Team-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3-Team-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/Agent3-Team.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3-Team.png -------------------------------------------------------------------------------- /pommerman/resources/Agent3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Agent3.png -------------------------------------------------------------------------------- /pommerman/resources/AgentDummy-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/AgentDummy-No-Background.png -------------------------------------------------------------------------------- /pommerman/resources/AgentDummy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/AgentDummy.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-1.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-10.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-2.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-3.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-4.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-5.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-6.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-7.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-8.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb-9.png -------------------------------------------------------------------------------- /pommerman/resources/Bomb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Bomb.png -------------------------------------------------------------------------------- /pommerman/resources/Cousine-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Cousine-Regular.ttf -------------------------------------------------------------------------------- /pommerman/resources/ExtraBomb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/ExtraBomb.png -------------------------------------------------------------------------------- /pommerman/resources/Flames.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Flames.png -------------------------------------------------------------------------------- /pommerman/resources/Fog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Fog.png -------------------------------------------------------------------------------- /pommerman/resources/IncrRange.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/IncrRange.png -------------------------------------------------------------------------------- /pommerman/resources/Kick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Kick.png -------------------------------------------------------------------------------- /pommerman/resources/Passage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Passage.png -------------------------------------------------------------------------------- /pommerman/resources/Rigid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Rigid.png -------------------------------------------------------------------------------- /pommerman/resources/Skull.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Skull.png -------------------------------------------------------------------------------- /pommerman/resources/Wood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/Wood.png -------------------------------------------------------------------------------- /pommerman/resources/X-No-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultiAgentLearning/playground/5315f6da378f495737dfe34a4ba7f50c84423ce7/pommerman/resources/X-No-Background.png -------------------------------------------------------------------------------- /pommerman/runner/__init__.py: -------------------------------------------------------------------------------- 1 | '''Module entry point for the base docker agent.''' 2 | from .docker_agent_runner import DockerAgentRunner 3 | -------------------------------------------------------------------------------- /pommerman/runner/docker_agent_runner.py: -------------------------------------------------------------------------------- 1 | '''This is the basic docker agent runner''' 2 | import abc 3 | import logging 4 | import json 5 | from .. import constants 6 | import numpy as np 7 | from flask import Flask, jsonify, request 8 | 9 | LOGGER = logging.getLogger(__name__) 10 | 11 | 12 | class DockerAgentRunner(metaclass=abc.ABCMeta): 13 | """Abstract base class to implement Docker-based agent""" 14 | 15 | def __init__(self): 16 | pass 17 | 18 | @abc.abstractmethod 19 | def act(self, observation, action_space): 20 | """Given an observation, returns the action the agent should""" 21 | raise NotImplementedError() 22 | 23 | def run(self, host="0.0.0.0", port=10080): 24 | """Runs the agent by creating a webserver that handles action requests.""" 25 | app = Flask(self.__class__.__name__) 26 | 27 | @app.route("/action", methods=["POST"]) 28 | def action(): #pylint: disable=W0612 29 | '''handles an action over http''' 30 | data = request.get_json() 31 | observation = data.get("obs") 32 | observation = json.loads(observation) 33 | 34 | observation['teammate'] = constants.Item(observation['teammate']) 35 | for enemy_id in range(len(observation['enemies'])): 36 | observation['enemies'][enemy_id] = constants.Item(observation['enemies'][enemy_id]) 37 | observation['position'] = tuple(observation['position']) 38 | observation['board'] = np.array(observation['board'], dtype=np.uint8) 39 | observation['bomb_life'] = np.array(observation['bomb_life'], dtype=np.float64) 40 | observation['bomb_blast_strength'] = np.array(observation['bomb_blast_strength'], dtype=np.float64) 41 | observation['bomb_moving_direction'] = np.array(observation['bomb_moving_direction'], dtype=np.float64) 42 | observation['flame_life'] = np.array(observation['flame_life'], dtype=np.float64) 43 | 44 | action_space = data.get("action_space") 45 | action_space = json.loads(action_space) 46 | action = self.act(observation, action_space) 47 | return jsonify({"action": action}) 48 | 49 | @app.route("/init_agent", methods=["POST"]) 50 | def init_agent(): #pylint: disable=W0612 51 | '''initiates agent over http''' 52 | data = request.get_json() 53 | id = data.get("id") 54 | id = json.loads(id) 55 | game_type = data.get("game_type") 56 | game_type = constants.GameType(json.loads(game_type)) 57 | self.init_agent(id, game_type) 58 | return jsonify(success=True) 59 | 60 | @app.route("/shutdown", methods=["POST"]) 61 | def shutdown(): #pylint: disable=W0612 62 | '''Requests destruction of any created objects''' 63 | self.shutdown() 64 | return jsonify(success=True) 65 | 66 | @app.route("/episode_end", methods=["POST"]) 67 | def episode_end(): #pylint: disable=W0612 68 | '''Info about end of a game''' 69 | data = request.get_json() 70 | reward = data.get("reward") 71 | reward = json.loads(reward) 72 | self.episode_end(reward) 73 | return jsonify(success=True) 74 | 75 | @app.route("/ping", methods=["GET"]) 76 | def ping(): #pylint: disable=W0612 77 | '''Basic agent health check''' 78 | return jsonify(success=True) 79 | 80 | LOGGER.info("Starting agent server on port %d", port) 81 | app.run(host=host, port=port) 82 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | docker~=3.1 2 | gym~=0.10.5 3 | scipy~=1.0 4 | Pillow~=8.3 5 | ruamel.yaml~=0.15 6 | Flask~=0.12 7 | requests~=2.18 8 | jsonmerge~=1.5.1 9 | astroid>=2 10 | isort~=4.3.4 11 | pylint>=2 12 | websockets~=6.0 13 | websocket-client~=0.53.0 14 | python-cli-ui~=0.7.1 15 | python-rapidjson~=0.6.3 16 | Click==7.0 17 | -------------------------------------------------------------------------------- /requirements_extra.txt: -------------------------------------------------------------------------------- 1 | tensorforce~=0.4.0 2 | tensorflow~=1.6 3 | jupyter~=1.0 4 | recommonmark~=0.3.0 -------------------------------------------------------------------------------- /scripts/build_sample_docker_agent: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | pushd $(dirname $0) > /dev/null 6 | pushd .. 7 | docker build . -f examples/docker-agent/Dockerfile 8 | popd > /dev/null 9 | popd > /dev/null 10 | 11 | -------------------------------------------------------------------------------- /scripts/run_sample_docker_agent: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | if [ -z "$1" ]; then 6 | echo "Usage: run_sample_docker_agent " 7 | exit 1 8 | fi 9 | 10 | container_id="$(docker run -d -p 10080:10080 "$1")" 11 | 12 | echo "Started running container $container_id" 13 | exit 0 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup, find_packages 3 | 4 | CURRENT_PYTHON = sys.version_info[:2] 5 | MIN_PYTHON = (3, 6) 6 | 7 | if CURRENT_PYTHON < MIN_PYTHON: 8 | sys.stderr.write(""" 9 | ============================ 10 | Unsupported Python Version 11 | ============================ 12 | 13 | Python {}.{} is unsupported. Please use a version newer than Python {}.{}. 14 | """.format(*CURRENT_PYTHON, *MIN_PYTHON)) 15 | sys.exit(1) 16 | 17 | with open('requirements.txt', 'r') as f: 18 | install_requires = f.readlines() 19 | 20 | with open('requirements_extra.txt', 'r') as f: 21 | extras_require = f.readlines() 22 | 23 | with open('VERSION') as f: 24 | VERSION = f.read().strip() 25 | 26 | files = ["resources/*"] 27 | 28 | setup(name='pommerman', 29 | version=VERSION, 30 | description='PlayGround: AI Research into Multi-Agent Learning', 31 | url='https://www.pommerman.com', 32 | author='Pommerman', 33 | author_email='support@pommerman.com', 34 | license='Apache 2.0', 35 | classifiers=[ 36 | 'Programming Language :: Python :: 3.6', 37 | ], 38 | packages=find_packages(), 39 | package_data = {'pommerman' : files }, 40 | install_requires=install_requires, 41 | extras_require={ 42 | 'extras': extras_require # @TODO this might need refinement 43 | }, 44 | entry_points={ 45 | 'console_scripts': [ 46 | 'pom_battle=pommerman.cli.run_battle:main', 47 | 'pom_tf_battle=pommerman.cli.train_with_tensorforce:main', 48 | 'ion_client=pommerman.network.client:init', 49 | 'ion_server=pommerman.network.server:init' 50 | ], 51 | }, 52 | zip_safe=False) 53 | --------------------------------------------------------------------------------